Bug fixes and added a script for changes in final html
authoramit@thunder
Tue, 23 Feb 2010 19:56:48 +0530
changeset 26 1846ab4ebdda
parent 25 ed38dd9bdb50
child 27 cb14131583c6
Bug fixes and added a script for changes in final html
SEESenv/scripts/finalhtml.py
SEESenv/scripts/rst2docbook.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SEESenv/scripts/finalhtml.py	Tue Feb 23 19:56:48 2010 +0530
@@ -0,0 +1,45 @@
+import glob
+import lxml
+import re
+import os
+from BeautifulSoup import BeautifulSoup
+import time
+repo='/home/amit/testdocbook2/'
+
+
+def finalchanges(file_name,html_string):
+    """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
+#    print html_string    
+    replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
+    reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
+    html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
+    html_string=re.sub('><a name',' id', html_string)	
+    soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
+    ch_name=os.path.split(file_name)[1].split('.')[0]
+    print ch_name
+    ch_title=re.split('[0-9]*',ch_name)[1]    
+    title_string='Chapter. '+ ch_title
+    soup.html.head.title.string.replaceWith(title_string) 
+    div=soup.html.div
+        
+    try:
+        del(div['title'])
+        div['id'] = ch_name
+    except TypeError:
+        print file_name  
+    return soup
+
+if __name__=='__main__':
+	file_names=glob.glob(repo+'ch*.html')
+	for file_name in file_names:
+            file_obj=open(file_name,'r')
+            soup=finalchanges(file_name,file_obj.read())
+      	    time.sleep(1)
+	    file_obj=open(file_name,'w')
+	    file_obj.write(soup.prettify())	
+
+
+
+
+
+	
--- a/SEESenv/scripts/rst2docbook.py	Tue Feb 23 19:43:04 2010 +0530
+++ b/SEESenv/scripts/rst2docbook.py	Tue Feb 23 19:56:48 2010 +0530
@@ -6,6 +6,7 @@
 sees documentation.
 """
 import xml.etree.ElementTree as ET
+from lxml import etree as ET2
 import os 
 import re
 import subprocess
@@ -41,55 +42,88 @@
 
 
 """
-def convert2docbook(xml_string):
-	""" convert to docbook from xml converted using rst2xml using regex replacements """		
-	xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string)   
-	xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string)
-	xml_string=re.sub('</literal_block>','</programlisting>',xml_string)
-	xml_string=re.sub('<paragraph>' ,'<para>',xml_string)
-	xml_string=re.sub('</paragraph>' ,'</para>',xml_string)  
-	xml_string=re.sub('</strong>' ,'</emphasis>',xml_string)
-	xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,)
-	xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string)
-	xml_string=re.sub('<list_item>','<listitem>',xml_string)	
-	xml_string=re.sub('</list_item>','</listitem>',xml_string)	
-	xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">', '<orderedlist numeration="arabic">',xml_string)
- 	xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string)
-	xml_string=re.sub('<block_quote>', '',xml_string)	
-	xml_string=re.sub('</block_quote>', '',xml_string) 	
-	xml_string=re.sub('<definition_list>', '',xml_string)	
-	xml_string=re.sub('</definition_list>', '',xml_string) 	
-	xml_string=re.sub('<entry>', '',xml_string)	
-	xml_string=re.sub('</entry>', '',xml_string) 	
-	xml_string=re.sub('<row>', '',xml_string)	
-	xml_string=re.sub('</row>', '',xml_string)	
-	xml_string=re.sub('<tbody>', '',xml_string)	
-	xml_string=re.sub('</tbody>', '',xml_string)	
-	xml_string=re.sub('<table>', '',xml_string)			
-	xml_string=re.sub('</table>', '',xml_string)	
-	xml_string=re.sub('<thead>', '',xml_string)	
-	xml_string=re.sub('</thead>', '',xml_string)	
-#	xml_string=re.sub('<tgroup cols="[1-90]*"><colspec colwidth="[1-90]*"/><colspec colwidth="[1-90]*"/><colspec colwidth="[1-90]"/>', '',xml_string)
-#	xml_string=re.sub('</tgroup', '',xml_string)	
-	chapter= ET.Element("chapter")
-	article=ET.SubElement(chapter,"article")
-	articleinfo=ET.SubElement(article,"articleinfo")
+def convert2docbook(name ,xml_string):
+    """changing tags to convert the xml to docbook"""	
+        
+    xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string)   
+    xml_string=re.sub('</strong>' ,'</emphasis>',xml_string)   
+    xml_string=re.sub('<literal_block xml:space="preserve">','<literal_block xml:space="preserve">\n',xml_string)	
+    xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string)
+    xml_string=re.sub('</literal_block>','</programlisting>',xml_string)
+    xml_string=re.sub('<paragraph>' ,'<para>',xml_string)
+    xml_string=re.sub('</paragraph>' ,'</para>',xml_string)  
+    
+    xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,)
+    xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string)
+    xml_string=re.sub('<list_item>','<listitem>',xml_string)	
+    xml_string=re.sub('</list_item>','</listitem>',xml_string)	
+    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="." suffix=".">', '<orderedlist numeration="arabic">',xml_string)
+    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">',  '<orderedlist numeration="arabic">',xml_string)  
+    xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string)
+    xml_string=re.sub('<line_block>', '',xml_string)
+    xml_string=re.sub('</line_block>', '',xml_string)
+    xml_string=re.sub('<line>', '',xml_string)
+    xml_string=re.sub('</line>', '',xml_string)   
+    xml_string=re.sub('<block_quote>', '',xml_string)
+    xml_string=re.sub('</block_quote>', '',xml_string)
+    xml_string=re.sub('<title_reference>', '',xml_string)
+    xml_string=re.sub('</title_reference>', '',xml_string)	
+    xml_string=re.sub('<definition>', '',xml_string)
+    xml_string=re.sub('</definition>', '',xml_string)
+    xml_string=re.sub('<definition_list_item>', '',xml_string)	
+    xml_string=re.sub('</definition_list_item>', '',xml_string)
+    xml_string=re.sub('<definition_list>', '',xml_string)	
+    xml_string=re.sub('</definition_list>', '',xml_string)	
+    xml_string=re.sub('<term>', '',xml_string)
+    xml_string=re.sub('</term>', '',xml_string)
+    xml_string=re.sub('<entry>', '',xml_string)	
+    xml_string=re.sub('</entry>', '',xml_string) 	
+    xml_string=re.sub('<row>', '',xml_string)	
+    xml_string=re.sub('</row>', '',xml_string)	
+    xml_string=re.sub('<tbody>', '',xml_string)	
+    xml_string=re.sub('</tbody>', '',xml_string)	
+    xml_string=re.sub('<table>', '',xml_string)			
+    xml_string=re.sub('</table>', '',xml_string)	
+    xml_string=re.sub('<thead>', '',xml_string)	
+    xml_string=re.sub('</thead>', '',xml_string)	
+    
+    xml_string=re.sub('<tgroup.*"/>', '',xml_string)
+    xml_string=re.sub('</tgroup>', '',xml_string)	
+    chapter= ET.Element("chapter")
+    article=ET.SubElement(chapter,"article")
+    articleinfo=ET.SubElement(article,"articleinfo")
 		
-	try:	
-		tree = ET.fromstring(xml_string)
-	except:
-		print "xml_string problem"	
-	#	print "fromstring"	
-	try:    
-	        title= ET.SubElement(articleinfo,"title") 
-		title.text=tree.items()[1][1]
-	except:
-	        pass    
-	article.insert(1,tree)
-	xml_string=ET.tostring(chapter)
-	xml_string=re.sub('<document [A-Za-z=/_."]*>' ,'',xml_string)
-	xml_string=re.sub('</document>' ,'',xml_string)
-	return xml_string
+    try:	
+    
+#        print name    
+        tree = ET.fromstring(xml_string)
+    except:
+        pass        
+        #print name        
+               
+      #  tree=ET2.fromstring(xml_string)    
+        #print "xml_string problem"	
+        
+    #   print "fromstring"	
+    try:    
+        title= ET.SubElement(articleinfo,"title") 
+        title_element=tree.find('title')
+        title.text=title_element.text
+        article.insert(1,tree)	
+    except:
+        if name not in ('ch12index.xml',):        
+            title= ET.SubElement(articleinfo,"title") 
+            section_element=tree.find('section')
+            title_element=section_element.find('title')
+            title.text=title_element.text
+            article.insert(1,tree)                
+            
+    xml_string=ET.tostring(chapter)
+    xml_string=xml_string.replace('\\',' ')    
+    xml_string=re.sub('<document[-A-Za-z=/_" .0-9]*>' ,'',xml_string)
+    xml_string=re.sub('</document>' ,'',xml_string)
+#    xml_string=re.sub('</section></section></section>' ,'</section></section>',xml_string)    
+    return xml_string
 
     
 
@@ -97,17 +131,19 @@
 
 
 if __name__=='__main__':
-	for name in names:
-		#print name        
-		xml_string=open(name,'r').read()
-		#xml_string=open('ch9Using_Linux_Tools.xml','r').read()
-		xml_string=convert2docbook(xml_string)
-		docbook_file=name.split('.')[0]+'.docbook'
-		f=open(docbook_file,'w')
-		try:        
-		    f.write(xml_string)
-		except:
-		        pass
+        
+    for name in names:
+           
+    #print name        
+    #xml_string=open(name,'r').read()
+        xml_string=open(name,'r').read()
+        xml_string=convert2docbook(name,xml_string)
+        docbook_file=name.split('.')[0]+'.docbook'
+        f=open(docbook_file,'w')
+        try:        
+	        f.write(xml_string)
+        except:
+	        pass
 
 
 
@@ -121,3 +157,22 @@
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+