Using Docutils for the conversion instead of regex
authoramit@thunder
Mon, 12 Apr 2010 04:02:20 +0530
changeset 46 7f011b42609c
parent 45 b5bff924ef69
child 47 e50530e32ac0
Using Docutils for the conversion instead of regex
SEESenv/scripts/rst2docbook.py
--- a/SEESenv/scripts/rst2docbook.py	Wed Mar 10 17:39:26 2010 +0530
+++ b/SEESenv/scripts/rst2docbook.py	Mon Apr 12 04:02:20 2010 +0530
@@ -13,138 +13,173 @@
 import os
 import pkg_resources
 import glob, os, re, sys
+from docbook import Writer
+from docutils.core import publish_file 
+
+#repo="/home/hg/repos/test_review/sttp/"
 repo=sys.argv[1]
-names = glob.glob(os.path.join(repo , 'ch*.xml'))
-"""
+#names = glob.glob(os.path.join(repo , '*.rst'))
+tmp_folder="/home/hg/repos/SEES-hacks/temp/"
+
+
 chapterno=0
 
-def convert2xml(file):
-#    print folder,subfolder,file
-    global chapterno
-    name=file.split('/')[-1]
-    name=str(chapterno)+name.split('.')[0]
-#    full_file=os.path.join(folder,file)    
-#    if file.endswith('.rst'):    
-    print file	    
-    xml_file=name+'.xml'        
-    command="rst2xml.py %s > %s" %(file , xml_file)
-    print command        
-    a=subprocess.Popen(command , shell=True)
+# def convert2xml(file):
+# #    print folder,subfolder,file
+#     global chapterno
+#     name=file.split('/')[-1]
+#     name=str(chapterno)+name.split('.')[0]
+# #    full_file=os.path.join(folder,file)    
+# #    if file.endswith('.rst'):    
+#     print file	    
+#     xml_file=name+'.xml'        
+#     command="rst2xml.py %s > %s" %(file , xml_file)
+#     print command        
+#     a=subprocess.Popen(command , shell=True)
 	   	
 
 
-def walk(repo):
-    global chapterno
-    mainfolder='/home/amit/sttp_latest/'    
-    for readline in open('index.config','r').readlines():
-        chapterno+=1		
-        filename=mainfolder+readline
-        convert2xml(filename)
+# def walk(repo):
+#     global chapterno
+#     mainfolder='/home/amit/sttp_latest/'    
+#     for readline in open('index.config','r').readlines():
+#         chapterno+=1		
+#         filename=mainfolder+readline
+#         convert2xml(filename)
+
 
 
-"""
-def convert2docbook(name ,xml_string):
-    """changing tags to convert the xml to docbook"""	
+# def convert2docbook(name ,xml_string):
+#     """changing tags to convert the xml to docbook"""	
         
-    xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string)   
-    xml_string=re.sub('</strong>' ,'</emphasis>',xml_string)   
-    xml_string=re.sub('<literal_block xml:space="preserve">','<literal_block xml:space="preserve">\n',xml_string)	
-    xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string)
-    xml_string=re.sub('</literal_block>','</programlisting>',xml_string)
-    xml_string=re.sub('<paragraph>' ,'<para>',xml_string)
-    xml_string=re.sub('</paragraph>' ,'</para>',xml_string)  
+#     xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string)   
+#     xml_string=re.sub('</strong>' ,'</emphasis>',xml_string)   
+#     xml_string=re.sub('<literal_block xml:space="preserve">','<literal_block xml:space="preserve">\n',xml_string)	
+#     xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string)
+#     xml_string=re.sub('</literal_block>','</programlisting>',xml_string)
+#     xml_string=re.sub('<paragraph>' ,'<para>',xml_string)
+#     xml_string=re.sub('</paragraph>' ,'</para>',xml_string)  
+    
+#     xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,)
+#     xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string)
+#     xml_string=re.sub('<list_item>','<listitem>',xml_string)	
+#     xml_string=re.sub('</list_item>','</listitem>',xml_string)	
+#     xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="." suffix=".">', '<orderedlist numeration="arabic">',xml_string)
+#     xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">',  '<orderedlist numeration="arabic">',xml_string)  
+#     xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string)
+#     xml_string=re.sub('<line_block>', '',xml_string)
+#     xml_string=re.sub('</line_block>', '',xml_string)
+#     xml_string=re.sub('<line>', '',xml_string)
+#     xml_string=re.sub('</line>', '',xml_string)   
+#     xml_string=re.sub('<block_quote>', '',xml_string)
+#     xml_string=re.sub('</block_quote>', '',xml_string)
+#     xml_string=re.sub('<title_reference>', '',xml_string)
+#     xml_string=re.sub('</title_reference>', '',xml_string)	
+#     xml_string=re.sub('<definition>', '',xml_string)
+#     xml_string=re.sub('</definition>', '',xml_string)
+#     xml_string=re.sub('<definition_list_item>', '',xml_string)	
+#     xml_string=re.sub('</definition_list_item>', '',xml_string)
+#     xml_string=re.sub('<definition_list>', '',xml_string)	
+#     xml_string=re.sub('</definition_list>', '',xml_string)	
+#     xml_string=re.sub('<term>', '',xml_string)
+#     xml_string=re.sub('</term>', '',xml_string)
+#     xml_string=re.sub('<entry>', '',xml_string)	
+#     xml_string=re.sub('</entry>', '',xml_string) 	
+#     xml_string=re.sub('<row>', '',xml_string)	
+#     xml_string=re.sub('</row>', '',xml_string)	
+#     xml_string=re.sub('<tbody>', '',xml_string)	
+#     xml_string=re.sub('</tbody>', '',xml_string)	
+#     xml_string=re.sub('<table>', '',xml_string)			
+#     xml_string=re.sub('</table>', '',xml_string)	
+#     xml_string=re.sub('<thead>', '',xml_string)	
+#     xml_string=re.sub('</thead>', '',xml_string)	
     
-    xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,)
-    xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string)
-    xml_string=re.sub('<list_item>','<listitem>',xml_string)	
-    xml_string=re.sub('</list_item>','</listitem>',xml_string)	
-    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="." suffix=".">', '<orderedlist numeration="arabic">',xml_string)
-    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">',  '<orderedlist numeration="arabic">',xml_string)  
-    xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string)
-    xml_string=re.sub('<line_block>', '',xml_string)
-    xml_string=re.sub('</line_block>', '',xml_string)
-    xml_string=re.sub('<line>', '',xml_string)
-    xml_string=re.sub('</line>', '',xml_string)   
-    xml_string=re.sub('<block_quote>', '',xml_string)
-    xml_string=re.sub('</block_quote>', '',xml_string)
-    xml_string=re.sub('<title_reference>', '',xml_string)
-    xml_string=re.sub('</title_reference>', '',xml_string)	
-    xml_string=re.sub('<definition>', '',xml_string)
-    xml_string=re.sub('</definition>', '',xml_string)
-    xml_string=re.sub('<definition_list_item>', '',xml_string)	
-    xml_string=re.sub('</definition_list_item>', '',xml_string)
-    xml_string=re.sub('<definition_list>', '',xml_string)	
-    xml_string=re.sub('</definition_list>', '',xml_string)	
-    xml_string=re.sub('<term>', '',xml_string)
-    xml_string=re.sub('</term>', '',xml_string)
-    xml_string=re.sub('<entry>', '',xml_string)	
-    xml_string=re.sub('</entry>', '',xml_string) 	
-    xml_string=re.sub('<row>', '',xml_string)	
-    xml_string=re.sub('</row>', '',xml_string)	
-    xml_string=re.sub('<tbody>', '',xml_string)	
-    xml_string=re.sub('</tbody>', '',xml_string)	
-    xml_string=re.sub('<table>', '',xml_string)			
-    xml_string=re.sub('</table>', '',xml_string)	
-    xml_string=re.sub('<thead>', '',xml_string)	
-    xml_string=re.sub('</thead>', '',xml_string)	
+#     xml_string=re.sub('<tgroup.*"/>', '',xml_string)
+#     xml_string=re.sub('</tgroup>', '',xml_string)	
+#     chapter= ET.Element("chapter")
+#     article=ET.SubElement(chapter,"article")
+#     articleinfo=ET.SubElement(article,"articleinfo")
+		
+#     try:	
     
-    xml_string=re.sub('<tgroup.*"/>', '',xml_string)
-    xml_string=re.sub('</tgroup>', '',xml_string)	
-    chapter= ET.Element("chapter")
-    article=ET.SubElement(chapter,"article")
-    articleinfo=ET.SubElement(article,"articleinfo")
-		
-    try:	
-    
-#        print name    
-        tree = ET.fromstring(xml_string)
-    except:
-        pass        
-        #print name        
+# #        print name    
+#         tree = ET.fromstring(xml_string)
+#     except:
+#         pass        
+#         #print name        
                
-      #  tree=ET2.fromstring(xml_string)    
-        #print "xml_string problem"	
+#       #  tree=ET2.fromstring(xml_string)    
+#         #print "xml_string problem"	
         
-    #   print "fromstring"	
-    try:    
-        title= ET.SubElement(articleinfo,"title") 
-        title_element=tree.find('title')
-        title.text=title_element.text
-        article.insert(1,tree)	
-    except:
-        if name not in ('ch12index.xml',):        
-            title= ET.SubElement(articleinfo,"title") 
-            section_element=tree.find('section')
-            title_element=section_element.find('title')
-            title.text=title_element.text
-            article.insert(1,tree)                
+#     #   print "fromstring"	
+#     try:    
+#         title= ET.SubElement(articleinfo,"title") 
+#         title_element=tree.find('title')
+#         title.text=title_element.text
+#         article.insert(1,tree)	
+#     except:
+#         if name not in ('ch12index.xml',):        
+#             title= ET.SubElement(articleinfo,"title") 
+#             section_element=tree.find('section')
+#             title_element=section_element.find('title')
+#             title.text=title_element.text
+#             article.insert(1,tree)                
             
-    xml_string=ET.tostring(chapter)
-    xml_string=xml_string.replace('\\',' ')    
-    xml_string=re.sub('<document[-A-Za-z=/_" .0-9:]*>' ,'',xml_string)
-    xml_string=re.sub('</document>' ,'',xml_string)
-#    xml_string=re.sub('</section></section></section>' ,'</section></section>',xml_string)    
-    return xml_string
+#     xml_string=ET.tostring(chapter)
+#     xml_string=xml_string.replace('\\',' ')    
+#     xml_string=re.sub('<document[-A-Za-z=/_" .0-9:]*>' ,'',xml_string)
+#     xml_string=re.sub('</document>' ,'',xml_string)
+# #    xml_string=re.sub('</section></section></section>' ,'</section></section>',xml_string)    
+#     return xml_string
 
+def convert2docbook(file_name):
+    global chapterno    
+    file_name=file_name.split()[0]
+    name=file_name.split('/')[-1]
+   
+    xml_file_temp='/'.join(file_name.split('/')[:-2])
+     
+    name='ch'+str(chapterno)+name.split('.')[0]
+    docbook_file=tmp_folder+name+'.docbook'
+    print docbook_file   
+    writer=Writer()
+#    try:    
+    publish_file(source_path=file_name, destination_path=docbook_file,parser_name='restructuredtext', writer=writer) 
+#    except :
+#        pass    
     
 
 
 
 
-if __name__=='__main__':
+
+
+
         
-    for name in names:
+    # for name in names:
            
-    #print name        
-    #xml_string=open(name,'r').read()
-        xml_string=open(name,'r').read()
-        xml_string=convert2docbook(name,xml_string)
-        docbook_file=name.split('.')[0]+'.docbook'
-        f=open(docbook_file,'w')
-        try:        
-	        f.write(xml_string)
-        except:
-	        pass
+    # #print name        
+    # #xml_string=open(name,'r').read()
+    #     xml_string=open(name,'r').read()
+    #     xml_string=convert2docbook(name,xml_string)
+    #     docbook_file=name.split('.')[0]+'.docbook'
+    #     f=open(docbook_file,'w')
+    #     try:        
+    #             f.write(xml_string)
+    #     except:
+    #             pass
+def main():
+    
+    global chapterno
+#    mainfolder='/home/hg/repos/sttp/'    
+    for readline in open('/home/hg/repos/SEES-hacks/index.config','r').readlines():
+        chapterno+=1		
+        filename=repo+readline
+        print filename        
+        convert2docbook(filename)
+
+if __name__=='__main__':
+    main()
 
 
 
@@ -173,7 +208,3 @@
 
 
 
-
-
-
-