SEESenv/scripts/rst2docbook.py
author amit@thunder
Thu, 25 Feb 2010 18:53:51 +0530
changeset 30 f66b0a5ebf40
parent 26 1846ab4ebdda
child 33 bc535262231d
permissions -rw-r--r--
Changes in Path

#!/usr/bin/python
"""
Just a hack to convert rst to xml and then docbook . 
May not containt all the required elements of a docbook .
Just done to make it run for the specific rst for our 
sees documentation.
"""
import xml.etree.ElementTree as ET
from lxml import etree as ET2
import os 
import re
import subprocess
import os
import pkg_resources
import glob, os, re, sys
names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.xml')
"""
chapterno=0

def convert2xml(file):
#    print folder,subfolder,file
    global chapterno
    name=file.split('/')[-1]
    name=str(chapterno)+name.split('.')[0]
#    full_file=os.path.join(folder,file)    
#    if file.endswith('.rst'):    
    print file	    
    xml_file=name+'.xml'        
    command="rst2xml.py %s > %s" %(file , xml_file)
    print command        
    a=subprocess.Popen(command , shell=True)
	   	


def walk(repo):
    global chapterno
    mainfolder='/home/amit/sttp_latest/'    
    for readline in open('index.config','r').readlines():
        chapterno+=1		
        filename=mainfolder+readline
        convert2xml(filename)


"""
def convert2docbook(name ,xml_string):
    """changing tags to convert the xml to docbook"""	
        
    xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string)   
    xml_string=re.sub('</strong>' ,'</emphasis>',xml_string)   
    xml_string=re.sub('<literal_block xml:space="preserve">','<literal_block xml:space="preserve">\n',xml_string)	
    xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string)
    xml_string=re.sub('</literal_block>','</programlisting>',xml_string)
    xml_string=re.sub('<paragraph>' ,'<para>',xml_string)
    xml_string=re.sub('</paragraph>' ,'</para>',xml_string)  
    
    xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,)
    xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string)
    xml_string=re.sub('<list_item>','<listitem>',xml_string)	
    xml_string=re.sub('</list_item>','</listitem>',xml_string)	
    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="." suffix=".">', '<orderedlist numeration="arabic">',xml_string)
    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">',  '<orderedlist numeration="arabic">',xml_string)  
    xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string)
    xml_string=re.sub('<line_block>', '',xml_string)
    xml_string=re.sub('</line_block>', '',xml_string)
    xml_string=re.sub('<line>', '',xml_string)
    xml_string=re.sub('</line>', '',xml_string)   
    xml_string=re.sub('<block_quote>', '',xml_string)
    xml_string=re.sub('</block_quote>', '',xml_string)
    xml_string=re.sub('<title_reference>', '',xml_string)
    xml_string=re.sub('</title_reference>', '',xml_string)	
    xml_string=re.sub('<definition>', '',xml_string)
    xml_string=re.sub('</definition>', '',xml_string)
    xml_string=re.sub('<definition_list_item>', '',xml_string)	
    xml_string=re.sub('</definition_list_item>', '',xml_string)
    xml_string=re.sub('<definition_list>', '',xml_string)	
    xml_string=re.sub('</definition_list>', '',xml_string)	
    xml_string=re.sub('<term>', '',xml_string)
    xml_string=re.sub('</term>', '',xml_string)
    xml_string=re.sub('<entry>', '',xml_string)	
    xml_string=re.sub('</entry>', '',xml_string) 	
    xml_string=re.sub('<row>', '',xml_string)	
    xml_string=re.sub('</row>', '',xml_string)	
    xml_string=re.sub('<tbody>', '',xml_string)	
    xml_string=re.sub('</tbody>', '',xml_string)	
    xml_string=re.sub('<table>', '',xml_string)			
    xml_string=re.sub('</table>', '',xml_string)	
    xml_string=re.sub('<thead>', '',xml_string)	
    xml_string=re.sub('</thead>', '',xml_string)	
    
    xml_string=re.sub('<tgroup.*"/>', '',xml_string)
    xml_string=re.sub('</tgroup>', '',xml_string)	
    chapter= ET.Element("chapter")
    article=ET.SubElement(chapter,"article")
    articleinfo=ET.SubElement(article,"articleinfo")
		
    try:	
    
#        print name    
        tree = ET.fromstring(xml_string)
    except:
        pass        
        #print name        
               
      #  tree=ET2.fromstring(xml_string)    
        #print "xml_string problem"	
        
    #   print "fromstring"	
    try:    
        title= ET.SubElement(articleinfo,"title") 
        title_element=tree.find('title')
        title.text=title_element.text
        article.insert(1,tree)	
    except:
        if name not in ('ch12index.xml',):        
            title= ET.SubElement(articleinfo,"title") 
            section_element=tree.find('section')
            title_element=section_element.find('title')
            title.text=title_element.text
            article.insert(1,tree)                
            
    xml_string=ET.tostring(chapter)
    xml_string=xml_string.replace('\\',' ')    
    xml_string=re.sub('<document[-A-Za-z=/_" .0-9]*>' ,'',xml_string)
    xml_string=re.sub('</document>' ,'',xml_string)
#    xml_string=re.sub('</section></section></section>' ,'</section></section>',xml_string)    
    return xml_string

    




if __name__=='__main__':
        
    for name in names:
           
    #print name        
    #xml_string=open(name,'r').read()
        xml_string=open(name,'r').read()
        xml_string=convert2docbook(name,xml_string)
        docbook_file=name.split('.')[0]+'.docbook'
        f=open(docbook_file,'w')
        try:        
	        f.write(xml_string)
        except:
	        pass