SEES-hacks: SEESenv/scripts/rst2docbook.py@bc535262231d


#!/usr/bin/python
"""
Just a hack to convert rst to xml and then docbook . 
May not containt all the required elements of a docbook .
Just done to make it run for the specific rst for our 
sees documentation.
"""
import xml.etree.ElementTree as ET
from lxml import etree as ET2
import os 
import re
import subprocess
import os
import pkg_resources
import glob, os, re, sys
names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.xml')
"""
chapterno=0

def convert2xml(file):
#    print folder,subfolder,file
    global chapterno
    name=file.split('/')[-1]
    name=str(chapterno)+name.split('.')[0]
#    full_file=os.path.join(folder,file)    
#    if file.endswith('.rst'):    
    print file	    
    xml_file=name+'.xml'        
    command="rst2xml.py %s > %s" %(file , xml_file)
    print command        
    a=subprocess.Popen(command , shell=True)
	   	


def walk(repo):
    global chapterno
    mainfolder='/home/amit/sttp_latest/'    
    for readline in open('index.config','r').readlines():
        chapterno+=1		
        filename=mainfolder+readline
        convert2xml(filename)


"""
def convert2docbook(name ,xml_string):
    """changing tags to convert the xml to docbook"""	
        
    xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string)   
    xml_string=re.sub('</strong>' ,'</emphasis>',xml_string)   
    xml_string=re.sub('<literal_block xml:space="preserve">','<literal_block xml:space="preserve">\n',xml_string)	
    xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string)
    xml_string=re.sub('</literal_block>','</programlisting>',xml_string)
    xml_string=re.sub('<paragraph>' ,'<para>',xml_string)
    xml_string=re.sub('</paragraph>' ,'</para>',xml_string)  
    
    xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,)
    xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string)
    xml_string=re.sub('<list_item>','<listitem>',xml_string)	
    xml_string=re.sub('</list_item>','</listitem>',xml_string)	
    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="." suffix=".">', '<orderedlist numeration="arabic">',xml_string)
    xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">',  '<orderedlist numeration="arabic">',xml_string)  
    xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string)
    xml_string=re.sub('<line_block>', '',xml_string)
    xml_string=re.sub('</line_block>', '',xml_string)
    xml_string=re.sub('<line>', '',xml_string)
    xml_string=re.sub('</line>', '',xml_string)   
    xml_string=re.sub('<block_quote>', '',xml_string)
    xml_string=re.sub('</block_quote>', '',xml_string)
    xml_string=re.sub('<title_reference>', '',xml_string)
    xml_string=re.sub('</title_reference>', '',xml_string)	
    xml_string=re.sub('<definition>', '',xml_string)
    xml_string=re.sub('</definition>', '',xml_string)
    xml_string=re.sub('<definition_list_item>', '',xml_string)	
    xml_string=re.sub('</definition_list_item>', '',xml_string)
    xml_string=re.sub('<definition_list>', '',xml_string)	
    xml_string=re.sub('</definition_list>', '',xml_string)	
    xml_string=re.sub('<term>', '',xml_string)
    xml_string=re.sub('</term>', '',xml_string)
    xml_string=re.sub('<entry>', '',xml_string)	
    xml_string=re.sub('</entry>', '',xml_string) 	
    xml_string=re.sub('<row>', '',xml_string)	
    xml_string=re.sub('</row>', '',xml_string)	
    xml_string=re.sub('<tbody>', '',xml_string)	
    xml_string=re.sub('</tbody>', '',xml_string)	
    xml_string=re.sub('<table>', '',xml_string)			
    xml_string=re.sub('</table>', '',xml_string)	
    xml_string=re.sub('<thead>', '',xml_string)	
    xml_string=re.sub('</thead>', '',xml_string)	
    
    xml_string=re.sub('<tgroup.*"/>', '',xml_string)
    xml_string=re.sub('</tgroup>', '',xml_string)	
    chapter= ET.Element("chapter")
    article=ET.SubElement(chapter,"article")
    articleinfo=ET.SubElement(article,"articleinfo")
		
    try:	
    
#        print name    
        tree = ET.fromstring(xml_string)
    except:
        pass        
        #print name        
               
      #  tree=ET2.fromstring(xml_string)    
        #print "xml_string problem"	
        
    #   print "fromstring"	
    try:    
        title= ET.SubElement(articleinfo,"title") 
        title_element=tree.find('title')
        title.text=title_element.text
        article.insert(1,tree)	
    except:
        if name not in ('ch12index.xml',):        
            title= ET.SubElement(articleinfo,"title") 
            section_element=tree.find('section')
            title_element=section_element.find('title')
            title.text=title_element.text
            article.insert(1,tree)                
            
    xml_string=ET.tostring(chapter)
    xml_string=xml_string.replace('\\',' ')    
    xml_string=re.sub('<document[-A-Za-z=/_" .0-9:]*>' ,'',xml_string)
    xml_string=re.sub('</document>' ,'',xml_string)
#    xml_string=re.sub('</section></section></section>' ,'</section></section>',xml_string)    
    return xml_string

    




if __name__=='__main__':
        
    for name in names:
           
    #print name        
    #xml_string=open(name,'r').read()
        xml_string=open(name,'r').read()
        xml_string=convert2docbook(name,xml_string)
        docbook_file=name.split('.')[0]+'.docbook'
        f=open(docbook_file,'w')
        try:        
	        f.write(xml_string)
        except:
	        pass
author	amit@thunder
	Sun, 28 Feb 2010 16:22:19 +0530
changeset 33	bc535262231d
parent 30	f66b0a5ebf40
child 36	a542eb905ced
permissions	-rw-r--r--