39 filename=mainfolder+readline |
40 filename=mainfolder+readline |
40 convert2xml(filename) |
41 convert2xml(filename) |
41 |
42 |
42 |
43 |
43 """ |
44 """ |
44 def convert2docbook(xml_string): |
45 def convert2docbook(name ,xml_string): |
45 """ convert to docbook from xml converted using rst2xml using regex replacements """ |
46 """changing tags to convert the xml to docbook""" |
46 xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string) |
47 |
47 xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string) |
48 xml_string=re.sub('<strong>','<emphasis role="strong">', xml_string) |
48 xml_string=re.sub('</literal_block>','</programlisting>',xml_string) |
49 xml_string=re.sub('</strong>' ,'</emphasis>',xml_string) |
49 xml_string=re.sub('<paragraph>' ,'<para>',xml_string) |
50 xml_string=re.sub('<literal_block xml:space="preserve">','<literal_block xml:space="preserve">\n',xml_string) |
50 xml_string=re.sub('</paragraph>' ,'</para>',xml_string) |
51 xml_string=re.sub('<literal_block xml:space="preserve">','<programlisting>',xml_string) |
51 xml_string=re.sub('</strong>' ,'</emphasis>',xml_string) |
52 xml_string=re.sub('</literal_block>','</programlisting>',xml_string) |
52 xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,) |
53 xml_string=re.sub('<paragraph>' ,'<para>',xml_string) |
53 xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string) |
54 xml_string=re.sub('</paragraph>' ,'</para>',xml_string) |
54 xml_string=re.sub('<list_item>','<listitem>',xml_string) |
55 |
55 xml_string=re.sub('</list_item>','</listitem>',xml_string) |
56 xml_string=re.sub('<bullet_list bullet="[-*+]">','<itemizedlist mark="*">',xml_string,) |
56 xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">', '<orderedlist numeration="arabic">',xml_string) |
57 xml_string=re.sub('</bullet_list>','</itemizedlist>',xml_string) |
57 xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string) |
58 xml_string=re.sub('<list_item>','<listitem>',xml_string) |
58 xml_string=re.sub('<block_quote>', '',xml_string) |
59 xml_string=re.sub('</list_item>','</listitem>',xml_string) |
59 xml_string=re.sub('</block_quote>', '',xml_string) |
60 xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="." suffix=".">', '<orderedlist numeration="arabic">',xml_string) |
60 xml_string=re.sub('<definition_list>', '',xml_string) |
61 xml_string=re.sub('<enumerated_list enumtype="arabic" prefix="" suffix=".">', '<orderedlist numeration="arabic">',xml_string) |
61 xml_string=re.sub('</definition_list>', '',xml_string) |
62 xml_string=re.sub('</enumerated_list>', '</orderedlist>',xml_string) |
62 xml_string=re.sub('<entry>', '',xml_string) |
63 xml_string=re.sub('<line_block>', '',xml_string) |
63 xml_string=re.sub('</entry>', '',xml_string) |
64 xml_string=re.sub('</line_block>', '',xml_string) |
64 xml_string=re.sub('<row>', '',xml_string) |
65 xml_string=re.sub('<line>', '',xml_string) |
65 xml_string=re.sub('</row>', '',xml_string) |
66 xml_string=re.sub('</line>', '',xml_string) |
66 xml_string=re.sub('<tbody>', '',xml_string) |
67 xml_string=re.sub('<block_quote>', '',xml_string) |
67 xml_string=re.sub('</tbody>', '',xml_string) |
68 xml_string=re.sub('</block_quote>', '',xml_string) |
68 xml_string=re.sub('<table>', '',xml_string) |
69 xml_string=re.sub('<title_reference>', '',xml_string) |
69 xml_string=re.sub('</table>', '',xml_string) |
70 xml_string=re.sub('</title_reference>', '',xml_string) |
70 xml_string=re.sub('<thead>', '',xml_string) |
71 xml_string=re.sub('<definition>', '',xml_string) |
71 xml_string=re.sub('</thead>', '',xml_string) |
72 xml_string=re.sub('</definition>', '',xml_string) |
72 # xml_string=re.sub('<tgroup cols="[1-90]*"><colspec colwidth="[1-90]*"/><colspec colwidth="[1-90]*"/><colspec colwidth="[1-90]"/>', '',xml_string) |
73 xml_string=re.sub('<definition_list_item>', '',xml_string) |
73 # xml_string=re.sub('</tgroup', '',xml_string) |
74 xml_string=re.sub('</definition_list_item>', '',xml_string) |
74 chapter= ET.Element("chapter") |
75 xml_string=re.sub('<definition_list>', '',xml_string) |
75 article=ET.SubElement(chapter,"article") |
76 xml_string=re.sub('</definition_list>', '',xml_string) |
76 articleinfo=ET.SubElement(article,"articleinfo") |
77 xml_string=re.sub('<term>', '',xml_string) |
|
78 xml_string=re.sub('</term>', '',xml_string) |
|
79 xml_string=re.sub('<entry>', '',xml_string) |
|
80 xml_string=re.sub('</entry>', '',xml_string) |
|
81 xml_string=re.sub('<row>', '',xml_string) |
|
82 xml_string=re.sub('</row>', '',xml_string) |
|
83 xml_string=re.sub('<tbody>', '',xml_string) |
|
84 xml_string=re.sub('</tbody>', '',xml_string) |
|
85 xml_string=re.sub('<table>', '',xml_string) |
|
86 xml_string=re.sub('</table>', '',xml_string) |
|
87 xml_string=re.sub('<thead>', '',xml_string) |
|
88 xml_string=re.sub('</thead>', '',xml_string) |
|
89 |
|
90 xml_string=re.sub('<tgroup.*"/>', '',xml_string) |
|
91 xml_string=re.sub('</tgroup>', '',xml_string) |
|
92 chapter= ET.Element("chapter") |
|
93 article=ET.SubElement(chapter,"article") |
|
94 articleinfo=ET.SubElement(article,"articleinfo") |
77 |
95 |
78 try: |
96 try: |
79 tree = ET.fromstring(xml_string) |
97 |
80 except: |
98 # print name |
81 print "xml_string problem" |
99 tree = ET.fromstring(xml_string) |
82 # print "fromstring" |
100 except: |
83 try: |
101 pass |
84 title= ET.SubElement(articleinfo,"title") |
102 #print name |
85 title.text=tree.items()[1][1] |
103 |
86 except: |
104 # tree=ET2.fromstring(xml_string) |
87 pass |
105 #print "xml_string problem" |
88 article.insert(1,tree) |
106 |
89 xml_string=ET.tostring(chapter) |
107 # print "fromstring" |
90 xml_string=re.sub('<document [A-Za-z=/_."]*>' ,'',xml_string) |
108 try: |
91 xml_string=re.sub('</document>' ,'',xml_string) |
109 title= ET.SubElement(articleinfo,"title") |
92 return xml_string |
110 title_element=tree.find('title') |
|
111 title.text=title_element.text |
|
112 article.insert(1,tree) |
|
113 except: |
|
114 if name not in ('ch12index.xml',): |
|
115 title= ET.SubElement(articleinfo,"title") |
|
116 section_element=tree.find('section') |
|
117 title_element=section_element.find('title') |
|
118 title.text=title_element.text |
|
119 article.insert(1,tree) |
|
120 |
|
121 xml_string=ET.tostring(chapter) |
|
122 xml_string=xml_string.replace('\\',' ') |
|
123 xml_string=re.sub('<document[-A-Za-z=/_" .0-9]*>' ,'',xml_string) |
|
124 xml_string=re.sub('</document>' ,'',xml_string) |
|
125 # xml_string=re.sub('</section></section></section>' ,'</section></section>',xml_string) |
|
126 return xml_string |
93 |
127 |
94 |
128 |
95 |
129 |
96 |
130 |
97 |
131 |
98 |
132 |
99 if __name__=='__main__': |
133 if __name__=='__main__': |
100 for name in names: |
134 |
101 #print name |
135 for name in names: |
102 xml_string=open(name,'r').read() |
136 |
103 #xml_string=open('ch9Using_Linux_Tools.xml','r').read() |
137 #print name |
104 xml_string=convert2docbook(xml_string) |
138 #xml_string=open(name,'r').read() |
105 docbook_file=name.split('.')[0]+'.docbook' |
139 xml_string=open(name,'r').read() |
106 f=open(docbook_file,'w') |
140 xml_string=convert2docbook(name,xml_string) |
107 try: |
141 docbook_file=name.split('.')[0]+'.docbook' |
108 f.write(xml_string) |
142 f=open(docbook_file,'w') |
109 except: |
143 try: |
110 pass |
144 f.write(xml_string) |
|
145 except: |
|
146 pass |
111 |
147 |
112 |
148 |
113 |
149 |
114 |
150 |
115 |
151 |