1 import glob |
1 import glob |
2 #import lxml |
2 #import lxml |
3 import re |
3 import re |
4 import os |
4 import os |
5 from BeautifulSoup import BeautifulSoup |
5 from BeautifulSoup import BeautifulSoup ,NavigableString |
6 import time |
6 import time |
7 import sys |
7 import sys |
|
8 import xml.etree.ElementTree as ET |
|
9 import xml |
|
10 |
8 repo='/home/hg/repos/SEES-hacks/temp/' |
11 repo='/home/hg/repos/SEES-hacks/temp/' |
|
12 #repo='/home/amit/testdocbook2/' |
9 |
13 |
|
14 def sort_doubledigit(chapter_names): |
|
15 for item in chapter_names: |
|
16 reg_obj=re.compile(os.path.join(repo,'ch1[0-9].*.html')) |
|
17 if (reg_obj.match(item)): |
|
18 item_tmp=item |
|
19 chapter_names.remove(item) |
|
20 chapter_names.append(item_tmp) |
|
21 return chapter_names |
10 |
22 |
11 def finalchanges(file_name,html_string): |
23 def finalchanges(file_name,html_string): |
12 """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project""" |
24 """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project""" |
13 # print html_string |
25 # print html_string |
14 replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>""" |
26 replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>""" |
|
27 ch_name=os.path.split(file_name)[1].split('.')[0] |
|
28 chapter_names_unsorted=glob.glob(os.path.join(repo,'ch*.html')) |
|
29 chapter_names_unsorted.sort() |
|
30 chapter_names_sorted=chapter_names_unsorted |
|
31 # print chapter_names_sorted |
|
32 chapter_names_sorted=sort_doubledigit(chapter_names_sorted) |
|
33 chapter_names=chapter_names_sorted |
|
34 previous_string='<<<' |
|
35 next_string='>>>' |
|
36 html_src_folder="review/html/" |
|
37 current_chapter_index=chapter_names.index(file_name) |
|
38 |
|
39 current_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index].split('/')[-1]) |
|
40 if (current_chapter_index-1>0): |
|
41 previous_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index-1].split('/')[-1]) |
|
42 else: |
|
43 previous_chapter='' |
|
44 previous_string='' |
|
45 try : |
|
46 next_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index+1].split('/')[-1]) |
|
47 except: |
|
48 next_string='' |
|
49 next_chapter='' |
|
50 |
|
51 ch_name_tmp=file_name.split('.')[0] |
|
52 chapter_xml=ch_name_tmp+'.xml' |
|
53 |
|
54 |
|
55 try: |
|
56 xml_file =open(chapter_xml,'r').read() |
|
57 xml_tree=ET.fromstring(xml_file) |
|
58 try: |
|
59 title_tag=xml_tree.find('title') |
|
60 current_chapter_title=title_tag.text |
|
61 except: |
|
62 section=xml_tree.getchildren()[0] |
|
63 title_tag=section.find('title') |
|
64 current_chapter_title=title_tag.text |
|
65 |
|
66 print current_chapter_title |
|
67 |
|
68 # soup.html.body.insert(0,NavigableString(body_add_string)) |
|
69 |
|
70 |
|
71 except : |
|
72 ch_title=re.split('[0-9]*',ch_name)[1] |
|
73 title_string='Chapter. '+ ch_title |
|
74 current_chapter_title=title_string |
|
75 |
|
76 |
|
77 |
|
78 body_add_string="""<div><table width="100%%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter %s</th></tr><tr><td width="20%%" align="left"><a accesskey="p" href="%s">%s</a></td><th width="60%%" align="center"> </th><td width="20%%" align="right"> <a accesskey="n" href="%s">%s</a></td></tr></table></div>"""%(current_chapter_title,previous_chapter,previous_string,next_chapter,next_string) |
|
79 |
|
80 |
|
81 |
|
82 |
|
83 |
15 reg_obj=re.compile('<head>.*</head>',re.DOTALL) |
84 reg_obj=re.compile('<head>.*</head>',re.DOTALL) |
16 html_string=reg_obj.sub(replace_string, html_string,re.DOTALL) |
85 html_string=reg_obj.sub(replace_string, html_string,re.DOTALL) |
17 html_string=re.sub('><a name',' id', html_string) |
86 html_string=re.sub('><a name',' id', html_string) |
18 soup=BeautifulSoup(html_string.decode('ascii','ignore')) |
87 soup=BeautifulSoup(html_string.decode('ascii','ignore')) |
19 ch_name=os.path.split(file_name)[1].split('.')[0] |
88 soup.html.head.title.string.replaceWith(current_chapter_title) |
20 print ch_name |
|
21 ch_title=re.split('[0-9]*',ch_name)[1] |
|
22 title_string='Chapter. '+ ch_title |
|
23 soup.html.head.title.string.replaceWith(title_string) |
|
24 div=soup.html.div |
89 div=soup.html.div |
25 |
90 |
26 try: |
91 try: |
27 del(div['title']) |
92 del(div['title']) |
28 div['id'] = ch_name |
93 div['id'] = ch_name |
29 except TypeError: |
94 except TypeError: |
30 print file_name |
95 print file_name |
|
96 |
|
97 soup.html.body.insert(0,NavigableString(body_add_string)) |
|
98 |
|
99 |
31 return soup |
100 return soup |
32 |
101 |
33 if __name__=='__main__': |
102 if __name__=='__main__': |
34 file_names=glob.glob(os.path.join(repo,'ch*.html')) |
103 file_names=glob.glob(os.path.join(repo,'ch*.html')) |
35 for file_name in file_names: |
104 for file_name in file_names: |
36 file_obj=open(file_name,'r') |
105 file_obj=open(file_name,'r') |
37 soup=finalchanges(file_name,file_obj.read()) |
106 soup=finalchanges(file_name,file_obj.read()) |
38 time.sleep(1) |
107 time.sleep(1) |
39 file_obj=open(file_name,'w') |
108 file_obj=open(file_name,'w') |
40 print >>file_obj ,soup |
109 print >>file_obj ,soup |
41 |
110 print file_name |
42 |
111 |
43 |
112 |
44 |
113 |