equal
deleted
inserted
replaced
|
1 import glob |
|
2 import lxml |
|
3 import re |
|
4 import os |
|
5 from BeautifulSoup import BeautifulSoup |
|
6 import time |
|
7 repo='/home/amit/testdocbook2/' |
|
8 |
|
9 |
|
10 def finalchanges(file_name,html_string): |
|
11 """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project""" |
|
12 # print html_string |
|
13 replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>""" |
|
14 reg_obj=re.compile('<head>.*</head>',re.DOTALL) |
|
15 html_string=reg_obj.sub(replace_string, html_string,re.DOTALL) |
|
16 html_string=re.sub('><a name',' id', html_string) |
|
17 soup=BeautifulSoup(html_string.decode('ascii','ignore')) |
|
18 ch_name=os.path.split(file_name)[1].split('.')[0] |
|
19 print ch_name |
|
20 ch_title=re.split('[0-9]*',ch_name)[1] |
|
21 title_string='Chapter. '+ ch_title |
|
22 soup.html.head.title.string.replaceWith(title_string) |
|
23 div=soup.html.div |
|
24 |
|
25 try: |
|
26 del(div['title']) |
|
27 div['id'] = ch_name |
|
28 except TypeError: |
|
29 print file_name |
|
30 return soup |
|
31 |
|
32 if __name__=='__main__': |
|
33 file_names=glob.glob(repo+'ch*.html') |
|
34 for file_name in file_names: |
|
35 file_obj=open(file_name,'r') |
|
36 soup=finalchanges(file_name,file_obj.read()) |
|
37 time.sleep(1) |
|
38 file_obj=open(file_name,'w') |
|
39 file_obj.write(soup.prettify()) |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |