author | amit@thunder |
Thu, 25 Feb 2010 00:45:20 +0530 | |
changeset 28 | 514098969b11 |
parent 26 | 1846ab4ebdda |
child 29 | 5ce5b22a9a0b |
permissions | -rw-r--r-- |
26
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
1 |
import glob |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
2 |
import lxml |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
3 |
import re |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
4 |
import os |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
5 |
from BeautifulSoup import BeautifulSoup |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
6 |
import time |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
7 |
repo='/home/amit/testdocbook2/' |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
8 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
9 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
10 |
def finalchanges(file_name,html_string): |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
11 |
"""some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project""" |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
12 |
# print html_string |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
13 |
replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>""" |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
14 |
reg_obj=re.compile('<head>.*</head>',re.DOTALL) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
15 |
html_string=reg_obj.sub(replace_string, html_string,re.DOTALL) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
16 |
html_string=re.sub('><a name',' id', html_string) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
17 |
soup=BeautifulSoup(html_string.decode('ascii','ignore')) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
18 |
ch_name=os.path.split(file_name)[1].split('.')[0] |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
19 |
print ch_name |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
20 |
ch_title=re.split('[0-9]*',ch_name)[1] |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
21 |
title_string='Chapter. '+ ch_title |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
22 |
soup.html.head.title.string.replaceWith(title_string) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
23 |
div=soup.html.div |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
24 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
25 |
try: |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
26 |
del(div['title']) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
27 |
div['id'] = ch_name |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
28 |
except TypeError: |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
29 |
print file_name |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
30 |
return soup |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
31 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
32 |
if __name__=='__main__': |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
33 |
file_names=glob.glob(repo+'ch*.html') |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
34 |
for file_name in file_names: |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
35 |
file_obj=open(file_name,'r') |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
36 |
soup=finalchanges(file_name,file_obj.read()) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
37 |
time.sleep(1) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
38 |
file_obj=open(file_name,'w') |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
39 |
file_obj.write(soup.prettify()) |
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
40 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
41 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
42 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
43 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
44 |
|
1846ab4ebdda
Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff
changeset
|
45 |