Some small changes ... bug fixes
import glob
#import lxml
import re
import os
from BeautifulSoup import BeautifulSoup ,NavigableString
import time
import sys
import xml.etree.ElementTree as ET
import xml
repo='/home/hg/repos/SEES-hacks/temp/'
#repo='/home/amit/testdocbook2/'
def sort_doubledigit(chapter_names):
extend_list=[]
for item in chapter_names:
reg_obj=re.compile(os.path.join(repo,'ch1[0-9].*.html'))
if (reg_obj.match(item)):
item=re.sub('ch1','chn1',item)
extend_list.append(item)
chapter_names=chapter_names[len(extend_list):]
chapter_names.extend(extend_list)
return chapter_names
def finalchanges(file_name,html_string):
"""some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""
# print html_string
#handling the adding of all the javascript stuff
replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
ch_name=os.path.split(file_name)[1].split('.')[0]
chapter_names_unsorted=glob.glob(os.path.join(repo,'ch*.html'))
chapter_names_unsorted.sort()
chapter_names_sorted=chapter_names_unsorted
# print chapter_names_sorted
#handling adding the previous and the next chapter
chapter_names_sorted=sort_doubledigit(chapter_names_sorted)
chapter_names=chapter_names_sorted
previous_string='<<<'
next_string='>>>'
ch_name_tmp=file_name.split('.')[0]
# html_src_folder=""
try:
#Handling the problem of chapter names in two digits
current_chapter_index=chapter_names.index(file_name)
except :
temp_file_name=re.sub('ch1','chn1',file_name)
current_chapter_index=chapter_names.index(temp_file_name)
current_chapter=chapter_names[current_chapter_index].split('/')[-1]
if (current_chapter_index-1>=0):
previous_chapter=chapter_names[current_chapter_index-1].split('/')[-1]
else:
previous_chapter=''
previous_string=''
try :
next_chapter=chapter_names[current_chapter_index+1].split('/')[-1]
except:
next_string=''
next_chapter=''
#handling adding a title
chapter_xml=ch_name_tmp+'.xml'
try:
xml_file =open(chapter_xml,'r').read()
xml_tree=ET.fromstring(xml_file)
try:
title_tag=xml_tree.find('title')
current_chapter_title=title_tag.text
except:
section=xml_tree.getchildren()[0]
title_tag=section.find('title')
current_chapter_title=title_tag.text
print current_chapter_title
# soup.html.body.insert(0,NavigableString(body_add_string))
except :
ch_title=re.split('[0-9]*',ch_name)[1]
title_string=ch_title
current_chapter_title=title_string
body_add_string="""<div><table width="100%%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter %s</th></tr><tr><td width="20%%" align="left"><a accesskey="p" href="%s">%s</a></td><th width="60%%" align="center"> </th><td width="20%%" align="right"> <a accesskey="n" href="%s">%s</a></td></tr></table></div>"""%(current_chapter_title,previous_chapter,previous_string,next_chapter,next_string)
reg_obj=re.compile('<head>.*</head>',re.DOTALL)
html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
html_string=re.sub('><a name',' id', html_string)
soup=BeautifulSoup(html_string.decode('ascii','ignore'))
soup.html.head.title.string.replaceWith(current_chapter_title)
div=soup.html.div
try:
del(div['title'])
div['id'] = ch_name
except TypeError:
print file_name
add_footer_string="""<div class="footer">
© Copyright 2009, Prabhu Ramachandran, Asokan Pichai, Shantanu Choudhary, Puneeth Chaganti, Santosh G. Vattam, Kadambari Devararajan, Madhusudan.C.S.</div>"""
position=len(soup.findall('div'))
soup.html.body.insert(0,NavigableString(body_add_string))
soup.html.body.insert(position+1,NavigableString(add_footer_string)
# handling adding footer
return soup
if __name__=='__main__':
file_names=glob.glob(os.path.join(repo,'ch4strings_dicts.html'))
for file_name in file_names:
file_obj=open(file_name,'r')
soup=finalchanges(file_name,file_obj.read())
time.sleep(1)
file_obj=open(file_name,'w')
print >>file_obj ,soup
# add_footer_string="""<div class="footer">
# © Copyright 2009, Prabhu Ramachandran, Asokan Pichai, Shantanu Choudhary, Puneeth Chaganti, Santosh G. Vattam, Kadambari Devararajan, Madhusudan.C.S.</div></body>"""
# print file_name
# file_obj_footer_read=open(file_name,'r')
# html_string=file_obj_footer_read.read()
# #time.sleep(1)
# #print html_string
# print len(html_string)
# print re.findall("</div>",html_string)
# html_string=re.sub('</body>','abshdaskbd',html_string)
# file_obj_footer_write=open(file_name,'w')
# file_obj_footer_write.write(html_string)