Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
import glob
#import lxml
import re
import os
from BeautifulSoup import BeautifulSoup
import time
import sys
repo='/home/hg/repos/SEES-hacks/temp/'
def finalchanges(file_name,html_string):
"""some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""
# print html_string
replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
reg_obj=re.compile('<head>.*</head>',re.DOTALL)
html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
html_string=re.sub('><a name',' id', html_string)
soup=BeautifulSoup(html_string.decode('ascii','ignore'))
ch_name=os.path.split(file_name)[1].split('.')[0]
print ch_name
ch_title=re.split('[0-9]*',ch_name)[1]
title_string='Chapter. '+ ch_title
soup.html.head.title.string.replaceWith(title_string)
div=soup.html.div
try:
del(div['title'])
div['id'] = ch_name
except TypeError:
print file_name
return soup
if __name__=='__main__':
file_names=glob.glob(os.path.join(repo,'ch*.html'))
for file_name in file_names:
file_obj=open(file_name,'r')
soup=finalchanges(file_name,file_obj.read())
time.sleep(1)
file_obj=open(file_name,'w')
print >>file_obj ,soup