SEESenv/scripts/finalhtml.py
changeset 26 1846ab4ebdda
child 29 5ce5b22a9a0b
equal deleted inserted replaced
25:ed38dd9bdb50 26:1846ab4ebdda
       
     1 import glob
       
     2 import lxml
       
     3 import re
       
     4 import os
       
     5 from BeautifulSoup import BeautifulSoup
       
     6 import time
       
     7 repo='/home/amit/testdocbook2/'
       
     8 
       
     9 
       
    10 def finalchanges(file_name,html_string):
       
    11     """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
       
    12 #    print html_string    
       
    13     replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
       
    14     reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
       
    15     html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
       
    16     html_string=re.sub('><a name',' id', html_string)	
       
    17     soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
       
    18     ch_name=os.path.split(file_name)[1].split('.')[0]
       
    19     print ch_name
       
    20     ch_title=re.split('[0-9]*',ch_name)[1]    
       
    21     title_string='Chapter. '+ ch_title
       
    22     soup.html.head.title.string.replaceWith(title_string) 
       
    23     div=soup.html.div
       
    24         
       
    25     try:
       
    26         del(div['title'])
       
    27         div['id'] = ch_name
       
    28     except TypeError:
       
    29         print file_name  
       
    30     return soup
       
    31 
       
    32 if __name__=='__main__':
       
    33 	file_names=glob.glob(repo+'ch*.html')
       
    34 	for file_name in file_names:
       
    35             file_obj=open(file_name,'r')
       
    36             soup=finalchanges(file_name,file_obj.read())
       
    37       	    time.sleep(1)
       
    38 	    file_obj=open(file_name,'w')
       
    39 	    file_obj.write(soup.prettify())	
       
    40 
       
    41 
       
    42 
       
    43 
       
    44 
       
    45