SEESenv/scripts/finalhtml.py
changeset 44 d0e9b52bda73
parent 41 e54725be4df6
child 45 b5bff924ef69
equal deleted inserted replaced
43:134b87b382f5 44:d0e9b52bda73
     1 import glob
     1 import glob
     2 #import lxml
     2 #import lxml
     3 import re
     3 import re
     4 import os
     4 import os
     5 from BeautifulSoup import BeautifulSoup
     5 from BeautifulSoup import BeautifulSoup ,NavigableString
     6 import time
     6 import time
     7 import sys
     7 import sys
       
     8 import xml.etree.ElementTree as ET
       
     9 import xml
       
    10 
     8 repo='/home/hg/repos/SEES-hacks/temp/'
    11 repo='/home/hg/repos/SEES-hacks/temp/'
       
    12 #repo='/home/amit/testdocbook2/'
     9 
    13 
       
    14 def sort_doubledigit(chapter_names):
       
    15     for item in chapter_names:
       
    16         reg_obj=re.compile(os.path.join(repo,'ch1[0-9].*.html'))
       
    17         if (reg_obj.match(item)):
       
    18             item_tmp=item
       
    19             chapter_names.remove(item)
       
    20             chapter_names.append(item_tmp)
       
    21     return chapter_names
    10 
    22 
    11 def finalchanges(file_name,html_string):
    23 def finalchanges(file_name,html_string):
    12     """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
    24     """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
    13 #    print html_string    
    25 #    print html_string    
    14     replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
    26     replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
       
    27     ch_name=os.path.split(file_name)[1].split('.')[0]
       
    28     chapter_names_unsorted=glob.glob(os.path.join(repo,'ch*.html'))
       
    29     chapter_names_unsorted.sort()    
       
    30     chapter_names_sorted=chapter_names_unsorted
       
    31 #    print chapter_names_sorted
       
    32     chapter_names_sorted=sort_doubledigit(chapter_names_sorted)
       
    33     chapter_names=chapter_names_sorted
       
    34     previous_string='<<<'
       
    35     next_string='>>>'
       
    36     html_src_folder="review/html/"
       
    37     current_chapter_index=chapter_names.index(file_name)
       
    38 
       
    39     current_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index].split('/')[-1])
       
    40     if (current_chapter_index-1>0):
       
    41         previous_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index-1].split('/')[-1])
       
    42     else:
       
    43         previous_chapter=''
       
    44         previous_string=''
       
    45     try :  
       
    46         next_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index+1].split('/')[-1])
       
    47     except:
       
    48         next_string=''
       
    49         next_chapter=''
       
    50     
       
    51     ch_name_tmp=file_name.split('.')[0]
       
    52     chapter_xml=ch_name_tmp+'.xml'
       
    53        
       
    54 
       
    55     try:    
       
    56         xml_file =open(chapter_xml,'r').read()
       
    57         xml_tree=ET.fromstring(xml_file)
       
    58         try:
       
    59             title_tag=xml_tree.find('title')
       
    60             current_chapter_title=title_tag.text
       
    61         except:
       
    62             section=xml_tree.getchildren()[0]
       
    63             title_tag=section.find('title')
       
    64             current_chapter_title=title_tag.text
       
    65 
       
    66         print current_chapter_title
       
    67         
       
    68 #        soup.html.body.insert(0,NavigableString(body_add_string))
       
    69 
       
    70 
       
    71     except :
       
    72         ch_title=re.split('[0-9]*',ch_name)[1]    
       
    73         title_string='Chapter. '+ ch_title
       
    74         current_chapter_title=title_string        
       
    75     
       
    76     
       
    77 
       
    78     body_add_string="""<div><table width="100%%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter %s</th></tr><tr><td width="20%%" align="left"><a accesskey="p" href="%s">%s</a></td><th width="60%%" align="center"> </th><td width="20%%" align="right"> <a accesskey="n" href="%s">%s</a></td></tr></table></div>"""%(current_chapter_title,previous_chapter,previous_string,next_chapter,next_string)
       
    79         
       
    80         
       
    81 
       
    82 
       
    83     
    15     reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
    84     reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
    16     html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
    85     html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
    17     html_string=re.sub('><a name',' id', html_string)	
    86     html_string=re.sub('><a name',' id', html_string)	
    18     soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
    87     soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
    19     ch_name=os.path.split(file_name)[1].split('.')[0]
    88     soup.html.head.title.string.replaceWith(current_chapter_title) 
    20     print ch_name
       
    21     ch_title=re.split('[0-9]*',ch_name)[1]    
       
    22     title_string='Chapter. '+ ch_title
       
    23     soup.html.head.title.string.replaceWith(title_string) 
       
    24     div=soup.html.div
    89     div=soup.html.div
    25         
    90         
    26     try:
    91     try:
    27         del(div['title'])
    92         del(div['title'])
    28         div['id'] = ch_name
    93         div['id'] = ch_name
    29     except TypeError:
    94     except TypeError:
    30         print file_name  
    95         print file_name  
       
    96     
       
    97     soup.html.body.insert(0,NavigableString(body_add_string))
       
    98 
       
    99 
    31     return soup
   100     return soup
    32 
   101 
    33 if __name__=='__main__':
   102 if __name__=='__main__':
    34 	file_names=glob.glob(os.path.join(repo,'ch*.html'))
   103 	file_names=glob.glob(os.path.join(repo,'ch*.html'))
    35 	for file_name in file_names:
   104 	for file_name in file_names:
    36             file_obj=open(file_name,'r')
   105             file_obj=open(file_name,'r')
    37             soup=finalchanges(file_name,file_obj.read())
   106             soup=finalchanges(file_name,file_obj.read())
    38       	    time.sleep(1)
   107       	    time.sleep(1)
    39 	    file_obj=open(file_name,'w')
   108 	    file_obj=open(file_name,'w')
    40 	    print >>file_obj ,soup
   109 	    print >>file_obj ,soup
    41 
   110             print file_name
    42 
   111 
    43 
   112 
    44 	
   113