SEESenv/scripts/finalhtml.py
author amit@thunder
Wed, 10 Mar 2010 17:39:26 +0530
changeset 45 b5bff924ef69
parent 44 d0e9b52bda73
child 49 3b5f1341d6c6
permissions -rw-r--r--
Some more changes to soup is made in final html also comment.html has been changed so the links don't appear

import glob
#import lxml
import re
import os
from BeautifulSoup import BeautifulSoup ,NavigableString
import time
import sys
import xml.etree.ElementTree as ET
import xml

repo='/home/hg/repos/SEES-hacks/temp/'
#repo='/home/amit/testdocbook2/'

def sort_doubledigit(chapter_names):
    extend_list=[]
    for item in chapter_names:
        reg_obj=re.compile(os.path.join(repo,'ch1[0-9].*.html'))
        if (reg_obj.match(item)):
            item=re.sub('ch1','chn1',item)
            extend_list.append(item)
    chapter_names=chapter_names[len(extend_list):]    
    chapter_names.extend(extend_list)

    return chapter_names





def finalchanges(file_name,html_string):
    """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
#    print html_string    
    replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
    ch_name=os.path.split(file_name)[1].split('.')[0]
    chapter_names_unsorted=glob.glob(os.path.join(repo,'ch*.html'))
    chapter_names_unsorted.sort()    
    chapter_names_sorted=chapter_names_unsorted
#    print chapter_names_sorted
    chapter_names_sorted=sort_doubledigit(chapter_names_sorted)
    chapter_names=chapter_names_sorted
    previous_string='<<<'
    next_string='>>>'
    ch_name_tmp=file_name.split('.')[0]
#    html_src_folder=""
    

    try:
    #Handling the problem of chapter names in two digits
        current_chapter_index=chapter_names.index(file_name)
    except :
        temp_file_name=re.sub('ch1','chn1',file_name)
        current_chapter_index=chapter_names.index(temp_file_name)



    current_chapter=chapter_names[current_chapter_index].split('/')[-1]
    if (current_chapter_index-1>=0):
        previous_chapter=chapter_names[current_chapter_index-1].split('/')[-1]
    else:
        previous_chapter=''
        previous_string=''
    try :  
        next_chapter=chapter_names[current_chapter_index+1].split('/')[-1]
    except:
        next_string=''
        next_chapter=''
    
    
    chapter_xml=ch_name_tmp+'.xml'
       

    try:    
        xml_file =open(chapter_xml,'r').read()
        xml_tree=ET.fromstring(xml_file)
        try:
            title_tag=xml_tree.find('title')
            current_chapter_title=title_tag.text
        except:
            section=xml_tree.getchildren()[0]
            title_tag=section.find('title')
            current_chapter_title=title_tag.text

        print current_chapter_title
        
#        soup.html.body.insert(0,NavigableString(body_add_string))


    except :
        ch_title=re.split('[0-9]*',ch_name)[1]    
        title_string=ch_title
        current_chapter_title=title_string        
    
    

    body_add_string="""<div><table width="100%%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter %s</th></tr><tr><td width="20%%" align="left"><a accesskey="p" href="%s">%s</a></td><th width="60%%" align="center"> </th><td width="20%%" align="right"> <a accesskey="n" href="%s">%s</a></td></tr></table></div>"""%(current_chapter_title,previous_chapter,previous_string,next_chapter,next_string)
        
        


    
    reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
    html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
    html_string=re.sub('><a name',' id', html_string)	
    soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
    soup.html.head.title.string.replaceWith(current_chapter_title) 
    div=soup.html.div
        
    try:
        del(div['title'])
        div['id'] = ch_name
    except TypeError:
        print file_name  
    
    soup.html.body.insert(0,NavigableString(body_add_string))


    return soup

if __name__=='__main__':
	file_names=glob.glob(os.path.join(repo,'ch*.html'))
	for file_name in file_names:
            file_obj=open(file_name,'r')
            soup=finalchanges(file_name,file_obj.read())
      	    time.sleep(1)
	    file_obj=open(file_name,'w')
	    print >>file_obj ,soup
            print file_name