SEESenv/scripts/finalhtml.py
author amit@thunder
Wed, 10 Mar 2010 00:04:25 +0530
changeset 44 d0e9b52bda73
parent 41 e54725be4df6
child 45 b5bff924ef69
permissions -rw-r--r--
Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     1
import glob
39
bc65d8802897 Bug fix in myrst .. so that it does not fail at not finding a file
amit@thunder
parents: 32
diff changeset
     2
#import lxml
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     3
import re
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     4
import os
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
     5
from BeautifulSoup import BeautifulSoup ,NavigableString
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     6
import time
41
e54725be4df6 Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
amit@thunder
parents: 40
diff changeset
     7
import sys
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
     8
import xml.etree.ElementTree as ET
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
     9
import xml
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    10
41
e54725be4df6 Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
amit@thunder
parents: 40
diff changeset
    11
repo='/home/hg/repos/SEES-hacks/temp/'
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    12
#repo='/home/amit/testdocbook2/'
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    13
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    14
def sort_doubledigit(chapter_names):
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    15
    for item in chapter_names:
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    16
        reg_obj=re.compile(os.path.join(repo,'ch1[0-9].*.html'))
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    17
        if (reg_obj.match(item)):
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    18
            item_tmp=item
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    19
            chapter_names.remove(item)
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    20
            chapter_names.append(item_tmp)
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    21
    return chapter_names
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    22
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    23
def finalchanges(file_name,html_string):
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    24
    """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    25
#    print html_string    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    26
    replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    27
    ch_name=os.path.split(file_name)[1].split('.')[0]
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    28
    chapter_names_unsorted=glob.glob(os.path.join(repo,'ch*.html'))
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    29
    chapter_names_unsorted.sort()    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    30
    chapter_names_sorted=chapter_names_unsorted
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    31
#    print chapter_names_sorted
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    32
    chapter_names_sorted=sort_doubledigit(chapter_names_sorted)
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    33
    chapter_names=chapter_names_sorted
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    34
    previous_string='<<<'
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    35
    next_string='>>>'
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    36
    html_src_folder="review/html/"
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    37
    current_chapter_index=chapter_names.index(file_name)
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    38
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    39
    current_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index].split('/')[-1])
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    40
    if (current_chapter_index-1>0):
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    41
        previous_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index-1].split('/')[-1])
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    42
    else:
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    43
        previous_chapter=''
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    44
        previous_string=''
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    45
    try :  
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    46
        next_chapter=os.path.join(html_src_folder,chapter_names[current_chapter_index+1].split('/')[-1])
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    47
    except:
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    48
        next_string=''
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    49
        next_chapter=''
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    50
    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    51
    ch_name_tmp=file_name.split('.')[0]
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    52
    chapter_xml=ch_name_tmp+'.xml'
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    53
       
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    54
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    55
    try:    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    56
        xml_file =open(chapter_xml,'r').read()
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    57
        xml_tree=ET.fromstring(xml_file)
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    58
        try:
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    59
            title_tag=xml_tree.find('title')
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    60
            current_chapter_title=title_tag.text
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    61
        except:
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    62
            section=xml_tree.getchildren()[0]
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    63
            title_tag=section.find('title')
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    64
            current_chapter_title=title_tag.text
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    65
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    66
        print current_chapter_title
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    67
        
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    68
#        soup.html.body.insert(0,NavigableString(body_add_string))
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    69
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    70
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    71
    except :
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    72
        ch_title=re.split('[0-9]*',ch_name)[1]    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    73
        title_string='Chapter. '+ ch_title
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    74
        current_chapter_title=title_string        
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    75
    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    76
    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    77
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    78
    body_add_string="""<div><table width="100%%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter %s</th></tr><tr><td width="20%%" align="left"><a accesskey="p" href="%s">%s</a></td><th width="60%%" align="center"> </th><td width="20%%" align="right"> <a accesskey="n" href="%s">%s</a></td></tr></table></div>"""%(current_chapter_title,previous_chapter,previous_string,next_chapter,next_string)
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    79
        
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    80
        
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    81
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    82
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    83
    
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    84
    reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    85
    html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    86
    html_string=re.sub('><a name',' id', html_string)	
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    87
    soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    88
    soup.html.head.title.string.replaceWith(current_chapter_title) 
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    89
    div=soup.html.div
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    90
        
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    91
    try:
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    92
        del(div['title'])
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    93
        div['id'] = ch_name
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    94
    except TypeError:
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    95
        print file_name  
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    96
    
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    97
    soup.html.body.insert(0,NavigableString(body_add_string))
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    98
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
    99
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   100
    return soup
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   101
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   102
if __name__=='__main__':
41
e54725be4df6 Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
amit@thunder
parents: 40
diff changeset
   103
	file_names=glob.glob(os.path.join(repo,'ch*.html'))
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   104
	for file_name in file_names:
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   105
            file_obj=open(file_name,'r')
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   106
            soup=finalchanges(file_name,file_obj.read())
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   107
      	    time.sleep(1)
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   108
	    file_obj=open(file_name,'w')
40
ef147a79b098 Added change names to do required changes in names such that we get the chapter names beyond 10 in proper list
amit@thunder
parents: 39
diff changeset
   109
	    print >>file_obj ,soup
44
d0e9b52bda73 Changed the algorithm for getting the titles ... Also added the ability to navigate to the next chapters
amit@thunder
parents: 41
diff changeset
   110
            print file_name
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   111
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   112
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
   113