SEESenv/scripts/finalhtml.py
author amit@thunder
Tue, 02 Mar 2010 17:07:14 +0530
changeset 41 e54725be4df6
parent 40 ef147a79b098
child 44 d0e9b52bda73
permissions -rw-r--r--
Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     1
import glob
39
bc65d8802897 Bug fix in myrst .. so that it does not fail at not finding a file
amit@thunder
parents: 32
diff changeset
     2
#import lxml
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     3
import re
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     4
import os
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     5
from BeautifulSoup import BeautifulSoup
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     6
import time
41
e54725be4df6 Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
amit@thunder
parents: 40
diff changeset
     7
import sys
e54725be4df6 Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
amit@thunder
parents: 40
diff changeset
     8
repo='/home/hg/repos/SEES-hacks/temp/'
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
     9
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    10
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    11
def finalchanges(file_name,html_string):
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    12
    """some of the final changes that need to do be done on the html before creating the final usable page in the hgbook project"""	    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    13
#    print html_string    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    14
    replace_string="""<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Chapter 2. Basic Python</title><link rel="stylesheet" href="/review/support/styles.css" type="text/css"><meta name="generator" content="DocBook XSL Stylesheets V1.74.3"><link rel="shortcut icon" type="image/png" href="/review/support/figs/favicon.png"><script type="text/javascript" src="/review/support/jquery-min.js"></script><script type="text/javascript" src="/review/support/form.js"></script><script type="text/javascript" src="/review/support/hsbook.js"></script></head>"""
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    15
    reg_obj=re.compile('<head>.*</head>',re.DOTALL)    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    16
    html_string=reg_obj.sub(replace_string, html_string,re.DOTALL)
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    17
    html_string=re.sub('><a name',' id', html_string)	
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    18
    soup=BeautifulSoup(html_string.decode('ascii','ignore'))    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    19
    ch_name=os.path.split(file_name)[1].split('.')[0]
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    20
    print ch_name
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    21
    ch_title=re.split('[0-9]*',ch_name)[1]    
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    22
    title_string='Chapter. '+ ch_title
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    23
    soup.html.head.title.string.replaceWith(title_string) 
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    24
    div=soup.html.div
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    25
        
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    26
    try:
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    27
        del(div['title'])
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    28
        div['id'] = ch_name
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    29
    except TypeError:
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    30
        print file_name  
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    31
    return soup
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    32
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    33
if __name__=='__main__':
41
e54725be4df6 Changed paths dependent on repo location to be taken from the script also changed how the soup is printed
amit@thunder
parents: 40
diff changeset
    34
	file_names=glob.glob(os.path.join(repo,'ch*.html'))
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    35
	for file_name in file_names:
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    36
            file_obj=open(file_name,'r')
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    37
            soup=finalchanges(file_name,file_obj.read())
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    38
      	    time.sleep(1)
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    39
	    file_obj=open(file_name,'w')
40
ef147a79b098 Added change names to do required changes in names such that we get the chapter names beyond 10 in proper list
amit@thunder
parents: 39
diff changeset
    40
	    print >>file_obj ,soup
26
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    41
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    42
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    43
1846ab4ebdda Bug fixes and added a script for changes in final html
amit@thunder
parents:
diff changeset
    44