SEESenv/scripts/autoid.py
changeset 33 bc535262231d
parent 31 06a02dd3966f
equal deleted inserted replaced
32:de7ac08f237b 33:bc535262231d
     7 import glob, os, re, sys
     7 import glob, os, re, sys
     8 
     8 
     9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
     9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
    10 untagged = re.compile('<para>')
    10 untagged = re.compile('<para>')
    11 
    11 
       
    12 script_folder='/home/hg/repos/SEES-hacks/temp/'
    12 names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook') 
    13 names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook') 
    13 # First pass: find the highest-numbered paragraph ID.
    14 # First pass: find the highest-numbered paragraph ID.
    14 
    15 
    15 
    16 
    16 chapter=None
    17 chapter=None
    17 seen = set()
    18 seen = set()
    18 errs = 0
    19 errs = 0
    19 beginning="p_list= "
    20 beginning="p_list= "
    20 
    21 
    21 id_file=open('p_list.py','w')
    22 id_file=open(script_folder+'p_list.py','w') 
    22 dictionary={}
    23 dictionary={}
    23 id_list=[]
    24 id_list=[]
    24 for name in names:
    25 for name in names:
    25     for m in tagged.finditer(open(name).read()):
    26     for m in tagged.finditer(open(name).read()):
    26         i = int(m.group(1),16)
    27         i = int(m.group(1),16)
    35 def retag(s):
    36 def retag(s):
    36     global biggest_id
    37     global biggest_id
    37     global chapter   
    38     global chapter   
    38     biggest_id += 1
    39     biggest_id += 1
    39      
    40      
    40     id_name="%s_%x" % (chapter,biggest_id)   
    41     id_name="%s_%x" % (chapter.split('/')[-1],biggest_id)   
    41     id_list.append(id_name)    
    42     id_list.append(id_name)    
    42     
    43     
    43     return '<para id="%s">' %id_name
    44     return '<para id="%s">' %id_name
    44 
    45 
    45 # Second pass: add IDs to paragraphs that currently lack them.
    46 # Second pass: add IDs to paragraphs that currently lack them.
    48     biggest_id=0 
    49     biggest_id=0 
    49     chapter=name.split('.')[0]    
    50     chapter=name.split('.')[0]    
    50     id_list=[]    
    51     id_list=[]    
    51     f = open(name).read()
    52     f = open(name).read()
    52     f1 = untagged.sub(retag, f )
    53     f1 = untagged.sub(retag, f )
    53     dictionary[chapter]=id_list       
    54     dictionary[chapter.split('/')[-1]]=id_list       
    54     if f1 != f:
    55     if f1 != f:
    55         tmpname = name + '.tmp'
    56         tmpname = name + '.tmp'
    56         fp = open(tmpname, 'w')
    57         fp = open(tmpname, 'w')
    57         fp.write(f1)
    58         fp.write(f1)
    58         fp.close()
    59         fp.close()