SEESenv/scripts/autoid.py
changeset 2 52d12eb31c30
parent 0 8083d21c0020
child 31 06a02dd3966f
equal deleted inserted replaced
1:672eaaab9204 2:52d12eb31c30
       
     1 #!/usr/bin/env python
       
     2 #
       
     3 # Add unique ID attributes to para tags.  This script should only be
       
     4 # run by one person, since otherwise it introduces the possibility of
       
     5 # chaotic conflicts among tags.
       
     6 
       
     7 import glob, os, re, sys
       
     8 
       
     9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
       
    10 untagged = re.compile('<para>')
       
    11 
       
    12 names = glob.glob('ch*.docbook') 
       
    13 # First pass: find the highest-numbered paragraph ID.
       
    14 
       
    15 
       
    16 chapter=None
       
    17 seen = set()
       
    18 errs = 0
       
    19 beginning="p_list= "
       
    20 
       
    21 id_file=open('p_list.py','w')
       
    22 dictionary={}
       
    23 id_list=[]
       
    24 for name in names:
       
    25     for m in tagged.finditer(open(name).read()):
       
    26         i = int(m.group(1),16)
       
    27         if i in seen:
       
    28             print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
       
    29             errs += 1
       
    30         seen.add(i)
       
    31         if i > biggest_id:
       
    32             biggest_id = i
       
    33 
       
    34 
       
    35 def retag(s):
       
    36     global biggest_id
       
    37     global chapter   
       
    38     biggest_id += 1
       
    39      
       
    40     id_name="%s_%x" % (chapter,biggest_id)   
       
    41     id_list.append(id_name)    
       
    42     
       
    43     return '<para id="%s">' %id_name
       
    44 
       
    45 # Second pass: add IDs to paragraphs that currently lack them.
       
    46 
       
    47 for name in names:
       
    48     biggest_id=0 
       
    49     chapter=name.split('.')[0]    
       
    50     id_list=[]    
       
    51     f = open(name).read()
       
    52     f1 = untagged.sub(retag, f )
       
    53     dictionary[chapter]=id_list       
       
    54     if f1 != f:
       
    55         tmpname = name + '.tmp'
       
    56         fp = open(tmpname, 'w')
       
    57         fp.write(f1)
       
    58         fp.close()
       
    59         os.rename(tmpname, name)
       
    60 p_lists_string=beginning+str(dictionary)
       
    61 id_file.write(p_lists_string)
       
    62 sys.exit(errs)