SEESenv/scripts/autoid.py
author amit@thunder
Mon, 12 Apr 2010 15:12:41 +0530
changeset 49 3b5f1341d6c6
parent 33 bc535262231d
permissions -rw-r--r--
Some small changes ... bug fixes
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     1
#!/usr/bin/env python
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     2
#
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     3
# Add unique ID attributes to para tags.  This script should only be
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     4
# run by one person, since otherwise it introduces the possibility of
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     5
# chaotic conflicts among tags.
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     6
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     7
import glob, os, re, sys
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     8
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
     9
tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    10
untagged = re.compile('<para>')
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    11
33
bc535262231d Solving some especially with p_list.py
amit@thunder
parents: 31
diff changeset
    12
script_folder='/home/hg/repos/SEES-hacks/temp/'
31
amit@thunder
parents: 2
diff changeset
    13
names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook') 
0
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    14
# First pass: find the highest-numbered paragraph ID.
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    15
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    16
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    17
chapter=None
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    18
seen = set()
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    19
errs = 0
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    20
beginning="p_list= "
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    21
33
bc535262231d Solving some especially with p_list.py
amit@thunder
parents: 31
diff changeset
    22
id_file=open(script_folder+'p_list.py','w') 
0
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    23
dictionary={}
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    24
id_list=[]
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    25
for name in names:
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    26
    for m in tagged.finditer(open(name).read()):
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    27
        i = int(m.group(1),16)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    28
        if i in seen:
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    29
            print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    30
            errs += 1
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    31
        seen.add(i)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    32
        if i > biggest_id:
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    33
            biggest_id = i
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    34
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    35
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    36
def retag(s):
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    37
    global biggest_id
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    38
    global chapter   
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    39
    biggest_id += 1
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    40
     
33
bc535262231d Solving some especially with p_list.py
amit@thunder
parents: 31
diff changeset
    41
    id_name="%s_%x" % (chapter.split('/')[-1],biggest_id)   
0
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    42
    id_list.append(id_name)    
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    43
    
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    44
    return '<para id="%s">' %id_name
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    45
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    46
# Second pass: add IDs to paragraphs that currently lack them.
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    47
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    48
for name in names:
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    49
    biggest_id=0 
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    50
    chapter=name.split('.')[0]    
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    51
    id_list=[]    
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    52
    f = open(name).read()
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    53
    f1 = untagged.sub(retag, f )
33
bc535262231d Solving some especially with p_list.py
amit@thunder
parents: 31
diff changeset
    54
    dictionary[chapter.split('/')[-1]]=id_list       
0
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    55
    if f1 != f:
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    56
        tmpname = name + '.tmp'
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    57
        fp = open(tmpname, 'w')
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    58
        fp.write(f1)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    59
        fp.close()
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    60
        os.rename(tmpname, name)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    61
p_lists_string=beginning+str(dictionary)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    62
id_file.write(p_lists_string)
8083d21c0020 The first commit of all the required files for the review app
amit@thunder
parents:
diff changeset
    63
sys.exit(errs)