author | amit@shrike.aero.iitb.ac.in |
Wed, 14 Apr 2010 14:38:29 +0530 | |
changeset 54 | ce987056033c |
parent 33 | bc535262231d |
permissions | -rw-r--r-- |
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
1 |
#!/usr/bin/env python |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
2 |
# |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
3 |
# Add unique ID attributes to para tags. This script should only be |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
4 |
# run by one person, since otherwise it introduces the possibility of |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
5 |
# chaotic conflicts among tags. |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
6 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
7 |
import glob, os, re, sys |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
8 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
9 |
tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
10 |
untagged = re.compile('<para>') |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
11 |
|
33 | 12 |
script_folder='/home/hg/repos/SEES-hacks/temp/' |
31 | 13 |
names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook') |
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
14 |
# First pass: find the highest-numbered paragraph ID. |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
15 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
16 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
17 |
chapter=None |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
18 |
seen = set() |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
19 |
errs = 0 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
20 |
beginning="p_list= " |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
21 |
|
33 | 22 |
id_file=open(script_folder+'p_list.py','w') |
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
23 |
dictionary={} |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
24 |
id_list=[] |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
25 |
for name in names: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
26 |
for m in tagged.finditer(open(name).read()): |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
27 |
i = int(m.group(1),16) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
28 |
if i in seen: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
29 |
print >> sys.stderr, '%s: duplication of ID %s' % (name, i) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
30 |
errs += 1 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
31 |
seen.add(i) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
32 |
if i > biggest_id: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
33 |
biggest_id = i |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
34 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
35 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
36 |
def retag(s): |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
37 |
global biggest_id |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
38 |
global chapter |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
39 |
biggest_id += 1 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
40 |
|
33 | 41 |
id_name="%s_%x" % (chapter.split('/')[-1],biggest_id) |
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
42 |
id_list.append(id_name) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
43 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
44 |
return '<para id="%s">' %id_name |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
45 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
46 |
# Second pass: add IDs to paragraphs that currently lack them. |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
47 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
48 |
for name in names: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
49 |
biggest_id=0 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
50 |
chapter=name.split('.')[0] |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
51 |
id_list=[] |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
52 |
f = open(name).read() |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
53 |
f1 = untagged.sub(retag, f ) |
33 | 54 |
dictionary[chapter.split('/')[-1]]=id_list |
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
55 |
if f1 != f: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
56 |
tmpname = name + '.tmp' |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
57 |
fp = open(tmpname, 'w') |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
58 |
fp.write(f1) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
59 |
fp.close() |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
60 |
os.rename(tmpname, name) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
61 |
p_lists_string=beginning+str(dictionary) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
62 |
id_file.write(p_lists_string) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
63 |
sys.exit(errs) |