| author | amit@shrike.aero.iitb.ac.in |
| Wed, 14 Apr 2010 14:38:29 +0530 | |
| changeset 54 | ce987056033c |
| parent 33 | bc535262231d |
| permissions | -rw-r--r-- |
|
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
1 |
#!/usr/bin/env python |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
2 |
# |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
3 |
# Add unique ID attributes to para tags. This script should only be |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
4 |
# run by one person, since otherwise it introduces the possibility of |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
5 |
# chaotic conflicts among tags. |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
6 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
7 |
import glob, os, re, sys |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
8 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
9 |
tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
10 |
untagged = re.compile('<para>')
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
11 |
|
| 33 | 12 |
script_folder='/home/hg/repos/SEES-hacks/temp/' |
| 31 | 13 |
names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook')
|
|
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
14 |
# First pass: find the highest-numbered paragraph ID. |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
15 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
16 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
17 |
chapter=None |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
18 |
seen = set() |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
19 |
errs = 0 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
20 |
beginning="p_list= " |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
21 |
|
| 33 | 22 |
id_file=open(script_folder+'p_list.py','w') |
|
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
23 |
dictionary={}
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
24 |
id_list=[] |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
25 |
for name in names: |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
26 |
for m in tagged.finditer(open(name).read()): |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
27 |
i = int(m.group(1),16) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
28 |
if i in seen: |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
29 |
print >> sys.stderr, '%s: duplication of ID %s' % (name, i) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
30 |
errs += 1 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
31 |
seen.add(i) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
32 |
if i > biggest_id: |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
33 |
biggest_id = i |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
34 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
35 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
36 |
def retag(s): |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
37 |
global biggest_id |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
38 |
global chapter |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
39 |
biggest_id += 1 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
40 |
|
| 33 | 41 |
id_name="%s_%x" % (chapter.split('/')[-1],biggest_id)
|
|
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
42 |
id_list.append(id_name) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
43 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
44 |
return '<para id="%s">' %id_name |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
45 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
46 |
# Second pass: add IDs to paragraphs that currently lack them. |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
47 |
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
48 |
for name in names: |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
49 |
biggest_id=0 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
50 |
chapter=name.split('.')[0]
|
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
51 |
id_list=[] |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
52 |
f = open(name).read() |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
53 |
f1 = untagged.sub(retag, f ) |
| 33 | 54 |
dictionary[chapter.split('/')[-1]]=id_list
|
|
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
55 |
if f1 != f: |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
56 |
tmpname = name + '.tmp' |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
57 |
fp = open(tmpname, 'w') |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
58 |
fp.write(f1) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
59 |
fp.close() |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
60 |
os.rename(tmpname, name) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
61 |
p_lists_string=beginning+str(dictionary) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
62 |
id_file.write(p_lists_string) |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
63 |
sys.exit(errs) |