author | amit@thunder |
Fri, 05 Feb 2010 23:42:24 +0530 | |
changeset 1 | 672eaaab9204 |
parent 0 | 8083d21c0020 |
permissions | -rwxr-xr-x |
0
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
1 |
#!/usr/bin/env python |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
2 |
# |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
3 |
# Add unique ID attributes to para tags. This script should only be |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
4 |
# run by one person, since otherwise it introduces the possibility of |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
5 |
# chaotic conflicts among tags. |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
6 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
7 |
import glob, os, re, sys |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
8 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
9 |
tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
10 |
untagged = re.compile('<para>') |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
11 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
12 |
names = glob.glob('ch*.xml') + glob.glob('app*.xml') |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
13 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
14 |
# First pass: find the highest-numbered paragraph ID. |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
15 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
16 |
biggest_id = 0 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
17 |
seen = set() |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
18 |
errs = 0 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
19 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
20 |
for name in names: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
21 |
for m in tagged.finditer(open(name).read()): |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
22 |
i = int(m.group(1),16) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
23 |
if i in seen: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
24 |
print >> sys.stderr, '%s: duplication of ID %s' % (name, i) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
25 |
errs += 1 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
26 |
seen.add(i) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
27 |
if i > biggest_id: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
28 |
biggest_id = i |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
29 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
30 |
def retag(s): |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
31 |
global biggest_id |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
32 |
biggest_id += 1 |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
33 |
return '<para id="x_%x">' % biggest_id |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
34 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
35 |
# Second pass: add IDs to paragraphs that currently lack them. |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
36 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
37 |
for name in names: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
38 |
f = open(name).read() |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
39 |
f1 = untagged.sub(retag, f) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
40 |
if f1 != f: |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
41 |
tmpname = name + '.tmp' |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
42 |
fp = open(tmpname, 'w') |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
43 |
fp.write(f1) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
44 |
fp.close() |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
45 |
os.rename(tmpname, name) |
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
46 |
|
8083d21c0020
The first commit of all the required files for the review app
amit@thunder
parents:
diff
changeset
|
47 |
sys.exit(errs) |