equal
deleted
inserted
replaced
7 import glob, os, re, sys |
7 import glob, os, re, sys |
8 |
8 |
9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M) |
9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M) |
10 untagged = re.compile('<para>') |
10 untagged = re.compile('<para>') |
11 |
11 |
|
12 script_folder='/home/hg/repos/SEES-hacks/temp/' |
12 names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook') |
13 names = glob.glob('/home/hg/repos/SEES-hacks/temp/ch*.docbook') |
13 # First pass: find the highest-numbered paragraph ID. |
14 # First pass: find the highest-numbered paragraph ID. |
14 |
15 |
15 |
16 |
16 chapter=None |
17 chapter=None |
17 seen = set() |
18 seen = set() |
18 errs = 0 |
19 errs = 0 |
19 beginning="p_list= " |
20 beginning="p_list= " |
20 |
21 |
21 id_file=open('p_list.py','w') |
22 id_file=open(script_folder+'p_list.py','w') |
22 dictionary={} |
23 dictionary={} |
23 id_list=[] |
24 id_list=[] |
24 for name in names: |
25 for name in names: |
25 for m in tagged.finditer(open(name).read()): |
26 for m in tagged.finditer(open(name).read()): |
26 i = int(m.group(1),16) |
27 i = int(m.group(1),16) |
35 def retag(s): |
36 def retag(s): |
36 global biggest_id |
37 global biggest_id |
37 global chapter |
38 global chapter |
38 biggest_id += 1 |
39 biggest_id += 1 |
39 |
40 |
40 id_name="%s_%x" % (chapter,biggest_id) |
41 id_name="%s_%x" % (chapter.split('/')[-1],biggest_id) |
41 id_list.append(id_name) |
42 id_list.append(id_name) |
42 |
43 |
43 return '<para id="%s">' %id_name |
44 return '<para id="%s">' %id_name |
44 |
45 |
45 # Second pass: add IDs to paragraphs that currently lack them. |
46 # Second pass: add IDs to paragraphs that currently lack them. |
48 biggest_id=0 |
49 biggest_id=0 |
49 chapter=name.split('.')[0] |
50 chapter=name.split('.')[0] |
50 id_list=[] |
51 id_list=[] |
51 f = open(name).read() |
52 f = open(name).read() |
52 f1 = untagged.sub(retag, f ) |
53 f1 = untagged.sub(retag, f ) |
53 dictionary[chapter]=id_list |
54 dictionary[chapter.split('/')[-1]]=id_list |
54 if f1 != f: |
55 if f1 != f: |
55 tmpname = name + '.tmp' |
56 tmpname = name + '.tmp' |
56 fp = open(tmpname, 'w') |
57 fp = open(tmpname, 'w') |
57 fp.write(f1) |
58 fp.write(f1) |
58 fp.close() |
59 fp.close() |