#!/usr/bin/env python
#
# Add unique ID attributes to para tags. This script should only be
# run by one person, since otherwise it introduces the possibility of
# chaotic conflicts among tags.
import glob, os, re, sys
tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
untagged = re.compile('<para>')
names = glob.glob('ch*.docbook')
# First pass: find the highest-numbered paragraph ID.
chapter=None
seen = set()
errs = 0
beginning="p_list= "
id_file=open('p_list.py','w')
dictionary={}
id_list=[]
for name in names:
for m in tagged.finditer(open(name).read()):
i = int(m.group(1),16)
if i in seen:
print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
errs += 1
seen.add(i)
if i > biggest_id:
biggest_id = i
def retag(s):
global biggest_id
global chapter
biggest_id += 1
id_name="%s_%x" % (chapter,biggest_id)
id_list.append(id_name)
return '<para id="%s">' %id_name
# Second pass: add IDs to paragraphs that currently lack them.
for name in names:
biggest_id=0
chapter=name.split('.')[0]
id_list=[]
f = open(name).read()
f1 = untagged.sub(retag, f )
dictionary[chapter]=id_list
if f1 != f:
tmpname = name + '.tmp'
fp = open(tmpname, 'w')
fp.write(f1)
fp.close()
os.rename(tmpname, name)
p_lists_string=beginning+str(dictionary)
id_file.write(p_lists_string)
sys.exit(errs)