#!/usr/bin/env python## Add unique ID attributes to para tags. This script should only be# run by one person, since otherwise it introduces the possibility of# chaotic conflicts among tags.import glob, os, re, systagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)untagged = re.compile('<para>')names = glob.glob('ch*.docbook') # First pass: find the highest-numbered paragraph ID.chapter=Noneseen = set()errs = 0beginning="p_list= "id_file=open('p_list.py','w')dictionary={}id_list=[]for name in names: for m in tagged.finditer(open(name).read()): i = int(m.group(1),16) if i in seen: print >> sys.stderr, '%s: duplication of ID %s' % (name, i) errs += 1 seen.add(i) if i > biggest_id: biggest_id = idef retag(s): global biggest_id global chapter biggest_id += 1 id_name="%s_%x" % (chapter,biggest_id) id_list.append(id_name) return '<para id="%s">' %id_name# Second pass: add IDs to paragraphs that currently lack them.for name in names: biggest_id=0 chapter=name.split('.')[0] id_list=[] f = open(name).read() f1 = untagged.sub(retag, f ) dictionary[chapter]=id_list if f1 != f: tmpname = name + '.tmp' fp = open(tmpname, 'w') fp.write(f1) fp.close() os.rename(tmpname, name)p_lists_string=beginning+str(dictionary)id_file.write(p_lists_string)sys.exit(errs)