diff -r 672eaaab9204 -r 52d12eb31c30 SEESenv/scripts/autoid.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SEESenv/scripts/autoid.py Fri Feb 12 01:11:21 2010 +0530 @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# +# Add unique ID attributes to para tags. This script should only be +# run by one person, since otherwise it introduces the possibility of +# chaotic conflicts among tags. + +import glob, os, re, sys + +tagged = re.compile(']* id="x_([0-9a-f]+)"[^>]*>', re.M) +untagged = re.compile('') + +names = glob.glob('ch*.docbook') +# First pass: find the highest-numbered paragraph ID. + + +chapter=None +seen = set() +errs = 0 +beginning="p_list= " + +id_file=open('p_list.py','w') +dictionary={} +id_list=[] +for name in names: + for m in tagged.finditer(open(name).read()): + i = int(m.group(1),16) + if i in seen: + print >> sys.stderr, '%s: duplication of ID %s' % (name, i) + errs += 1 + seen.add(i) + if i > biggest_id: + biggest_id = i + + +def retag(s): + global biggest_id + global chapter + biggest_id += 1 + + id_name="%s_%x" % (chapter,biggest_id) + id_list.append(id_name) + + return '' %id_name + +# Second pass: add IDs to paragraphs that currently lack them. + +for name in names: + biggest_id=0 + chapter=name.split('.')[0] + id_list=[] + f = open(name).read() + f1 = untagged.sub(retag, f ) + dictionary[chapter]=id_list + if f1 != f: + tmpname = name + '.tmp' + fp = open(tmpname, 'w') + fp.write(f1) + fp.close() + os.rename(tmpname, name) +p_lists_string=beginning+str(dictionary) +id_file.write(p_lists_string) +sys.exit(errs)