SEES-hacks: comparison SEESenv/scripts/autoid.py

equal deleted inserted replaced

-:672eaaab9204
+:52d12eb31c30
+#!/usr/bin/env python
+#
+# Add unique ID attributes to para tags.  This script should only be
+# run by one person, since otherwise it introduces the possibility of
+# chaotic conflicts among tags.
+import glob, os, re, sys
+tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
+untagged = re.compile('<para>')
+names = glob.glob('ch*.docbook')
+# First pass: find the highest-numbered paragraph ID.
+chapter=None
+seen = set()
+errs = 0
+beginning="p_list= "
+id_file=open('p_list.py','w')
+dictionary={}
+id_list=[]
+for name in names:
+for m in tagged.finditer(open(name).read()):
+i = int(m.group(1),16)
+if i in seen:
+print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
+errs += 1
+seen.add(i)
+if i > biggest_id:
+biggest_id = i
+def retag(s):
+global biggest_id
+global chapter
+biggest_id += 1
+id_name="%s_%x" % (chapter,biggest_id)
+id_list.append(id_name)
+return '<para id="%s">' %id_name
+# Second pass: add IDs to paragraphs that currently lack them.
+for name in names:
+biggest_id=0
+chapter=name.split('.')[0]
+id_list=[]
+f = open(name).read()
+f1 = untagged.sub(retag, f )
+dictionary[chapter]=id_list
+if f1 != f:
+tmpname = name + '.tmp'
+fp = open(tmpname, 'w')
+fp.write(f1)
+fp.close()
+os.rename(tmpname, name)
+p_lists_string=beginning+str(dictionary)
+id_file.write(p_lists_string)
+sys.exit(errs)