scripts/autoid.py
changeset 2 52d12eb31c30
parent 1 672eaaab9204
child 3 6cee07c589cb
--- a/scripts/autoid.py	Fri Feb 05 23:42:24 2010 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-#!/usr/bin/env python
-#
-# Add unique ID attributes to para tags.  This script should only be
-# run by one person, since otherwise it introduces the possibility of
-# chaotic conflicts among tags.
-
-import glob, os, re, sys
-
-tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
-untagged = re.compile('<para>')
-
-names = glob.glob('ch*.docbook') 
-# First pass: find the highest-numbered paragraph ID.
-
-
-chapter=None
-seen = set()
-errs = 0
-beginning="p_list= "
-
-id_file=open('p_list.py','w')
-dictionary={}
-id_list=[]
-for name in names:
-    for m in tagged.finditer(open(name).read()):
-        i = int(m.group(1),16)
-        if i in seen:
-            print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
-            errs += 1
-        seen.add(i)
-        if i > biggest_id:
-            biggest_id = i
-
-
-def retag(s):
-    global biggest_id
-    global chapter   
-    biggest_id += 1
-     
-    id_name="%s_%x" % (chapter,biggest_id)   
-    id_list.append(id_name)    
-    
-    return '<para id="%s">' %id_name
-
-# Second pass: add IDs to paragraphs that currently lack them.
-
-for name in names:
-    biggest_id=0 
-    chapter=name.split('.')[0]    
-    id_list=[]    
-    f = open(name).read()
-    f1 = untagged.sub(retag, f )
-    dictionary[chapter]=id_list       
-    if f1 != f:
-        tmpname = name + '.tmp'
-        fp = open(tmpname, 'w')
-        fp.write(f1)
-        fp.close()
-        os.rename(tmpname, name)
-p_lists_string=beginning+str(dictionary)
-id_file.write(p_lists_string)
-sys.exit(errs)