web/hgbook/converter.py
changeset 2 52d12eb31c30
parent 1 672eaaab9204
child 3 6cee07c589cb
--- a/web/hgbook/converter.py	Fri Feb 05 23:42:24 2010 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-from lxml import etree
-from lxml import html
-from lxml.cssselect import CSSSelector
-import md5
-import sys
-
-
-args = sys.argv[1:]
-
-# django stuff
-from django.core.management import setup_environ
-import settings # Assumed to be in the same directory.
-setup_environ(settings)       # ugly django collateral effects :(
-from comments.models import Element
-
-doc_id = 'MMSC'
-sel = CSSSelector('div.chapter p, pre, h1, table.equation')
-chapter_sel = CSSSelector('div.chapter')
-
-try:
-    filename = args[0]
-except IndexError:
-    raise IndexError("Usage: %s <path-to-html-file>" % __file__)
-
-tree = etree.parse(filename, html.HTMLParser(remove_blank_text=True))
-root = tree.getroot()
-
-chapter = chapter_sel(root)[0]
-chapter_title = chapter.get('id').split(':')[1]
-chapter_hash = md5.new(chapter.get('id').encode('utf8')).hexdigest()
-
-chapter.set('id', chapter_hash)
-
-for element in sel(root):
-    hsh_source = element.text or element.get('alt') or etree.tostring(element)
-
-    if hsh_source:
-        hsh_source_encoded = hsh_source.encode('utf8')
-        hsh = md5.new(hsh_source_encoded).hexdigest()
-        element.set('id', '%s-%s' % (chapter_hash, hsh))
-    
-        # create the commentable element in the DB
-        e = Element()
-        e.id = '%s-%s' % (chapter_hash, hsh)
-        e.chapter = chapter_hash
-        e.title = chapter_title
-        e.save()
-
-
-
-print etree.tostring(root)      # pipe to a file if you wish
-