# HG changeset patch
# User Sverre Rabbelier <sverre@rabbelier.nl>
# Date 1243298259 -7200
# Node ID 4cc66ab098e8444ab03d4a6817be70539b0f3513
# Parent  616df973e457a15183f37eee3e3cf0477b6df32f# Parent  3156760b4d269dd3c88adda4509eed2ce4702944
remove accidental head

diff -r 3156760b4d26 -r 4cc66ab098e8 app/app.yaml.template
--- a/app/app.yaml.template	Mon May 25 23:42:15 2009 +0200
+++ b/app/app.yaml.template	Tue May 26 02:37:39 2009 +0200
@@ -46,6 +46,14 @@
 - url: /json
   static_dir: json
 
+- url: /admin/shell.*
+  script: shell/shell.py
+  login: admin
+
+- url: /static
+  static_dir: shell/static
+  expiration: 1d
+
 - url: /.*
   script: main.py
 
diff -r 3156760b4d26 -r 4cc66ab098e8 app/gae_django.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/gae_django.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,61 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2008 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing Melange Django 1.0+ configuration for Google App Engine.
+"""
+
+import logging
+import os
+import sys
+
+__authors__ = [
+  # alphabetical order by last name, please
+  '"Pawel Solyga" <pawel.solyga@gmail.com>',
+  ]
+
+
+# Remove the standard version of Django.
+for k in [k for k in sys.modules if k.startswith('django')]:
+  del sys.modules[k]
+
+# Force sys.path to have our own directory first, in case we want to import
+# from it. This lets us replace the built-in Django
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+sys.path.insert(0, os.path.abspath('django.zip'))
+
+# Force Django to reload its settings.
+from django.conf import settings
+settings._target = None
+
+# Must set this env var before importing any part of Django
+os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
+
+import django.core.signals
+import django.db
+
+# Log errors.
+def log_exception(*args, **kwds):
+  """Function used for logging exceptions.
+  """
+  logging.exception('Exception in request:')
+
+# Log all exceptions detected by Django.
+django.core.signals.got_request_exception.connect(log_exception)
+
+# Unregister the rollback event handler.
+django.core.signals.got_request_exception.disconnect(
+    django.db._rollback_on_exception)
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/__init__.py
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/comment.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/comment.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,40 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP specific Comment Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+from django.utils.translation import ugettext
+
+import soc.models.comment
+
+
+class GHOPComment(soc.models.comment.Comment):
+  """GHOP Comment model for tasks, extends the basic Comment model.
+  """
+
+  #: Property containing the human readable string that should be
+  #: shown for the comment when something in the task changes, 
+  #: code.google.com issue tracker style
+  change_in_task = db.StringProperty(required=True,
+      verbose_name=ugettext('Changes in the task'))
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/org_prize_assignment.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/org_prize_assignment.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,54 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP PrizePerOrg Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+import soc.models.base
+
+import ghop.models.organization
+import ghop.models.program
+
+
+class GHOPOrgPrizeAssignment(soc.models.base.ModelWithFieldAttributes):
+  """Model for prizes assigned to Students by an Organization.
+  """
+
+  #: Program to which these winners belong to
+  program = db.ReferenceProperty(reference_class=ghop.models.program.GHOPProgram,
+                                 required=True,
+                                 collection_name='program_prizes')
+
+  #: Organization to which these winners belong to
+  org = db.ReferenceProperty(
+      reference_class=ghop.models.organization.GHOPOrganization,
+      required=True, collection_name='organization_prizes')
+
+  #: Ordered list of winners(reference to Student entities) for the given
+  #: organization under the specified program
+  winners = db.ListProperty(item_type=db.Key, default=[])
+
+  #: unordered list of runner-ups(reference to Student entities) for the given
+  #: organization under the specified program
+  runner_ups = db.ListProperty(item_type=db.Key, default=[])
+
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/organization.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/organization.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,35 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP specific Organization Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+import soc.models.organization
+
+
+class GHOPOrganization(soc.models.organization.Organization):
+  """GHOP Organization model extends the basic Organization model.
+  """
+
+  #: Property that stores the amount of tasks the organization can publish.
+  task_quota_limit = db.IntegerProperty(required=False, default=0)
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/program.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/program.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,76 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP specific Program Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+from django.utils.translation import ugettext
+
+import soc.models.program
+
+
+class GHOPProgram(soc.models.program.Program):
+  """GHOP Program model extends the basic Program model.
+  """
+
+  #: Property that contains the latest date of birth before which a Student
+  #: can participate
+  student_min_age = db.DateTimeProperty(required=False)
+  student_min_age.help_text = ugettext(
+      'Minimum age of the student to sign-up. Given by the latest birthdate allowed')
+
+  #: Required property containing the number of Tasks Students can work
+  #: on simultaneously. For GHOP it is 1
+  nr_simultaneous_tasks = db.IntegerProperty(
+      required=True, default=1, 
+      verbose_name=ugettext('Simultaneous tasks'))
+  nr_simultaneous_tasks.help_text = ugettext(
+      'Number of tasks students can work on simultaneously in the program.')
+
+  #: Property containing the number of winners per Organization
+  nr_winners = db.IntegerProperty(
+      required=True, default=0,
+      verbose_name=ugettext('Winners per organization'))
+  nr_winners.help_text = ugettext(
+      'Number of winners an organization can announce.')
+
+  #: Property containing the number of runner ups per Organization
+  nr_runnerups = db.IntegerProperty(
+      required=True, default=0,
+      verbose_name=ugettext('Runner-ups per organization'))
+  nr_runnerups.help_text = ugettext(
+      'Number of runner-ups an organization can announce.')
+
+  #: A list of difficulty levels that can be assigned for each Task created
+  task_difficulties = db.StringListProperty(
+      required=True, default=[''],
+      verbose_name=ugettext('Difficulty levels'))
+  task_difficulties.help_text = ugettext(
+      'List all the difficulty levels that can be assigned to a task.')
+
+  #: A list of task types that a Task can belong to
+  task_types = db.StringListProperty(
+      required=True, default=['Any'],
+      verbose_name=ugettext('Task Types'))
+  task_rypes.help_text = ugettext(
+      'List all the types a task can be in.')
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/task.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/task.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,181 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP Task Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+  '"Lennard de Rijk" <ljvderijk@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+from django.utils.translation import ugettext
+
+import soc.models.linkable
+import soc.models.role
+import soc.models.student
+import soc.models.user
+
+import ghop.models.program
+
+
+class GHOPTask(soc.models.linkable.Linkable):
+  """Model for a task used in GHOP workflow.
+
+  The scope property of Linkable will be set to the Organization to which
+  this task belongs to. A link_id will be generated automatically and will
+  have no specific meaning other than identification.
+  """
+
+  #: Required field indicating the "title" of the task
+  title = db.StringProperty(required=True,
+                            verbose_name=ugettext('Title'))
+  title.help_text = ugettext('Title of the task')
+
+  #: Required field containing the description of the task
+  description = db.TextProperty(required=True, 
+                                verbose_name=ugettext('Description'))
+  description.help_text = ugettext('Complete description of the task')
+
+  #: Field indicating the difficulty level of the Task. This is not
+  #: mandatory so the it can be assigned at any later stage. 
+  #: The options are configured by a Program Admin.
+  difficulty = db.StringProperty(required=False,
+                                 verbose_name=ugettext('Difficulty'))
+  difficulty.help_text = ugettext('Difficulty Level of the task')
+
+  #: Required field which contains the type of the task. These types are
+  #: configured by a Program Admin.
+  type = db.StringListProperty(required=True, 
+                               verbose_name=ugettext('Task Type'))
+  type.help_text = ugettext('Type of the task')
+
+  #: A field which contains time allowed for completing the task (in hours)
+  #: from the moment that this task has been assigned to a Student
+  time_to_complete = db.IntegerProperty(required=True,
+                                        verbose_name=('Time to Complete'))
+  time_to_complete.help_text = ugettext(
+      'Time allowed to complete the task, in hours, once it is claimed')
+
+  #: List of Mentors assigned to this task. A Mentor who creates this
+  #: task is assigned as the Mentor by default. An Org Admin will have
+  #: to assign a Mentor upon task creation.
+  mentors = db.ListProperty(item_type=db.Key, default=[])
+
+  #: User profile to whom this task has been claimed by. This field
+  #: is mandatory for claimed tasks
+  user = db.ReferenceProperty(reference_class=soc.models.user.User,
+                              required=False,
+                              collection_name='assigned_tasks')
+
+  #: Student profile to whom this task is currently assigned to. If the user
+  #: has registered as a Student than this field will be filled in. This field
+  #: is mandatory for all Tasks in the closed state.
+  student = db.ReferenceProperty(reference_class=soc.models.student.Student,
+                                 required=False,
+                                 collection_name='assigned_tasks')
+
+  #: Program in which this Task has been created
+  program = db.ReferenceProperty(reference_class=ghop.models.program.GHOPProgram,
+                                 required=True,
+                                 collection_name='tasks')
+
+  #: Required property which holds the state, the Task is currently in.
+  #: This is a hidden field not shown on forms. Handled by logic internally.
+  #: The state can be one of the following:
+  #: unapproved: If Task is created by a Mentor, this is the automatically
+  #:   assigned state.
+  #: unpublished: This Task is not published yet.
+  #: open: This Task is open and ready to be claimed.
+  #: reopened: This Task has been claimed but never finished and has been
+  #:   reopened.
+  #: claim_requested: A Student has requested to claim this task.
+  #: claimed: This Task has been claimed and someone is working on it.
+  #: action_needed: Work on this Task must be submitted for review within 
+  #:   24 hours.
+  #: closed: Work on this Task has been completed to the org's content.
+  #: awaiting_registration: Student has completed work on this task, but
+  #:   needs to complete Student registration before this task is closed.
+  #: needs_work: This work on this Tasks needs a bit more brushing up. This
+  #:   state is followed by a Mentor review.
+  #: needs_review: Student has submitted work for this task and it should
+  #:   be reviewed by a Mentor.
+  status = db.StringProperty(
+      required=True, verbose_name=ugettext('Status'),
+      choices=['unapproved', 'unpublished', 'open', 'reopened', 
+               'claim_requested', 'claimed', 'action_needed', 
+               'closed', 'awaiting_registration', 'needs_work',
+               'needs_review'],
+      default='unapproved')
+
+  #: A field which indicates if the Task was ever in the Reopened state.
+  #: True indicates that its state was Reopened once, false indicated that it
+  #: has never been in the Reopened state.
+  was_reopened = db.BooleanProperty(default=False,
+                                    verbose_name=ugettext('Has been reopened'))
+
+  #: This field is set to the next deadline that will have consequences for
+  #: this Task. For instance this will store a DateTime property which will
+  #: tell when this Task should be completed.
+  deadline = db.DateTimeProperty(required=False,
+                                 verbose_name=ugettext('Deadline'))
+
+  #: Required field containing the Mentor/Org Admin who created this task
+  created_by = db.ReferenceProperty(reference_class=soc.models.role.Role,
+                                    required=True,
+                                    collection_name='created_tasks',
+                                    verbose_name=ugettext('Created by'))
+
+  #: Date when the proposal was created
+  created_on = db.DateTimeProperty(required=True, auto_now_add=True,
+                                   verbose_name=ugettext('Created on'))
+
+  #: Required field containing the Mentor/Org Admin who last edited this
+  #: task. It changes only when Mentor/Org Admin changes title, description,
+  #: difficulty, type, time_to_complete.
+  modified_by = db.ReferenceProperty(reference_class=soc.models.role.Role,
+                                   required=True,
+                                   collection_name='edited_tasks',
+                                   verbose_name=ugettext('Modified by'))
+
+  #: Date when the proposal was last modified, should be set manually on edit
+  modified_on = db.DateTimeProperty(required=True, auto_now_add=True,
+                                    verbose_name=ugettext('Modified on'))
+
+  #: A field which holds the entire history of this task in JSON. The
+  #: structure of this JSON string is as follows:
+  #: {
+  #:    timestamp1: {
+  #:                   "user": User reference
+  #:                   "student": Student reference
+  #:                   ...
+  #:                   "state": "Unapproved"
+  #:                   ...
+  #:                   "edited_by": Role reference
+  #:                   
+  #:               }
+  #:    timestamp2: {
+  #:                   "state": "Unpublished"
+  #:               }
+  #: }
+  #: First dictionary item holds the values for all the properties in this
+  #: model. The subsequent items hold the properties that changed at the
+  #: timestamp given by the key.
+  #: Reference properties will be stored by calling str() on their Key.
+  history = db.TextProperty(required=True, default='')
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/timeline.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/timeline.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,62 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP specific Timeline Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+from django.utils.translation import ugettext
+
+import soc.models.timeline
+
+
+class GHOPTimeline(soc.models.timeline.Timeline):
+  """GHOP Timeline model extends the basic Timeline model. It implements
+     the GHOP specific timeline entries.
+  """
+
+  task_claim_deadline = db.DateTimeProperty(
+      verbose_name=ugettext('Task Claim Deadline date'))
+  task_claim_deadline.help_text = ugettext(
+      'No tasks can be claimed after this date.'
+      'Work on claimed tasks can continue.')
+
+  stop_all_work = db.DateTimeProperty(
+      verbose_name=ugettext('Work Submission Deadline date'))
+  stop_all_work.help_text = ugettext(
+      'All work must stop by this date.')
+
+  winner_selection_start = db.DateTimeProperty(
+      verbose_name=ugettext('Winner Selection Start date'))
+  winner_selection_start.help_text = ugettext(
+      'Organizations start choosing their winners.')
+
+  winner_selection_end = db.DateTimeProperty(
+      verbose_name=ugettext('Winner Selection End date'))
+  winner_selection_end.help_text = ugettext(
+      'Organizations must have completed choosing their winners.')
+
+  winner_announcement = db.DateTimeProperty(
+      verbose_name=ugettext('Winner Announcement date'))
+  winner_announcement.help_text = ugettext(
+      'All winners are announced.')
+
diff -r 3156760b4d26 -r 4cc66ab098e8 app/ghop/models/work_submission.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/ghop/models/work_submission.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,72 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module contains the GHOP WorkSubmission Model.
+"""
+
+__authors__ = [
+  '"Madhusudan.C.S" <madhusudancs@gmail.com>',
+  '"Lennard de Rijk" <ljvderijk@gmail.com>',
+]
+
+
+from google.appengine.ext import db
+
+from django.utils.translation import ugettext
+
+import soc.models.linkable
+import soc.models.user
+
+import ghop.models.program
+import ghop.models.task
+
+
+class GHOPWorkSubmission(soc.models.linkable.Linkable):
+  """Model for work submissions for a task by students.
+
+  Scope will be set to the Organization to which this work has been submitted.
+  """
+
+  #: Task to which this work was submitted
+  task = db.ReferenceProperty(reference_class=ghop.models.task.GHOPTask,
+                              required=True,
+                              collection_name='work_submissions')
+
+  #: User who submitted this work
+  user = db.ReferenceProperty(reference_class=soc.models.user.User,
+                              required=True,
+                              collection_name='work_submissions')
+
+  #: Program to which this work belongs to
+  program = db.ReferenceProperty(reference_class=ghop.models.program.GHOPProgram,
+                                 required=True,
+                                 collection_name='work_submissions')
+
+  #: Property allowing you to store information about your work
+  information = db.TextProperty(
+      required=True, verbose_name=ugettext('Info'))
+  information.help_text = ugettext(
+      'Information about the work you submit for this task')
+
+  #: Property containing an URL to this work or more information about it
+  url_to_work = db.LinkProperty(
+      required=False, verbose_name=ugettext('URL to your Work'))
+  url_to_work.help_text = ugettext(
+      'URL to a resource containing your work or more information about it')
+
+  #: Property containing the date when the work was submitted
+  submitted_on = db.DateTimeProperty(required=True, auto_now_add=True,
+                                     verbose_name=ugettext('Submitted on'))
diff -r 3156760b4d26 -r 4cc66ab098e8 app/htmlsanitizer/BeautifulSoup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/htmlsanitizer/BeautifulSoup.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,2000 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup parses a (possibly invalid) XML or HTML document into a
+tree representation. It provides methods and Pythonic idioms that make
+it easy to navigate, search, and modify the tree.
+
+A well-formed XML/HTML document yields a well-formed data
+structure. An ill-formed XML/HTML document yields a correspondingly
+ill-formed data structure. If your document is only locally
+well-formed, you can use this library to find and process the
+well-formed part of it.
+
+Beautiful Soup works with Python 2.2 and up. It has no external
+dependencies, but you'll have more success at converting data to UTF-8
+if you also install these three packages:
+
+* chardet, for auto-detecting character encodings
+  http://chardet.feedparser.org/
+* cjkcodecs and iconv_codec, which add more encodings to the ones supported
+  by stock Python.
+  http://cjkpython.i18n.org/
+
+Beautiful Soup defines classes for two main parsing strategies:
+
+ * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
+   language that kind of looks like XML.
+
+ * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
+   or invalid. This class has web browser-like heuristics for
+   obtaining a sensible parse tree in the face of common HTML errors.
+
+Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
+the encoding of an HTML or XML document, and converting it to
+Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/documentation.html
+
+Here, have some legalese:
+
+Copyright (c) 2004-2009, Leonard Richardson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following
+    disclaimer in the documentation and/or other materials provided
+    with the distribution.
+
+  * Neither the name of the the Beautiful Soup Consortium and All
+    Night Kosher Bakery nor the names of its contributors may be
+    used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
+
+"""
+from __future__ import generators
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "3.1.0.1"
+__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson"
+__license__ = "New-style BSD"
+
+import codecs
+import markupbase
+import types
+import re
+from HTMLParser import HTMLParser, HTMLParseError
+try:
+    from htmlentitydefs import name2codepoint
+except ImportError:
+    name2codepoint = {}
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+#These hacks make Beautiful Soup able to parse XML with namespaces
+markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
+
+DEFAULT_OUTPUT_ENCODING = "utf-8"
+
+# First, the classes that represent markup elements.
+
+def sob(unicode, encoding):
+    """Returns either the given Unicode string or its encoding."""
+    if encoding is None:
+        return unicode
+    else:
+        return unicode.encode(encoding)
+
+class PageElement:
+    """Contains the navigational information for some part of the page
+    (either a tag or a piece of text)"""
+
+    def setup(self, parent=None, previous=None):
+        """Sets up the initial relations between this element and
+        other elements."""
+        self.parent = parent
+        self.previous = previous
+        self.next = None
+        self.previousSibling = None
+        self.nextSibling = None
+        if self.parent and self.parent.contents:
+            self.previousSibling = self.parent.contents[-1]
+            self.previousSibling.nextSibling = self
+
+    def replaceWith(self, replaceWith):
+        oldParent = self.parent
+        myIndex = self.parent.contents.index(self)
+        if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
+            # We're replacing this element with one of its siblings.
+            index = self.parent.contents.index(replaceWith)
+            if index and index < myIndex:
+                # Furthermore, it comes before this element. That
+                # means that when we extract it, the index of this
+                # element will change.
+                myIndex = myIndex - 1
+        self.extract()
+        oldParent.insert(myIndex, replaceWith)
+
+    def extract(self):
+        """Destructively rips this element out of the tree."""
+        if self.parent:
+            try:
+                self.parent.contents.remove(self)
+            except ValueError:
+                pass
+
+        #Find the two elements that would be next to each other if
+        #this element (and any children) hadn't been parsed. Connect
+        #the two.
+        lastChild = self._lastRecursiveChild()
+        nextElement = lastChild.next
+
+        if self.previous:
+            self.previous.next = nextElement
+        if nextElement:
+            nextElement.previous = self.previous
+        self.previous = None
+        lastChild.next = None
+
+        self.parent = None
+        if self.previousSibling:
+            self.previousSibling.nextSibling = self.nextSibling
+        if self.nextSibling:
+            self.nextSibling.previousSibling = self.previousSibling
+        self.previousSibling = self.nextSibling = None
+        return self
+
+    def _lastRecursiveChild(self):
+        "Finds the last element beneath this object to be parsed."
+        lastChild = self
+        while hasattr(lastChild, 'contents') and lastChild.contents:
+            lastChild = lastChild.contents[-1]
+        return lastChild
+
+    def insert(self, position, newChild):
+        if (isinstance(newChild, basestring)
+            or isinstance(newChild, unicode)) \
+            and not isinstance(newChild, NavigableString):
+            newChild = NavigableString(newChild)
+
+        position =  min(position, len(self.contents))
+        if hasattr(newChild, 'parent') and newChild.parent != None:
+            # We're 'inserting' an element that's already one
+            # of this object's children.
+            if newChild.parent == self:
+                index = self.find(newChild)
+                if index and index < position:
+                    # Furthermore we're moving it further down the
+                    # list of this object's children. That means that
+                    # when we extract this element, our target index
+                    # will jump down one.
+                    position = position - 1
+            newChild.extract()
+
+        newChild.parent = self
+        previousChild = None
+        if position == 0:
+            newChild.previousSibling = None
+            newChild.previous = self
+        else:
+            previousChild = self.contents[position-1]
+            newChild.previousSibling = previousChild
+            newChild.previousSibling.nextSibling = newChild
+            newChild.previous = previousChild._lastRecursiveChild()
+        if newChild.previous:
+            newChild.previous.next = newChild
+
+        newChildsLastElement = newChild._lastRecursiveChild()
+
+        if position >= len(self.contents):
+            newChild.nextSibling = None
+
+            parent = self
+            parentsNextSibling = None
+            while not parentsNextSibling:
+                parentsNextSibling = parent.nextSibling
+                parent = parent.parent
+                if not parent: # This is the last element in the document.
+                    break
+            if parentsNextSibling:
+                newChildsLastElement.next = parentsNextSibling
+            else:
+                newChildsLastElement.next = None
+        else:
+            nextChild = self.contents[position]
+            newChild.nextSibling = nextChild
+            if newChild.nextSibling:
+                newChild.nextSibling.previousSibling = newChild
+            newChildsLastElement.next = nextChild
+
+        if newChildsLastElement.next:
+            newChildsLastElement.next.previous = newChildsLastElement
+        self.contents.insert(position, newChild)
+
+    def append(self, tag):
+        """Appends the given tag to the contents of this tag."""
+        self.insert(len(self.contents), tag)
+
+    def findNext(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears after this Tag in the document."""
+        return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
+
+    def findAllNext(self, name=None, attrs={}, text=None, limit=None,
+                    **kwargs):
+        """Returns all items that match the given criteria and appear
+        after this Tag in the document."""
+        return self._findAll(name, attrs, text, limit, self.nextGenerator,
+                             **kwargs)
+
+    def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears after this Tag in the document."""
+        return self._findOne(self.findNextSiblings, name, attrs, text,
+                             **kwargs)
+
+    def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
+                         **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear after this Tag in the document."""
+        return self._findAll(name, attrs, text, limit,
+                             self.nextSiblingGenerator, **kwargs)
+    fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
+
+    def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears before this Tag in the document."""
+        return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
+
+    def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
+                        **kwargs):
+        """Returns all items that match the given criteria and appear
+        before this Tag in the document."""
+        return self._findAll(name, attrs, text, limit, self.previousGenerator,
+                           **kwargs)
+    fetchPrevious = findAllPrevious # Compatibility with pre-3.x
+
+    def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears before this Tag in the document."""
+        return self._findOne(self.findPreviousSiblings, name, attrs, text,
+                             **kwargs)
+
+    def findPreviousSiblings(self, name=None, attrs={}, text=None,
+                             limit=None, **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear before this Tag in the document."""
+        return self._findAll(name, attrs, text, limit,
+                             self.previousSiblingGenerator, **kwargs)
+    fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
+
+    def findParent(self, name=None, attrs={}, **kwargs):
+        """Returns the closest parent of this Tag that matches the given
+        criteria."""
+        # NOTE: We can't use _findOne because findParents takes a different
+        # set of arguments.
+        r = None
+        l = self.findParents(name, attrs, 1)
+        if l:
+            r = l[0]
+        return r
+
+    def findParents(self, name=None, attrs={}, limit=None, **kwargs):
+        """Returns the parents of this Tag that match the given
+        criteria."""
+
+        return self._findAll(name, attrs, None, limit, self.parentGenerator,
+                             **kwargs)
+    fetchParents = findParents # Compatibility with pre-3.x
+
+    #These methods do the real heavy lifting.
+
+    def _findOne(self, method, name, attrs, text, **kwargs):
+        r = None
+        l = method(name, attrs, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+
+    def _findAll(self, name, attrs, text, limit, generator, **kwargs):
+        "Iterates over a generator looking for things that match."
+
+        if isinstance(name, SoupStrainer):
+            strainer = name
+        else:
+            # Build a SoupStrainer
+            strainer = SoupStrainer(name, attrs, text, **kwargs)
+        results = ResultSet(strainer)
+        g = generator()
+        while True:
+            try:
+                i = g.next()
+            except StopIteration:
+                break
+            if i:
+                found = strainer.search(i)
+                if found:
+                    results.append(found)
+                    if limit and len(results) >= limit:
+                        break
+        return results
+
+    #These Generators can be used to navigate starting from both
+    #NavigableStrings and Tags.
+    def nextGenerator(self):
+        i = self
+        while i:
+            i = i.next
+            yield i
+
+    def nextSiblingGenerator(self):
+        i = self
+        while i:
+            i = i.nextSibling
+            yield i
+
+    def previousGenerator(self):
+        i = self
+        while i:
+            i = i.previous
+            yield i
+
+    def previousSiblingGenerator(self):
+        i = self
+        while i:
+            i = i.previousSibling
+            yield i
+
+    def parentGenerator(self):
+        i = self
+        while i:
+            i = i.parent
+            yield i
+
+    # Utility methods
+    def substituteEncoding(self, str, encoding=None):
+        encoding = encoding or "utf-8"
+        return str.replace("%SOUP-ENCODING%", encoding)
+
+    def toEncoding(self, s, encoding=None):
+        """Encodes an object to a string in some encoding, or to Unicode.
+        ."""
+        if isinstance(s, unicode):
+            if encoding:
+                s = s.encode(encoding)
+        elif isinstance(s, str):
+            if encoding:
+                s = s.encode(encoding)
+            else:
+                s = unicode(s)
+        else:
+            if encoding:
+                s  = self.toEncoding(str(s), encoding)
+            else:
+                s = unicode(s)
+        return s
+
+class NavigableString(unicode, PageElement):
+
+    def __new__(cls, value):
+        """Create a new NavigableString.
+
+        When unpickling a NavigableString, this method is called with
+        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
+        passed in to the superclass's __new__ or the superclass won't know
+        how to handle non-ASCII characters.
+        """
+        if isinstance(value, unicode):
+            return unicode.__new__(cls, value)
+        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+
+    def __getnewargs__(self):
+        return (unicode(self),)
+
+    def __getattr__(self, attr):
+        """text.string gives you text. This is for backwards
+        compatibility for Navigable*String, but for CData* it lets you
+        get the string without the CData wrapper."""
+        if attr == 'string':
+            return self
+        else:
+            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+
+    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return self.decode().encode(encoding)
+
+    def decodeGivenEventualEncoding(self, eventualEncoding):
+        return self
+
+class CData(NavigableString):
+
+    def decodeGivenEventualEncoding(self, eventualEncoding):
+        return u'<![CDATA[' + self + u']]>'
+
+class ProcessingInstruction(NavigableString):
+
+    def decodeGivenEventualEncoding(self, eventualEncoding):
+        output = self
+        if u'%SOUP-ENCODING%' in output:
+            output = self.substituteEncoding(output, eventualEncoding)
+        return u'<?' + output + u'?>'
+
+class Comment(NavigableString):
+    def decodeGivenEventualEncoding(self, eventualEncoding):
+        return u'<!--' + self + u'-->'
+
+class Declaration(NavigableString):
+    def decodeGivenEventualEncoding(self, eventualEncoding):
+        return u'<!' + self + u'>'
+
+class Tag(PageElement):
+
+    """Represents a found HTML tag with its attributes and contents."""
+
+    def _invert(h):
+        "Cheap function to invert a hash."
+        i = {}
+        for k,v in h.items():
+            i[v] = k
+        return i
+
+    XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
+                                      "quot" : '"',
+                                      "amp" : "&",
+                                      "lt" : "<",
+                                      "gt" : ">" }
+
+    XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
+
+    def _convertEntities(self, match):
+        """Used in a call to re.sub to replace HTML, XML, and numeric
+        entities with the appropriate Unicode characters. If HTML
+        entities are being converted, any unrecognized entities are
+        escaped."""
+        x = match.group(1)
+        if self.convertHTMLEntities and x in name2codepoint:
+            return unichr(name2codepoint[x])
+        elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
+            if self.convertXMLEntities:
+                return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
+            else:
+                return u'&%s;' % x
+        elif len(x) > 0 and x[0] == '#':
+            # Handle numeric entities
+            if len(x) > 1 and x[1] == 'x':
+                return unichr(int(x[2:], 16))
+            else:
+                return unichr(int(x[1:]))
+
+        elif self.escapeUnrecognizedEntities:
+            return u'&amp;%s;' % x
+        else:
+            return u'&%s;' % x
+
+    def __init__(self, parser, name, attrs=None, parent=None,
+                 previous=None):
+        "Basic constructor."
+
+        # We don't actually store the parser object: that lets extracted
+        # chunks be garbage-collected
+        self.parserClass = parser.__class__
+        self.isSelfClosing = parser.isSelfClosingTag(name)
+        self.name = name
+        if attrs == None:
+            attrs = []
+        self.attrs = attrs
+        self.contents = []
+        self.setup(parent, previous)
+        self.hidden = False
+        self.containsSubstitutions = False
+        self.convertHTMLEntities = parser.convertHTMLEntities
+        self.convertXMLEntities = parser.convertXMLEntities
+        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
+
+        def convert(kval):
+            "Converts HTML, XML and numeric entities in the attribute value."
+            k, val = kval
+            if val is None:
+                return kval
+            return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
+                              self._convertEntities, val))
+        self.attrs = map(convert, self.attrs)
+
+    def get(self, key, default=None):
+        """Returns the value of the 'key' attribute for the tag, or
+        the value given for 'default' if it doesn't have that
+        attribute."""
+        return self._getAttrMap().get(key, default)
+
+    def has_key(self, key):
+        return self._getAttrMap().has_key(key)
+
+    def __getitem__(self, key):
+        """tag[key] returns the value of the 'key' attribute for the tag,
+        and throws an exception if it's not there."""
+        return self._getAttrMap()[key]
+
+    def __iter__(self):
+        "Iterating over a tag iterates over its contents."
+        return iter(self.contents)
+
+    def __len__(self):
+        "The length of a tag is the length of its list of contents."
+        return len(self.contents)
+
+    def __contains__(self, x):
+        return x in self.contents
+
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
+    def __setitem__(self, key, value):
+        """Setting tag[key] sets the value of the 'key' attribute for the
+        tag."""
+        self._getAttrMap()
+        self.attrMap[key] = value
+        found = False
+        for i in range(0, len(self.attrs)):
+            if self.attrs[i][0] == key:
+                self.attrs[i] = (key, value)
+                found = True
+        if not found:
+            self.attrs.append((key, value))
+        self._getAttrMap()[key] = value
+
+    def __delitem__(self, key):
+        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        for item in self.attrs:
+            if item[0] == key:
+                self.attrs.remove(item)
+                #We don't break because bad HTML can define the same
+                #attribute multiple times.
+            self._getAttrMap()
+            if self.attrMap.has_key(key):
+                del self.attrMap[key]
+
+    def __call__(self, *args, **kwargs):
+        """Calling a tag like a function is the same as calling its
+        findAll() method. Eg. tag('a') returns a list of all the A tags
+        found within this tag."""
+        return apply(self.findAll, args, kwargs)
+
+    def __getattr__(self, tag):
+        #print "Getattr %s.%s" % (self.__class__, tag)
+        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
+            return self.find(tag[:-3])
+        elif tag.find('__') != 0:
+            return self.find(tag)
+        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+
+    def __eq__(self, other):
+        """Returns true iff this tag has the same name, the same attributes,
+        and the same contents (recursively) as the given tag.
+
+        NOTE: right now this will return false if two tags have the
+        same attributes in a different order. Should this be fixed?"""
+        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
+            return False
+        for i in range(0, len(self.contents)):
+            if self.contents[i] != other.contents[i]:
+                return False
+        return True
+
+    def __ne__(self, other):
+        """Returns true iff this tag is not identical to the other tag,
+        as defined in __eq__."""
+        return not self == other
+
+    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        """Renders this tag as a string."""
+        return self.decode(eventualEncoding=encoding)
+
+    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+                                           + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           + ")")
+
+    def _sub_entity(self, x):
+        """Used with a regular expression to substitute the
+        appropriate XML entity for an XML special character."""
+        return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
+
+    def __unicode__(self):
+        return self.decode()
+
+    def __str__(self):
+        return self.encode()
+
+    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
+               prettyPrint=False, indentLevel=0):
+        return self.decode(prettyPrint, indentLevel, encoding).encode(encoding)
+
+    def decode(self, prettyPrint=False, indentLevel=0,
+               eventualEncoding=DEFAULT_OUTPUT_ENCODING):
+        """Returns a string or Unicode representation of this tag and
+        its contents. To get Unicode, pass None for encoding."""
+
+        attrs = []
+        if self.attrs:
+            for key, val in self.attrs:
+                fmt = '%s="%s"'
+                if isString(val):
+                    if (self.containsSubstitutions
+                        and eventualEncoding is not None
+                        and '%SOUP-ENCODING%' in val):
+                        val = self.substituteEncoding(val, eventualEncoding)
+
+                    # The attribute value either:
+                    #
+                    # * Contains no embedded double quotes or single quotes.
+                    #   No problem: we enclose it in double quotes.
+                    # * Contains embedded single quotes. No problem:
+                    #   double quotes work here too.
+                    # * Contains embedded double quotes. No problem:
+                    #   we enclose it in single quotes.
+                    # * Embeds both single _and_ double quotes. This
+                    #   can't happen naturally, but it can happen if
+                    #   you modify an attribute value after parsing
+                    #   the document. Now we have a bit of a
+                    #   problem. We solve it by enclosing the
+                    #   attribute in single quotes, and escaping any
+                    #   embedded single quotes to XML entities.
+                    if '"' in val:
+                        fmt = "%s='%s'"
+                        if "'" in val:
+                            # TODO: replace with apos when
+                            # appropriate.
+                            val = val.replace("'", "&squot;")
+
+                    # Now we're okay w/r/t quotes. But the attribute
+                    # value might also contain angle brackets, or
+                    # ampersands that aren't part of entities. We need
+                    # to escape those to XML entities too.
+                    val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
+                if val is None:
+                    # Handle boolean attributes.
+                    decoded = key
+                else:
+                    decoded = fmt % (key, val)
+                attrs.append(decoded)
+        close = ''
+        closeTag = ''
+        if self.isSelfClosing:
+            close = ' /'
+        else:
+            closeTag = '</%s>' % self.name
+
+        indentTag, indentContents = 0, 0
+        if prettyPrint:
+            indentTag = indentLevel
+            space = (' ' * (indentTag-1))
+            indentContents = indentTag + 1
+        contents = self.decodeContents(prettyPrint, indentContents,
+                                       eventualEncoding)
+        if self.hidden:
+            s = contents
+        else:
+            s = []
+            attributeString = ''
+            if attrs:
+                attributeString = ' ' + ' '.join(attrs)
+            if prettyPrint:
+                s.append(space)
+            s.append('<%s%s%s>' % (self.name, attributeString, close))
+            if prettyPrint:
+                s.append("\n")
+            s.append(contents)
+            if prettyPrint and contents and contents[-1] != "\n":
+                s.append("\n")
+            if prettyPrint and closeTag:
+                s.append(space)
+            s.append(closeTag)
+            if prettyPrint and closeTag and self.nextSibling:
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def decompose(self):
+        """Recursively destroys the contents of this tree."""
+        contents = [i for i in self.contents]
+        for i in contents:
+            if isinstance(i, Tag):
+                i.decompose()
+            else:
+                i.extract()
+        self.extract()
+
+    def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return self.encode(encoding, True)
+
+    def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        return self.decodeContents(prettyPrint, indentLevel).encode(encoding)
+
+    def decodeContents(self, prettyPrint=False, indentLevel=0,
+                       eventualEncoding=DEFAULT_OUTPUT_ENCODING):
+        """Renders the contents of this tag as a string in the given
+        encoding. If encoding is None, returns a Unicode string.."""
+        s=[]
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.decodeGivenEventualEncoding(eventualEncoding)
+            elif isinstance(c, Tag):
+                s.append(c.decode(prettyPrint, indentLevel, eventualEncoding))
+            if text and prettyPrint:
+                text = text.strip()
+            if text:
+                if prettyPrint:
+                    s.append(" " * (indentLevel-1))
+                s.append(text)
+                if prettyPrint:
+                    s.append("\n")
+        return ''.join(s)
+
+    #Soup methods
+
+    def find(self, name=None, attrs={}, recursive=True, text=None,
+             **kwargs):
+        """Return only the first child of this Tag matching the given
+        criteria."""
+        r = None
+        l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findChild = find
+
+    def findAll(self, name=None, attrs={}, recursive=True, text=None,
+                limit=None, **kwargs):
+        """Extracts a list of Tag objects that match the given
+        criteria.  You can specify the name of the Tag and any
+        attributes you want the Tag to have.
+
+        The value of a key-value pair in the 'attrs' map can be a
+        string, a list of strings, a regular expression object, or a
+        callable that takes a string and returns whether or not the
+        string matches for some custom definition of 'matches'. The
+        same is true of the tag name."""
+        generator = self.recursiveChildGenerator
+        if not recursive:
+            generator = self.childGenerator
+        return self._findAll(name, attrs, text, limit, generator, **kwargs)
+    findChildren = findAll
+
+    # Pre-3.x compatibility methods. Will go away in 4.0.
+    first = find
+    fetch = findAll
+
+    def fetchText(self, text=None, recursive=True, limit=None):
+        return self.findAll(text=text, recursive=recursive, limit=limit)
+
+    def firstText(self, text=None, recursive=True):
+        return self.find(text=text, recursive=recursive)
+
+    # 3.x compatibility methods. Will go away in 4.0.
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        if encoding is None:
+            return self.decodeContents(prettyPrint, indentLevel, encoding)
+        else:
+            return self.encodeContents(encoding, prettyPrint, indentLevel)
+
+
+    #Private methods
+
+    def _getAttrMap(self):
+        """Initializes a map representation of this tag's attributes,
+        if not already initialized."""
+        if not getattr(self, 'attrMap'):
+            self.attrMap = {}
+            for (key, value) in self.attrs:
+                self.attrMap[key] = value
+        return self.attrMap
+
+    #Generator methods
+    def recursiveChildGenerator(self):
+        if not len(self.contents):
+            raise StopIteration
+        stopNode = self._lastRecursiveChild().next
+        current = self.contents[0]
+        while current is not stopNode:
+            yield current
+            current = current.next
+
+    def childGenerator(self):
+        if not len(self.contents):
+            raise StopIteration
+        current = self.contents[0]
+        while current:
+            yield current
+            current = current.nextSibling
+        raise StopIteration
+
+# Next, a couple classes to represent queries and their results.
+class SoupStrainer:
+    """Encapsulates a number of ways of matching a markup element (tag or
+    text)."""
+
+    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        self.name = name
+        if isString(attrs):
+            kwargs['class'] = attrs
+            attrs = None
+        if kwargs:
+            if attrs:
+                attrs = attrs.copy()
+                attrs.update(kwargs)
+            else:
+                attrs = kwargs
+        self.attrs = attrs
+        self.text = text
+
+    def __str__(self):
+        if self.text:
+            return self.text
+        else:
+            return "%s|%s" % (self.name, self.attrs)
+
+    def searchTag(self, markupName=None, markupAttrs={}):
+        found = None
+        markup = None
+        if isinstance(markupName, Tag):
+            markup = markupName
+            markupAttrs = markup
+        callFunctionWithTagData = callable(self.name) \
+                                and not isinstance(markupName, Tag)
+
+        if (not self.name) \
+               or callFunctionWithTagData \
+               or (markup and self._matches(markup, self.name)) \
+               or (not markup and self._matches(markupName, self.name)):
+            if callFunctionWithTagData:
+                match = self.name(markupName, markupAttrs)
+            else:
+                match = True
+                markupAttrMap = None
+                for attr, matchAgainst in self.attrs.items():
+                    if not markupAttrMap:
+                         if hasattr(markupAttrs, 'get'):
+                            markupAttrMap = markupAttrs
+                         else:
+                            markupAttrMap = {}
+                            for k,v in markupAttrs:
+                                markupAttrMap[k] = v
+                    attrValue = markupAttrMap.get(attr)
+                    if not self._matches(attrValue, matchAgainst):
+                        match = False
+                        break
+            if match:
+                if markup:
+                    found = markup
+                else:
+                    found = markupName
+        return found
+
+    def search(self, markup):
+        #print 'looking for %s in %s' % (self, markup)
+        found = None
+        # If given a list of items, scan it for a text element that
+        # matches.
+        if isList(markup) and not isinstance(markup, Tag):
+            for element in markup:
+                if isinstance(element, NavigableString) \
+                       and self.search(element):
+                    found = element
+                    break
+        # If it's a Tag, make sure its name or attributes match.
+        # Don't bother with Tags if we're searching for text.
+        elif isinstance(markup, Tag):
+            if not self.text:
+                found = self.searchTag(markup)
+        # If it's text, make sure the text matches.
+        elif isinstance(markup, NavigableString) or \
+                 isString(markup):
+            if self._matches(markup, self.text):
+                found = markup
+        else:
+            raise Exception, "I don't know how to match against a %s" \
+                  % markup.__class__
+        return found
+
+    def _matches(self, markup, matchAgainst):
+        #print "Matching %s against %s" % (markup, matchAgainst)
+        result = False
+        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+            result = markup != None
+        elif callable(matchAgainst):
+            result = matchAgainst(markup)
+        else:
+            #Custom match methods take the tag as an argument, but all
+            #other ways of matching match the tag name as a string.
+            if isinstance(markup, Tag):
+                markup = markup.name
+            if markup is not None and not isString(markup):
+                markup = unicode(markup)
+            #Now we know that chunk is either a string, or None.
+            if hasattr(matchAgainst, 'match'):
+                # It's a regexp object.
+                result = markup and matchAgainst.search(markup)
+            elif (isList(matchAgainst)
+                  and (markup is not None or not isString(matchAgainst))):
+                result = markup in matchAgainst
+            elif hasattr(matchAgainst, 'items'):
+                result = markup.has_key(matchAgainst)
+            elif matchAgainst and isString(markup):
+                if isinstance(markup, unicode):
+                    matchAgainst = unicode(matchAgainst)
+                else:
+                    matchAgainst = str(matchAgainst)
+
+            if not result:
+                result = matchAgainst == markup
+        return result
+
+class ResultSet(list):
+    """A ResultSet is just a list that keeps track of the SoupStrainer
+    that created it."""
+    def __init__(self, source):
+        list.__init__([])
+        self.source = source
+
+# Now, some helper functions.
+
+def isList(l):
+    """Convenience method that works with all 2.x versions of Python
+    to determine whether or not something is listlike."""
+    return ((hasattr(l, '__iter__') and not isString(l))
+            or (type(l) in (types.ListType, types.TupleType)))
+
+def isString(s):
+    """Convenience method that works with all 2.x versions of Python
+    to determine whether or not something is stringlike."""
+    try:
+        return isinstance(s, unicode) or isinstance(s, basestring)
+    except NameError:
+        return isinstance(s, str)
+
+def buildTagMap(default, *args):
+    """Turns a list of maps, lists, or scalars into a single map.
+    Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and
+    NESTING_RESET_TAGS maps out of lists and partial maps."""
+    built = {}
+    for portion in args:
+        if hasattr(portion, 'items'):
+            #It's a map. Merge it.
+            for k,v in portion.items():
+                built[k] = v
+        elif isList(portion) and not isString(portion):
+            #It's a list. Map each item to the default.
+            for k in portion:
+                built[k] = default
+        else:
+            #It's a scalar. Map it to the default.
+            built[portion] = default
+    return built
+
+# Now, the parser classes.
+
+class HTMLParserBuilder(HTMLParser):
+
+    def __init__(self, soup):
+        HTMLParser.__init__(self)
+        self.soup = soup
+
+    # We inherit feed() and reset().
+
+    def handle_starttag(self, name, attrs):
+        if name == 'meta':
+            self.soup.extractCharsetFromMeta(attrs)
+        else:
+            self.soup.unknown_starttag(name, attrs)
+
+    def handle_endtag(self, name):
+        self.soup.unknown_endtag(name)
+
+    def handle_data(self, content):
+        self.soup.handle_data(content)
+
+    def _toStringSubclass(self, text, subclass):
+        """Adds a certain piece of text to the tree as a NavigableString
+        subclass."""
+        self.soup.endData()
+        self.handle_data(text)
+        self.soup.endData(subclass)
+
+    def handle_pi(self, text):
+        """Handle a processing instruction as a ProcessingInstruction
+        object, possibly one with a %SOUP-ENCODING% slot into which an
+        encoding will be plugged later."""
+        if text[:3] == "xml":
+            text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
+        self._toStringSubclass(text, ProcessingInstruction)
+
+    def handle_comment(self, text):
+        "Handle comments as Comment objects."
+        self._toStringSubclass(text, Comment)
+
+    def handle_charref(self, ref):
+        "Handle character references as data."
+        if self.soup.convertEntities:
+            data = unichr(int(ref))
+        else:
+            data = '&#%s;' % ref
+        self.handle_data(data)
+
+    def handle_entityref(self, ref):
+        """Handle entity references as data, possibly converting known
+        HTML and/or XML entity references to the corresponding Unicode
+        characters."""
+        data = None
+        if self.soup.convertHTMLEntities:
+            try:
+                data = unichr(name2codepoint[ref])
+            except KeyError:
+                pass
+
+        if not data and self.soup.convertXMLEntities:
+                data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
+
+        if not data and self.soup.convertHTMLEntities and \
+            not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
+                # TODO: We've got a problem here. We're told this is
+                # an entity reference, but it's not an XML entity
+                # reference or an HTML entity reference. Nonetheless,
+                # the logical thing to do is to pass it through as an
+                # unrecognized entity reference.
+                #
+                # Except: when the input is "&carol;" this function
+                # will be called with input "carol". When the input is
+                # "AT&T", this function will be called with input
+                # "T". We have no way of knowing whether a semicolon
+                # was present originally, so we don't know whether
+                # this is an unknown entity or just a misplaced
+                # ampersand.
+                #
+                # The more common case is a misplaced ampersand, so I
+                # escape the ampersand and omit the trailing semicolon.
+                data = "&amp;%s" % ref
+        if not data:
+            # This case is different from the one above, because we
+            # haven't already gone through a supposedly comprehensive
+            # mapping of entities to Unicode characters. We might not
+            # have gone through any mapping at all. So the chances are
+            # very high that this is a real entity, and not a
+            # misplaced ampersand.
+            data = "&%s;" % ref
+        self.handle_data(data)
+
+    def handle_decl(self, data):
+        "Handle DOCTYPEs and the like as Declaration objects."
+        self._toStringSubclass(data, Declaration)
+
+    def parse_declaration(self, i):
+        """Treat a bogus SGML declaration as raw data. Treat a CDATA
+        declaration as a CData object."""
+        j = None
+        if self.rawdata[i:i+9] == '<![CDATA[':
+             k = self.rawdata.find(']]>', i)
+             if k == -1:
+                 k = len(self.rawdata)
+             data = self.rawdata[i+9:k]
+             j = k+3
+             self._toStringSubclass(data, CData)
+        else:
+            try:
+                j = HTMLParser.parse_declaration(self, i)
+            except HTMLParseError:
+                toHandle = self.rawdata[i:]
+                self.handle_data(toHandle)
+                j = i + len(toHandle)
+        return j
+
+
+class BeautifulStoneSoup(Tag):
+
+    """This class contains the basic parser and search code. It defines
+    a parser that knows nothing about tag behavior except for the
+    following:
+
+      You can't close a tag without closing all the tags it encloses.
+      That is, "<foo><bar></foo>" actually means
+      "<foo><bar></bar></foo>".
+
+    [Another possible explanation is "<foo><bar /></foo>", but since
+    this class defines no SELF_CLOSING_TAGS, it will never use that
+    explanation.]
+
+    This class is useful for parsing XML or made-up markup languages,
+    or when BeautifulSoup makes an assumption counter to what you were
+    expecting."""
+
+    SELF_CLOSING_TAGS = {}
+    NESTABLE_TAGS = {}
+    RESET_NESTING_TAGS = {}
+    QUOTE_TAGS = {}
+    PRESERVE_WHITESPACE_TAGS = []
+
+    MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+                       lambda x: x.group(1) + ' />'),
+                      (re.compile('<!\s+([^<>]*)>'),
+                       lambda x: '<!' + x.group(1) + '>')
+                      ]
+
+    ROOT_TAG_NAME = u'[document]'
+
+    HTML_ENTITIES = "html"
+    XML_ENTITIES = "xml"
+    XHTML_ENTITIES = "xhtml"
+    # TODO: This only exists for backwards-compatibility
+    ALL_ENTITIES = XHTML_ENTITIES
+
+    # Used when determining whether a text node is all whitespace and
+    # can be replaced with a single space. A text node that contains
+    # fancy Unicode spaces (usually non-breaking) should be left
+    # alone.
+    STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
+
+    def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
+                 markupMassage=True, smartQuotesTo=XML_ENTITIES,
+                 convertEntities=None, selfClosingTags=None, isHTML=False,
+                 builder=HTMLParserBuilder):
+        """The Soup object is initialized as the 'root tag', and the
+        provided markup (which can be a string or a file-like object)
+        is fed into the underlying parser.
+
+        HTMLParser will process most bad HTML, and the BeautifulSoup
+        class has some tricks for dealing with some HTML that kills
+        HTMLParser, but Beautiful Soup can nonetheless choke or lose data
+        if your data uses self-closing tags or declarations
+        incorrectly.
+
+        By default, Beautiful Soup uses regexes to sanitize input,
+        avoiding the vast majority of these problems. If the problems
+        don't apply to you, pass in False for markupMassage, and
+        you'll get better performance.
+
+        The default parser massage techniques fix the two most common
+        instances of invalid HTML that choke HTMLParser:
+
+         <br/> (No space between name of closing tag and tag close)
+         <! --Comment--> (Extraneous whitespace in declaration)
+
+        You can pass in a custom list of (RE object, replace method)
+        tuples to get Beautiful Soup to scrub your input the way you
+        want."""
+
+        self.parseOnlyThese = parseOnlyThese
+        self.fromEncoding = fromEncoding
+        self.smartQuotesTo = smartQuotesTo
+        self.convertEntities = convertEntities
+        # Set the rules for how we'll deal with the entities we
+        # encounter
+        if self.convertEntities:
+            # It doesn't make sense to convert encoded characters to
+            # entities even while you're converting entities to Unicode.
+            # Just convert it all to Unicode.
+            self.smartQuotesTo = None
+            if convertEntities == self.HTML_ENTITIES:
+                self.convertXMLEntities = False
+                self.convertHTMLEntities = True
+                self.escapeUnrecognizedEntities = True
+            elif convertEntities == self.XHTML_ENTITIES:
+                self.convertXMLEntities = True
+                self.convertHTMLEntities = True
+                self.escapeUnrecognizedEntities = False
+            elif convertEntities == self.XML_ENTITIES:
+                self.convertXMLEntities = True
+                self.convertHTMLEntities = False
+                self.escapeUnrecognizedEntities = False
+        else:
+            self.convertXMLEntities = False
+            self.convertHTMLEntities = False
+            self.escapeUnrecognizedEntities = False
+
+        self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
+        self.builder = builder(self)
+        self.reset()
+
+        if hasattr(markup, 'read'):        # It's a file-type object.
+            markup = markup.read()
+        self.markup = markup
+        self.markupMassage = markupMassage
+        try:
+            self._feed(isHTML=isHTML)
+        except StopParsing:
+            pass
+        self.markup = None                 # The markup can now be GCed.
+        self.builder = None                # So can the builder.
+
+    def _feed(self, inDocumentEncoding=None, isHTML=False):
+        # Convert the document to Unicode.
+        markup = self.markup
+        if isinstance(markup, unicode):
+            if not hasattr(self, 'originalEncoding'):
+                self.originalEncoding = None
+        else:
+            dammit = UnicodeDammit\
+                     (markup, [self.fromEncoding, inDocumentEncoding],
+                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
+            markup = dammit.unicode
+            self.originalEncoding = dammit.originalEncoding
+            self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
+        if markup:
+            if self.markupMassage:
+                if not isList(self.markupMassage):
+                    self.markupMassage = self.MARKUP_MASSAGE
+                for fix, m in self.markupMassage:
+                    markup = fix.sub(m, markup)
+                # TODO: We get rid of markupMassage so that the
+                # soup object can be deepcopied later on. Some
+                # Python installations can't copy regexes. If anyone
+                # was relying on the existence of markupMassage, this
+                # might cause problems.
+                del(self.markupMassage)
+        self.builder.reset()
+
+        self.builder.feed(markup)
+        # Close out any unfinished strings and close all the open tags.
+        self.endData()
+        while self.currentTag.name != self.ROOT_TAG_NAME:
+            self.popTag()
+
+    def isSelfClosingTag(self, name):
+        """Returns true iff the given string is the name of a
+        self-closing tag according to this parser."""
+        return self.SELF_CLOSING_TAGS.has_key(name) \
+               or self.instanceSelfClosingTags.has_key(name)
+
+    def reset(self):
+        Tag.__init__(self, self, self.ROOT_TAG_NAME)
+        self.hidden = 1
+        self.builder.reset()
+        self.currentData = []
+        self.currentTag = None
+        self.tagStack = []
+        self.quoteStack = []
+        self.pushTag(self)
+
+    def popTag(self):
+        tag = self.tagStack.pop()
+        # Tags with just one string-owning child get the child as a
+        # 'string' property, so that soup.tag.string is shorthand for
+        # soup.tag.contents[0]
+        if len(self.currentTag.contents) == 1 and \
+           isinstance(self.currentTag.contents[0], NavigableString):
+            self.currentTag.string = self.currentTag.contents[0]
+
+        #print "Pop", tag.name
+        if self.tagStack:
+            self.currentTag = self.tagStack[-1]
+        return self.currentTag
+
+    def pushTag(self, tag):
+        #print "Push", tag.name
+        if self.currentTag:
+            self.currentTag.contents.append(tag)
+        self.tagStack.append(tag)
+        self.currentTag = self.tagStack[-1]
+
+    def endData(self, containerClass=NavigableString):
+        if self.currentData:
+            currentData = u''.join(self.currentData)
+            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
+                not set([tag.name for tag in self.tagStack]).intersection(
+                    self.PRESERVE_WHITESPACE_TAGS)):
+                if '\n' in currentData:
+                    currentData = '\n'
+                else:
+                    currentData = ' '
+            self.currentData = []
+            if self.parseOnlyThese and len(self.tagStack) <= 1 and \
+                   (not self.parseOnlyThese.text or \
+                    not self.parseOnlyThese.search(currentData)):
+                return
+            o = containerClass(currentData)
+            o.setup(self.currentTag, self.previous)
+            if self.previous:
+                self.previous.next = o
+            self.previous = o
+            self.currentTag.contents.append(o)
+
+
+    def _popToTag(self, name, inclusivePop=True):
+        """Pops the tag stack up to and including the most recent
+        instance of the given tag. If inclusivePop is false, pops the tag
+        stack up to but *not* including the most recent instqance of
+        the given tag."""
+        #print "Popping to %s" % name
+        if name == self.ROOT_TAG_NAME:
+            return
+
+        numPops = 0
+        mostRecentTag = None
+        for i in range(len(self.tagStack)-1, 0, -1):
+            if name == self.tagStack[i].name:
+                numPops = len(self.tagStack)-i
+                break
+        if not inclusivePop:
+            numPops = numPops - 1
+
+        for i in range(0, numPops):
+            mostRecentTag = self.popTag()
+        return mostRecentTag
+
+    def _smartPop(self, name):
+
+        """We need to pop up to the previous tag of this type, unless
+        one of this tag's nesting reset triggers comes between this
+        tag and the previous tag of this type, OR unless this tag is a
+        generic nesting trigger and another generic nesting trigger
+        comes between this tag and the previous tag of this type.
+
+        Examples:
+         <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'.
+         <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'.
+         <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'.
+
+         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
+         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
+         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
+        """
+
+        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
+        isNestable = nestingResetTriggers != None
+        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
+        popTo = None
+        inclusive = True
+        for i in range(len(self.tagStack)-1, 0, -1):
+            p = self.tagStack[i]
+            if (not p or p.name == name) and not isNestable:
+                #Non-nestable tags get popped to the top or to their
+                #last occurance.
+                popTo = name
+                break
+            if (nestingResetTriggers != None
+                and p.name in nestingResetTriggers) \
+                or (nestingResetTriggers == None and isResetNesting
+                    and self.RESET_NESTING_TAGS.has_key(p.name)):
+
+                #If we encounter one of the nesting reset triggers
+                #peculiar to this tag, or we encounter another tag
+                #that causes nesting to reset, pop up to but not
+                #including that tag.
+                popTo = p.name
+                inclusive = False
+                break
+            p = p.parent
+        if popTo:
+            self._popToTag(popTo, inclusive)
+
+    def unknown_starttag(self, name, attrs, selfClosing=0):
+        #print "Start tag %s: %s" % (name, attrs)
+        if self.quoteStack:
+            #This is not a real tag.
+            #print "<%s> is not real!" % name
+            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
+            self.handle_data('<%s%s>' % (name, attrs))
+            return
+        self.endData()
+
+        if not self.isSelfClosingTag(name) and not selfClosing:
+            self._smartPop(name)
+
+        if self.parseOnlyThese and len(self.tagStack) <= 1 \
+               and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
+            return
+
+        tag = Tag(self, name, attrs, self.currentTag, self.previous)
+        if self.previous:
+            self.previous.next = tag
+        self.previous = tag
+        self.pushTag(tag)
+        if selfClosing or self.isSelfClosingTag(name):
+            self.popTag()
+        if name in self.QUOTE_TAGS:
+            #print "Beginning quote (%s)" % name
+            self.quoteStack.append(name)
+            self.literal = 1
+        return tag
+
+    def unknown_endtag(self, name):
+        #print "End tag %s" % name
+        if self.quoteStack and self.quoteStack[-1] != name:
+            #This is not a real end tag.
+            #print "</%s> is not real!" % name
+            self.handle_data('</%s>' % name)
+            return
+        self.endData()
+        self._popToTag(name)
+        if self.quoteStack and self.quoteStack[-1] == name:
+            self.quoteStack.pop()
+            self.literal = (len(self.quoteStack) > 0)
+
+    def handle_data(self, data):
+        self.currentData.append(data)
+
+    def extractCharsetFromMeta(self, attrs):
+        self.unknown_starttag('meta', attrs)
+
+
+class BeautifulSoup(BeautifulStoneSoup):
+
+    """This parser knows the following facts about HTML:
+
+    * Some tags have no closing tag and should be interpreted as being
+      closed as soon as they are encountered.
+
+    * The text inside some tags (ie. 'script') may contain tags which
+      are not really part of the document and which should be parsed
+      as text, not tags. If you want to parse the text as tags, you can
+      always fetch it and parse it explicitly.
+
+    * Tag nesting rules:
+
+      Most tags can't be nested at all. For instance, the occurance of
+      a <p> tag should implicitly close the previous <p> tag.
+
+       <p>Para1<p>Para2
+        should be transformed into:
+       <p>Para1</p><p>Para2
+
+      Some tags can be nested arbitrarily. For instance, the occurance
+      of a <blockquote> tag should _not_ implicitly close the previous
+      <blockquote> tag.
+
+       Alice said: <blockquote>Bob said: <blockquote>Blah
+        should NOT be transformed into:
+       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
+
+      Some tags can be nested, but the nesting is reset by the
+      interposition of other tags. For instance, a <tr> tag should
+      implicitly close the previous <tr> tag within the same <table>,
+      but not close a <tr> tag in another table.
+
+       <table><tr>Blah<tr>Blah
+        should be transformed into:
+       <table><tr>Blah</tr><tr>Blah
+        but,
+       <tr>Blah<table><tr>Blah
+        should NOT be transformed into
+       <tr>Blah<table></tr><tr>Blah
+
+    Differing assumptions about tag nesting rules are a major source
+    of problems with the BeautifulSoup class. If BeautifulSoup is not
+    treating as nestable a tag your page author treats as nestable,
+    try ICantBelieveItsBeautifulSoup, MinimalSoup, or
+    BeautifulStoneSoup before writing your own subclass."""
+
+    def __init__(self, *args, **kwargs):
+        if not kwargs.has_key('smartQuotesTo'):
+            kwargs['smartQuotesTo'] = self.HTML_ENTITIES
+        kwargs['isHTML'] = True
+        BeautifulStoneSoup.__init__(self, *args, **kwargs)
+
+    SELF_CLOSING_TAGS = buildTagMap(None,
+                                    ['br' , 'hr', 'input', 'img', 'meta',
+                                    'spacer', 'link', 'frame', 'base'])
+
+    PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
+
+    QUOTE_TAGS = {'script' : None, 'textarea' : None}
+
+    #According to the HTML standard, each of these inline tags can
+    #contain another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
+                            'center']
+
+    #According to the HTML standard, these block tags can contain
+    #another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
+
+    #Lists can contain other lists, but there are restrictions.
+    NESTABLE_LIST_TAGS = { 'ol' : [],
+                           'ul' : [],
+                           'li' : ['ul', 'ol'],
+                           'dl' : [],
+                           'dd' : ['dl'],
+                           'dt' : ['dl'] }
+
+    #Tables can contain other tables, but there are restrictions.
+    NESTABLE_TABLE_TAGS = {'table' : [],
+                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
+                           'td' : ['tr'],
+                           'th' : ['tr'],
+                           'thead' : ['table'],
+                           'tbody' : ['table'],
+                           'tfoot' : ['table'],
+                           }
+
+    NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
+
+    #If one of these tags is encountered, all tags up to the next tag of
+    #this type are popped.
+    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
+                                     NON_NESTABLE_BLOCK_TAGS,
+                                     NESTABLE_LIST_TAGS,
+                                     NESTABLE_TABLE_TAGS)
+
+    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
+                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
+
+    # Used to detect the charset in a META tag; see start_meta
+    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+
+    def extractCharsetFromMeta(self, attrs):
+        """Beautiful Soup can detect a charset included in a META tag,
+        try to convert the document to that charset, and re-parse the
+        document from the beginning."""
+        httpEquiv = None
+        contentType = None
+        contentTypeIndex = None
+        tagNeedsEncodingSubstitution = False
+
+        for i in range(0, len(attrs)):
+            key, value = attrs[i]
+            key = key.lower()
+            if key == 'http-equiv':
+                httpEquiv = value
+            elif key == 'content':
+                contentType = value
+                contentTypeIndex = i
+
+        if httpEquiv and contentType: # It's an interesting meta tag.
+            match = self.CHARSET_RE.search(contentType)
+            if match:
+                if (self.declaredHTMLEncoding is not None or
+                    self.originalEncoding == self.fromEncoding):
+                    # An HTML encoding was sniffed while converting
+                    # the document to Unicode, or an HTML encoding was
+                    # sniffed during a previous pass through the
+                    # document, or an encoding was specified
+                    # explicitly and it worked. Rewrite the meta tag.
+                    def rewrite(match):
+                        return match.group(1) + "%SOUP-ENCODING%"
+                    newAttr = self.CHARSET_RE.sub(rewrite, contentType)
+                    attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
+                                               newAttr)
+                    tagNeedsEncodingSubstitution = True
+                else:
+                    # This is our first pass through the document.
+                    # Go through it again with the encoding information.
+                    newCharset = match.group(3)
+                    if newCharset and newCharset != self.originalEncoding:
+                        self.declaredHTMLEncoding = newCharset
+                        self._feed(self.declaredHTMLEncoding)
+                        raise StopParsing
+                    pass
+        tag = self.unknown_starttag("meta", attrs)
+        if tag and tagNeedsEncodingSubstitution:
+            tag.containsSubstitutions = True
+
+
+class StopParsing(Exception):
+    pass
+
+class ICantBelieveItsBeautifulSoup(BeautifulSoup):
+
+    """The BeautifulSoup class is oriented towards skipping over
+    common HTML errors like unclosed tags. However, sometimes it makes
+    errors of its own. For instance, consider this fragment:
+
+     <b>Foo<b>Bar</b></b>
+
+    This is perfectly valid (if bizarre) HTML. However, the
+    BeautifulSoup class will implicitly close the first b tag when it
+    encounters the second 'b'. It will think the author wrote
+    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
+    there's no real-world reason to bold something that's already
+    bold. When it encounters '</b></b>' it will close two more 'b'
+    tags, for a grand total of three tags closed instead of two. This
+    can throw off the rest of your document structure. The same is
+    true of a number of other tags, listed below.
+
+    It's much more common for someone to forget to close a 'b' tag
+    than to actually use nested 'b' tags, and the BeautifulSoup class
+    handles the common case. This class handles the not-co-common
+    case: where you can't believe someone wrote what they did, but
+    it's valid HTML and BeautifulSoup screwed up by assuming it
+    wouldn't be."""
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
+     ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
+      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
+      'big']
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
+
+    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
+
+class MinimalSoup(BeautifulSoup):
+    """The MinimalSoup class is for parsing HTML that contains
+    pathologically bad markup. It makes no assumptions about tag
+    nesting, but it does know which tags are self-closing, that
+    <script> tags contain Javascript and should not be parsed, that
+    META tags may contain encoding information, and so on.
+
+    This also makes it better for subclassing than BeautifulStoneSoup
+    or BeautifulSoup."""
+
+    RESET_NESTING_TAGS = buildTagMap('noscript')
+    NESTABLE_TAGS = {}
+
+class BeautifulSOAP(BeautifulStoneSoup):
+    """This class will push a tag with only a single string child into
+    the tag's parent as an attribute. The attribute's name is the tag
+    name, and the value is the string child. An example should give
+    the flavor of the change:
+
+    <foo><bar>baz</bar></foo>
+     =>
+    <foo bar="baz"><bar>baz</bar></foo>
+
+    You can then access fooTag['bar'] instead of fooTag.barTag.string.
+
+    This is, of course, useful for scraping structures that tend to
+    use subelements instead of attributes, such as SOAP messages. Note
+    that it modifies its input, so don't print the modified version
+    out.
+
+    I'm not sure how many people really want to use this class; let me
+    know if you do. Mainly I like the name."""
+
+    def popTag(self):
+        if len(self.tagStack) > 1:
+            tag = self.tagStack[-1]
+            parent = self.tagStack[-2]
+            parent._getAttrMap()
+            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
+                isinstance(tag.contents[0], NavigableString) and
+                not parent.attrMap.has_key(tag.name)):
+                parent[tag.name] = tag.contents[0]
+        BeautifulStoneSoup.popTag(self)
+
+#Enterprise class names! It has come to our attention that some people
+#think the names of the Beautiful Soup parser classes are too silly
+#and "unprofessional" for use in enterprise screen-scraping. We feel
+#your pain! For such-minded folk, the Beautiful Soup Consortium And
+#All-Night Kosher Bakery recommends renaming this file to
+#"RobustParser.py" (or, in cases of extreme enterprisiness,
+#"RobustParserBeanInterface.class") and using the following
+#enterprise-friendly class aliases:
+class RobustXMLParser(BeautifulStoneSoup):
+    pass
+class RobustHTMLParser(BeautifulSoup):
+    pass
+class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
+    pass
+class RobustInsanelyWackAssHTMLParser(MinimalSoup):
+    pass
+class SimplifyingSOAPParser(BeautifulSOAP):
+    pass
+
+######################################################
+#
+# Bonus library: Unicode, Dammit
+#
+# This class forces XML data into a standard format (usually to UTF-8
+# or Unicode).  It is heavily based on code from Mark Pilgrim's
+# Universal Feed Parser. It does not rewrite the XML or HTML to
+# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
+# (XML) and BeautifulSoup.start_meta (HTML).
+
+# Autodetects character encodings.
+# Download from http://chardet.feedparser.org/
+try:
+    import chardet
+#    import chardet.constants
+#    chardet.constants._debug = 1
+except ImportError:
+    chardet = None
+
+# cjkcodecs and iconv_codec make Python know about more character encodings.
+# Both are available from http://cjkpython.i18n.org/
+# They're built in if you use Python 2.4.
+try:
+    import cjkcodecs.aliases
+except ImportError:
+    pass
+try:
+    import iconv_codec
+except ImportError:
+    pass
+
+class UnicodeDammit:
+    """A class for detecting the encoding of a *ML document and
+    converting it to a Unicode string. If the source encoding is
+    windows-1252, can replace MS smart quotes with their HTML or XML
+    equivalents."""
+
+    # This dictionary maps commonly seen values for "charset" in HTML
+    # meta tags to the corresponding Python codec names. It only covers
+    # values that aren't in Python's aliases and can't be determined
+    # by the heuristics in find_codec.
+    CHARSET_ALIASES = { "macintosh" : "mac-roman",
+                        "x-sjis" : "shift-jis" }
+
+    def __init__(self, markup, overrideEncodings=[],
+                 smartQuotesTo='xml', isHTML=False):
+        self.declaredHTMLEncoding = None
+        self.markup, documentEncoding, sniffedEncoding = \
+                     self._detectEncoding(markup, isHTML)
+        self.smartQuotesTo = smartQuotesTo
+        self.triedEncodings = []
+        if markup == '' or isinstance(markup, unicode):
+            self.originalEncoding = None
+            self.unicode = unicode(markup)
+            return
+
+        u = None
+        for proposedEncoding in overrideEncodings:
+            u = self._convertFrom(proposedEncoding)
+            if u: break
+        if not u:
+            for proposedEncoding in (documentEncoding, sniffedEncoding):
+                u = self._convertFrom(proposedEncoding)
+                if u: break
+
+        # If no luck and we have auto-detection library, try that:
+        if not u and chardet and not isinstance(self.markup, unicode):
+            u = self._convertFrom(chardet.detect(self.markup)['encoding'])
+
+        # As a last resort, try utf-8 and windows-1252:
+        if not u:
+            for proposed_encoding in ("utf-8", "windows-1252"):
+                u = self._convertFrom(proposed_encoding)
+                if u: break
+
+        self.unicode = u
+        if not u: self.originalEncoding = None
+
+    def _subMSChar(self, match):
+        """Changes a MS smart quote character to an XML or HTML
+        entity."""
+        orig = match.group(1)
+        sub = self.MS_CHARS.get(orig)
+        if type(sub) == types.TupleType:
+            if self.smartQuotesTo == 'xml':
+                sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
+            else:
+                sub = '&'.encode() + sub[0].encode() + ';'.encode()
+        else:
+            sub = sub.encode()
+        return sub
+
+    def _convertFrom(self, proposed):
+        proposed = self.find_codec(proposed)
+        if not proposed or proposed in self.triedEncodings:
+            return None
+        self.triedEncodings.append(proposed)
+        markup = self.markup
+
+        # Convert smart quotes to HTML if coming from an encoding
+        # that might have them.
+        if self.smartQuotesTo and proposed.lower() in("windows-1252",
+                                                      "iso-8859-1",
+                                                      "iso-8859-2"):
+            smart_quotes_re = "([\x80-\x9f])"
+            smart_quotes_compiled = re.compile(smart_quotes_re)
+            markup = smart_quotes_compiled.sub(self._subMSChar, markup)
+
+        try:
+            # print "Trying to convert document to %s" % proposed
+            u = self._toUnicode(markup, proposed)
+            self.markup = u
+            self.originalEncoding = proposed
+        except Exception, e:
+            # print "That didn't work!"
+            # print e
+            return None
+        #print "Correct encoding: %s" % proposed
+        return self.markup
+
+    def _toUnicode(self, data, encoding):
+        '''Given a string and its encoding, decodes the string into Unicode.
+        %encoding is a string recognized by encodings.aliases'''
+
+        # strip Byte Order Mark (if present)
+        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
+               and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16be'
+            data = data[2:]
+        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
+                 and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16le'
+            data = data[2:]
+        elif data[:3] == '\xef\xbb\xbf':
+            encoding = 'utf-8'
+            data = data[3:]
+        elif data[:4] == '\x00\x00\xfe\xff':
+            encoding = 'utf-32be'
+            data = data[4:]
+        elif data[:4] == '\xff\xfe\x00\x00':
+            encoding = 'utf-32le'
+            data = data[4:]
+        newdata = unicode(data, encoding)
+        return newdata
+
+    def _detectEncoding(self, xml_data, isHTML=False):
+        """Given a document, tries to detect its XML encoding."""
+        xml_encoding = sniffed_xml_encoding = None
+        try:
+            if xml_data[:4] == '\x4c\x6f\xa7\x94':
+                # EBCDIC
+                xml_data = self._ebcdic_to_ascii(xml_data)
+            elif xml_data[:4] == '\x00\x3c\x00\x3f':
+                # UTF-16BE
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
+                     and (xml_data[2:4] != '\x00\x00'):
+                # UTF-16BE with BOM
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
+            elif xml_data[:4] == '\x3c\x00\x3f\x00':
+                # UTF-16LE
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
+                     (xml_data[2:4] != '\x00\x00'):
+                # UTF-16LE with BOM
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
+            elif xml_data[:4] == '\x00\x00\x00\x3c':
+                # UTF-32BE
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == '\x3c\x00\x00\x00':
+                # UTF-32LE
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
+            elif xml_data[:4] == '\x00\x00\xfe\xff':
+                # UTF-32BE with BOM
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == '\xff\xfe\x00\x00':
+                # UTF-32LE with BOM
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
+            elif xml_data[:3] == '\xef\xbb\xbf':
+                # UTF-8 with BOM
+                sniffed_xml_encoding = 'utf-8'
+                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
+            else:
+                sniffed_xml_encoding = 'ascii'
+                pass
+        except:
+            xml_encoding_match = None
+        xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode()
+        xml_encoding_match = re.compile(xml_encoding_re).match(xml_data)
+        if not xml_encoding_match and isHTML:
+            meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode()
+            regexp = re.compile(meta_re, re.I)
+            xml_encoding_match = regexp.search(xml_data)
+        if xml_encoding_match is not None:
+            xml_encoding = xml_encoding_match.groups()[0].decode(
+                'ascii').lower()
+            if isHTML:
+                self.declaredHTMLEncoding = xml_encoding
+            if sniffed_xml_encoding and \
+               (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
+                                 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
+                                 'utf-16', 'utf-32', 'utf_16', 'utf_32',
+                                 'utf16', 'u16')):
+                xml_encoding = sniffed_xml_encoding
+        return xml_data, xml_encoding, sniffed_xml_encoding
+
+
+    def find_codec(self, charset):
+        return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
+               or (charset and self._codec(charset.replace("-", ""))) \
+               or (charset and self._codec(charset.replace("-", "_"))) \
+               or charset
+
+    def _codec(self, charset):
+        if not charset: return charset
+        codec = None
+        try:
+            codecs.lookup(charset)
+            codec = charset
+        except (LookupError, ValueError):
+            pass
+        return codec
+
+    EBCDIC_TO_ASCII_MAP = None
+    def _ebcdic_to_ascii(self, s):
+        c = self.__class__
+        if not c.EBCDIC_TO_ASCII_MAP:
+            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
+                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
+                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
+                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
+                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
+                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
+                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
+                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
+                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
+                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
+                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
+                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
+                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
+                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
+                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
+                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
+                    250,251,252,253,254,255)
+            import string
+            c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
+            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+        return s.translate(c.EBCDIC_TO_ASCII_MAP)
+
+    MS_CHARS = { '\x80' : ('euro', '20AC'),
+                 '\x81' : ' ',
+                 '\x82' : ('sbquo', '201A'),
+                 '\x83' : ('fnof', '192'),
+                 '\x84' : ('bdquo', '201E'),
+                 '\x85' : ('hellip', '2026'),
+                 '\x86' : ('dagger', '2020'),
+                 '\x87' : ('Dagger', '2021'),
+                 '\x88' : ('circ', '2C6'),
+                 '\x89' : ('permil', '2030'),
+                 '\x8A' : ('Scaron', '160'),
+                 '\x8B' : ('lsaquo', '2039'),
+                 '\x8C' : ('OElig', '152'),
+                 '\x8D' : '?',
+                 '\x8E' : ('#x17D', '17D'),
+                 '\x8F' : '?',
+                 '\x90' : '?',
+                 '\x91' : ('lsquo', '2018'),
+                 '\x92' : ('rsquo', '2019'),
+                 '\x93' : ('ldquo', '201C'),
+                 '\x94' : ('rdquo', '201D'),
+                 '\x95' : ('bull', '2022'),
+                 '\x96' : ('ndash', '2013'),
+                 '\x97' : ('mdash', '2014'),
+                 '\x98' : ('tilde', '2DC'),
+                 '\x99' : ('trade', '2122'),
+                 '\x9a' : ('scaron', '161'),
+                 '\x9b' : ('rsaquo', '203A'),
+                 '\x9c' : ('oelig', '153'),
+                 '\x9d' : '?',
+                 '\x9e' : ('#x17E', '17E'),
+                 '\x9f' : ('Yuml', ''),}
+
+#######################################################################
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+    import sys
+    soup = BeautifulSoup(sys.stdin)
+    print soup.prettify()
diff -r 3156760b4d26 -r 4cc66ab098e8 app/htmlsanitizer/BeautifulSoupTests.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/htmlsanitizer/BeautifulSoupTests.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,826 @@
+# -*- coding: utf-8 -*-
+"""Unit tests for Beautiful Soup.
+
+These tests make sure the Beautiful Soup works as it should. If you
+find a bug in Beautiful Soup, the best way to express it is as a test
+case like this that fails."""
+
+import unittest
+from BeautifulSoup import *
+
+class SoupTest(unittest.TestCase):
+
+    def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup,
+                         encoding=None):
+        """Parse the given text and make sure its string rep is the other
+        given text."""
+        if rep == None:
+            rep = toParse
+        obj = c(toParse)
+        if encoding is None:
+            rep2 = obj.decode()
+        else:
+            rep2 = obj.encode(encoding)
+        self.assertEqual(rep2, rep)
+
+class FollowThatTag(SoupTest):
+
+    "Tests the various ways of fetching tags from a soup."
+
+    def setUp(self):
+        ml = """
+        <a id="x">1</a>
+        <A id="a">2</a>
+        <b id="b">3</a>
+        <b href="foo" id="x">4</a>
+        <ac width=100>4</ac>"""
+        self.soup = BeautifulStoneSoup(ml)
+
+    def testFindAllByName(self):
+        matching = self.soup('a')
+        self.assertEqual(len(matching), 2)
+        self.assertEqual(matching[0].name, 'a')
+        self.assertEqual(matching, self.soup.findAll('a'))
+        self.assertEqual(matching, self.soup.findAll(SoupStrainer('a')))
+
+    def testFindAllByAttribute(self):
+        matching = self.soup.findAll(id='x')
+        self.assertEqual(len(matching), 2)
+        self.assertEqual(matching[0].name, 'a')
+        self.assertEqual(matching[1].name, 'b')
+
+        matching2 = self.soup.findAll(attrs={'id' : 'x'})
+        self.assertEqual(matching, matching2)
+
+        strainer = SoupStrainer(attrs={'id' : 'x'})
+        self.assertEqual(matching, self.soup.findAll(strainer))
+
+        self.assertEqual(len(self.soup.findAll(id=None)), 1)
+
+        self.assertEqual(len(self.soup.findAll(width=100)), 1)
+        self.assertEqual(len(self.soup.findAll(junk=None)), 5)
+        self.assertEqual(len(self.soup.findAll(junk=[1, None])), 5)
+
+        self.assertEqual(len(self.soup.findAll(junk=re.compile('.*'))), 0)
+        self.assertEqual(len(self.soup.findAll(junk=True)), 0)
+
+        self.assertEqual(len(self.soup.findAll(junk=True)), 0)
+        self.assertEqual(len(self.soup.findAll(href=True)), 1)
+
+    def testFindallByClass(self):
+        soup = BeautifulSoup('<a>Foo</a><a class="1">Bar</a>')
+        self.assertEqual(soup.find('a', '1').string, "Bar")
+
+    def testFindAllByList(self):
+        matching = self.soup(['a', 'ac'])
+        self.assertEqual(len(matching), 3)
+
+    def testFindAllByHash(self):
+        matching = self.soup({'a' : True, 'b' : True})
+        self.assertEqual(len(matching), 4)
+
+    def testFindAllText(self):
+        soup = BeautifulSoup("<html>\xbb</html>")
+        self.assertEqual(soup.findAll(text=re.compile('.*')),
+                         [u'\xbb'])
+
+    def testFindAllByRE(self):
+        import re
+        r = re.compile('a.*')
+        self.assertEqual(len(self.soup(r)), 3)
+
+    def testFindAllByMethod(self):
+        def matchTagWhereIDMatchesName(tag):
+            return tag.name == tag.get('id')
+
+        matching = self.soup.findAll(matchTagWhereIDMatchesName)
+        self.assertEqual(len(matching), 2)
+        self.assertEqual(matching[0].name, 'a')
+
+    def testParents(self):
+        soup = BeautifulSoup('<ul id="foo"></ul><ul id="foo"><ul><ul id="foo" a="b"><b>Blah')
+        b = soup.b
+        self.assertEquals(len(b.findParents('ul', {'id' : 'foo'})), 2)
+        self.assertEquals(b.findParent('ul')['a'], 'b')
+
+    PROXIMITY_TEST = BeautifulSoup('<b id="1"><b id="2"><b id="3"><b id="4">')
+
+    def testNext(self):
+        soup = self.PROXIMITY_TEST
+        b = soup.find('b', {'id' : 2})
+        self.assertEquals(b.findNext('b')['id'], '3')
+        self.assertEquals(b.findNext('b')['id'], '3')
+        self.assertEquals(len(b.findAllNext('b')), 2)
+        self.assertEquals(len(b.findAllNext('b', {'id' : 4})), 1)
+
+    def testPrevious(self):
+        soup = self.PROXIMITY_TEST
+        b = soup.find('b', {'id' : 3})
+        self.assertEquals(b.findPrevious('b')['id'], '2')
+        self.assertEquals(b.findPrevious('b')['id'], '2')
+        self.assertEquals(len(b.findAllPrevious('b')), 2)
+        self.assertEquals(len(b.findAllPrevious('b', {'id' : 2})), 1)
+
+
+    SIBLING_TEST = BeautifulSoup('<blockquote id="1"><blockquote id="1.1"></blockquote></blockquote><blockquote id="2"><blockquote id="2.1"></blockquote></blockquote><blockquote id="3"><blockquote id="3.1"></blockquote></blockquote><blockquote id="4">')
+
+    def testNextSibling(self):
+        soup = self.SIBLING_TEST
+        tag = 'blockquote'
+        b = soup.find(tag, {'id' : 2})
+        self.assertEquals(b.findNext(tag)['id'], '2.1')
+        self.assertEquals(b.findNextSibling(tag)['id'], '3')
+        self.assertEquals(b.findNextSibling(tag)['id'], '3')
+        self.assertEquals(len(b.findNextSiblings(tag)), 2)
+        self.assertEquals(len(b.findNextSiblings(tag, {'id' : 4})), 1)
+
+    def testPreviousSibling(self):
+        soup = self.SIBLING_TEST
+        tag = 'blockquote'
+        b = soup.find(tag, {'id' : 3})
+        self.assertEquals(b.findPrevious(tag)['id'], '2.1')
+        self.assertEquals(b.findPreviousSibling(tag)['id'], '2')
+        self.assertEquals(b.findPreviousSibling(tag)['id'], '2')
+        self.assertEquals(len(b.findPreviousSiblings(tag)), 2)
+        self.assertEquals(len(b.findPreviousSiblings(tag, id=1)), 1)
+
+    def testTextNavigation(self):
+        soup = BeautifulSoup('Foo<b>Bar</b><i id="1"><b>Baz<br />Blee<hr id="1"/></b></i>Blargh')
+        baz = soup.find(text='Baz')
+        self.assertEquals(baz.findParent("i")['id'], '1')
+        self.assertEquals(baz.findNext(text='Blee'), 'Blee')
+        self.assertEquals(baz.findNextSibling(text='Blee'), 'Blee')
+        self.assertEquals(baz.findNextSibling(text='Blargh'), None)
+        self.assertEquals(baz.findNextSibling('hr')['id'], '1')
+
+class SiblingRivalry(SoupTest):
+    "Tests the nextSibling and previousSibling navigation."
+
+    def testSiblings(self):
+        soup = BeautifulSoup("<ul><li>1<p>A</p>B<li>2<li>3</ul>")
+        secondLI = soup.find('li').nextSibling
+        self.assert_(secondLI.name == 'li' and secondLI.string == '2')
+        self.assertEquals(soup.find(text='1').nextSibling.name, 'p')
+        self.assertEquals(soup.find('p').nextSibling, 'B')
+        self.assertEquals(soup.find('p').nextSibling.previousSibling.nextSibling, 'B')
+
+class TagsAreObjectsToo(SoupTest):
+    "Tests the various built-in functions of Tag objects."
+
+    def testLen(self):
+        soup = BeautifulSoup("<top>1<b>2</b>3</top>")
+        self.assertEquals(len(soup.top), 3)
+
+class StringEmUp(SoupTest):
+    "Tests the use of 'string' as an alias for a tag's only content."
+
+    def testString(self):
+        s = BeautifulSoup("<b>foo</b>")
+        self.assertEquals(s.b.string, 'foo')
+
+    def testLackOfString(self):
+        s = BeautifulSoup("<b>f<i>e</i>o</b>")
+        self.assert_(not s.b.string)
+
+class ThatsMyLimit(SoupTest):
+    "Tests the limit argument."
+
+    def testBasicLimits(self):
+        s = BeautifulSoup('<br id="1" /><br id="1" /><br id="1" /><br id="1" />')
+        self.assertEquals(len(s.findAll('br')), 4)
+        self.assertEquals(len(s.findAll('br', limit=2)), 2)
+        self.assertEquals(len(s('br', limit=2)), 2)
+
+class OnlyTheLonely(SoupTest):
+    "Tests the parseOnly argument to the constructor."
+    def setUp(self):
+        x = []
+        for i in range(1,6):
+            x.append('<a id="%s">' % i)
+            for j in range(100,103):
+                x.append('<b id="%s.%s">Content %s.%s</b>' % (i,j, i,j))
+            x.append('</a>')
+        self.x = ''.join(x)
+
+    def testOnly(self):
+        strainer = SoupStrainer("b")
+        soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
+        self.assertEquals(len(soup), 15)
+
+        strainer = SoupStrainer(id=re.compile("100.*"))
+        soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
+        self.assertEquals(len(soup), 5)
+
+        strainer = SoupStrainer(text=re.compile("10[01].*"))
+        soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
+        self.assertEquals(len(soup), 10)
+
+        strainer = SoupStrainer(text=lambda(x):x[8]=='3')
+        soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
+        self.assertEquals(len(soup), 3)
+
+class PickleMeThis(SoupTest):
+    "Testing features like pickle and deepcopy."
+
+    def setUp(self):
+        self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+"http://www.w3.org/TR/REC-html40/transitional.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
+<link rev="made" href="mailto:leonardr@segfault.org">
+<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
+<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
+<meta name="author" content="Leonard Richardson">
+</head>
+<body>
+<a href="foo">foo</a>
+<a href="foo"><b>bar</b></a>
+</body>
+</html>"""
+
+        self.soup = BeautifulSoup(self.page)
+
+    def testPickle(self):
+        import pickle
+        dumped = pickle.dumps(self.soup, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), self.soup.decode())
+
+    def testDeepcopy(self):
+        from copy import deepcopy
+        deepcopy(BeautifulSoup("<a></a>"))
+        copied = deepcopy(self.soup)
+        self.assertEqual(copied.decode(), self.soup.decode())
+
+    def testUnicodePickle(self):
+        import cPickle as pickle
+        html = "<b>" + chr(0xc3) + "</b>"
+        soup = BeautifulSoup(html)
+        dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.decode(), soup.decode())
+
+
+class WriteOnlyCode(SoupTest):
+    "Testing the modification of the tree."
+
+    def testModifyAttributes(self):
+        soup = BeautifulSoup('<a id="1"></a>')
+        soup.a['id'] = 2
+        self.assertEqual(soup.decode(), '<a id="2"></a>')
+        del(soup.a['id'])
+        self.assertEqual(soup.decode(), '<a></a>')
+        soup.a['id2'] = 'foo'
+        self.assertEqual(soup.decode(), '<a id2="foo"></a>')
+
+    def testNewTagCreation(self):
+        "Makes sure tags don't step on each others' toes."
+        soup = BeautifulSoup()
+        a = Tag(soup, 'a')
+        ol = Tag(soup, 'ol')
+        a['href'] = 'http://foo.com/'
+        self.assertRaises(KeyError, lambda : ol['href'])
+
+    def testTagReplacement(self):
+        # Make sure you can replace an element with itself.
+        text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
+        soup = BeautifulSoup(text)
+        c = soup.c
+        soup.c.replaceWith(c)
+        self.assertEquals(soup.decode(), text)
+
+        # A very simple case
+        soup = BeautifulSoup("<b>Argh!</b>")
+        soup.find(text="Argh!").replaceWith("Hooray!")
+        newText = soup.find(text="Hooray!")
+        b = soup.b
+        self.assertEqual(newText.previous, b)
+        self.assertEqual(newText.parent, b)
+        self.assertEqual(newText.previous.next, newText)
+        self.assertEqual(newText.next, None)
+
+        # A more complex case
+        soup = BeautifulSoup("<a><b>Argh!</b><c></c><d></d></a>")
+        soup.b.insert(1, "Hooray!")
+        newText = soup.find(text="Hooray!")
+        self.assertEqual(newText.previous, "Argh!")
+        self.assertEqual(newText.previous.next, newText)
+
+        self.assertEqual(newText.previousSibling, "Argh!")
+        self.assertEqual(newText.previousSibling.nextSibling, newText)
+
+        self.assertEqual(newText.nextSibling, None)
+        self.assertEqual(newText.next, soup.c)
+
+        text = "<html>There's <b>no</b> business like <b>show</b> business</html>"
+        soup = BeautifulSoup(text)
+        no, show = soup.findAll('b')
+        show.replaceWith(no)
+        self.assertEquals(soup.decode(), "<html>There's  business like <b>no</b> business</html>")
+
+        # Even more complex
+        soup = BeautifulSoup("<a><b>Find</b><c>lady!</c><d></d></a>")
+        tag = Tag(soup, 'magictag')
+        tag.insert(0, "the")
+        soup.a.insert(1, tag)
+
+        b = soup.b
+        c = soup.c
+        theText = tag.find(text=True)
+        findText = b.find(text="Find")
+
+        self.assertEqual(findText.next, tag)
+        self.assertEqual(tag.previous, findText)
+        self.assertEqual(b.nextSibling, tag)
+        self.assertEqual(tag.previousSibling, b)
+        self.assertEqual(tag.nextSibling, c)
+        self.assertEqual(c.previousSibling, tag)
+
+        self.assertEqual(theText.next, c)
+        self.assertEqual(c.previous, theText)
+
+        # Aand... incredibly complex.
+        soup = BeautifulSoup("""<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
+        f = soup.f
+        a = soup.a
+        c = soup.c
+        e = soup.e
+        weText = a.find(text="We")
+        soup.b.replaceWith(soup.f)
+        self.assertEqual(soup.decode(), "<a>We<f>refuse</f></a><e>to<g>service</g></e>")
+
+        self.assertEqual(f.previous, weText)
+        self.assertEqual(weText.next, f)
+        self.assertEqual(f.previousSibling, weText)
+        self.assertEqual(f.nextSibling, None)
+        self.assertEqual(weText.nextSibling, f)
+
+    def testAppend(self):
+       doc = "<p>Don't leave me <b>here</b>.</p> <p>Don't leave me.</p>"
+       soup = BeautifulSoup(doc)
+       second_para = soup('p')[1]
+       bold = soup.find('b')
+       soup('p')[1].append(soup.find('b'))
+       self.assertEqual(bold.parent, second_para)
+       self.assertEqual(soup.decode(),
+                        "<p>Don't leave me .</p> "
+                        "<p>Don't leave me.<b>here</b></p>")
+
+    def testTagExtraction(self):
+        # A very simple case
+        text = '<html><div id="nav">Nav crap</div>Real content here.</html>'
+        soup = BeautifulSoup(text)
+        extracted = soup.find("div", id="nav").extract()
+        self.assertEqual(soup.decode(), "<html>Real content here.</html>")
+        self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
+
+        # A simple case, a more complex test.
+        text = "<doc><a>1<b>2</b></a><a>i<b>ii</b></a><a>A<b>B</b></a></doc>"
+        soup = BeautifulStoneSoup(text)
+        doc = soup.doc
+        numbers, roman, letters = soup("a")
+
+        self.assertEqual(roman.parent, doc)
+        oldPrevious = roman.previous
+        endOfThisTag = roman.nextSibling.previous
+        self.assertEqual(oldPrevious, "2")
+        self.assertEqual(roman.next, "i")
+        self.assertEqual(endOfThisTag, "ii")
+        self.assertEqual(roman.previousSibling, numbers)
+        self.assertEqual(roman.nextSibling, letters)
+
+        roman.extract()
+        self.assertEqual(roman.parent, None)
+        self.assertEqual(roman.previous, None)
+        self.assertEqual(roman.next, "i")
+        self.assertEqual(letters.previous, '2')
+        self.assertEqual(roman.previousSibling, None)
+        self.assertEqual(roman.nextSibling, None)
+        self.assertEqual(endOfThisTag.next, None)
+        self.assertEqual(roman.b.contents[0].next, None)
+        self.assertEqual(numbers.nextSibling, letters)
+        self.assertEqual(letters.previousSibling, numbers)
+        self.assertEqual(len(doc.contents), 2)
+        self.assertEqual(doc.contents[0], numbers)
+        self.assertEqual(doc.contents[1], letters)
+
+        # A more complex case.
+        text = "<a>1<b>2<c>Hollywood, baby!</c></b></a>3"
+        soup = BeautifulStoneSoup(text)
+        one = soup.find(text="1")
+        three = soup.find(text="3")
+        toExtract = soup.b
+        soup.b.extract()
+        self.assertEqual(one.next, three)
+        self.assertEqual(three.previous, one)
+        self.assertEqual(one.parent.nextSibling, three)
+        self.assertEqual(three.previousSibling, soup.a)
+
+class TheManWithoutAttributes(SoupTest):
+    "Test attribute access"
+
+    def testHasKey(self):
+        text = "<foo attr='bar'>"
+        self.assertTrue(BeautifulSoup(text).foo.has_key('attr'))
+
+class QuoteMeOnThat(SoupTest):
+    "Test quoting"
+    def testQuotedAttributeValues(self):
+        self.assertSoupEquals("<foo attr='bar'></foo>",
+                              '<foo attr="bar"></foo>')
+
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        soup = BeautifulSoup(text)
+        self.assertEquals(soup.decode(), text)
+
+        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+        newText = """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>"""
+        self.assertSoupEquals(soup.decode(), newText)
+
+        self.assertSoupEquals('<this is="really messed up & stuff">',
+                              '<this is="really messed up &amp; stuff"></this>')
+
+
+
+class YoureSoLiteral(SoupTest):
+    "Test literal mode."
+    def testLiteralMode(self):
+        text = "<script>if (i<imgs.length)</script><b>Foo</b>"
+        soup = BeautifulSoup(text)
+        self.assertEqual(soup.script.contents[0], "if (i<imgs.length)")
+        self.assertEqual(soup.b.contents[0], "Foo")
+
+    def testTextArea(self):
+        text = "<textarea><b>This is an example of an HTML tag</b><&<&</textarea>"
+        soup = BeautifulSoup(text)
+        self.assertEqual(soup.textarea.contents[0],
+                         "<b>This is an example of an HTML tag</b><&<&")
+
+class OperatorOverload(SoupTest):
+    "Our operators do it all! Call now!"
+
+    def testTagNameAsFind(self):
+        "Tests that referencing a tag name as a member delegates to find()."
+        soup = BeautifulSoup('<b id="1">foo<i>bar</i></b><b>Red herring</b>')
+        self.assertEqual(soup.b.i, soup.find('b').find('i'))
+        self.assertEqual(soup.b.i.string, 'bar')
+        self.assertEqual(soup.b['id'], '1')
+        self.assertEqual(soup.b.contents[0], 'foo')
+        self.assert_(not soup.a)
+
+        #Test the .fooTag variant of .foo.
+        self.assertEqual(soup.bTag.iTag.string, 'bar')
+        self.assertEqual(soup.b.iTag.string, 'bar')
+        self.assertEqual(soup.find('b').find('i'), soup.bTag.iTag)
+
+class NestableEgg(SoupTest):
+    """Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!"""
+
+    def testParaInsideBlockquote(self):
+        soup = BeautifulSoup('<blockquote><p><b>Foo</blockquote><p>Bar')
+        self.assertEqual(soup.blockquote.p.b.string, 'Foo')
+        self.assertEqual(soup.blockquote.b.string, 'Foo')
+        self.assertEqual(soup.find('p', recursive=False).string, 'Bar')
+
+    def testNestedTables(self):
+        text = """<table id="1"><tr><td>Here's another table:
+        <table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>"""
+        soup = BeautifulSoup(text)
+        self.assertEquals(soup.table.table.td.string, 'Juicy text')
+        self.assertEquals(len(soup.findAll('table')), 2)
+        self.assertEquals(len(soup.table.findAll('table')), 1)
+        self.assertEquals(soup.find('table', {'id' : 2}).parent.parent.parent.name,
+                          'table')
+
+        text = "<table><tr><td><div><table>Foo</table></div></td></tr></table>"
+        soup = BeautifulSoup(text)
+        self.assertEquals(soup.table.tr.td.div.table.contents[0], "Foo")
+
+        text = """<table><thead><tr>Foo</tr></thead><tbody><tr>Bar</tr></tbody>
+        <tfoot><tr>Baz</tr></tfoot></table>"""
+        soup = BeautifulSoup(text)
+        self.assertEquals(soup.table.thead.tr.contents[0], "Foo")
+
+    def testBadNestedTables(self):
+        soup = BeautifulSoup("<table><tr><table><tr id='nested'>")
+        self.assertEquals(soup.table.tr.table.tr['id'], 'nested')
+
+class CleanupOnAisleFour(SoupTest):
+    """Here we test cleanup of text that breaks HTMLParser or is just
+    obnoxious."""
+
+    def testSelfClosingtag(self):
+        self.assertEqual(BeautifulSoup("Foo<br/>Bar").find('br').decode(),
+                         '<br />')
+
+        self.assertSoupEquals('<p>test1<br/>test2</p>',
+                              '<p>test1<br />test2</p>')
+
+        text = '<p>test1<selfclosing>test2'
+        soup = BeautifulStoneSoup(text)
+        self.assertEqual(soup.decode(),
+                         '<p>test1<selfclosing>test2</selfclosing></p>')
+
+        soup = BeautifulStoneSoup(text, selfClosingTags='selfclosing')
+        self.assertEqual(soup.decode(),
+                         '<p>test1<selfclosing />test2</p>')
+
+    def testSelfClosingTagOrNot(self):
+        text = "<item><link>http://foo.com/</link></item>"
+        self.assertEqual(BeautifulStoneSoup(text).decode(), text)
+        self.assertEqual(BeautifulSoup(text).decode(),
+                         '<item><link />http://foo.com/</item>')
+
+    def testBooleanAttributes(self):
+        text = "<td nowrap>foo</td>"
+        self.assertSoupEquals(text, text)
+
+    def testCData(self):
+        xml = "<root>foo<![CDATA[foobar]]>bar</root>"
+        self.assertSoupEquals(xml, xml)
+        r = re.compile("foo.*bar")
+        soup = BeautifulSoup(xml)
+        self.assertEquals(soup.find(text=r).string, "foobar")
+        self.assertEquals(soup.find(text=r).__class__, CData)
+
+    def testComments(self):
+        xml = "foo<!--foobar-->baz"
+        self.assertSoupEquals(xml)
+        r = re.compile("foo.*bar")
+        soup = BeautifulSoup(xml)
+        self.assertEquals(soup.find(text=r).string, "foobar")
+        self.assertEquals(soup.find(text="foobar").__class__, Comment)
+
+    def testDeclaration(self):
+        xml = "foo<!DOCTYPE foobar>baz"
+        self.assertSoupEquals(xml)
+        r = re.compile(".*foo.*bar")
+        soup = BeautifulSoup(xml)
+        text = "DOCTYPE foobar"
+        self.assertEquals(soup.find(text=r).string, text)
+        self.assertEquals(soup.find(text=text).__class__, Declaration)
+
+        namespaced_doctype = ('<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">'
+                              '<html>foo</html>')
+        soup = BeautifulSoup(namespaced_doctype)
+        self.assertEquals(soup.contents[0],
+                          'DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd"')
+        self.assertEquals(soup.html.contents[0], 'foo')
+
+    def testEntityConversions(self):
+        text = "&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;"
+        soup = BeautifulStoneSoup(text)
+        self.assertSoupEquals(text)
+
+        xmlEnt = BeautifulStoneSoup.XML_ENTITIES
+        htmlEnt = BeautifulStoneSoup.HTML_ENTITIES
+        xhtmlEnt = BeautifulStoneSoup.XHTML_ENTITIES
+
+        soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
+        self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")
+
+        soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
+        self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")
+
+        soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
+        self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>")
+
+        # Make sure the "XML", "HTML", and "XHTML" settings work.
+        text = "&lt;&trade;&apos;"
+        soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
+        self.assertEquals(soup.decode(), u"<&trade;'")
+
+        soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
+        self.assertEquals(soup.decode(), u"<\u2122&apos;")
+
+        soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)
+        self.assertEquals(soup.decode(), u"<\u2122'")
+
+    def testNonBreakingSpaces(self):
+        soup = BeautifulSoup("<a>&nbsp;&nbsp;</a>",
+                             convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
+        self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>")
+
+    def testWhitespaceInDeclaration(self):
+        self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
+
+    def testJunkInDeclaration(self):
+        self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
+
+    def testIncompleteDeclaration(self):
+        self.assertSoupEquals('a<!b <p>c')
+
+    def testEntityReplacement(self):
+        self.assertSoupEquals('<b>hello&nbsp;there</b>')
+
+    def testEntitiesInAttributeValues(self):
+        self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
+                              encoding='utf-8')
+        self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
+                              encoding='utf-8')
+
+        soup = BeautifulSoup('<x t="&gt;&trade;">',
+                             convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
+        self.assertEquals(soup.decode(), u'<x t="&gt;\u2122"></x>')
+
+        uri = "http://crummy.com?sacr&eacute;&amp;bleu"
+        link = '<a href="%s"></a>' % uri
+
+        soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
+        self.assertEquals(soup.decode(),
+                          link.replace("&eacute;", u"\xe9"))
+
+        uri = "http://crummy.com?sacr&eacute;&bleu"
+        link = '<a href="%s"></a>' % uri
+        soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
+        self.assertEquals(soup.a['href'],
+                          uri.replace("&eacute;", u"\xe9"))
+
+    def testNakedAmpersands(self):
+        html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}
+        soup = BeautifulStoneSoup("AT&T ", **html)
+        self.assertEquals(soup.decode(), 'AT&amp;T ')
+
+        nakedAmpersandInASentence = "AT&T was Ma Bell"
+        soup = BeautifulStoneSoup(nakedAmpersandInASentence,**html)
+        self.assertEquals(soup.decode(), \
+               nakedAmpersandInASentence.replace('&','&amp;'))
+
+        invalidURL = '<a href="http://example.org?a=1&b=2;3">foo</a>'
+        validURL = invalidURL.replace('&','&amp;')
+        soup = BeautifulStoneSoup(invalidURL)
+        self.assertEquals(soup.decode(), validURL)
+
+        soup = BeautifulStoneSoup(validURL)
+        self.assertEquals(soup.decode(), validURL)
+
+
+class EncodeRed(SoupTest):
+    """Tests encoding conversion, Unicode conversion, and Microsoft
+    smart quote fixes."""
+
+    def testUnicodeDammitStandalone(self):
+        markup = "<foo>\x92</foo>"
+        dammit = UnicodeDammit(markup)
+        self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
+
+        hebrew = "\xed\xe5\xec\xf9"
+        dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
+        self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
+        self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
+
+    def testGarbageInGarbageOut(self):
+        ascii = "<foo>a</foo>"
+        asciiSoup = BeautifulStoneSoup(ascii)
+        self.assertEquals(ascii, asciiSoup.decode())
+
+        unicodeData = u"<foo>\u00FC</foo>"
+        utf8 = unicodeData.encode("utf-8")
+        self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
+
+        unicodeSoup = BeautifulStoneSoup(unicodeData)
+        self.assertEquals(unicodeData, unicodeSoup.decode())
+        self.assertEquals(unicodeSoup.foo.string, u'\u00FC')
+
+        utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')
+        self.assertEquals(utf8, utf8Soup.encode('utf-8'))
+        self.assertEquals(utf8Soup.originalEncoding, "utf-8")
+
+        utf8Soup = BeautifulStoneSoup(unicodeData)
+        self.assertEquals(utf8, utf8Soup.encode('utf-8'))
+        self.assertEquals(utf8Soup.originalEncoding, None)
+
+
+    def testHandleInvalidCodec(self):
+        for bad_encoding in ['.utf8', '...', 'utF---16.!']:
+            soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
+                                 fromEncoding=bad_encoding)
+            self.assertEquals(soup.originalEncoding, 'utf-8')
+
+    def testUnicodeSearch(self):
+        html = u'<html><body><h1>Räksmörgås</h1></body></html>'
+        soup = BeautifulSoup(html)
+        self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
+
+    def testRewrittenXMLHeader(self):
+        euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
+        utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
+        soup = BeautifulStoneSoup(euc_jp)
+        if soup.originalEncoding != "euc-jp":
+            raise Exception("Test failed when parsing euc-jp document. "
+                            "If you're running Python >=2.4, or you have "
+                            "cjkcodecs installed, this is a real problem. "
+                            "Otherwise, ignore it.")
+
+        self.assertEquals(soup.originalEncoding, "euc-jp")
+        self.assertEquals(soup.renderContents('utf-8'), utf8)
+
+        old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
+        new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
+        self.assertSoupEquals(old_text, new_text)
+
+    def testRewrittenMetaTag(self):
+        no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
+        soup = BeautifulSoup(no_shift_jis_html)
+
+        # Beautiful Soup used to try to rewrite the meta tag even if the
+        # meta tag got filtered out by the strainer. This test makes
+        # sure that doesn't happen.
+        strainer = SoupStrainer('pre')
+        soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
+        self.assertEquals(soup.contents[0].name, 'pre')
+
+        meta_tag = ('<meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type" />')
+        shift_jis_html = (
+            '<html><head>\n%s\n'
+            '<meta http-equiv="Content-language" content="ja" />'
+            '</head><body><pre>\n'
+            '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
+            '</pre></body></html>') % meta_tag
+        soup = BeautifulSoup(shift_jis_html)
+        if soup.originalEncoding != "shift-jis":
+            raise Exception("Test failed when parsing shift-jis document "
+                            "with meta tag '%s'."
+                            "If you're running Python >=2.4, or you have "
+                            "cjkcodecs installed, this is a real problem. "
+                            "Otherwise, ignore it." % meta_tag)
+        self.assertEquals(soup.originalEncoding, "shift-jis")
+
+        content_type_tag = soup.meta['content']
+        self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
+                          'charset=%SOUP-ENCODING%')
+        content_type = str(soup.meta)
+        index = content_type.find('charset=')
+        self.assertEqual(content_type[index:index+len('charset=utf8')+1],
+                         'charset=utf-8')
+        content_type = soup.meta.encode('shift-jis')
+        index = content_type.find('charset=')
+        self.assertEqual(content_type[index:index+len('charset=shift-jis')],
+                         'charset=shift-jis'.encode())
+
+        self.assertEquals(soup.encode('utf-8'), (
+                '<html><head>\n'
+                '<meta content="text/html; charset=utf-8" '
+                'http-equiv="Content-type" />\n'
+                '<meta http-equiv="Content-language" content="ja" />'
+                '</head><body><pre>\n'
+                '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
+                '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
+                '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
+                '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
+                '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
+                '</pre></body></html>'))
+        self.assertEquals(soup.encode("shift-jis"),
+                          shift_jis_html.replace('x-sjis'.encode(),
+                                                 'shift-jis'.encode()))
+
+        isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
+        soup = BeautifulSoup(isolatin)
+
+        utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
+        utf8 = utf8.replace("\xe9", "\xc3\xa9")
+        self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
+
+    def testHebrew(self):
+        iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
+        utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
+        soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
+        self.assertEquals(soup.encode('utf-8'), utf8)
+
+    def testSmartQuotesNotSoSmartAnymore(self):
+        self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
+                              '&lsquo;Foo&rsquo; <!--blah-->')
+
+    def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
+        smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
+        soup = BeautifulSoup(smartQuotes)
+        self.assertEquals(soup.decode(),
+                          'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
+        soup = BeautifulSoup(smartQuotes, convertEntities="html")
+        self.assertEquals(soup.encode('utf-8'),
+                          'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
+
+    def testDontSeeSmartQuotesWhereThereAreNone(self):
+        utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
+        self.assertSoupEquals(utf_8, encoding='utf-8')
+
+
+class Whitewash(SoupTest):
+    """Test whitespace preservation."""
+
+    def testPreservedWhitespace(self):
+        self.assertSoupEquals("<pre>   </pre>")
+        self.assertSoupEquals("<pre> woo  </pre>")
+
+    def testCollapsedWhitespace(self):
+        self.assertSoupEquals("<p>   </p>", "<p> </p>")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff -r 3156760b4d26 -r 4cc66ab098e8 app/htmlsanitizer/HtmlSanitizer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/htmlsanitizer/HtmlSanitizer.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,575 @@
+# -*- coding: UTF-8 -*-
+"""
+some input filters, for regularising the html fragments from screen scraping and 
+browser-based editors into some semblance of sanity
+
+TODO: turn the messy setting[method_name]=True filter syntax into a list of cleaning methods to invoke, so that they can be invoked in a specific order and multiple times.
+
+AUTHORS:
+Dan MacKinlay - https://launchpad.net/~dan-possumpalace
+Collin Grady - http://launchpad.net/~collin-collingrady
+Andreas Gustafsson - https://bugs.launchpad.net/~gson
+Håkan W - https://launchpad.net/~hwaara-gmail
+"""
+
+import BeautifulSoup
+import re
+import sys
+
+# Python 2.4 compatibility
+try: any
+except NameError:
+    def any(iterable):
+        for element in iterable:
+            if element:
+                return True
+        return False
+
+"""
+html5lib compatibility. Basically, we need to know that this still works whether html5lib
+is imported or not. Should run complete suites of tests for both possible configs -
+or test in virtual environments, but for now a basic sanity check will do.
+>>> if html5:
+>>>     c=Cleaner(html5=False)
+>>>     c(u'<p>foo</p>)
+u'<p>foo</p>'
+"""
+try:
+    import html5lib
+    from html5lib import sanitizer, treebuilders
+    parser = html5lib.HTMLParser(
+        tree=treebuilders.getTreeBuilder("beautifulsoup"),
+        tokenizer=sanitizer.HTMLSanitizer
+    )
+    html5 = True
+except ImportError:
+    html5 = False
+
+ANTI_JS_RE=re.compile('j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:', re.IGNORECASE)
+#These tags and attrs are sufficently liberal to let microformats through...
+#it ruthlessly culls all the rdf, dublin core metadata and so on.
+valid_tags = dict.fromkeys('p i em strong b u a h1 h2 h3 pre abbr br img dd dt ol ul li span sub sup ins del blockquote table tr td th address cite'.split()) #div?
+valid_attrs = dict.fromkeys('href src rel title'.split())
+valid_schemes = dict.fromkeys('http https'.split())
+elem_map = {'b' : 'strong', 'i': 'em'}
+attrs_considered_links = dict.fromkeys("src href".split()) #should include
+#courtesy http://developer.mozilla.org/en/docs/HTML:Block-level_elements
+block_elements = dict.fromkeys(["p", "h1","h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "address", "blockquote", "dl", "div", "fieldset", "form", "hr", "noscript", "table"])
+
+#convenient default filter lists.
+paranoid_filters = ["strip_comments", "strip_tags", "strip_attrs",
+  "strip_schemes", "rename_tags", "wrap_string", "strip_empty_tags", "strip_empty_tags", ]
+complete_filters = ["strip_comments", "rename_tags", "strip_tags", "strip_attrs",
+    "strip_cdata", "strip_schemes",  "wrap_string", "strip_empty_tags", "rebase_links", "reparse"]
+
+#set some conservative default string processings
+default_settings = {
+    "filters" : paranoid_filters,
+    "block_elements" : block_elements, #xml or None for a more liberal version
+    "convert_entities" : "html", #xml or None for a more liberal version
+    "valid_tags" : valid_tags,
+    "valid_attrs" : valid_attrs,
+    "valid_schemes" : valid_schemes,
+    "attrs_considered_links" : attrs_considered_links,
+    "elem_map" : elem_map,
+    "wrapping_element" : "p",
+    "auto_clean" : False,
+    "original_url" : "",
+    "new_url" : "",
+    "html5" : html5
+}
+#processes I'd like but haven't implemented            
+#"encode_xml_specials", "ensure complete xhtml doc", "ensure_xhtml_fragment_only"
+# and some handling of permitted namespaces for tags. for RDF, say. maybe.
+
+XML_ENTITIES = { u"'" : u"&apos;",
+                 u'"' : u"&quot;",
+                 u"&" : u"&amp;",
+                 u"<" : u"&lt;",
+                 u">" : u"&gt;"
+               }
+LINE_EXTRACTION_RE = re.compile(".+", re.MULTILINE)
+BR_EXTRACTION_RE = re.compile("</?br ?/?>", re.MULTILINE)
+
+class Stop:
+    """
+    handy class that we use as a stop input for our state machine in lieu of falling
+    off the end of lists
+    """
+    pass
+
+
+class Cleaner(object):
+    r"""
+    powerful and slow arbitrary HTML sanitisation. can deal (i hope) with most XSS
+    vectors and layout-breaking badness.
+    Probably overkill for content from trusted sources; defaults are accordingly
+    set to be paranoid.
+    >>> bad_html = '<p style="forbidden markup"><!-- XSS attach -->content</p'
+    >>> good_html = u'<p>content</p>'
+    >>> c = Cleaner()
+    >>> c.string = bad_html
+    >>> c.clean()
+    >>> c.string == good_html
+    True
+    
+    Also supports shorthand syntax:
+    >>> c = Cleaner()
+    >>> c(bad_html) == c(good_html)
+    True
+    """
+    
+    def __init__(self, string_or_soup="", *args,  **kwargs):
+        self.settings=default_settings.copy()
+        self.settings.update(kwargs)
+        if args :
+            self.settings['filters'] = args
+        super(Cleaner, self).__init__(string_or_soup, *args, **kwargs)
+        self.string = string_or_soup
+    
+    def __call__(self, string = None, **kwargs):
+        """
+        convenience method allowing one-step calling of an instance and returning
+        a cleaned string.
+        
+        TODO: make this method preserve internal state- perhaps by creating a new
+        instance.
+        
+        >>> s = 'input string'
+        >>> c1 = Cleaner(s, auto_clean=True)
+        >>> c2 = Cleaner("")
+        >>> c1.string == c2(s)
+        True
+        
+        """
+        self.settings.update(kwargs)
+        if not string == None :
+            self.string = string
+        self.clean()
+        return self.string
+    
+    def _set_contents(self, string_or_soup):
+        if isinstance(string_or_soup, BeautifulSoup.BeautifulSoup) :
+            self._set_soup(string_or_soup)
+        else :
+            self._set_string(string_or_soup)
+    
+    def _set_string(self, html_fragment_string):
+        if self.settings['html5']:
+            s = parser.parse(html_fragment_string).body
+        else:
+            s = BeautifulSoup.BeautifulSoup(
+                    html_fragment_string,
+                    convertEntities=self.settings['convert_entities'])
+        self._set_soup(s)
+        
+    def _set_soup(self, soup):
+        """
+        Does all the work of set_string, but bypasses a potential autoclean to avoid 
+        loops upon internal string setting ops.
+        """
+        self._soup = BeautifulSoup.BeautifulSoup(
+            '<rootrootroot></rootrootroot>'
+        )
+        self.root=self._soup.contents[0]
+        
+        if len(soup.contents) :
+            backwards_soup = [i for i in soup.contents]
+            backwards_soup.reverse()
+        else :
+            backwards_soup = []
+        for i in backwards_soup :
+            i.extract()
+            self.root.insert(0, i)
+    
+    def set_string(self, string) :
+        ur"""
+            sets the string to process and does the necessary input encoding too
+        really intended to be invoked as a property.
+        note the godawful rootrootroot element which we need because the
+        BeautifulSoup object has all the same methods as a Tag, but
+        behaves differently, silently failing on some inserts and appends
+        
+        >>> c = Cleaner(convert_entities="html")
+        >>> c.string = '&eacute;'
+        >>> c.string
+        u'\xe9'
+        >>> c = Cleaner(convert_entities="xml")
+        >>> c.string = u'&eacute;'
+        >>> c.string
+        u'&eacute;'
+        """
+        self._set_string(string)
+        if len(string) and self.settings['auto_clean'] : self.clean()
+        
+    def get_string(self):
+        return unicode(self.root.renderContents())
+    
+    string = property(get_string, set_string)
+    
+    def clean(self):
+        """
+        invoke all cleaning processes stipulated in the settings
+        """
+        for method in self.settings['filters'] :
+            try :
+                getattr(self, method)()
+            except NotImplementedError :
+                sys.stderr.write('Warning, called unimplemented method %s' % method + '\n')
+    
+    def strip_comments(self):
+        r"""
+        XHTML comments are used as an XSS attack vector. they must die.
+        
+        >>> c = Cleaner("", "strip_comments")
+        >>> c('<p>text<!-- comment --> More text</p>')
+        u'<p>text More text</p>'
+        """
+        for comment in self.root.findAll(
+            text = lambda text: isinstance(text, BeautifulSoup.Comment)):
+            comment.extract()
+            
+    def strip_cdata(self):
+        for cdata in self.root.findAll(
+          text = lambda text: isinstance(text, BeautifulSoup.CData)):
+            cdata.extract()
+    
+    def strip_tags(self):
+        r"""
+        ill-considered tags break our layout. they must die.
+        >>> c = Cleaner("", "strip_tags", auto_clean=True)
+        >>> c.string = '<div>A <strong>B C</strong></div>'
+        >>> c.string
+        u'A <strong>B C</strong>'
+        >>> c.string = '<div>A <div>B C</div></div>'
+        >>> c.string
+        u'A B C'
+        >>> c.string = '<div>A <br /><div>B C</div></div>'
+        >>> c.string
+        u'A <br />B C'
+        >>> c.string = '<p>A <div>B C</div></p>'
+        >>> c.string
+        u'<p>A B C</p>'
+        >>> c.string = 'A<div>B<div>C<div>D</div>E</div>F</div>G'
+        >>> c.string
+        u'ABCDEFG'
+        >>> c.string = '<div>B<div>C<div>D</div>E</div>F</div>'
+        >>> c.string
+        u'BCDEF'
+        """
+        # Beautiful Soup doesn't support dynamic .findAll results when the tree is
+        # modified in place.
+        # going backwards doesn't seem to help.
+        # so find one at a time
+        while True :
+            next_bad_tag = self.root.find(
+              lambda tag : not tag.name in (self.settings['valid_tags'])
+            )
+            if next_bad_tag :                
+                self.disgorge_elem(next_bad_tag)
+            else:
+                break
+    
+    def strip_attrs(self):
+        """
+        preserve only those attributes we need in the soup
+        >>> c = Cleaner("", "strip_attrs")
+        >>> c('<div title="v" bad="v">A <strong title="v" bad="v">B C</strong></div>')
+        u'<div title="v">A <strong title="v">B C</strong></div>'
+        """
+        for tag in self.root.findAll(True):
+            tag.attrs = [(attr, val) for attr, val in tag.attrs
+                         if attr in self.settings['valid_attrs']]
+    
+    def _all_links(self):
+        """
+        finds all tags with link attributes sequentially. safe against modification
+        of said attributes in-place.
+        """
+        start = self.root
+        while True: 
+            tag = start.findNext(
+              lambda tag : any(
+                [(tag.get(i) for i in self.settings['attrs_considered_links'])]
+              ))
+            if tag: 
+                start = tag
+                yield tag
+            else :
+                break
+            
+    def strip_schemes(self):
+        """
+        >>> c = Cleaner("", "strip_schemes")
+        >>> c('<img src="javascript:alert();" />')
+        u'<img />'
+        >>> c('<a href="javascript:alert();">foo</a>')
+        u'<a>foo</a>'
+        """
+        for tag in self._all_links() :
+            for key in self.settings['attrs_considered_links'] :
+                scheme_bits = tag.get(key, u"").split(u':',1)
+                if len(scheme_bits) == 1 : 
+                    pass #relative link
+                else:
+		    if not scheme_bits[0] in self.settings['valid_schemes'] :
+			del(tag[key])
+    
+    def br_to_p(self):
+        """
+        >>> c = Cleaner("", "br_to_p")
+        >>> c('<p>A<br />B</p>')
+        u'<p>A</p><p>B</p>'
+        >>> c('A<br />B')
+        u'<p>A</p><p>B</p>'
+        """
+        block_elems = self.settings['block_elements']
+        block_elems['br'] = None
+        block_elems['p'] = None
+        
+        while True :
+            next_br = self.root.find('br')
+            if not next_br: break
+            parent = next_br.parent
+            self.wrap_string('p', start_at=parent, block_elems = block_elems)
+            while True:
+                useless_br=parent.find('br', recursive=False)
+                if not useless_br: break
+                useless_br.extract()        
+            if parent.name == 'p':
+                self.disgorge_elem(parent)
+    
+    def rename_tags(self):
+        """
+        >>> c = Cleaner("", "rename_tags", elem_map={'i': 'em'})
+        >>> c('<b>A<i>B</i></b>')
+        u'<b>A<em>B</em></b>'
+        """
+        for tag in self.root.findAll(self.settings['elem_map']) :
+            tag.name = self.settings['elem_map'][tag.name]
+        
+    def wrap_string(self, wrapping_element = None, start_at=None, block_elems=None):
+        """
+        takes an html fragment, which may or may not have a single containing element,
+        and guarantees what the tag name of the topmost elements are.
+        TODO: is there some simpler way than a state machine to do this simple thing?
+        >>> c = Cleaner("", "wrap_string")
+        >>> c('A <strong>B C</strong>D')
+        u'<p>A <strong>B C</strong>D</p>'
+        >>> c('A <p>B C</p>D')
+        u'<p>A </p><p>B C</p><p>D</p>'
+        """
+        if not start_at : start_at = self.root
+        if not block_elems : block_elems = self.settings['block_elements']
+        e = (wrapping_element or self.settings['wrapping_element'])
+        paragraph_list = []
+        children = [elem for elem in start_at.contents]
+        children.append(Stop())
+        
+        last_state = 'block'
+        paragraph = BeautifulSoup.Tag(self._soup, e)
+        
+        for node in children :
+            if isinstance(node, Stop) :
+                state = 'end'
+            elif hasattr(node, 'name') and node.name in block_elems:
+                state = 'block'
+            else:
+                state = 'inline'
+                
+            if last_state == 'block' and state == 'inline':
+                #collate inline elements
+                paragraph = BeautifulSoup.Tag(self._soup, e)
+                
+            if state == 'inline' :
+                paragraph.append(node)
+                
+            if ((state <> 'inline') and last_state == 'inline') :
+                paragraph_list.append(paragraph)
+                
+            if state == 'block' :
+                paragraph_list.append(node)
+            
+            last_state = state
+        
+        #can't use append since it doesn't work on empty elements...
+        paragraph_list.reverse()
+        for paragraph in paragraph_list:
+            start_at.insert(0, paragraph)
+        
+    def strip_empty_tags(self):
+        """
+        strip out all empty tags
+        TODO: depth-first search
+        >>> c = Cleaner("", "strip_empty_tags")
+        >>> c('<p>A</p><p></p><p>B</p><p></p>')
+        u'<p>A</p><p>B</p>'
+        >>> c('<p><a></a></p>')
+        u'<p></p>'
+        """
+        tag = self.root
+        while True:
+            next_tag = tag.findNext(True)
+            if not next_tag: break
+            if next_tag.contents or next_tag.attrs:
+                tag = next_tag
+                continue
+            next_tag.extract()
+        
+    def rebase_links(self, original_url="", new_url ="") :
+        if not original_url : original_url = self.settings.get('original_url', '')
+        if not new_url : new_url = self.settings.get('new_url', '')
+        raise NotImplementedError
+    
+    # Because of its internal character set handling,
+    # the following will not work in Beautiful soup and is hopefully redundant.
+    # def encode_xml_specials(self, original_url="", new_url ="") :
+    #     """
+    #     BeautifulSoup will let some dangerous xml entities hang around
+    #     in the navigable strings. destroy all monsters.
+    #     >>> c = Cleaner(auto_clean=True, encode_xml_specials=True)
+    #     >>> c('<<<<<')
+    #     u'&lt;&lt;&lt;&lt;'
+    #     """
+    #     for string in self.root.findAll(text=True) :
+    #         sys.stderr.write("root" +"\n")
+    #         sys.stderr.write(str(self.root) +"\n")
+    #         sys.stderr.write("parent" +"\n")
+    #         sys.stderr.write(str(string.parent) +"\n")
+    #         new_string = unicode(string)
+    #         sys.stderr.write(string +"\n")
+    #         for special_char in XML_ENTITIES.keys() :
+    #             sys.stderr.write(special_char +"\n")
+    #         string.replaceWith(
+    #           new_string.replace(special_char, XML_ENTITIES[special_char])
+    #         )
+        
+        
+    def disgorge_elem(self, elem):
+        """
+        remove the given element from the soup and replaces it with its own contents
+        actually tricky, since you can't replace an element with an list of elements
+        using replaceWith
+        >>> disgorgeable_string = '<body>A <em>B</em> C</body>'
+        >>> c = Cleaner()
+        >>> c.string = disgorgeable_string
+        >>> elem = c._soup.find('em')
+        >>> c.disgorge_elem(elem)
+        >>> c.string
+        u'<body>A B C</body>'
+        >>> c.string = disgorgeable_string
+        >>> elem = c._soup.find('body')
+        >>> c.disgorge_elem(elem)
+        >>> c.string
+        u'A <em>B</em> C'
+        >>> c.string = '<div>A <div id="inner">B C</div></div>'
+        >>> elem = c._soup.find(id="inner")
+        >>> c.disgorge_elem(elem)
+        >>> c.string
+        u'<div>A B C</div>'
+        """
+        if elem == self.root :
+            raise AttributeError, "Can't disgorge root"  
+                      
+        # With in-place modification, BeautifulSoup occasionally can return
+        # elements that think they are orphans
+        # this lib is full of workarounds, but it's worth checking
+        parent = elem.parent
+        if parent == None: 
+            raise AttributeError, "AAAAAAAAGH! NO PARENTS! DEATH!"
+        
+        i = None
+        for i in range(len(parent.contents)) :
+            if parent.contents[i] == elem :
+                index = i
+                break
+                
+        elem.extract()
+        
+        #the proceeding method breaks horribly, sporadically.
+        # for i in range(len(elem.contents)) :
+        #     elem.contents[i].extract()
+        #     parent.contents.insert(index+i, elem.contents[i])
+        # return
+        self._safe_inject(parent, index, elem.contents)
+        
+    def _safe_inject(self, dest, dest_index, node_list):
+        #BeautifulSoup result sets look like lists but don't behave right
+        # i.e. empty ones are still True,
+        if not len(node_list) : return
+        node_list = [i for i in node_list]
+        node_list.reverse()
+        for i in node_list :
+            dest.insert(dest_index, i)
+
+        
+class Htmlator(object) :
+    """
+    converts a string into a series of html paragraphs
+    """
+    settings = {
+        "encode_xml_specials" : True,
+        "is_plaintext" : True,
+        "convert_newlines" : False,
+        "make_links" : True,
+        "auto_convert" : False,
+        "valid_schemes" : valid_schemes,
+    }
+    def __init__(self, string = "",  **kwargs):
+        self.settings.update(kwargs)
+        super(Htmlator, self).__init__(string, **kwargs)
+        self.string = string
+    
+    def _set_string(self, string):
+        self.string = string
+        if self.settings['auto_convert'] : self.convert()
+        
+    def _get_string(self):
+        return unicode(self._soup)
+    
+    string = property(_get_string, _set_string)
+    
+    def __call__(self, string):
+        """
+        convenience method supporting one-step calling of an instance
+        as a string cleaning function
+        """
+        self.string = string
+        self.convert()
+        return self.string
+        
+    def convert(self):
+        for method in ["encode_xml_specials", "convert_newlines",
+          "make_links"] :
+            if self.settings(method) :
+                getattr(self, method)()
+    
+    def encode_xml_specials(self) :
+        for char in XML_ENTITIES.keys() :
+            self.string.replace(char, XML_ENTITIES[char])
+        
+    def make_links(self):
+        raise NotImplementedError
+        
+    def convert_newlines(self) :
+        self.string = ''.join([
+            '<p>' + line + '</p>' for line in LINE_EXTRACTION_RE.findall(self.string)
+        ])
+        
+def _test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == "__main__":
+    _test()
+
+
+# def cast_input_to_soup(fn):
+#     """
+#     Decorate function to handle strings as BeautifulSoups transparently
+#     """
+#     def stringy_version(input, *args, **kwargs) :
+#         if not isinstance(input,BeautifulSoup) :
+#             input=BeautifulSoup(input)
+#         return fn(input, *args, **kwargs)
+#     return stringy_version
diff -r 3156760b4d26 -r 4cc66ab098e8 app/htmlsanitizer/LICENSE-BeautifulSoup
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/htmlsanitizer/LICENSE-BeautifulSoup	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,32 @@
+Copyright (c) 2004-2009, Leonard Richardson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following
+    disclaimer in the documentation and/or other materials provided
+    with the distribution.
+
+  * Neither the name of the the Beautiful Soup Consortium and All
+    Night Kosher Bakery nor the names of its contributors may be
+    used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
diff -r 3156760b4d26 -r 4cc66ab098e8 app/htmlsanitizer/LICENSE-HtmlSanitizer
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/htmlsanitizer/LICENSE-HtmlSanitizer	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,23 @@
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  1. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright 
+     notice, this list of conditions and the following disclaimer in 
+     the documentation and/or other materials provided with the distribution.
+
+  3. The names of the authors may not be used to endorse or promote products
+     derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,
+INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff -r 3156760b4d26 -r 4cc66ab098e8 app/htmlsanitizer/__init__.py
diff -r 3156760b4d26 -r 4cc66ab098e8 app/main.py
--- a/app/main.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/main.py	Tue May 26 02:37:39 2009 +0200
@@ -29,42 +29,7 @@
 
 from google.appengine.ext.webapp import util
 
-
-# Remove the standard version of Django.
-for k in [k for k in sys.modules if k.startswith('django')]:
-  del sys.modules[k]
-
-# Force sys.path to have our own directory first, in case we want to import
-# from it. This lets us replace the built-in Django
-sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
-
-sys.path.insert(0, os.path.abspath('django.zip'))
-
-ultimate_sys_path = None
-
-# Force Django to reload its settings.
-from django.conf import settings
-settings._target = None
-
-# Must set this env var before importing any part of Django
-os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
-
-import django.core.handlers.wsgi
-import django.core.signals
-import django.db
-
-# Log errors.
-def log_exception(*args, **kwds):
-  """Function used for logging exceptions.
-  """
-  logging.exception('Exception in request:')
-
-# Log all exceptions detected by Django.
-django.core.signals.got_request_exception.connect(log_exception)
-
-# Unregister the rollback event handler.
-django.core.signals.got_request_exception.disconnect(
-    django.db._rollback_on_exception)
+import gae_django
 
 
 def profile_main_as_html():
@@ -117,15 +82,17 @@
 def real_main():
   """Main program without profiling.
   """
-  global ultimate_sys_path
-  if ultimate_sys_path is None:
-    ultimate_sys_path = list(sys.path)
-  else:
-    sys.path[:] = ultimate_sys_path
+  import django.core.handlers.wsgi
 
   # Create a Django application for WSGI.
   application = django.core.handlers.wsgi.WSGIHandler()
 
+  from soc.modules import callback
+  from soc.modules import core
+
+  callback.registerCore(core.Core())
+  callback.getCore().registerModuleCallbacks()
+
   # Run the WSGI CGI handler with that application.
   util.run_wsgi_app(application)
 
diff -r 3156760b4d26 -r 4cc66ab098e8 app/settings.py
--- a/app/settings.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/settings.py	Tue May 26 02:37:39 2009 +0200
@@ -100,6 +100,7 @@
     os.path.join(ROOT_PATH, 'ghop', 'templates'),
     os.path.join(ROOT_PATH, 'gsoc', 'templates'),
     os.path.join(ROOT_PATH, 'soc', 'templates'),
+    os.path.join(ROOT_PATH, 'shell', 'templates'),
 )
 
 INSTALLED_APPS = (
@@ -109,3 +110,6 @@
 #    'django.contrib.sessions',
 #    'django.contrib.sites',
 )
+
+MODULE_FMT = 'soc.modules.%s'
+MODULES = []
diff -r 3156760b4d26 -r 4cc66ab098e8 app/shell/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/shell/README	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,17 @@
+An interactive, stateful AJAX shell that runs Python code on the server.
+
+Part of http://code.google.com/p/google-app-engine-samples/.
+
+May be run as a standalone app or in an existing app as an admin-only handler.
+Can be used for system administration tasks, as an interactive way to try out
+APIs, or as a debugging aid during development.
+
+The logging, os, sys, db, and users modules are imported automatically.
+
+Interpreter state is stored in the datastore so that variables, function
+definitions, and other values in the global and local namespaces can be used
+across commands.
+
+To use the shell in your app, copy shell.py, static/*, and templates/* into
+your app's source directory. Then, copy the URL handlers from app.yaml into
+your app.yaml.
diff -r 3156760b4d26 -r 4cc66ab098e8 app/shell/__init__.py
diff -r 3156760b4d26 -r 4cc66ab098e8 app/shell/shell.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/shell/shell.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,317 @@
+#!/usr/bin/python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+An interactive, stateful AJAX shell that runs Python code on the server.
+
+Part of http://code.google.com/p/google-app-engine-samples/.
+
+May be run as a standalone app or in an existing app as an admin-only handler.
+Can be used for system administration tasks, as an interactive way to try out
+APIs, or as a debugging aid during development.
+
+The logging, os, sys, db, and users modules are imported automatically.
+
+Interpreter state is stored in the datastore so that variables, function
+definitions, and other values in the global and local namespaces can be used
+across commands.
+
+To use the shell in your app, copy shell.py, static/*, and templates/* into
+your app's source directory. Then, copy the URL handlers from app.yaml into
+your app.yaml.
+
+TODO: unit tests!
+"""
+
+import logging
+import new
+import os
+import pickle
+import sys
+import traceback
+import types
+import wsgiref.handlers
+
+from django.template import loader
+from google.appengine.api import users
+from google.appengine.ext import db
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp import template
+
+import django.template
+import gae_django
+
+
+# Set to True if stack traces should be shown in the browser, etc.
+_DEBUG = True
+
+# The entity kind for shell sessions. Feel free to rename to suit your app.
+_SESSION_KIND = '_Shell_Session'
+
+# Types that can't be pickled.
+UNPICKLABLE_TYPES = (
+  types.ModuleType,
+  types.TypeType,
+  types.ClassType,
+  types.FunctionType,
+  )
+
+# Unpicklable statements to seed new sessions with.
+INITIAL_UNPICKLABLES = [
+  'import logging',
+  'import os',
+  'import sys',
+  'from google.appengine.ext import db',
+  'from google.appengine.api import users',
+  ]
+
+
+class ShellSession(db.Model):
+  """A shell session. Stores the session's globals.
+
+  Each session globals is stored in one of two places:
+
+  If the global is picklable, it's stored in the parallel globals and
+  global_names list properties. (They're parallel lists to work around the
+  unfortunate fact that the datastore can't store dictionaries natively.)
+
+  If the global is not picklable (e.g. modules, classes, and functions), or if
+  it was created by the same statement that created an unpicklable global,
+  it's not stored directly. Instead, the statement is stored in the
+  unpicklables list property. On each request, before executing the current
+  statement, the unpicklable statements are evaluated to recreate the
+  unpicklable globals.
+
+  The unpicklable_names property stores all of the names of globals that were
+  added by unpicklable statements. When we pickle and store the globals after
+  executing a statement, we skip the ones in unpicklable_names.
+
+  Using Text instead of string is an optimization. We don't query on any of
+  these properties, so they don't need to be indexed.
+  """
+  global_names = db.ListProperty(db.Text)
+  globals = db.ListProperty(db.Blob)
+  unpicklable_names = db.ListProperty(db.Text)
+  unpicklables = db.ListProperty(db.Text)
+
+  def set_global(self, name, value):
+    """Adds a global, or updates it if it already exists.
+
+    Also removes the global from the list of unpicklable names.
+
+    Args:
+      name: the name of the global to remove
+      value: any picklable value
+    """
+    blob = db.Blob(pickle.dumps(value))
+
+    if name in self.global_names:
+      index = self.global_names.index(name)
+      self.globals[index] = blob
+    else:
+      self.global_names.append(db.Text(name))
+      self.globals.append(blob)
+
+    self.remove_unpicklable_name(name)
+
+  def remove_global(self, name):
+    """Removes a global, if it exists.
+
+    Args:
+      name: string, the name of the global to remove
+    """
+    if name in self.global_names:
+      index = self.global_names.index(name)
+      del self.global_names[index]
+      del self.globals[index]
+
+  def globals_dict(self):
+    """Returns a dictionary view of the globals.
+    """
+    return dict((name, pickle.loads(val))
+                for name, val in zip(self.global_names, self.globals))
+
+  def add_unpicklable(self, statement, names):
+    """Adds a statement and list of names to the unpicklables.
+
+    Also removes the names from the globals.
+
+    Args:
+      statement: string, the statement that created new unpicklable global(s).
+      names: list of strings; the names of the globals created by the statement.
+    """
+    self.unpicklables.append(db.Text(statement))
+
+    for name in names:
+      self.remove_global(name)
+      if name not in self.unpicklable_names:
+        self.unpicklable_names.append(db.Text(name))
+
+  def remove_unpicklable_name(self, name):
+    """Removes a name from the list of unpicklable names, if it exists.
+
+    Args:
+      name: string, the name of the unpicklable global to remove
+    """
+    if name in self.unpicklable_names:
+      self.unpicklable_names.remove(name)
+
+
+class FrontPageHandler(webapp.RequestHandler):
+  """Creates a new session and renders the shell.html template.
+  """
+
+  def get(self):
+    # set up the session. TODO: garbage collect old shell sessions
+    session_key = self.request.get('session')
+    if session_key:
+      session = ShellSession.get(session_key)
+    else:
+      # create a new session
+      session = ShellSession()
+      session.unpicklables = [db.Text(line) for line in INITIAL_UNPICKLABLES]
+      session_key = session.put()
+
+    template_file = os.path.join(os.path.dirname(__file__), 'templates',
+                                 'shell.html')
+    session_url = '/?session=%s' % session_key
+    vars = { 'server_software': os.environ['SERVER_SOFTWARE'],
+             'python_version': sys.version,
+             'session': str(session_key),
+             'user': users.get_current_user(),
+             'login_url': users.create_login_url(session_url),
+             'logout_url': users.create_logout_url(session_url),
+             }
+    
+    rendered = loader.render_to_string('shell.html', dictionary=vars)
+    # rendered = webapp.template.render(template_file, vars, debug=_DEBUG)
+    self.response.out.write(rendered)
+
+
+class StatementHandler(webapp.RequestHandler):
+  """Evaluates a python statement in a given session and returns the result.
+  """
+
+  def get(self):
+    self.response.headers['Content-Type'] = 'text/plain'
+
+    # extract the statement to be run
+    statement = self.request.get('statement')
+    if not statement:
+      return
+
+    # the python compiler doesn't like network line endings
+    statement = statement.replace('\r\n', '\n')
+
+    # add a couple newlines at the end of the statement. this makes
+    # single-line expressions such as 'class Foo: pass' evaluate happily.
+    statement += '\n\n'
+
+    # log and compile the statement up front
+    try:
+      logging.info('Compiling and evaluating:\n%s' % statement)
+      compiled = compile(statement, '<string>', 'single')
+    except:
+      self.response.out.write(traceback.format_exc())
+      return
+
+    # create a dedicated module to be used as this statement's __main__
+    statement_module = new.module('__main__')
+
+    # use this request's __builtin__, since it changes on each request.
+    # this is needed for import statements, among other things.
+    import __builtin__
+    statement_module.__builtins__ = __builtin__
+
+    # load the session from the datastore
+    session = ShellSession.get(self.request.get('session'))
+
+    # swap in our custom module for __main__. then unpickle the session
+    # globals, run the statement, and re-pickle the session globals, all
+    # inside it.
+    old_main = sys.modules.get('__main__')
+    try:
+      sys.modules['__main__'] = statement_module
+      statement_module.__name__ = '__main__'
+
+      # re-evaluate the unpicklables
+      for code in session.unpicklables:
+        exec code in statement_module.__dict__
+
+      # re-initialize the globals
+      for name, val in session.globals_dict().items():
+        try:
+          statement_module.__dict__[name] = val
+        except:
+          msg = 'Dropping %s since it could not be unpickled.\n' % name
+          self.response.out.write(msg)
+          logging.warning(msg + traceback.format_exc())
+          session.remove_global(name)
+
+      # run!
+      old_globals = dict(statement_module.__dict__)
+      try:
+        old_stdout = sys.stdout
+        old_stderr = sys.stderr
+        try:
+          sys.stdout = self.response.out
+          sys.stderr = self.response.out
+          exec compiled in statement_module.__dict__
+        finally:
+          sys.stdout = old_stdout
+          sys.stderr = old_stderr
+      except:
+        self.response.out.write(traceback.format_exc())
+        return
+
+      # extract the new globals that this statement added
+      new_globals = {}
+      for name, val in statement_module.__dict__.items():
+        if name not in old_globals or val != old_globals[name]:
+          new_globals[name] = val
+
+      if True in [isinstance(val, UNPICKLABLE_TYPES)
+                  for val in new_globals.values()]:
+        # this statement added an unpicklable global. store the statement and
+        # the names of all of the globals it added in the unpicklables.
+        session.add_unpicklable(statement, new_globals.keys())
+        logging.debug('Storing this statement as an unpicklable.')
+
+      else:
+        # this statement didn't add any unpicklables. pickle and store the
+        # new globals back into the datastore.
+        for name, val in new_globals.items():
+          if not name.startswith('__'):
+            session.set_global(name, val)
+
+    finally:
+      sys.modules['__main__'] = old_main
+
+    session.put()
+
+
+def main():
+  """Main program.
+  """
+  
+  application = webapp.WSGIApplication(
+    [('/admin/shell', FrontPageHandler),
+     ('/admin/shell/shell.do', StatementHandler)], debug=_DEBUG)
+  wsgiref.handlers.CGIHandler().run(application)
+
+
+if __name__ == '__main__':
+  main()
diff -r 3156760b4d26 -r 4cc66ab098e8 app/shell/static/shell.js
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/shell/static/shell.js	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,195 @@
+// Copyright 2007 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @fileoverview
+ * Javascript code for the interactive AJAX shell.
+ *
+ * Part of http://code.google.com/p/google-app-engine-samples/.
+ *
+ * Includes a function (shell.runStatement) that sends the current python
+ * statement in the shell prompt text box to the server, and a callback
+ * (shell.done) that displays the results when the XmlHttpRequest returns.
+ *
+ * Also includes cross-browser code (shell.getXmlHttpRequest) to get an
+ * XmlHttpRequest.
+ */
+
+/**
+ * Shell namespace.
+ * @type {Object}
+ */
+var shell = {}
+
+/**
+ * The shell history. history is an array of strings, ordered oldest to
+ * newest. historyCursor is the current history element that the user is on.
+ *
+ * The last history element is the statement that the user is currently
+ * typing. When a statement is run, it's frozen in the history, a new history
+ * element is added to the end of the array for the new statement, and
+ * historyCursor is updated to point to the new element.
+ *
+ * @type {Array}
+ */
+shell.history = [''];
+
+/**
+ * See {shell.history}
+ * @type {number}
+ */
+shell.historyCursor = 0;
+
+/**
+ * A constant for the XmlHttpRequest 'done' state.
+ * @type Number
+ */
+shell.DONE_STATE = 4;
+
+/**
+ * A cross-browser function to get an XmlHttpRequest object.
+ *
+ * @return {XmlHttpRequest?} a new XmlHttpRequest
+ */
+shell.getXmlHttpRequest = function() {
+  if (window.XMLHttpRequest) {
+    return new XMLHttpRequest();
+  } else if (window.ActiveXObject) {
+    try {
+      return new ActiveXObject('Msxml2.XMLHTTP');
+    } catch(e) {
+      return new ActiveXObject('Microsoft.XMLHTTP');
+    }
+  }
+
+  return null;
+};
+
+/**
+ * This is the prompt textarea's onkeypress handler. Depending on the key that
+ * was pressed, it will run the statement, navigate the history, or update the
+ * current statement in the history.
+ *
+ * @param {Event} event the keypress event
+ * @return {Boolean} false to tell the browser not to submit the form.
+ */
+shell.onPromptKeyPress = function(event) {
+  var statement = document.getElementById('statement');
+
+  if (this.historyCursor == this.history.length - 1) {
+    // we're on the current statement. update it in the history before doing
+    // anything.
+    this.history[this.historyCursor] = statement.value;
+  }
+
+  // should we pull something from the history?
+  if (event.shiftKey && event.keyCode == 38 /* up arrow */) {
+    if (this.historyCursor > 0) {
+      statement.value = this.history[--this.historyCursor];
+    }
+    return false;
+  } else if (event.shiftKey && event.keyCode == 40 /* down arrow */) {
+    if (this.historyCursor < this.history.length - 1) {
+      statement.value = this.history[++this.historyCursor];
+    }
+    return false;
+  } else if (!event.altKey) {
+    // probably changing the statement. update it in the history.
+    this.historyCursor = this.history.length - 1;
+    this.history[this.historyCursor] = statement.value;
+  }
+
+  // should we submit?
+  var ctrlEnter = (document.getElementById('submit_key').value == 'ctrl-enter');
+  if (event.keyCode == 13 /* enter */ && !event.altKey && !event.shiftKey &&
+      event.ctrlKey == ctrlEnter) {
+    return this.runStatement();
+  }
+};
+
+/**
+ * The XmlHttpRequest callback. If the request succeeds, it adds the command
+ * and its resulting output to the shell history div.
+ *
+ * @param {XmlHttpRequest} req the XmlHttpRequest we used to send the current
+ *     statement to the server
+ */
+shell.done = function(req) {
+  if (req.readyState == this.DONE_STATE) {
+    var statement = document.getElementById('statement')
+    statement.className = 'prompt';
+
+    // add the command to the shell output
+    var output = document.getElementById('output');
+
+    output.value += '\n>>> ' + statement.value;
+    statement.value = '';
+
+    // add a new history element
+    this.history.push('');
+    this.historyCursor = this.history.length - 1;
+
+    // add the command's result
+    var result = req.responseText.replace(/^\s*|\s*$/g, '');  // trim whitespace
+    if (result != '')
+      output.value += '\n' + result;
+
+    // scroll to the bottom
+    output.scrollTop = output.scrollHeight;
+    if (output.createTextRange) {
+      var range = output.createTextRange();
+      range.collapse(false);
+      range.select();
+    }
+  }
+};
+
+/**
+ * This is the form's onsubmit handler. It sends the python statement to the
+ * server, and registers shell.done() as the callback to run when it returns.
+ *
+ * @return {Boolean} false to tell the browser not to submit the form.
+ */
+shell.runStatement = function() {
+  var form = document.getElementById('form');
+
+  // build a XmlHttpRequest
+  var req = this.getXmlHttpRequest();
+  if (!req) {
+    document.getElementById('ajax-status').innerHTML =
+        "<span class='error'>Your browser doesn't support AJAX. :(</span>";
+    return false;
+  }
+
+  req.onreadystatechange = function() { shell.done(req); };
+
+  // build the query parameter string
+  var params = '';
+  for (i = 0; i < form.elements.length; i++) {
+    var elem = form.elements[i];
+    if (elem.type != 'submit' && elem.type != 'button' && elem.id != 'caret') {
+      var value = escape(elem.value).replace(/\+/g, '%2B'); // escape ignores +
+      params += '&' + elem.name + '=' + value;
+    }
+  }
+
+  // send the request and tell the user.
+  document.getElementById('statement').className = 'prompt processing';
+  req.open(form.method, form.action + '?' + params, true);
+  req.setRequestHeader('Content-type',
+                       'application/x-www-form-urlencoded;charset=UTF-8');
+  req.send(null);
+
+  return false;
+};
diff -r 3156760b4d26 -r 4cc66ab098e8 app/shell/static/spinner.gif
Binary file app/shell/static/spinner.gif has changed
diff -r 3156760b4d26 -r 4cc66ab098e8 app/shell/templates/shell.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/shell/templates/shell.html	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,124 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<title> Interactive Shell </title>
+<script type="text/javascript" src="/static/shell.js"></script>
+<style type="text/css">
+body {
+  font-family: monospace;
+  font-size: 10pt;
+}
+
+p {
+  margin: 0.5em;
+}
+
+.prompt, #output {
+  width: 45em;
+  border: 1px solid silver;
+  background-color: #f5f5f5;
+  font-size: 10pt;
+  margin: 0.5em;
+  padding: 0.5em;
+  padding-right: 0em;
+  overflow-x: hidden;
+}
+
+#toolbar {
+  margin-left: 0.5em;
+  padding-left: 0.5em;
+}
+
+#caret {
+  width: 2.5em;
+  margin-right: 0px;
+  padding-right: 0px;
+  border-right: 0px;
+}
+
+#statement {
+  width: 43em;
+  margin-left: -1em;
+  padding-left: 0px;
+  border-left: 0px;
+  background-position: top right;
+  background-repeat: no-repeat;
+}
+
+.processing {
+  background-image: url("/static/spinner.gif");
+}
+
+#ajax-status {
+  font-weight: bold;
+}
+
+.message {
+  color: #8AD;
+  font-weight: bold;
+  font-style: italic;
+}
+
+.error {
+  color: #F44;
+}
+
+.username {
+  font-weight: bold;
+}
+
+</style>
+</head>
+
+<body>
+
+<p> Interactive server-side Python shell 
+    (<a href="http://code.google.com/p/google-app-engine-samples/source/browse/#svn/trunk/shell">original source</a>)
+</p>
+<p>
+    <a href="/">Return to main home</a>
+</p>
+
+<textarea id="output" rows="30" readonly="readonly">
+{{ server_software }}
+Python {{ python_version }}
+</textarea>
+
+<form id="form" action="/admin/shell/shell.do" method="get">
+  <nobr>
+  <textarea class="prompt" id="caret" readonly="readonly" rows="4"
+            onfocus="document.getElementById('statement').focus()"
+            >&gt;&gt;&gt;</textarea>
+  <textarea class="prompt" name="statement" id="statement" rows="4"
+            onkeypress="return shell.onPromptKeyPress(event);"></textarea>
+  </nobr>
+  <input type="hidden" name="session" value="{{ session }}" />
+  <input type="submit" style="display: none" />
+</form>
+
+<p id="ajax-status"></p>
+
+<p id="toolbar">
+{% if user %}
+  <span class="username">{{ user.nickname }}</span>
+  (<a href="{{ logout_url }}">log out</a>)
+{% else %}
+  <a href="{{ login_url }}">log in</a>
+{% endif %}
+ | Shift-Up/Down for history |
+<select id="submit_key">
+  <option value="enter">Enter</option>
+  <option value="ctrl-enter" selected="selected">Ctrl-Enter</option>
+</select>
+<label for="submit_key">submits</label>
+</p>
+
+<script type="text/javascript">
+document.getElementById('statement').focus();
+</script>
+
+</body>
+</html>
+
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/content/css/ui.datetimepicker-090304.css
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/cron/job.py
--- a/app/soc/cron/job.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/cron/job.py	Tue May 26 02:37:39 2009 +0200
@@ -29,8 +29,10 @@
 from google.appengine.runtime import DeadlineExceededError
 
 from soc.cron import student_proposal_mailer
+from soc.cron import unique_user_id_adder
 from soc.models.job import Job
 
+
 class Error(Exception):
   """Base class for all exceptions raised by this module.
   """
@@ -67,6 +69,10 @@
         student_proposal_mailer.setupStudentProposalMailing
     self.tasks['sendStudentProposalMail'] = \
         student_proposal_mailer.sendStudentProposalMail
+    self.tasks['setupUniqueUserIdAdder'] = \
+        unique_user_id_adder.setupUniqueUserIdAdder
+    self.tasks['addUniqueUserIds'] = \
+        unique_user_id_adder.addUniqueUserIds
 
   def claimJob(self, job_key):
     """A transaction to claim a job.
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/cron/unique_user_id_adder.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/soc/cron/unique_user_id_adder.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,135 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Cron job handler for adding unique user id.
+"""
+
+__authors__ = [
+    '"Pawel Solyga" <pawel.solyga@gmail.com>',
+  ]
+
+
+from google.appengine.ext import db
+from google.appengine.api import users
+from soc.logic.models.job import logic as job_logic
+from soc.logic.models.priority_group import logic as priority_logic
+from soc.logic.models.user import logic as user_logic
+from soc.models.user import User
+
+
+# amount of users to create jobs for before updating
+DEF_USER_STEP_SIZE = 10
+
+
+class TempUserWithUniqueId(db.Model):
+  """Helper model for temporary storing User Property with unique id.
+  """
+  user = db.UserProperty(required=True)
+
+
+def emailToAccountAndUserId(address):
+  """Return a stable user_id string based on an email address, or None if
+  the address is not a valid/existing google account.
+  """
+  user = users.User(address)
+  key = TempUserWithUniqueId(user=user).put()
+  obj = TempUserWithUniqueId.get(key)
+  return (obj, obj.user.user_id())
+
+
+def setupUniqueUserIdAdder(job_entity):
+  """Job that setup jobs that will add unique user ids to all Users.
+
+  Args:
+    job_entity: a Job entity with key_data set to 
+                [last_completed_user]
+  """
+
+  from soc.cron.job import FatalJobError
+
+  user_fields = {'user_id': None}
+
+  if len(key_data) == 1:
+    # start where we left off
+    user_fields['__key__ >'] = key_data[0]
+
+  m_users = user_logic.getForFields(user_fields,
+                                    limit=DEF_USER_STEP_SIZE)
+
+  # set the default fields for the jobs we are going to create
+  priority_group = priority_logic.getGroup(priority_logic.CONVERT)
+  job_fields = {
+      'priority_group': priority_group,
+      'task_name': 'addUniqueUserIds'}
+
+  job_query_fields = job_fields.copy()
+
+  while m_users:
+    # for each user create a adder job
+    for user in m_users:
+
+      job_query_fields['key_data'] = user.key()
+      adder_job = job_logic.getForFields(job_query_fields, unique=True)
+
+      if not adder_job:
+        # this user doesn't have unique id yet
+        job_fields['key_data'] = [user.key()]
+        job_logic.updateOrCreateFromFields(job_fields)
+
+    # update our own job
+    last_user_key = m_users[-1].key()
+
+    if len(key_data) == 1:
+      key_data[0] = last_student_key
+    else:
+      key_data.append(last_student_key)
+
+    updated_job_fields = {'key_data': key_data}
+    job_logic.updateEntityProperties(job_entity, updated_job_fields)
+
+    # rinse and repeat
+    user_fields['__key__ >'] = last_user_key
+    m_users = student_logic.getForFields(user_fields,
+                                         limit=DEF_USER_STEP_SIZE)
+
+  # we are finished
+  return
+
+
+def addUniqueUserIds(job_entity):
+  """Job that will add unique user id to a User.
+
+  Args:
+    job_entity: a Job entity with key_data set to [user_key]
+  """
+
+  from soc.cron.job import FatalJobError
+
+  user_keyname = job_entity.key_data[0].name()
+  user_entity = user_logic.getFromKeyName(user_keyname)
+
+  if not user_entity:
+    raise FatalJobError('The User with keyname %s does not exist!' % (
+        user_keyname))
+
+  # add unique user id
+  account, user_id = emailToAccountAndUserId(user_entity.account.email())
+  user_entity.account = account
+  user_entity.user_id = user_id
+  user_entity.put()
+  
+  # we are done here
+  return
\ No newline at end of file
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/logic/accounts.py
--- a/app/soc/logic/accounts.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/logic/accounts.py	Tue May 26 02:37:39 2009 +0200
@@ -35,6 +35,13 @@
   return normalizeAccount(account) if (account and normalize) else account
 
 
+def getCurrentUserId():
+  """Returns a unique id of the current user.
+  """
+
+  return users.get_current_user().user_id()
+
+
 def normalizeAccount(account):
   """Returns a normalized version of the specified account.
   """
@@ -46,6 +53,7 @@
 
   return users.User(email=normalized)
 
+
 def denormalizeAccount(account):
   """Returns a denormalized version of the specified account.
   """
@@ -58,6 +66,7 @@
 
   return users.User(email=denormalized)
 
+
 def isDeveloper(account=None):
   """Returns True if a Google Account is a Developer with special privileges.
 
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/logic/cleaning.py
--- a/app/soc/logic/cleaning.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/logic/cleaning.py	Tue May 26 02:37:39 2009 +0200
@@ -21,10 +21,11 @@
     '"Todd Larsen" <tlarsen@google.com>',
     '"Sverre Rabbelier" <sverre@rabbelier.nl>',
     '"Lennard de Rijk" <ljvderijk@gmail.com>',
+    '"Pawel Solyga" <pawel.solyga@gmail.com>',
     ]
 
 
-import feedparser
+from htmlsanitizer import HtmlSanitizer
 
 from google.appengine.api import users
 
@@ -379,16 +380,25 @@
   def wrapped(self):
     """Decorator wrapper method.
     """
+    from HTMLParser import HTMLParseError
 
     content = self.cleaned_data.get(field_name)
 
+    # clean_html_content is called when writing data into GAE rather than 
+    # when reading data from GAE. This short-circuiting of the sanitizer 
+    # only affects html authored by developers. The isDeveloper test for 
+    # example allows developers to add javascript.
     if user_logic.isDeveloper():
       return content
-
-    sanitizer = feedparser._HTMLSanitizer('utf-8')
-    sanitizer.feed(content)
-    content = sanitizer.output()
-    content = content.decode('utf-8')
+    
+    try:
+      cleaner = HtmlSanitizer.Cleaner()
+      cleaner.string = content
+      cleaner.clean()
+    except HTMLParseError, msg:
+      raise forms.ValidationError(msg)
+    
+    content = cleaner.string
     content = content.strip().replace('\r\n', '\n')
 
     return content
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/logic/helper/notifications.py
--- a/app/soc/logic/helper/notifications.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/logic/helper/notifications.py	Tue May 26 02:37:39 2009 +0200
@@ -191,6 +191,7 @@
       'scope_path': to_user.link_id
   }
 
+  import soc.logic.models.notification
   key_name = model_logic.notification.logic.getKeyNameFromFields(fields)
 
   # create and put a new notification in the datastore
@@ -204,6 +205,8 @@
       notification_entity: Notification about which the message should be sent
   """
 
+  import soc.views.models.notification
+
   # create the url to show this notification
   notification_url = "http://%(host)s%(index)s" % {
       'host' : os.environ['HTTP_HOST'],
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/logic/models/base.py
--- a/app/soc/logic/models/base.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/logic/models/base.py	Tue May 26 02:37:39 2009 +0200
@@ -25,6 +25,8 @@
   ]
 
 
+import logging
+
 from google.appengine.ext import db
 
 from django.utils.translation import ugettext
@@ -324,7 +326,13 @@
 
     query = self.getQueryForFields(filter=filter, order=order)
 
-    result = query.fetch(limit, offset)
+    try:
+      result = query.fetch(limit, offset)
+    except db.NeedIndexError, exception:
+      result = []
+      logging.exception("%s, model: %s filter: %s, order: %s" % 
+                        (exception, self._model, filter, order))
+      # TODO: send email
 
     if unique:
       return result[0] if result else None
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/logic/models/user.py
--- a/app/soc/logic/models/user.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/logic/models/user.py	Tue May 26 02:37:39 2009 +0200
@@ -75,6 +75,20 @@
 
     return self.getForAccount(account)
 
+  def getForCurrentUserId(self):
+    """Retrieves the user entity for the currently logged in user id.
+
+    If there is no user logged in, or they have no valid associated User
+    entity, None is returned.
+    """
+
+    user_id = accounts.getCurrentUserId()
+
+    if not user_id:
+      return None
+
+    return self.getForUserId(user_id)
+
   def getForAccount(self, account):
     """Retrieves the user entity for the specified account.
 
@@ -94,6 +108,23 @@
 
     return self.getForFields(filter=fields, unique=True)
 
+  def getForUserId(self, user_id):
+    """Retrieves the user entity for the specified user id.
+
+    If there is no user logged in, or they have no valid associated User
+    entity, None is returned.
+    """
+
+    if not user_id:
+      raise base.InvalidArgumentError
+
+    fields = {
+        'user_id': user_id,
+        'status':'valid',
+        }
+
+    return self.getForFields(filter=fields, unique=True)
+
   def isDeveloper(self, account=None, user=None):
     """Returns true iff the specified user is a Developer.
 
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/models/seed_db.py
--- a/app/soc/models/seed_db.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/models/seed_db.py	Tue May 26 02:37:39 2009 +0200
@@ -139,7 +139,7 @@
 
   def seed(self, i, entities=None, current_user=None, gsoc2009=None):
     properties = {
-        'key_name': 'google/gsoc2009/%04d' % i,
+        'key_name': 'google/gsoc2009/org_%04d' % i,
         'link_id': 'org_%04d' % i,
         'name': 'Organization %04d' % i,
         'short_name': 'Org %04d' % i,
@@ -176,6 +176,58 @@
                 gsoc2009=gsoc2009)
 
 
+class OrgApplicationSeeder(Seeder):
+  def type(self):
+    return OrgApplication
+
+  def commonSeedArgs(self, request):
+    _, current_user = ensureUser()
+    gsoc2009 = Program.get_by_key_name('google/gsoc2009')
+
+    if not gsoc2009:
+      raise Error('Run seed_db first')
+
+    status = request.GET.get('status', 'pre-accepted')
+
+    return dict(current_user=current_user,
+                gsoc2009=gsoc2009,
+                status=status)
+
+
+  def seed(self, i, entities=None, current_user=None, gsoc2009=None,
+           status=None):
+    properties = {
+        'key_name': 'google/gsoc2009/org_%04d' % i,
+        'link_id': 'org_%04d' % i,
+        'name': 'Org App %04d' % i,
+        'scope_path': 'google/gsoc2009',
+        'scope': gsoc2009,
+        'status': status,
+        'applicant': current_user,
+        'home_page': 'http://www.google.com',
+        'email': 'org@example.com',
+        'irc_channel': '#care',
+        'pub_mailing_list': 'http://groups.google.com',
+        'dev_mailing_list': 'http://groups.google.com',
+        'description': 'This is an awesome org!',
+        'why_applying': 'Because we can',
+        'member_criteria': 'They need to be awesome',
+        'license_name': 'Apache License, 2.0',
+        'ideas': 'http://code.google.com/p/soc/issues',
+        'contrib_disappears': 'We use google to find them',
+        'member_disappears': 'See above',
+        'encourage_contribs': 'We offer them cookies.',
+        'continued_contribs': 'We promise them a cake.',
+        'agreed_to_admin_agreement': True,
+        }
+
+    org_application = OrgApplication(**properties)
+    if entities is None:
+      org_application.put()
+    else:
+      entities.append(org_application)
+
+
 def seed(request, *args, **kwargs):
   """Seeds the datastore with some default values.
   """
@@ -322,9 +374,9 @@
     }
 
   for i in range(10):
-    org_app_properties['key_name'] = 'google/gsoc2009/wannabe_%d' % i
-    org_app_properties['link_id'] = 'wannabe_%d' % i
-    org_app_properties['name'] = 'Wannabe %d' % i
+    org_app_properties['key_name'] = 'google/gsoc2009/org_%04d' % i
+    org_app_properties['link_id'] = 'org_%04d' % i
+    org_app_properties['name'] = 'Org App %04d' % i
     entity = OrgApplication(**org_app_properties)
     entity.put()
 
@@ -479,8 +531,8 @@
     raise Error('Run seed_db first')
 
   properties = {
-      'key_name': 'google/gsoc2009/org_app_%d' % i,
-      'link_id': 'org_app_%d' % i,
+      'key_name': 'google/gsoc2009/org_%d' % i,
+      'link_id': 'org_%d' % i,
       'name': 'Org App %d' % i,
       'scope_path': 'google/gsoc2009',
       'scope': gsoc2009,
@@ -577,16 +629,16 @@
 def seed_student(request, i):
   """Returns the properties for a new student entity.
   """
-  
+
   gsoc2009 = Program.get_by_key_name('google/gsoc2009')
   user = User.get_by_key_name('user_%d' % i)
-  
+
   if not gsoc2009:
     raise Error('Run seed_db first')
-    
+
   if not user:
     raise Error('Run seed_many for at least %d users first.' % i)
-    
+
   properties = {
       'key_name':'google/gsoc2009/student_%d' % i,
       'link_id': 'student_%d' % i,
@@ -627,13 +679,13 @@
   mentor = Mentor.get_by_key_name('google/gsoc2009/org_%d/mentor' % i)
   user = User.get_by_key_name('user_%d' % i)
   student = Student.get_by_key_name('google/gsoc2009/student_%d' % i)
-    
+
   if not user:
     raise Error('Run seed_many for at least %d users first.' % i)
 
   if not student:
     raise Error('Run seed_many for at least %d students first.' % i)
-  
+
   if not org:
     raise Error('Run seed_many for at least %d orgs first.' % i)
 
@@ -669,6 +721,7 @@
 SEEDABLE_MODEL_TYPES = {
     'user' : UserSeeder(),
     'organization' : OrganizationSeeder(),
+    'org_application' : OrgApplicationSeeder(),
     }
 
 
@@ -716,7 +769,7 @@
     # so, we look for what's after the _ and turn it into an int.
     link_id = highest_instance.link_id
     if '_' in link_id:
-      start_index = int(link_id.split('_')[1]) + 1
+      start_index = int(link_id.split('_')[-1]) + 1
     else:
       # couldn't find seeded_entities; guessing there are none
       start_index = 0
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/models/user.py
--- a/app/soc/models/user.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/models/user.py	Tue May 26 02:37:39 2009 +0200
@@ -71,6 +71,9 @@
       verbose_name=ugettext('User account'))
   account.help_text = ugettext(
       'A valid Google Account.')
+  
+  #: Google Account unique user id
+  user_id = db.StringProperty(required=False)
 
   #: A list (possibly empty) of former Google Accounts associated with
   #: this User.
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/modules/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/soc/modules/__init__.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,17 @@
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This is the main modules module.
+"""
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/modules/callback.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/soc/modules/callback.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,40 @@
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing Melange callbacks.
+"""
+
+__authors__ = [
+  '"Sverre Rabbelier" <sverre@rabbelier.nl>',
+  '"Lennard de Rijk" <ljvderijk@gmail.com>',
+  ]
+
+
+CORE = None
+
+
+def registerCore(core):
+  """Registers the specified callback as core.
+  """
+
+  global CORE
+  CORE = core
+
+
+def getCore():
+  """Returns the Core handler.
+  """
+
+  global CORE
+  return CORE
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/modules/core.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/soc/modules/core.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,235 @@
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The Melange Core module.
+"""
+
+__authors__ = [
+  '"Sverre Rabbelier" <sverre@rabbelier.nl>',
+  '"Lennard de Rijk" <ljvderijk@gmail.com>',
+  ]
+
+
+from django.conf.urls import defaults
+
+import settings
+import soc.cache.sidebar
+
+
+class Error(Exception):
+  """Error class for the callback module.
+  """
+
+  pass
+
+
+class APIVersionMismatch(Error):
+  """Error raised when API version mismatches.
+  """
+
+  MISMATCH_MSG_FMT = "API mismatch, expected '%d', got '%d'."
+
+  def __init__(self, expected, actual):
+    """Instantiates a new exception with a customized message.
+    """
+
+    msg = self.MISMATCH_MSG_FMT % (expected, actual)
+    super(APIVersionMismatch, self).__init__(msg)
+
+
+class MissingService(Error):
+  """Error raised when a required service is missing.
+  """
+
+  MISSING_SERVICE_FMT = "Required service '%s' is not registered, known: %s"
+
+  def __init__(self, service, services):
+    """Instantiates a new exception with a customized message.
+    """
+
+    msg = self.MISSING_SERVICE_FMT % (service, services)
+    super(MissingService, self).__init__(msg)
+
+
+class NonUniqueService(Error):
+  """Error raised when a required service is missing.
+  """
+
+  NON_UNIQUE_SERVICE_FMT = "Unique service '%s' called a second time, known: %s."
+
+  def __init__(self, service, services):
+    """Instantiates a new exception with a customized message.
+    """
+
+    msg = self.NON_UNIQUE_SERVICE_FMT % (service, services)
+    super(NonUniqueService, self).__init__(msg)
+
+
+class Core(object):
+  """The core handler that controls the Melange API.
+  """
+
+  def __init__(self):
+    """Creates a new instance of the Core.
+    """
+
+    self.API_VERSION = 1
+
+    self.registered_callbacks = []
+    self.capability = []
+    self.services = []
+
+    self.sitemap = []
+    self.sidebar = []
+
+  ##
+  ## internal
+  ##
+
+  def getService(self, callback, service):
+   """Retrieves the specified service from the callback if supported.
+
+   Args:
+     callback: the callback to retrieve the capability from
+     service: the service to retrieve
+   """
+
+   if not hasattr(callback, service):
+     return False
+
+   func = getattr(callback, service)
+
+   if not callable(func):
+     return False
+
+   return func
+
+  ##
+  ## Core code
+  ##
+
+  def getPatterns(self):
+    """Returns the Django patterns for this site.
+    """
+
+    self.callService('registerWithSitemap', True)
+    return defaults.patterns(None, *self.sitemap)
+
+  @soc.cache.sidebar.cache
+  def getSidebar(self, id, user):
+    """Constructs a sidebar for the current user.
+    """
+
+    self.callService('registerWithSidebar', True)
+
+    sidebar = []
+
+    for i in self.sidebar:
+      menus = i(id, user)
+
+      for menu in (menus if menus else []):
+        sidebar.append(menu)
+
+    return sorted(sidebar, key=lambda x: x.get('group'))
+
+  def callService(self, service, unique, *args, **kwargs):
+    """Calls the specified service on all callbacks.
+    """
+
+    if unique and (service in self.services):
+      return
+
+    results = []
+
+    for callback in self.registered_callbacks:
+      func = self.getService(callback, service)
+      if not func:
+        continue
+
+      result = func(*args, **kwargs)
+      results.append(result)
+
+    self.services.append(service)
+    return results
+
+  def registerModuleCallbacks(self):
+    """Retrieves all callbacks for the modules of this site.
+
+    Callbacks for modules without a version number or the wrong API_VERSION
+    number are dropped.  They won't be called.
+    """
+
+    fmt = settings.MODULE_FMT
+    modules = ['soc_core'] + settings.MODULES
+    modules = [__import__(fmt % i, fromlist=['']) for i in modules]
+
+    for callback_class in [i.getCallback() for i in modules]:
+      if callback_class.API_VERSION != self.API_VERSION:
+        raise callback.APIVersionMismatch(self.API_VERSION,
+                                          callback_class.API_VERSION)
+
+
+      callback = callback_class(self)
+      self.registered_callbacks.append(callback)
+
+    return True
+
+  ##
+  ## Module code
+  ##
+
+  def registerCapability(self, capability):
+    """Registers the specified capability.
+    """
+
+    self.capabilities.append(capability)
+
+  def requireCapability(self, capability):
+    """Requires that the specified capability is present.
+    """
+
+    if capability in self.capabilities:
+      return True
+
+    raise MissingCapability(capability, self.capability)
+
+  def requireService(self, service):
+    """Requires that the specified service has been called.
+    """
+
+    if service in self.services:
+      return True
+
+    raise MissingService(service, self.services)
+
+  def requireUniqueService(self, service):
+    """Requires that the specified service is called exactly once.
+    """
+
+    if service not in self.services:
+      return True
+
+    raise NonUniqueService(service, self.services)
+
+  def registerSitemapEntry(self, entries):
+    """Registers the specified entries with the sitemap.
+    """
+
+    self.sitemap.extend(entries)
+
+  def registerSidebarEntry(self, entry):
+    """Registers the specified entry with the sidebar.
+    """
+
+    self.sidebar.append(entry)
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/modules/soc_core/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/soc/modules/soc_core/__init__.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,29 @@
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This is the main modules module.
+"""
+
+__authors__ = [
+  '"Sverre Rabbelier" <sverre@rabbelier.nl>',
+  '"Lennard de Rijk" <ljvderijk@gmail.com>',
+  ]
+
+
+
+from soc.modules.soc_core import callback
+
+def getCallback():
+  return callback.Callback
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/modules/soc_core/callback.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/app/soc/modules/soc_core/callback.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,129 @@
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing the core callback.
+"""
+
+__authors__ = [
+  '"Sverre Rabbelier" <sverre@rabbelier.nl>',
+  '"Lennard de Rijk" <ljvderijk@gmail.com>',
+  ]
+
+
+from soc.modules import callback
+
+from soc.views.models import club
+from soc.views.models import club_app
+from soc.views.models import club_admin
+from soc.views.models import club_member
+from soc.views.models import cron
+from soc.views.models import document
+from soc.views.models import host
+from soc.views.models import job
+from soc.views.models import mentor
+from soc.views.models import notification
+from soc.views.models import organization
+from soc.views.models import org_admin
+from soc.views.models import org_app
+from soc.views.models import priority_group
+from soc.views.models import program
+from soc.views.models import request
+from soc.views.models import site
+from soc.views.models import sponsor
+from soc.views.models import student
+from soc.views.models import student_project
+from soc.views.models import student_proposal
+from soc.views.models import timeline
+from soc.views.models import user
+from soc.views.models import user_self
+
+
+class Callback(object):
+  """Callback object that handles interaction between the core.
+  """
+
+  API_VERSION = 1
+
+  def __init__(self, core):
+    """Initializes a new Callback object for the specified core.
+    """
+
+    self.core = core
+
+    # disable clubs
+    self.enable_clubs = False
+
+  def registerWithSitemap(self):
+    """Called by the server when sitemap entries should be registered.
+    """
+
+    self.core.requireUniqueService('registerWithSitemap')
+
+    if self.enable_clubs:
+      self.core.registerSitemapEntry(club.view.getDjangoURLPatterns())
+      self.core.registerSitemapEntry(club_admin.view.getDjangoURLPatterns())
+      self.core.registerSitemapEntry(club_app.view.getDjangoURLPatterns())
+      self.core.registerSitemapEntry(club_member.view.getDjangoURLPatterns())
+
+    self.core.registerSitemapEntry(cron.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(document.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(host.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(job.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(mentor.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(notification.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(organization.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(org_admin.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(org_app.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(priority_group.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(program.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(request.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(site.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(sponsor.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(student.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(student_project.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(student_proposal.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(timeline.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(user_self.view.getDjangoURLPatterns())
+    self.core.registerSitemapEntry(user.view.getDjangoURLPatterns())
+
+  def registerWithSidebar(self):
+    """Called by the server when sidebar entries should be registered.
+    """
+
+    self.core.requireUniqueService('registerWithSidebar')
+
+    if self.enable_clubs:
+      self.core.registerSidebarEntry(club.view.getSidebarMenus)
+      self.core.registerSidebarEntry(club.view.getExtraMenus)
+      self.core.registerSidebarEntry(club_admin.view.getSidebarMenus)
+      self.core.registerSidebarEntry(club_member.view.getSidebarMenus)
+      self.core.registerSidebarEntry(club_app.view.getSidebarMenus)
+
+    self.core.registerSidebarEntry(user_self.view.getSidebarMenus)
+    self.core.registerSidebarEntry(site.view.getSidebarMenus)
+    self.core.registerSidebarEntry(user.view.getSidebarMenus)
+    self.core.registerSidebarEntry(sponsor.view.getSidebarMenus)
+    self.core.registerSidebarEntry(sponsor.view.getExtraMenus)
+    self.core.registerSidebarEntry(host.view.getSidebarMenus)
+    self.core.registerSidebarEntry(request.view.getSidebarMenus)
+    self.core.registerSidebarEntry(program.view.getSidebarMenus)
+    self.core.registerSidebarEntry(program.view.getExtraMenus)
+    self.core.registerSidebarEntry(student.view.getSidebarMenus)
+    self.core.registerSidebarEntry(student_project.view.getSidebarMenus)
+    self.core.registerSidebarEntry(student_proposal.view.getSidebarMenus)
+    self.core.registerSidebarEntry(organization.view.getSidebarMenus)
+    self.core.registerSidebarEntry(organization.view.getExtraMenus)
+    self.core.registerSidebarEntry(org_admin.view.getSidebarMenus)
+    self.core.registerSidebarEntry(mentor.view.getSidebarMenus)
+    self.core.registerSidebarEntry(org_app.view.getSidebarMenus)
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/club_admin/manage.html
--- a/app/soc/templates/soc/club_admin/manage.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/club_admin/manage.html	Tue May 26 02:37:39 2009 +0200
@@ -23,7 +23,17 @@
   <td>
     Please select the appropriate action:</br>
     <input type="button" onclick="location.href='/{{ url_name }}/manage/{{ entity.scope_path }}/{{ entity.link_id }}?resign=true'" value="Resign"/>
-    <input type="button" onclick="location.href='{{ cancel_redirect }}'" value="Cancel"/>
+    {% if cancel_redirect %}
+    <input type="button" 
+      {% if entity %}
+      onclick="location.href='{{ cancel_redirect }}'" 
+      {% else %}
+      onClick="javascript: history.go(-1)">
+      {% endif %}
+    value="Cancel"/>
+    {% else %}
+    <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+    {% endif %}
   </td>
 </tr>
 {% endblock %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/club_member/manage.html
--- a/app/soc/templates/soc/club_member/manage.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/club_member/manage.html	Tue May 26 02:37:39 2009 +0200
@@ -23,7 +23,17 @@
   <td>
     Please select the appropriate action:</br>
     <input type="button" onclick="location.href='/{{ url_name }}/manage/{{ entity.scope_path }}/{{ entity.link_id }}?resign=true'" value="Resign"/>
-    <input type="button" onclick="location.href='{{ cancel_redirect }}'" value="Cancel"/>
+    {% if cancel_redirect %}
+    <input type="button" 
+      {% if entity %}
+      onclick="location.href='{{ cancel_redirect }}'" 
+      {% else %}
+      onClick="javascript: history.go(-1)">
+      {% endif %}
+    value="Cancel"/>
+    {% else %}
+    <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+    {% endif %}
   </td>
 </tr>
 {% endblock %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/host/manage.html
--- a/app/soc/templates/soc/host/manage.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/host/manage.html	Tue May 26 02:37:39 2009 +0200
@@ -23,7 +23,17 @@
   <td>
     Please select the appropriate action:</br>
     <input type="button" onclick="location.href='/{{ url_name }}/manage/{{ entity.scope_path }}/{{ entity.link_id }}?resign=true'" value="Resign"/>
-    <input type="button" onclick="location.href='{{ cancel_redirect }}'" value="Cancel"/>
+    {% if cancel_redirect %}
+    <input type="button" 
+      {% if entity %}
+      onclick="location.href='{{ cancel_redirect }}'" 
+      {% else %}
+      onClick="javascript: history.go(-1)">
+      {% endif %}
+    value="Cancel"/>
+    {% else %}
+    <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+    {% endif %}
   </td>
 </tr>
 {% endblock %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/mentor/manage.html
--- a/app/soc/templates/soc/mentor/manage.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/mentor/manage.html	Tue May 26 02:37:39 2009 +0200
@@ -23,7 +23,17 @@
   <td>
     Please select the appropriate action:</br>
     <input type="button" onclick="location.href='/{{ url_name }}/manage/{{ entity.scope_path }}/{{ entity.link_id }}?resign=true'" value="Resign"/>
-    <input type="button" onclick="location.href='{{ cancel_redirect }}'" value="Cancel"/>
+    {% if cancel_redirect %}
+    <input type="button" 
+      {% if entity %}
+      onclick="location.href='{{ cancel_redirect }}'" 
+      {% else %}
+      onClick="javascript: history.go(-1)">
+      {% endif %}
+    value="Cancel"/>
+    {% else %}
+    <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+    {% endif %}
   </td>
 </tr>
 {% endblock %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/models/edit.html
--- a/app/soc/templates/soc/models/edit.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/models/edit.html	Tue May 26 02:37:39 2009 +0200
@@ -58,11 +58,17 @@
    {% endif %}
    {% endblock %}
    <td>
-   {% if edit_cancel_redirect %}
-   <input type="button" onclick="location.href='{{ edit_cancel_redirect }}'" value="Cancel"/>
-   {% else %}
-   <input type="button" value="Back to Previous Page" onClick="javascript: history.go(-1)">
-   {% endif %}
+     {% if cancel_redirect %}
+     <input type="button" 
+       {% if entity %}
+       onclick="location.href='{{ cancel_redirect }}'" 
+       {% else %}
+       onClick="javascript: history.go(-1)">
+       {% endif %}
+     value="Cancel"/>
+     {% else %}
+     <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+     {% endif %}
    </td>
    {% if entity %}
    {% block delete_button %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/notification/list/row.html
--- a/app/soc/templates/soc/notification/list/row.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/notification/list/row.html	Tue May 26 02:37:39 2009 +0200
@@ -11,5 +11,5 @@
      </div>
   </td>
   <td><div class="subject">{{ list.item.subject }}</div></td>
-  <td><div class="created_on">{{ list.item.created_on }} </div> </td>
+  <td><div class="created_on">{{ list.item.created_on|date:"jS F Y H:i" }} </div> </td>
 </tr>
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/org_admin/manage.html
--- a/app/soc/templates/soc/org_admin/manage.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/org_admin/manage.html	Tue May 26 02:37:39 2009 +0200
@@ -23,7 +23,17 @@
   <td>
     Please select the appropriate action:</br>
     <input type="button" onclick="location.href='/{{ url_name }}/manage/{{ entity.scope_path }}/{{ entity.link_id }}?resign=true'" value="Resign"/>
-    <input type="button" onclick="location.href='{{ cancel_redirect }}'" value="Cancel"/>
+    {% if cancel_redirect %}
+    <input type="button" 
+      {% if entity %}
+      onclick="location.href='{{ cancel_redirect }}'" 
+      {% else %}
+      onClick="javascript: history.go(-1)">
+      {% endif %}
+    value="Cancel"/>
+    {% else %}
+    <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+    {% endif %}
   </td>
 </tr>
 {% endblock %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/templates/soc/student/manage.html
--- a/app/soc/templates/soc/student/manage.html	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/templates/soc/student/manage.html	Tue May 26 02:37:39 2009 +0200
@@ -23,7 +23,17 @@
   <td>
     Please select the appropriate action:</br>
     <input type="button" onclick="location.href='/{{ url_name }}/manage/{{ entity.scope_path }}/{{ entity.link_id }}?resign=true'" value="Resign"/>
-    <input type="button" onclick="location.href='{{ cancel_redirect }}'" value="Cancel"/>
+    {% if cancel_redirect %}
+    <input type="button" 
+      {% if entity %}
+      onclick="location.href='{{ cancel_redirect }}'" 
+      {% else %}
+      onClick="javascript: history.go(-1)">
+      {% endif %}
+    value="Cancel"/>
+    {% else %}
+    <input type="button" value="Back to Previous Page" onClick="javascript:history.go(-1)">
+    {% endif %}
   </td>
 </tr>
 {% endblock %}
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/helper/lists.py
--- a/app/soc/views/helper/lists.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/helper/lists.py	Tue May 26 02:37:39 2009 +0200
@@ -22,6 +22,7 @@
   '"Pawel Solyga" <pawel.solyga@gmail.com>',
   ]
 
+import logging
 
 from soc.logic import dicts
 from soc.logic.models.user import logic as user_logic
@@ -60,8 +61,6 @@
 
 OFFSET_KEY = 'offset_%d'
 LIMIT_KEY = 'limit_%d'
-OFFSET_KEYNAME_KEY = 'offset_keyname_%d'
-REVERSE_DIRECTION_KEY = 'reverse_sort_direction_%d'
 
 
 def makeOffsetKey(limit_idx):
@@ -72,14 +71,6 @@
   return LIMIT_KEY % limit_idx
 
 
-def makeOffsetKeynameKey(limit_idx):
-  return OFFSET_KEYNAME_KEY % limit_idx
-
-
-def makeReverseDirectionKey(limit_idx):
-  return REVERSE_DIRECTION_KEY % limit_idx
-
-
 def getListParameters(request, list_index):
   """Retrieves, converts and validates values for one list
 
@@ -119,44 +110,30 @@
   else:
     limit = min(DEF_MAX_PAGINATION, limit)
 
-  result = dict(limit=limit, offset=offset)
-  offset_keyname_key = makeOffsetLinkidKey(list_index)
-  offset_keyname = request.GET.get(offset_keyname_key, '')
-  # TODO(dbentley): URL unescape
-  result['offset_keyname'] = offset_keyname
-
-  reverse_direction = makeReverseDirectionKey(list_index) in request.GET
-  result['reverse_direction'] = reverse_direction
-
-  return result
+  return dict(limit=limit, offset=offset)
 
 
-class LinkCreator(object):
-  """A way to create links for a page.
+def generateLinkFromGetArgs(request, offset_and_limits):
+  """Constructs the get args for the url.
   """
-  def __init__(self, request, list_idx, limit):
-    self.path = request.path
-    self.base_params = dict(
-        i for i in request.GET.iteritems() if
-        i[0].startswith('offset_') or i[0].startswith('limit_'))
-    self.idx = list_idx
-    self.base_params[makeLimitKey(self.idx)] = limit
+
+  args = ["%s=%s" % (k, v) for k, v in offset_and_limits.iteritems()]
+  link_suffix = '?' + '&'.join(args)
+
+  return request.path + link_suffix
+
 
-  def create(self, offset_keyname=None, export=False, reverse_direction=False):
-    params = self.base_params.copy()
-    if offset_linkid is not None:
-      # TODO(dbentley): URL encode
-      if offset_linkid == '':
-        try:
-          del params[makeOffsetLinkidKey(self.idx)]
-        except KeyError:
-          pass
-      else:
-        params[makeOffsetLinkidKey(self.idx)]=offset_linkid
-    if reverse_direction:
-      params[makeReverseDirectionKey(self.idx)]=True
-    link_suffix = '&'.join('%s=%s' % (k, v) for k, v in params.iteritems())
-    return '%s?%s' % (self.path, link_suffix)
+def generateLinkForRequest(request, base_params, updated_params):
+  """Create a link to the same page as request but with different params
+
+  Params:
+    request: the request for the page
+    base_params: the base parameters
+    updated_params: the parameters to update
+  """
+  params = base_params.copy()
+  params.update(updated_params)
+  return generateLinkFromGetArgs(request, params)
 
 
 def getListContent(request, params, filter=None, order=None,
@@ -193,38 +170,16 @@
       'last': offset of the last item in the list
     }
   """
-
+  # TODO(dbentley): this appears to be unnecessary indirection,
+  # as we only use this logic for getForFields, which is never overridden
   logic = params['logic']
 
-  limit_key = makeLimitKey(idx)
-  offset_key = makeOffsetKey(idx)
-  offset_keyname_key = makeOffsetKeynameKey(idx)
-  reverse_direction_key = makeReverseDirectionKey(idx)
+  limit_key, offset_key = makeLimitKey(idx), makeOffsetKey(idx)
 
   list_params = getListParameters(request, idx)
-
-  limit = list_params['limit']
-  offset = list_params['offset']
-  offset_keyname = list_params['offset_keyname']
-  reverse_direction = list_params['reverse_direction']
-
-  pagination_form = makePaginationForm(request, limit, limit_key)
-
-  if offset_keyname:
-    if filter is None:
-      filter = {}
-
-    if reverse_direction:
-      filter['__key__ <'] = offset_keyname
-    else:
-      filter['__key__ >'] = offset_keyname
-
-    if order is None:
-      order = []
-    if reverse_direction:
-      order.append('-__key__')
-    else:
-      order.append('__key__')
+  limit, offset = list_params['limit'], list_params['offset']
+  pagination_form = makePaginationForm(request, list_params['limit'],
+                                       limit_key)
 
   # Fetch one more to see if there should be a 'next' link
   data = logic.getForFields(filter=filter, limit=limit+1, offset=offset,
@@ -234,60 +189,46 @@
     return None
 
   more = len(data) > limit
-  if reverse_direction:
-    data.reverse()
 
   if more:
-    if reverse_direction:
-      data = data[1:]
-    else:
-      data = data[:limit]
-
-  should_have_next_link = True
-  if not reverse_direction and not more:
-    should_have_next_link = False
-
-  # Calculating should_have_previous_link is tricky. It's possible we could
-  # be creating a previous link to a page that would have 0 entities.
-  # That would be suboptimal; what's a better way?
-  should_have_previous_link = False
-  if offset_keyname:
-    should_have_previous_link = True
-  if reverse_direction and not more:
-    should_have_previous_link = False
-
-  if data:
-    first_key_name = data[0].key().name_or_id()
-    last_key_name = data[-1].key().name_or_id()
-  else:
-    first_key_name = None
-    last_key_name = None
+    del data[limit:]
 
   newest = next = prev = export_link = ''
 
-  link_creator = LinkCreator(request, idx, limit)
+  base_params = dict(i for i in request.GET.iteritems() if
+                     i[0].startswith('offset_') or i[0].startswith('limit_'))
 
   if params.get('list_key_order'):
-    export_link = link_creator.create(export=True)
+    export_link = generateLinkForRequest(request, base_params, {'export' : idx})
 
-  if should_have_next_link:
-    next = link_creator.create(offset_keyname=last_key_name)
+  if more:
+    # TODO(dbentley): here we need to implement a new field "last_key"
+    next = generateLinkForRequest(request, base_params, {offset_key : offset+limit,
+                                                         limit_key : limit})
 
-  if should_have_previous_link:
-    prev = link_creator.create(offset_keyname=first_key_name,
-                               reverse_direction=True)
+  if offset > 0:
+    # TODO(dbentley): here we need to implement previous in the good way.
+    prev = generateLinkForRequest(request, base_params,
+                                  { offset_key : max(0, offset-limit),
+                                    limit_key : limit })
 
-  newest = link_creator.create(offset_keyname='')
+  if offset > limit:
+    # Having a link to the first doesn't make sense on the first page (we're on
+    # it).  It also doesn't make sense on the second page (because the first
+    # page is the previous page).
 
-  # TODO(dbentley): add a "last" link (which is now possible because we can
-  # query with a reverse keyname sorting
+    # NOTE(dbentley): I personally disagree that it's simpler to do that way,
+    # because sometimes you want to go to the first page without having to
+    # consider what page you're on now.
+    newest = generateLinkForGetArgs(request, base_params, {offset_key : 0,
+                                                           limit_key : limit})
 
   content = {
       'idx': idx,
       'data': data,
       'export': export_link,
-      'first': first_key_name,
-      'last': last_key_name,
+      'first': offset+1,
+      'last': len(data) > 1 and offset+len(data) or None,
       'logic': logic,
       'limit': limit,
       'newest': newest,
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/helper/params.py
--- a/app/soc/views/helper/params.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/helper/params.py	Tue May 26 02:37:39 2009 +0200
@@ -128,7 +128,7 @@
   new_params['missing_redirect'] = '/%(url_name)s/create' % params
   new_params['delete_redirect'] = '/%(url_name)s/list' % params
   new_params['invite_redirect'] = '/request/list'
-  new_params['edit_cancel_redirect'] = '/%(url_name)s/list' % params
+  # new_params['cancel_redirect'] = '/%(url_name)s/list' % params
   new_params['public_redirect'] = None
 
   new_params['sidebar'] = None
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/helper/responses.py
--- a/app/soc/views/helper/responses.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/helper/responses.py	Tue May 26 02:37:39 2009 +0200
@@ -33,6 +33,7 @@
 from soc.logic import system
 from soc.logic.models import site
 from soc.logic.models.user import logic as user_logic
+from soc.modules import callback
 from soc.views import helper
 from soc.views.helper import redirects
 from soc.views.helper import templates
@@ -125,7 +126,7 @@
   context['sign_in'] = users.create_login_url(request.path)
   context['sign_out'] = users.create_logout_url(request.path)
 
-  context['sidebar_menu_items'] = sidebar.getSidebar(account, user)
+  context['sidebar_menu_items'] = callback.getCore().getSidebar(account, user)
 
   context['gae_version'] = system.getAppVersion()
   context['soc_release'] = system.getMelangeVersion()
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/models/base.py
--- a/app/soc/views/models/base.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/models/base.py	Tue May 26 02:37:39 2009 +0200
@@ -41,7 +41,8 @@
 from soc.views.helper import redirects
 from soc.views.helper import requests
 from soc.views.helper import responses
-from soc.views import sitemap
+from soc.views.sitemap import sidebar
+from soc.views.sitemap import sitemap
 
 import soc.cache.logic
 import soc.logic
@@ -914,7 +915,7 @@
     context['entity_type_plural'] = params['name_plural']
     context['entity_type_short'] = params['name_short']
     context['entity_type_url'] = params['url_name']
-    context['edit_cancel_redirect'] = params.get('edit_cancel_redirect')
+    context['cancel_redirect'] = params.get('cancel_redirect')
     context['return_url'] = request.path
 
     if params.get('export_content_type') and entity:
@@ -952,7 +953,7 @@
       of _getSidebarItems on how it uses it.
     """
 
-    return sitemap.sidebar.getSidebarMenus(id, user, params=params)
+    return sidebar.getSidebarMenus(id, user, params=params)
 
   @decorators.merge_params
   def getDjangoURLPatterns(self, params=None):
@@ -967,5 +968,5 @@
       params: a dict with params for this View
     """
 
-    return sitemap.sitemap.getDjangoURLPatterns(params)
+    return sitemap.getDjangoURLPatterns(params)
 
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/models/organization.py
--- a/app/soc/views/models/organization.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/models/organization.py	Tue May 26 02:37:39 2009 +0200
@@ -554,6 +554,11 @@
       ap_list = lists.getListContent(request, ap_params, filter, idx=0,
                                      need_content=True)
 
+      # this is a temporary fix for sorting Student Projects 
+      # by Student name until we have a view that default 
+      # sorts it self by name (right now we can't do such query)
+      ap_list['data'].sort(key=lambda sp: sp.student.name().lower())
+      
       contents = []
 
       if ap_list:
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/models/student_project.py
--- a/app/soc/views/models/student_project.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/models/student_project.py	Tue May 26 02:37:39 2009 +0200
@@ -552,7 +552,7 @@
     responses.useJavaScript(context, params['js_uses_all'])
     context['page_name'] = page_name
     # cancel should go to the public view
-    params['edit_cancel_redirect'] = redirects.getPublicRedirect(entity, params)
+    params['cancel_redirect'] = redirects.getPublicRedirect(entity, params)
 
     if request.POST:
       return self.stEditPost(request, context, params, entity, **kwargs)
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/sitemap/build.py
--- a/app/soc/views/sitemap/build.py	Mon May 25 23:42:15 2009 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#!/usr/bin/python2.5
-#
-# Copyright 2008 the Melange authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Module that constructs the sitemap.
-"""
-
-__authors__ = [
-    '"Sverre Rabbelier" <sverre@rabbelier.nl>',
-  ]
-
-
-from django.conf.urls import defaults
-
-#from soc.views.models import club
-#from soc.views.models import club_app
-#from soc.views.models import club_admin
-#from soc.views.models import club_member
-from soc.views.models import cron
-from soc.views.models import document
-from soc.views.models import host
-from soc.views.models import job
-from soc.views.models import mentor
-from soc.views.models import notification
-from soc.views.models import organization
-from soc.views.models import org_admin
-from soc.views.models import org_app
-from soc.views.models import priority_group
-from soc.views.models import program
-from soc.views.models import request
-from soc.views.models import site
-from soc.views.models import sponsor
-from soc.views.models import student
-from soc.views.models import student_project
-from soc.views.models import student_proposal
-from soc.views.models import timeline
-from soc.views.models import user
-from soc.views.models import user_self
-
-from soc.views.sitemap import sidebar
-from soc.views.sitemap import sitemap
-
-
-# TODO: instead of commenting out club stuff, make it depend on a setting
-
-
-sidebar.addMenu(user_self.view.getSidebarMenus)
-#sidebar.addMenu(club.view.getSidebarMenus)
-#sidebar.addMenu(club.view.getExtraMenus)
-#sidebar.addMenu(club_admin.view.getSidebarMenus)
-#sidebar.addMenu(club_member.view.getSidebarMenus)
-#sidebar.addMenu(club_app.view.getSidebarMenus)
-sidebar.addMenu(site.view.getSidebarMenus)
-sidebar.addMenu(user.view.getSidebarMenus)
-#sidebar.addMenu(document.view.getSidebarMenus)
-sidebar.addMenu(sponsor.view.getSidebarMenus)
-sidebar.addMenu(sponsor.view.getExtraMenus)
-sidebar.addMenu(host.view.getSidebarMenus)
-sidebar.addMenu(request.view.getSidebarMenus)
-sidebar.addMenu(program.view.getSidebarMenus)
-sidebar.addMenu(program.view.getExtraMenus)
-sidebar.addMenu(student.view.getSidebarMenus)
-sidebar.addMenu(student_project.view.getSidebarMenus)
-sidebar.addMenu(student_proposal.view.getSidebarMenus)
-sidebar.addMenu(organization.view.getSidebarMenus)
-sidebar.addMenu(organization.view.getExtraMenus)
-sidebar.addMenu(org_admin.view.getSidebarMenus)
-sidebar.addMenu(mentor.view.getSidebarMenus)
-sidebar.addMenu(org_app.view.getSidebarMenus)
-
-#sitemap.addPages(club.view.getDjangoURLPatterns())
-#sitemap.addPages(club_admin.view.getDjangoURLPatterns())
-#sitemap.addPages(club_app.view.getDjangoURLPatterns())
-#sitemap.addPages(club_member.view.getDjangoURLPatterns())
-sitemap.addPages(cron.view.getDjangoURLPatterns())
-sitemap.addPages(document.view.getDjangoURLPatterns())
-sitemap.addPages(host.view.getDjangoURLPatterns())
-sitemap.addPages(job.view.getDjangoURLPatterns())
-sitemap.addPages(mentor.view.getDjangoURLPatterns())
-sitemap.addPages(notification.view.getDjangoURLPatterns())
-sitemap.addPages(organization.view.getDjangoURLPatterns())
-sitemap.addPages(org_admin.view.getDjangoURLPatterns())
-sitemap.addPages(org_app.view.getDjangoURLPatterns())
-sitemap.addPages(priority_group.view.getDjangoURLPatterns())
-sitemap.addPages(program.view.getDjangoURLPatterns())
-sitemap.addPages(request.view.getDjangoURLPatterns())
-sitemap.addPages(site.view.getDjangoURLPatterns())
-sitemap.addPages(sponsor.view.getDjangoURLPatterns())
-sitemap.addPages(student.view.getDjangoURLPatterns())
-sitemap.addPages(student_project.view.getDjangoURLPatterns())
-sitemap.addPages(student_proposal.view.getDjangoURLPatterns())
-sitemap.addPages(timeline.view.getDjangoURLPatterns())
-sitemap.addPages(user_self.view.getDjangoURLPatterns())
-sitemap.addPages(user.view.getDjangoURLPatterns())
-
-
-def getPatterns():
-  """Retrieves all the url patterns of this site.
-  """
-  return defaults.patterns(None, *sitemap.SITEMAP)
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/sitemap/sidebar.py
--- a/app/soc/views/sitemap/sidebar.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/sitemap/sidebar.py	Tue May 26 02:37:39 2009 +0200
@@ -24,38 +24,11 @@
 
 from soc.views import out_of_band
 
-import soc.cache.sidebar
 
-
-SIDEBAR = []
 SIDEBAR_ACCESS_ARGS = ['SIDEBAR_CALLING']
 SIDEBAR_ACCESS_KWARGS = {'SIDEBAR_CALLING': True}
 
 
-def addMenu(callback):
-  """Adds a callback to the menu builder.
-
-  The callback should return a list of menu's when called.
-  """
-  global SIDEBAR
-  SIDEBAR.append(callback)
-
-
-@soc.cache.sidebar.cache
-def getSidebar(id, user):
-  """Constructs a sidebar for the current user.
-  """
-
-  sidebar = []
-
-  for callback in SIDEBAR:
-    menus = callback(id, user)
-
-    for menu in (menus if menus else []):
-      sidebar.append(menu)
-
-  return sorted(sidebar, key=lambda x: x.get('group'))
-
 
 def getSidebarItems(params):
   """Retrieves a list of sidebar entries for this view.
diff -r 3156760b4d26 -r 4cc66ab098e8 app/soc/views/sitemap/sitemap.py
--- a/app/soc/views/sitemap/sitemap.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/soc/views/sitemap/sitemap.py	Tue May 26 02:37:39 2009 +0200
@@ -22,17 +22,6 @@
   ]
 
 
-SITEMAP = []
-
-
-def addPages(pages):
-  """Adds the specified pages to the sitemap.
-  """
-
-  global SITEMAP
-  SITEMAP += pages
-
-
 def getDjangoURLPatterns(params):
   """Retrieves a list of sidebar entries for this View.
 
diff -r 3156760b4d26 -r 4cc66ab098e8 app/urls.py
--- a/app/urls.py	Mon May 25 23:42:15 2009 +0200
+++ b/app/urls.py	Tue May 26 02:37:39 2009 +0200
@@ -18,15 +18,15 @@
 __authors__ = [
   '"Augie Fackler" <durin42@gmail.com>',
   '"Todd Larsen" <tlarsen@google.com>',
+  '"Sverre Rabbelier" <sverre@rabbelier.nl>',
   '"Lennard de Rijk" <ljvderijk@gmail.com>',
   '"Pawel Solyga" <pawel.solyga@gmail.com>',
   ]
 
 
-from soc.views.sitemap import build
+from soc.modules import callback
 
-
-urlpatterns = build.getPatterns()
+urlpatterns = callback.getCore().getPatterns()
 
 # define the error handlers
 handler404 = 'django.views.defaults.page_not_found'
diff -r 3156760b4d26 -r 4cc66ab098e8 scripts/build.sh
--- a/scripts/build.sh	Mon May 25 23:42:15 2009 +0200
+++ b/scripts/build.sh	Tue May 26 02:37:39 2009 +0200
@@ -10,8 +10,8 @@
 
 DEFAULT_APP_BUILD=../build
 DEFAULT_APP_FOLDER="../app"
-DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml main.py settings.py urls.py"
-DEFAULT_APP_DIRS="soc ghop gsoc feedparser python25src reflistprop jquery ranklist json"
+DEFAULT_APP_FILES="app.yaml cron.yaml index.yaml main.py settings.py shell.py urls.py gae_django.py"
+DEFAULT_APP_DIRS="soc ghop gsoc feedparser python25src reflistprop jquery ranklist shell json htmlsanitizer"
 DEFAULT_ZIP_FILES="tiny_mce.zip"
 
 APP_BUILD=${APP_BUILD:-"${DEFAULT_APP_BUILD}"}
@@ -20,6 +20,17 @@
 APP_DIRS=${APP_DIRS:-"${DEFAULT_APP_DIRS}"}
 ZIP_FILES=${ZIP_FILES:-"${DEFAULT_ZIP_FILES}"}
 
+
+if [ "$1" != "--skip-pylint" ]; then
+  cd pylint
+  bash do_pylint.sh --silent
+  if [ "$?" != "1" ] ; then
+      echo ' Build failed. Build script encountered pylint errors.'
+      exit 1
+  fi
+  cd ..
+fi
+
 if [ -e $APP_FOLDER ] ; then
     cd $APP_FOLDER
 else
diff -r 3156760b4d26 -r 4cc66ab098e8 scripts/pylint/do_pylint.sh
--- a/scripts/pylint/do_pylint.sh	Mon May 25 23:42:15 2009 +0200
+++ b/scripts/pylint/do_pylint.sh	Tue May 26 02:37:39 2009 +0200
@@ -36,7 +36,6 @@
 PROJ_DIR=$(cd "$PROJ_DIR"; pwd)
 APP_DIR="${PROJ_DIR}/app"
 
-# Note: We will add ghop and gsoc modules once there something in there
 CHECK_MODULES="soc reflistprop settings.py urls.py main.py"
 
 PYLINTRC=$(dirname "$0")/pylintrc
@@ -60,3 +59,4 @@
 done
 
 pylint $SILENT_ARGS $ARGS $CHECK_MODULES_PATHS
+exit $?
\ No newline at end of file
diff -r 3156760b4d26 -r 4cc66ab098e8 scripts/stats.py
--- a/scripts/stats.py	Mon May 25 23:42:15 2009 +0200
+++ b/scripts/stats.py	Tue May 26 02:37:39 2009 +0200
@@ -277,6 +277,21 @@
   job_logic.updateOrCreateFromFields(job_fields)
 
 
+def startUniqueUserIdConversion():
+  """Creates the job that is responsible for adding unique user ids.
+  """
+
+  from soc.logic.models.job import logic as job_logic
+  from soc.logic.models.priority_group import logic as priority_logic
+
+  priority_group = priority_logic.getGroup(priority_logic.CONVERT)
+  job_fields = {
+      'priority_group': priority_group,
+      'task_name': 'setupUniqueUserIdAdder'}
+
+  job_logic.updateOrCreateFromFields(job_fields)
+
+
 def reviveJobs(amount):
   """Sets jobs that are stuck in 'aborted' to waiting.
 
@@ -357,6 +372,87 @@
   cPickle.dump(target, f)
 
 
+def acceptedStudentsCSVExport(csv_filename, program_key_name):
+  """Exports all accepted Students for particular program into CSV file.
+  """
+  # TODO(Pawel.Solyga): Add additional Program parameter to this method 
+  # so we export students from different programs
+  # TODO(Pawel.SOlyga): Make it universal so it works with both GHOP 
+  # and GSoC programs
+  
+  from soc.models.student_project import StudentProject
+  from soc.models.student import Student
+  from soc.models.organization import Organization
+  
+  getStudentProjects = getEntities(StudentProject)
+  student_projects = getStudentProjects()
+  student_projects_amount = len(student_projects)
+  print "Fetched %d Student Projects." % student_projects_amount
+  print "Fetching Student entities from Student Projects."
+  accepted_students = {}
+  student_organization = {}
+  counter = 0
+  for sp_key in student_projects.keys():
+    key = student_projects[sp_key].student.key().name()
+    accepted_students[key] = student_projects[sp_key].student
+    org_name = student_projects[sp_key].scope.name
+    student_organization[key] = org_name
+    counter += 1
+    print str(counter) + '/' + str(student_projects_amount) + ' ' + key + ' (' + org_name + ')'
+  print "All Student entities fetched."
+  
+  students_key_order = ['link_id', 'given_name', 'surname', 
+      'name_on_documents', 'email', 'res_street', 'res_city', 'res_state',
+      'res_country', 'res_postalcode', 'phone', 'ship_street', 'ship_city',
+      'ship_state', 'ship_country', 'ship_postalcode', 'birth_date', 
+      'tshirt_size', 'tshirt_style', 'name', 'school_name', 'school_country',
+      'major', 'degree']
+
+  print "Preparing Students data for export."
+  students_data = [accepted_students[i].toDict(students_key_order) for i in accepted_students.keys()]
+  
+  print "Adding organization name to Students data."
+  for student in students_data:
+    student['organization'] = student_organization[program_key_name + '/' + student['link_id']]
+  
+  students_key_order.append('organization')
+  
+  saveDataToCSV(csv_filename, students_data, students_key_order)
+  print "Accepted Students exported to %s file." % csv_filename
+  
+  
+def saveDataToCSV(csv_filename, data, key_order):
+  """Saves data in order into CSV file.
+  
+  This is a helper function used with acceptedStudentsCSVExport().
+  """
+  
+  import csv
+  import StringIO
+  
+  from soc.logic import dicts
+  
+  file_handler = StringIO.StringIO()
+  
+  writer = csv.DictWriter(file_handler, key_order, dialect='excel')
+  writer.writerow(dicts.identity(key_order))
+  
+  # encode the data to UTF-8 to ensure compatibiliy
+  for row_dict in data:
+    for key in row_dict.keys():
+      value = row_dict[key]
+      if isinstance(value, basestring):
+        row_dict[key] = value.encode("utf-8")
+      else:
+        row_dict[key] = str(value)
+    writer.writerow(row_dict)
+  
+  csv_data = file_handler.getvalue()
+  csv_file = open(csv_filename, 'w')
+  csv_file.write(csv_data)
+  csv_file.close()
+
+
 def main(args):
   """Main routine.
   """
@@ -411,6 +507,8 @@
       'startSpam': startSpam,
       'reviveJobs': reviveJobs,
       'deidleJobs': deidleJobs,
+      'acceptedStudentsCSVExport': acceptedStudentsCSVExport,
+      'startUniqueUserIdConversion': startUniqueUserIdConversion,
   }
 
   interactive.remote(args, context)
diff -r 3156760b4d26 -r 4cc66ab098e8 tests/run.py
--- a/tests/run.py	Mon May 25 23:42:15 2009 +0200
+++ b/tests/run.py	Tue May 26 02:37:39 2009 +0200
@@ -9,6 +9,7 @@
                os.path.join(appengine_location, 'lib', 'django'),
                os.path.join(appengine_location, 'lib', 'webob'),
                os.path.join(appengine_location, 'lib', 'yaml', 'lib'),
+               os.path.join(appengine_location, 'lib', 'antlr3'),
                appengine_location,
                os.path.join(HERE, 'app'),
                os.path.join(HERE, 'thirdparty', 'coverage'),
@@ -32,7 +33,9 @@
   def afterTest(self, test):
     from google.appengine.api import apiproxy_stub_map
     datastore = apiproxy_stub_map.apiproxy.GetStub('datastore')
-    datastore.Clear()
+    # clear datastore iff one is available
+    if datastore is not None:
+      datastore.Clear()
 
 
 def main():
diff -r 3156760b4d26 -r 4cc66ab098e8 tests/test_functional.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_functional.py	Tue May 26 02:37:39 2009 +0200
@@ -0,0 +1,100 @@
+#!/usr/bin/python2.5
+#
+# Copyright 2009 the Melange authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+__authors__ = [
+  '"Matthew Wilkes" <matthew@matthewwilkes.co.uk>',
+  ]
+
+
+from gaeftest.test import FunctionalTestCase
+
+from zope.testbrowser import browser
+
+import os.path
+
+
+class MelangeFunctionalTestCase(FunctionalTestCase):
+  """A base class for all functional tests in Melange.
+
+  Tests MUST NOT be defined here, but the superclass requires a path
+  attribute that points to the app.yaml.  Utility functions MAY be
+  declared here to be shared by all functional tests, but any
+  overridden unittest methods MUST call the superclass version.
+  """
+
+  path = os.path.abspath(__file__+"/../../app/app.yaml")
+
+
+class TestBranding(MelangeFunctionalTestCase):
+  """Tests that ensure Melange properly displays attribution.
+
+  Other notices, as required by the project and/or law, are tested
+  here as well.
+  """
+
+  def test_attribution(self):
+    """Ensure that the front page asserts that it is a Melange app.
+    """
+
+    tb = browser.Browser()
+    tb.open("http://127.0.0.1:8080/site/show/site")
+
+    self.assertTrue("Powered by Melange" in tb.contents)
+
+
+class TestLogin(MelangeFunctionalTestCase):
+  """Tests that check the login system is functioning correctly.
+
+  Also tests that users go through the correct registration workflow.
+  """
+
+  def test_firstLogin(self):
+    """Ensure that new users are prompted to create a profile.
+
+    Also test that only new users are prompted.
+    """
+
+    tb = browser.Browser()
+    tb.open("http://127.0.0.1:8080")
+
+    tb.getLink("Sign in").click()
+    self.assertTrue("login" in tb.url)
+
+    # fill in dev_appserver login form
+    tb.getForm().getControl("Email").value = "newuser@example.com"
+    tb.getForm().getControl("Login").click()
+
+    self.assertTrue(tb.url.endswith("/show/site"))
+    self.assertTrue('Please create <a href="/user/create_profile">'
+        'User Profile</a> in order to view this page' in tb.contents)
+
+    tb.getLink("User Profile").click()
+
+    # fill in the user profile
+    cp = tb.getForm(action="create_profile")
+    cp.getControl(name="link_id").value = "exampleuser"
+    cp.getControl(name="name").value = "Example user"
+    cp.getControl("Save").click()
+
+    # if all is well, we go to the edit page
+    self.assertTrue("edit_profile" in tb.url)
+
+    tb.open("http://127.0.0.1:8080")
+
+    # call to action no longer on front page
+    self.assertFalse('Please create <a href="/user/create_profile">'
+        'User Profile</a> in order to view this page' in tb.contents)
\ No newline at end of file