diff -r 57b4279d8c4e -r 03e267d67478 app/django/core/serializers/xml_serializer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app/django/core/serializers/xml_serializer.py Fri Jul 18 18:22:23 2008 +0000 @@ -0,0 +1,237 @@ +""" +XML serializer. +""" + +from django.conf import settings +from django.core.serializers import base +from django.db import models +from django.utils.xmlutils import SimplerXMLGenerator +from django.utils.encoding import smart_unicode +from xml.dom import pulldom + +class Serializer(base.Serializer): + """ + Serializes a QuerySet to XML. + """ + + def indent(self, level): + if self.options.get('indent', None) is not None: + self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent', None) * level) + + def start_serialization(self): + """ + Start serialization -- open the XML document and the root element. + """ + self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET)) + self.xml.startDocument() + self.xml.startElement("django-objects", {"version" : "1.0"}) + + def end_serialization(self): + """ + End serialization -- end the document. + """ + self.indent(0) + self.xml.endElement("django-objects") + self.xml.endDocument() + + def start_object(self, obj): + """ + Called as each object is handled. + """ + if not hasattr(obj, "_meta"): + raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj)) + + self.indent(1) + self.xml.startElement("object", { + "pk" : smart_unicode(obj._get_pk_val()), + "model" : smart_unicode(obj._meta), + }) + + def end_object(self, obj): + """ + Called after handling all fields for an object. + """ + self.indent(1) + self.xml.endElement("object") + + def handle_field(self, obj, field): + """ + Called to handle each field on an object (except for ForeignKeys and + ManyToManyFields) + """ + self.indent(2) + self.xml.startElement("field", { + "name" : field.name, + "type" : field.get_internal_type() + }) + + # Get a "string version" of the object's data (this is handled by the + # serializer base class). + if getattr(obj, field.name) is not None: + value = self.get_string_value(obj, field) + self.xml.characters(smart_unicode(value)) + else: + self.xml.addQuickElement("None") + + self.xml.endElement("field") + + def handle_fk_field(self, obj, field): + """ + Called to handle a ForeignKey (we need to treat them slightly + differently from regular fields). + """ + self._start_relational_field(field) + related = getattr(obj, field.name) + if related is not None: + if field.rel.field_name == related._meta.pk.name: + # Related to remote object via primary key + related = related._get_pk_val() + else: + # Related to remote object via other field + related = getattr(related, field.rel.field_name) + self.xml.characters(smart_unicode(related)) + else: + self.xml.addQuickElement("None") + self.xml.endElement("field") + + def handle_m2m_field(self, obj, field): + """ + Called to handle a ManyToManyField. Related objects are only + serialized as references to the object's PK (i.e. the related *data* + is not dumped, just the relation). + """ + self._start_relational_field(field) + for relobj in getattr(obj, field.name).iterator(): + self.xml.addQuickElement("object", attrs={"pk" : smart_unicode(relobj._get_pk_val())}) + self.xml.endElement("field") + + def _start_relational_field(self, field): + """ + Helper to output the element for relational fields + """ + self.indent(2) + self.xml.startElement("field", { + "name" : field.name, + "rel" : field.rel.__class__.__name__, + "to" : smart_unicode(field.rel.to._meta), + }) + +class Deserializer(base.Deserializer): + """ + Deserialize XML. + """ + + def __init__(self, stream_or_string, **options): + super(Deserializer, self).__init__(stream_or_string, **options) + self.event_stream = pulldom.parse(self.stream) + + def next(self): + for event, node in self.event_stream: + if event == "START_ELEMENT" and node.nodeName == "object": + self.event_stream.expandNode(node) + return self._handle_object(node) + raise StopIteration + + def _handle_object(self, node): + """ + Convert an node to a DeserializedObject. + """ + # Look up the model using the model loading mechanism. If this fails, + # bail. + Model = self._get_model_from_node(node, "model") + + # Start building a data dictionary from the object. If the node is + # missing the pk attribute, bail. + pk = node.getAttribute("pk") + if not pk: + raise base.DeserializationError(" node is missing the 'pk' attribute") + + data = {Model._meta.pk.attname : Model._meta.pk.to_python(pk)} + + # Also start building a dict of m2m data (this is saved as + # {m2m_accessor_attribute : [list_of_related_objects]}) + m2m_data = {} + + # Deseralize each field. + for field_node in node.getElementsByTagName("field"): + # If the field is missing the name attribute, bail (are you + # sensing a pattern here?) + field_name = field_node.getAttribute("name") + if not field_name: + raise base.DeserializationError(" node is missing the 'name' attribute") + + # Get the field from the Model. This will raise a + # FieldDoesNotExist if, well, the field doesn't exist, which will + # be propagated correctly. + field = Model._meta.get_field(field_name) + + # As is usually the case, relation fields get the special treatment. + if field.rel and isinstance(field.rel, models.ManyToManyRel): + m2m_data[field.name] = self._handle_m2m_field_node(field_node, field) + elif field.rel and isinstance(field.rel, models.ManyToOneRel): + data[field.attname] = self._handle_fk_field_node(field_node, field) + else: + if field_node.getElementsByTagName('None'): + value = None + else: + value = field.to_python(getInnerText(field_node).strip()) + data[field.name] = value + + # Return a DeserializedObject so that the m2m data has a place to live. + return base.DeserializedObject(Model(**data), m2m_data) + + def _handle_fk_field_node(self, node, field): + """ + Handle a node for a ForeignKey + """ + # Check if there is a child node named 'None', returning None if so. + if node.getElementsByTagName('None'): + return None + else: + return field.rel.to._meta.get_field(field.rel.field_name).to_python( + getInnerText(node).strip()) + + def _handle_m2m_field_node(self, node, field): + """ + Handle a node for a ManyToManyField. + """ + return [field.rel.to._meta.pk.to_python( + c.getAttribute("pk")) + for c in node.getElementsByTagName("object")] + + def _get_model_from_node(self, node, attr): + """ + Helper to look up a model from a or a node. + """ + model_identifier = node.getAttribute(attr) + if not model_identifier: + raise base.DeserializationError( + "<%s> node is missing the required '%s' attribute" \ + % (node.nodeName, attr)) + try: + Model = models.get_model(*model_identifier.split(".")) + except TypeError: + Model = None + if Model is None: + raise base.DeserializationError( + "<%s> node has invalid model identifier: '%s'" % \ + (node.nodeName, model_identifier)) + return Model + + +def getInnerText(node): + """ + Get all the inner text of a DOM node (recursively). + """ + # inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html + inner_text = [] + for child in node.childNodes: + if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE: + inner_text.append(child.data) + elif child.nodeType == child.ELEMENT_NODE: + inner_text.extend(getInnerText(child)) + else: + pass + return u"".join(inner_text) +