thirdparty/google_appengine/google/appengine/api/datastore_types.py
author Todd Larsen <tlarsen@google.com>
Tue, 26 Aug 2008 21:49:54 +0000
changeset 109 620f9b141567
child 149 f2e327a7c5de
permissions -rwxr-xr-x
Load ../../google_appengine into trunk/thirdparty/google_appengine.

#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Higher-level, semantic data types for the datastore. These types
are expected to be set as attributes of Entities.  See "Supported Data Types"
in the API Guide.

Most of these types are based on XML elements from Atom and GData elements
from the atom and gd namespaces. For more information, see:

  http://www.atomenabled.org/developers/syndication/
  http://code.google.com/apis/gdata/common-elements.html

The namespace schemas are:

  http://www.w3.org/2005/Atom
  http://schemas.google.com/g/2005
"""





import base64
import calendar
import datetime
import os
import re
import string
import time
import urlparse
from xml.sax import saxutils
from google.appengine.datastore import datastore_pb
from google.appengine.api import datastore_errors
from google.appengine.api import users
from google.net.proto import ProtocolBuffer
from google.appengine.datastore import entity_pb

_LOCAL_APP_ID = u':self'

_MAX_STRING_LENGTH = 500

_MAX_LINK_PROPERTY_LENGTH = 2083

RESERVED_PROPERTY_NAME = re.compile('^__.*__$');

class UtcTzinfo(datetime.tzinfo):
  def utcoffset(self, dt): return datetime.timedelta(0)
  def dst(self, dt): return datetime.timedelta(0)
  def tzname(self, dt): return 'UTC'
  def __repr__(self): return 'datastore_types.UTC'

UTC = UtcTzinfo()


def typename(obj):
  """Returns the type of obj as a string. More descriptive and specific than
  type(obj), and safe for any object, unlike __class__."""
  if hasattr(obj, '__class__'):
    return getattr(obj, '__class__').__name__
  else:
    return type(obj).__name__


def ValidateString(value, name='Value',
                   exception=datastore_errors.BadValueError,
                   max_len=_MAX_STRING_LENGTH):
  """Raises an exception if value is not a valid string or a subclass thereof.

  A string is valid if it's not empty, no more than _MAX_STRING_LENGTH bytes,
  and not a Blob. The exception type can be specified with the exception
  argument; it defaults to BadValueError.

  Args:
    value: the value to validate.
    name: the name of this value; used in the exception message.
    exception: the type of exception to raise.
    max_len: the maximum allowed length, in bytes
  """
  if not isinstance(value, basestring) or isinstance(value, Blob):
    raise exception('%s should be a string; received %s (a %s):' %
                    (name, value, typename(value)))
  elif value == '':
    raise exception('%s must not be empty.' % name)
  elif len(value.encode('utf-8')) > max_len:
    raise exception('%s must be under %d bytes.' % (name, max_len))


class Key(object):
  """The primary key for a datastore entity.

  A datastore GUID. A Key instance uniquely identifies an entity across all
  apps, and includes all information necessary to fetch the entity from the
  datastore with Get().

  Key implements __hash__, and key instances are immutable, so Keys may be
  used in sets and as dictionary keys.
  """
  __reference = None

  def __init__(self, encoded=None):
    """Constructor. Creates a Key from a string.

    Args:
      # a base64-encoded primary key, generated by Key.__str__
      encoded: str
    """
    if encoded is not None:
      if not isinstance(encoded, basestring):
        try:
          repr_encoded = repr(encoded)
        except:
          repr_encoded = "<couldn't encode>"
        raise datastore_errors.BadArgumentError(
          'Key() expects a string; received %s (a %s).' %
          (repr_encoded, typename(encoded)))
      try:
        modulo = len(encoded) % 4
        if modulo != 0:
          encoded += ('=' * (4 - modulo))

        encoded_pb = base64.urlsafe_b64decode(str(encoded))
        self.__reference = entity_pb.Reference(encoded_pb)
        assert self.__reference.IsInitialized()

      except (AssertionError, TypeError), e:
        raise datastore_errors.BadKeyError(
          'Invalid string key %s. Details: %s' % (encoded, e))
      except Exception, e:
        if e.__class__.__name__ == 'ProtocolBufferDecodeError':
          raise datastore_errors.BadKeyError('Invalid string key %s.' % encoded)
        else:
          raise
    else:
      self.__reference = entity_pb.Reference()

  @staticmethod
  def from_path(*args, **kwds):
    """Static method to construct a Key out of a "path" (kind, id or name, ...).

    This is useful when an application wants to use just the id or name portion
    of a key in e.g. a URL, where the rest of the URL provides enough context to
    fill in the rest, i.e. the app id (always implicit), the entity kind, and
    possibly an ancestor key. Since ids and names are usually small, they're
    more attractive for use in end-user-visible URLs than the full string
    representation of a key.

    Args:
      kind: the entity kind (a str or unicode instance)
      id_or_name: the id (an int or long) or name (a str or unicode instance)

    Additional positional arguments are allowed and should be
    alternating kind and id/name.

    Keyword args:
      parent: optional parent Key; default None.

    Returns:
      A new Key instance whose .kind() and .id() or .name() methods return
      the *last* kind and id or name positional arguments passed.

    Raises:
      BadArgumentError for invalid arguments.
      BadKeyError if the parent key is incomplete.
    """
    parent = kwds.pop('parent', None)
    _app = kwds.pop('_app', None)

    if kwds:
      raise datastore_errors.BadArgumentError(
          'Excess keyword arguments ' + repr(kwds))

    if not args or len(args) % 2:
      raise datastore_errors.BadArgumentError(
          'A non-zero even number of positional arguments is required '
          '(kind, id or name, kind, id or name, ...); received %s' % repr(args))

    if _app is not None:
      if not isinstance(_app, basestring):
        raise datastore_errors.BadArgumentError(
          'Expected a string _app; received %r (a %s).' %
          (_app, typename(_app)))

    if parent is not None:
      if not isinstance(parent, Key):
        raise datastore_errors.BadArgumentError(
            'Expected None or a Key as parent; received %r (a %s).' %
            (parent, typename(parent)))
      if not parent.has_id_or_name():
        raise datastore_errors.BadKeyError(
            'The parent Key is incomplete.')
      if _app is not None and _app != parent.app():
        raise datastore_errors.BadArgumentError(
            'The _app argument (%r) should match parent.app() (%s)' %
            (_app, parent.app()))

    key = Key()
    ref = key.__reference
    if parent is not None:
      ref.CopyFrom(parent.__reference)
    elif _app is not None:
      ref.set_app(_app)
    else:
      ref.set_app(_LOCAL_APP_ID)

    path = ref.mutable_path()
    for i in xrange(0, len(args), 2):
      kind, id_or_name = args[i:i+2]
      if isinstance(kind, basestring):
        kind = kind.encode('utf-8')
      else:
        raise datastore_errors.BadArgumentError(
            'Expected a string kind as argument %d; received %r (a %s).' %
            (i + 1, kind, typename(kind)))
      elem = path.add_element()
      elem.set_type(kind)
      if isinstance(id_or_name, (int, long)):
        elem.set_id(id_or_name)
      elif isinstance(id_or_name, basestring):
        ValidateString(id_or_name, 'name')
        if id_or_name and id_or_name[0] in string.digits:
          raise datastore_errors.BadArgumentError(
            'Names may not begin with a digit; received %s.' % id_or_name)
        elem.set_name(id_or_name.encode('utf-8'))
      else:
        raise datastore_errors.BadArgumentError(
            'Expected an integer id or string name as argument %d; '
            'received %r (a %s).' % (i + 2, id_or_name, typename(id_or_name)))

    assert ref.IsInitialized()
    return key

  def app(self):
    """Returns this entity's app id, a string."""
    if self.__reference.app():
      return self.__reference.app().decode('utf-8')
    else:
      return None

  def kind(self):
    """Returns this entity's kind, as a string."""
    if self.__reference.path().element_size() > 0:
      encoded = self.__reference.path().element_list()[-1].type()
      return unicode(encoded.decode('utf-8'))
    else:
      return None

  def id(self):
    """Returns this entity's id, or None if it doesn't have one."""
    elems = self.__reference.path().element_list()
    if elems and elems[-1].has_id() and elems[-1].id():
      return elems[-1].id()
    else:
      return None

  def name(self):
    """Returns this entity's name, or None if it doesn't have one."""
    elems = self.__reference.path().element_list()
    if elems and elems[-1].has_name() and elems[-1].name():
      return elems[-1].name().decode('utf-8')
    else:
      return None

  def id_or_name(self):
    """Returns this entity's id or name, whichever it has, or None."""
    if self.id() is not None:
      return self.id()
    else:
      return self.name()

  def has_id_or_name(self):
    """Returns True if this entity has an id or name, False otherwise.
    """
    return self.id_or_name() is not None

  def parent(self):
    """Returns this entity's parent, as a Key. If this entity has no parent,
    returns None."""
    if self.__reference.path().element_size() > 1:
      parent = Key()
      parent.__reference.CopyFrom(self.__reference)
      parent.__reference.path().element_list().pop()
      return parent
    else:
      return None

  def ToTagUri(self):
    """Returns a tag: URI for this entity for use in XML output.

    Foreign keys for entities may be represented in XML output as tag URIs.
    RFC 4151 describes the tag URI scheme. From http://taguri.org/:

      The tag algorithm lets people mint - create - identifiers that no one
      else using the same algorithm could ever mint. It is simple enough to do
      in your head, and the resulting identifiers can be easy to read, write,
      and remember. The identifiers conform to the URI (URL) Syntax.

    Tag URIs for entities use the app's auth domain and the date that the URI
     is generated. The namespace-specific part is <kind>[<key>].

    For example, here is the tag URI for a Kitten with the key "Fluffy" in the
    catsinsinks app:

      tag:catsinsinks.googleapps.com,2006-08-29:Kitten[Fluffy]

    Raises a BadKeyError if this entity's key is incomplete.
    """
    if not self.has_id_or_name():
      raise datastore_errors.BadKeyError(
        'ToTagUri() called for an entity with an incomplete key.')

    return u'tag:%s.%s,%s:%s[%s]' % (saxutils.escape(self.app()),
                                     os.environ['AUTH_DOMAIN'],
                                     datetime.date.today().isoformat(),
                                     saxutils.escape(self.kind()),
                                     saxutils.escape(str(self)))
  ToXml = ToTagUri

  def entity_group(self):
    """Returns this key's entity group as a Key.

    Note that the returned Key will be incomplete if this Key is for a root
    entity and it is incomplete.
    """
    group = Key._FromPb(self.__reference)
    del group.__reference.path().element_list()[1:]
    return group

  @staticmethod
  def _FromPb(pb):
    """Static factory method. Creates a Key from an entity_pb.Reference.

    Not intended to be used by application developers. Enforced by hiding the
    entity_pb classes.

    Args:
      pb: entity_pb.Reference
    """
    if not isinstance(pb, entity_pb.Reference):
      raise datastore_errors.BadArgumentError(
        'Key constructor takes an entity_pb.Reference; received %s (a %s).' %
        (pb, typename(pb)))

    key = Key()
    key.__reference = entity_pb.Reference()
    key.__reference.CopyFrom(pb)
    return key

  def _ToPb(self):
    """Converts this Key to its protocol buffer representation.

    Not intended to be used by application developers. Enforced by hiding the
    entity_pb classes.

    Returns:
      # the Reference PB representation of this Key
      entity_pb.Reference
    """
    pb = entity_pb.Reference()
    pb.CopyFrom(self.__reference)
    if not self.has_id_or_name():
      pb.mutable_path().element_list()[-1].set_id(0)

    pb.app().decode('utf-8')
    for pathelem in pb.path().element_list():
      pathelem.type().decode('utf-8')

    return pb

  def __str__(self):
    """Encodes this Key as an opaque string.

    Returns a string representation of this key, suitable for use in HTML,
    URLs, and other similar use cases. If the entity's key is incomplete,
    raises a BadKeyError.

    Unfortunately, this string encoding isn't particularly compact, and its
    length varies with the length of the path. If you want a shorter identifier
    and you know the kind and parent (if any) ahead of time, consider using just
    the entity's id or name.

    Returns:
      string
    """
    if (self.has_id_or_name()):
      encoded = base64.urlsafe_b64encode(self.__reference.Encode())
      return encoded.replace('=', '')
    else:
      raise datastore_errors.BadKeyError(
        'Cannot string encode an incomplete key!\n%s' % self.__reference)

  def __repr__(self):
    """Returns an eval()able string representation of this key.

    Returns a Python string of the form 'datastore_types.Key.from_path(...)'
    that can be used to recreate this key.

    Returns:
      string
    """
    args = []
    for elem in self.__reference.path().element_list():
      args.append(repr(elem.type()))
      if elem.has_name():
        args.append(repr(elem.name().decode('utf-8')))
      else:
        args.append(repr(elem.id()))

    args.append('_app=%r' % self.__reference.app().decode('utf-8'))
    return u'datastore_types.Key.from_path(%s)' % ', '.join(args)

  def __cmp__(self, other):
    """Returns negative, zero, or positive when comparing two keys.

    TODO(ryanb): for API v2, we should change this to make incomplete keys, ie
    keys without an id or name, not equal to any other keys.

    Args:
      other: Key to compare to.

    Returns:
      Negative if self is less than "other"
      Zero if "other" is equal to self
      Positive if self is greater than "other"
    """
    if not isinstance(other, Key):
      return -2

    self_args = []
    other_args = []

    if (self.app() in (_LOCAL_APP_ID, None) or
        other.app() in (_LOCAL_APP_ID, None)):
      pass
    else:
      self_args.append(self.__reference.app().decode('utf-8'))
      other_args.append(other.__reference.app().decode('utf-8'))

    for elem in self.__reference.path().element_list():
      self_args.append(repr(elem.type()))
      if elem.has_name():
        self_args.append(repr(elem.name().decode('utf-8')))
      else:
        self_args.append(elem.id())

    for elem in other.__reference.path().element_list():
      other_args.append(repr(elem.type()))
      if elem.has_name():
        other_args.append(repr(elem.name().decode('utf-8')))
      else:
        other_args.append(elem.id())

    result = cmp(self_args, other_args)
    return result

  def __hash__(self):
    """Returns a 32-bit integer hash of this key.

    Implements Python's hash protocol so that Keys may be used in sets and as
    dictionary keys.

    Returns:
      int
    """
    return hash(self.__str__())


class Category(unicode):
  """A tag, ie a descriptive word or phrase. Entities may be tagged by users,
  and later returned by a queries for that tag. Tags can also be used for
  ranking results (frequency), photo captions, clustering, activity, etc.

  Here's a more in-depth description:  http://www.zeldman.com/daily/0405d.shtml

  This is the Atom "category" element. In XML output, the tag is provided as
  the term attribute. See:
  http://www.atomenabled.org/developers/syndication/#category

  Raises BadValueError if tag is not a string or subtype.
  """
  TERM = 'user-tag'

  def __init__(self, tag):
    super(Category, self).__init__(self, tag)
    ValidateString(tag, 'tag')

  def ToXml(self):
    return u'<category term="%s" label=%s />' % (Category.TERM,
                                                 saxutils.quoteattr(self))


class Link(unicode):
  """A fully qualified URL. Usually http: scheme, but may also be file:, ftp:,
  news:, among others.

  If you have email (mailto:) or instant messaging (aim:, xmpp:) links,
  consider using the Email or IM classes instead.

  This is the Atom "link" element. In XML output, the link is provided as the
  href attribute. See:
  http://www.atomenabled.org/developers/syndication/#link

  Raises BadValueError if link is not a fully qualified, well-formed URL.
  """
  def __init__(self, link):
    super(Link, self).__init__(self, link)
    ValidateString(link, 'link', max_len=_MAX_LINK_PROPERTY_LENGTH)

    scheme, domain, path, params, query, fragment = urlparse.urlparse(link)
    if (not scheme or (scheme != 'file' and not domain) or
                      (scheme == 'file' and not path)):
      raise datastore_errors.BadValueError('Invalid URL: %s' % link)

  def ToXml(self):
    return u'<link href=%s />' % saxutils.quoteattr(self)


class Email(unicode):
  """An RFC2822 email address. Makes no attempt at validation; apart from
  checking MX records, email address validation is a rathole.

  This is the gd:email element. In XML output, the email address is provided as
  the address attribute. See:
  http://code.google.com/apis/gdata/common-elements.html#gdEmail

  Raises BadValueError if email is not a valid email address.
  """
  def __init__(self, email):
    super(Email, self).__init__(self, email)
    ValidateString(email, 'email')

  def ToXml(self):
    return u'<gd:email address=%s />' % saxutils.quoteattr(self)


class GeoPt(object):
  """A geographical point, specified by floating-point latitude and longitude
  coordinates. Often used to integrate with mapping sites like Google Maps.
  May also be used as ICBM coordinates.

  This is the georss:point element. In XML output, the coordinates are
  provided as the lat and lon attributes. See: http://georss.org/

  Serializes to '<lat>,<lon>'. Raises BadValueError if it's passed an invalid
  serialized string, or if lat and lon are not valid floating points in the
  ranges [-90, 90] and [-180, 180], respectively.
  """
  lat = None
  lon = None

  def __init__(self, lat, lon=None):
    if lon is None:
      try:
        split = lat.split(',')
        lat, lon = split
      except (AttributeError, ValueError):
        raise datastore_errors.BadValueError(
          'Expected a "lat,long" formatted string; received %s (a %s).' %
          (lat, typename(lat)))

    try:
      lat = float(lat)
      lon = float(lon)
      if abs(lat) > 90:
        raise datastore_errors.BadValueError(
          'Latitude must be between -90 and 90; received %f' % lat)
      if abs(lon) > 180:
        raise datastore_errors.BadValueError(
          'Longitude must be between -180 and 180; received %f' % lon)
    except (TypeError, ValueError):
      raise datastore_errors.BadValueError(
        'Expected floats for lat and long; received %s (a %s) and %s (a %s).' %
        (lat, typename(lat), lon, typename(lon)))

    self.lat = lat
    self.lon = lon

  def __cmp__(self, other):
    if not isinstance(other, GeoPt):
      try:
        other = GeoPt(other)
      except datastore_errors.BadValueError:
        return NotImplemented

    lat_cmp = cmp(self.lat, other.lat)
    if lat_cmp != 0:
      return lat_cmp
    else:
      return cmp(self.lon, other.lon)

  def __hash__(self):
    """Returns a 32-bit integer hash of this point.

    Implements Python's hash protocol so that GeoPts may be used in sets and
    as dictionary keys.

    Returns:
      int
    """
    return hash((self.lat, self.lon))

  def __repr__(self):
    """Returns an eval()able string representation of this GeoPt.

    The returned string is of the form 'datastore_types.GeoPt([lat], [lon])'.

    Returns:
      string
    """
    return 'datastore_types.GeoPt(%r, %r)' % (self.lat, self.lon)

  def __unicode__(self):
    return u'%s,%s' % (unicode(self.lat), unicode(self.lon))

  __str__ = __unicode__

  def ToXml(self):
    return u'<georss:point>%s %s</georss:point>' % (unicode(self.lat),
                                                    unicode(self.lon))

class IM(object):
  """An instant messaging handle. Includes both an address and its protocol.
  The protocol value is either a standard IM scheme or a URL identifying the
  IM network for the protocol. Possible values include:

    Value                           Description
    sip                             SIP/SIMPLE
    unknown                         Unknown or unspecified
    xmpp                            XMPP/Jabber
    http://aim.com/                 AIM
    http://icq.com/                 ICQ
    http://talk.google.com/         Google Talk
    http://messenger.msn.com/       MSN Messenger
    http://messenger.yahoo.com/     Yahoo Messenger
    http://sametime.com/            Lotus Sametime
    http://gadu-gadu.pl/            Gadu-Gadu

  This is the gd:im element. In XML output, the address and protocol are
  provided as the address and protocol attributes, respectively. See:
  http://code.google.com/apis/gdata/common-elements.html#gdIm

  Serializes to '<protocol> <address>'. Raises BadValueError if tag is not a
  standard IM scheme or a URL.
  """
  PROTOCOLS = [ 'sip', 'unknown', 'xmpp' ]

  protocol = None
  address = None

  def __init__(self, protocol, address=None):
    if address is None:
      try:
        split = protocol.split(' ')
        protocol, address = split
      except (AttributeError, ValueError):
        raise datastore_errors.BadValueError(
          'Expected string of format "protocol address"; received %s' %
          str(protocol))

    ValidateString(address, 'address')
    if protocol not in self.PROTOCOLS:
      Link(protocol)

    self.address = address
    self.protocol = protocol

  def __cmp__(self, other):
    if not isinstance(other, IM):
      try:
        other = IM(other)
      except datastore_errors.BadValueError:
        return NotImplemented

    return cmp((self.address, self.protocol),
               (other.address, other.protocol))

  def __repr__(self):
    """Returns an eval()able string representation of this IM.

    The returned string is of the form:

      datastore_types.IM('address', 'protocol')

    Returns:
      string
    """
    return 'datastore_types.IM(%r, %r)' % (self.protocol, self.address)

  def __unicode__(self):
    return u'%s %s' % (self.protocol, self.address)

  __str__ = __unicode__

  def ToXml(self):
    return (u'<gd:im protocol=%s address=%s />' %
            (saxutils.quoteattr(self.protocol),
             saxutils.quoteattr(self.address)))

  def __len__(self):
    return len(unicode(self))

class PhoneNumber(unicode):
  """A human-readable phone number or address.

  No validation is performed. Phone numbers have many different formats -
  local, long distance, domestic, international, internal extension, TTY,
  VOIP, SMS, and alternative networks like Skype, XFire and Roger Wilco. They
  all have their own numbering and addressing formats.

  This is the gd:phoneNumber element. In XML output, the phone number is
  provided as the text of the element. See:
  http://code.google.com/apis/gdata/common-elements.html#gdPhoneNumber

  Raises BadValueError if phone is not a string or subtype.
  """
  def __init__(self, phone):
    super(PhoneNumber, self).__init__(self, phone)
    ValidateString(phone, 'phone')

  def ToXml(self):
    return u'<gd:phoneNumber>%s</gd:phoneNumber>' % saxutils.escape(self)


class PostalAddress(unicode):
  """A human-readable mailing address. Again, mailing address formats vary
  widely, so no validation is performed.

  This is the gd:postalAddress element. In XML output, the address is provided
  as the text of the element. See:
  http://code.google.com/apis/gdata/common-elements.html#gdPostalAddress

  Raises BadValueError if address is not a string or subtype.
  """
  def __init__(self, address):
    super(PostalAddress, self).__init__(self, address)
    ValidateString(address, 'address')

  def ToXml(self):
    return u'<gd:postalAddress>%s</gd:postalAddress>' % saxutils.escape(self)


class Rating(long):
  """A user-provided integer rating for a piece of content. Normalized to a
  0-100 scale.

  This is the gd:rating element. In XML output, the address is provided
  as the text of the element. See:
  http://code.google.com/apis/gdata/common-elements.html#gdRating

  Serializes to the decimal string representation of the rating. Raises
  BadValueError if the rating is not an integer in the range [0, 100].
  """
  MIN = 0
  MAX = 100

  def __init__(self, rating):
    super(Rating, self).__init__(self, rating)
    if isinstance(rating, float) or isinstance(rating, complex):
      raise datastore_errors.BadValueError(
        'Expected int or long; received %s (a %s).' %
        (rating, typename(rating)))

    try:
      if long(rating) < Rating.MIN or long(rating) > Rating.MAX:
        raise datastore_errors.BadValueError()
    except ValueError:
      raise datastore_errors.BadValueError(
        'Expected int or long; received %s (a %s).' %
        (rating, typename(rating)))

  def ToXml(self):
    return (u'<gd:rating value="%d" min="%d" max="%d" />' %
            (self, Rating.MIN, Rating.MAX))


class Text(unicode):
  """A long string type.

  Strings of any length can be stored in the datastore using this
  type. It behaves identically to the Python unicode type, except for
  the constructor, which only accepts str and unicode arguments.
  """

  def __new__(cls, arg=None, encoding=None):
    """Constructor.

    We only accept unicode and str instances, the latter with encoding.

    Args:
      arg: optional unicode or str instance; default u''
      encoding: optional encoding; disallowed when isinstance(arg, unicode),
                defaults to 'ascii' when isinstance(arg, str);
    """
    if arg is None:
      arg = u''
    if isinstance(arg, unicode):
      if encoding is not None:
        raise TypeError('Text() with a unicode argument '
                        'should not specify an encoding')
      return super(Text, cls).__new__(cls, arg)

    if isinstance(arg, str):
      if encoding is None:
        encoding = 'ascii'
      return super(Text, cls).__new__(cls, arg, encoding)

    raise TypeError('Text() argument should be str or unicode, not %s' %
                    type(arg).__name__)

class Blob(str):
  """A blob type, appropriate for storing binary data of any length.

  This behaves identically to the Python str type, except for the
  constructor, which only accepts str arguments.
  """

  def __new__(cls, arg=None):
    """Constructor.

    We only accept str instances.

    Args:
      arg: optional str instance (default '')
    """
    if arg is None:
      arg = ''
    if isinstance(arg, str):
      return super(Blob, cls).__new__(cls, arg)

    raise TypeError('Blob() argument should be str instance, not %s' %
                    type(arg).__name__)


_PROPERTY_TYPES = [
  str,
  unicode,
  bool,
  int,
  long,
  type(None),
  float,
  Key,
  datetime.datetime,
  Blob,
  Text,
  users.User,
  Category,
  Link,
  Email,
  GeoPt,
  IM,
  PhoneNumber,
  PostalAddress,
  Rating,
  ]

_PROPERTY_MEANINGS = {



  Blob:              entity_pb.Property.BLOB,
  Text:              entity_pb.Property.TEXT,
  datetime.datetime: entity_pb.Property.GD_WHEN,
  Category:          entity_pb.Property.ATOM_CATEGORY,
  Link:              entity_pb.Property.ATOM_LINK,
  Email:             entity_pb.Property.GD_EMAIL,
  GeoPt:             entity_pb.Property.GEORSS_POINT,
  IM:                entity_pb.Property.GD_IM,
  PhoneNumber:       entity_pb.Property.GD_PHONENUMBER,
  PostalAddress:     entity_pb.Property.GD_POSTALADDRESS,
  Rating:            entity_pb.Property.GD_RATING,
  }

_RAW_PROPERTY_TYPES = (
  Blob,
  Text,
)

def ToPropertyPb(name, values):
  """Creates a type-specific onestore property PB from a property name and a
  value or list of values. Determines the type of property based on the type
  of the value(s).

  If name is invalid, Serialize throws a BadPropertyError. If values is
  an unsupported type, or an empty list, or a list with elements of different
  types, Serialize throws a BadValueError.

  Args:
    # the property name
    name: string
    # either a supported type or a list of them. if a list, all
    # of the list's elements should be of the same type
    values: string, int, long, float, datetime, Key, or list

  Returns:
    # a list of or single StringProperty, Int64Property, BoolProperty,
    # DoubleProperty, PointProperty, UserProperty, or ReferenceProperty.
    [entity_pb.*Property, ...]
  """
  ValidateString(name, 'property name', datastore_errors.BadPropertyError)
  if RESERVED_PROPERTY_NAME.match(name):
    raise datastore_errors.BadPropertyError('%s is a reserved property name.' %
                                            name)

  if isinstance(values, tuple):
    raise datastore_errors.BadValueError(
        'May not use tuple property value; property %s is %s.' %
        (name, repr(values)))

  if isinstance(values, list):
    multiple = True
  else:
    multiple = False
    values = [values]

  if not values:
    raise datastore_errors.BadValueError(
        'May not use the empty list as a property value; property %s is %s.' %
        (name, repr(values)))

  def long_if_int(val):
    if isinstance(val, int) and not isinstance(val, bool):
      return long(val)
    else:
      return val

  values = [long_if_int(v) for v in values]

  try:
    proptype = values[0].__class__
    for v in values:
      if v is not None:
        if (v.__class__ is not proptype and not
            (v.__class__ in (str, unicode) and proptype in (str, unicode))):
          raise datastore_errors.BadValueError(
              'Values for property %s have mismatched types: %s (a %s) and '
              '%s (a %s).' % (name, values[0], proptype, v, typename(v)))
        elif (isinstance(v, Key) and not v.has_id_or_name()):
          raise datastore_errors.BadValueError(
              'Incomplete key found for reference property %s.' % name)
  except (KeyError, ValueError, TypeError, IndexError, AttributeError), msg:
    raise datastore_errors.BadValueError(
      'Error type checking values for property %s: %s' % (name, msg))

  if proptype not in _PROPERTY_TYPES:
    raise datastore_errors.BadValueError(
      'Unsupported type for property %s: %s' % (name, proptype))

  pbs = []
  for v in values:
    pb = entity_pb.Property()
    pb.set_name(name.encode('utf-8'))
    pb.set_multiple(multiple)
    if _PROPERTY_MEANINGS.has_key(proptype):
      pb.set_meaning(_PROPERTY_MEANINGS[proptype])

    pbvalue = pb.mutable_value()
    if v is None:
      pass
    elif isinstance(v, Blob):
      pbvalue.set_stringvalue(v)
    elif isinstance(v, (basestring, IM)):
      if not isinstance(v, Text):
        if isinstance(v, Link):
          max_len = _MAX_LINK_PROPERTY_LENGTH
        else:
          max_len = _MAX_STRING_LENGTH
        if len(v) > max_len:
          raise datastore_errors.BadValueError(
            'Property %s is %d bytes long; it must be %d or less. '
            'Consider Text instead, which can store strings of any length.' %
            (name, len(v), max_len))
      pbvalue.set_stringvalue(unicode(v).encode('utf-8'))
    elif isinstance(v, datetime.datetime):
      if v.tzinfo:
        v = v.astimezone(UTC)
      pbvalue.set_int64value(
        long(calendar.timegm(v.timetuple()) * 1000000L) + v.microsecond)
    elif isinstance(v, GeoPt):
      pbvalue.mutable_pointvalue().set_x(v.lat)
      pbvalue.mutable_pointvalue().set_y(v.lon)
    elif isinstance(v, users.User):
      pbvalue.mutable_uservalue().set_email(v.email().encode('utf-8'))
      pbvalue.mutable_uservalue().set_auth_domain(
        v.auth_domain().encode('utf-8'))
      pbvalue.mutable_uservalue().set_gaiaid(0)
    elif isinstance(v, Key):
      ref = v._Key__reference
      pbvalue.mutable_referencevalue().set_app(ref.app())
      for elem in ref.path().element_list():
        pbvalue.mutable_referencevalue().add_pathelement().CopyFrom(elem)
    elif isinstance(v, bool):
      pbvalue.set_booleanvalue(v)
    elif isinstance(v, long):
      pbvalue.set_int64value(v)
      try:
        pbvalue.Encode()
      except ProtocolBuffer.ProtocolBufferEncodeError, e:
        pbvalue.clear_int64value()
        raise OverflowError(e)
    elif isinstance(v, float):
      pbvalue.set_doublevalue(v)
    else:
      assert False, "Shouldn't reach here; property type was validated above."

    pbs.append(pb)

  if multiple:
    return pbs
  else:
    return pbs[0]


def FromReferenceProperty(value):
  """Converts a reference PropertyValue to a Key. Raises BadValueError is prop
  is not a PropertyValue.

  Args:
    value: entity_pb.PropertyValue

  Returns:
    Key
  """
  assert isinstance(value, entity_pb.PropertyValue)
  assert value.has_referencevalue()
  ref = value.referencevalue()

  key = Key()
  key_ref = key._Key__reference
  key_ref.set_app(ref.app())

  for pathelem in ref.pathelement_list():
    key_ref.mutable_path().add_element().CopyFrom(pathelem)

  return key


_EPOCH = datetime.datetime.utcfromtimestamp(0)

_PROPERTY_CONVERSIONS = {
  entity_pb.Property.GD_WHEN:


    lambda val: _EPOCH + datetime.timedelta(microseconds=val),
  entity_pb.Property.ATOM_CATEGORY:     Category,
  entity_pb.Property.ATOM_LINK:         Link,
  entity_pb.Property.GD_EMAIL:          Email,
  entity_pb.Property.GEORSS_POINT:      lambda coords: GeoPt(*coords),
  entity_pb.Property.GD_IM:             IM,
  entity_pb.Property.GD_PHONENUMBER:    PhoneNumber,
  entity_pb.Property.GD_POSTALADDRESS:  PostalAddress,
  entity_pb.Property.GD_RATING:         Rating,
  entity_pb.Property.BLOB:              Blob,
  entity_pb.Property.TEXT:              Text,
  }

def FromPropertyPb(pb):
  """Converts a onestore property PB to a python value.

  Args:
    pb: entity_pb.Property

  Returns:
    # return type is determined by the type of the argument
    string, int, bool, double, users.User, or one of the atom or gd types
  """
  if not isinstance(pb, entity_pb.Property):
    raise datastore_errors.BadValueError(
      'Expected PropertyValue; received %s (a %s).' % (pb, typename(pb)))

  pbval = pb.value()

  if (pbval.has_stringvalue()):
    value = pbval.stringvalue()
    if pb.meaning() != entity_pb.Property.BLOB:
      value = unicode(value.decode('utf-8'))
  elif pbval.has_pointvalue():
    value = (pbval.pointvalue().x(), pbval.pointvalue().y())
  elif pbval.has_uservalue():
    email = unicode(pbval.uservalue().email().decode('utf-8'))
    auth_domain = unicode(pbval.uservalue().auth_domain().decode('utf-8'))
    value = users.User(email=email, _auth_domain=auth_domain)
  elif pbval.has_referencevalue():
    value = FromReferenceProperty(pbval)
  elif pbval.has_int64value():
    value = long(pbval.int64value())
  elif pbval.has_booleanvalue():
    value = bool(pbval.booleanvalue())
  elif pbval.has_doublevalue():
    value = float(pbval.doublevalue())
  else:
    if pb.multiple():
      raise datastore_errors.BadValueError(
          'Record indicated as multiple, but has no values.')
    else:
      value = None

  try:
    if pb.has_meaning() and pb.meaning() in _PROPERTY_CONVERSIONS:
      value = _PROPERTY_CONVERSIONS[pb.meaning()](value)
  except (KeyError, ValueError, IndexError, TypeError, AttributeError), msg:
    raise datastore_errors.BadValueError(
      'Error converting pb: %s\nException was: %s' % (pb, msg))

  return value


def PropertyTypeName(value):
  """Returns the name of the type of the given property value, as a string.

  Raises BadValueError if the value is not a valid property type.

  Args:
    value: any valid property value

  Returns:
    string
  """
  if value.__class__ in _PROPERTY_MEANINGS:
    meaning = _PROPERTY_MEANINGS[value.__class__]
    name = entity_pb.Property._Meaning_NAMES[meaning]
    return name.lower().replace('_', ':')
  elif isinstance(value, basestring):
    return 'string'
  elif isinstance(value, users.User):
    return 'user'
  elif isinstance(value, long):
    return 'int'
  elif value is None:
    return 'null'
  else:
    return typename(value).lower()

_PROPERTY_TYPE_STRINGS = {
    'string':           unicode,
    'bool':             bool,
    'int':              long,
    'null':             type(None),
    'float':            float,
    'key':              Key,
    'blob':             Blob,
    'text':             Text,
    'user':             users.User,
    'atom:category':    Category,
    'atom:link':        Link,
    'gd:email':         Email,
    'gd:when':          datetime.datetime,
    'georss:point':     GeoPt,
    'gd:im':            IM,
    'gd:phonenumber':   PhoneNumber,
    'gd:postaladdress': PostalAddress,
    'gd:rating':        Rating,
    }


def FromPropertyTypeName(type_name):
  """Returns the python type given a type name.

  Args:
    type_name: A string representation of a datastore type name.

  Returns:
    A python type.
  """
  return _PROPERTY_TYPE_STRINGS[type_name]


def PropertyValueFromString(type_, value_string, _auth_domain=None):
  """Returns an instance of a property value given a type and string value.

  The reverse of this method is just str() and type() of the python value.

  Note that this does *not* support non-UTC offsets in ISO 8601-formatted
  datetime strings, e.g. the -08:00 suffix in '2002-12-25 00:00:00-08:00'.
  It only supports -00:00 and +00:00 suffixes, which are UTC.

  Args:
    type_: A python class.
    value_string: A string representation of the value of the property.

  Returns:
    An instance of 'type'.

  Raises:
    ValueError if type_ is datetime and value_string has a timezone offset.
  """
  if type_ == datetime.datetime:
    if value_string[-6] in ('+', '-'):
      if value_string[-5:] == '00:00':
        value_string = value_string[:-6]
      else:
        raise ValueError('Non-UTC offsets in datetimes are not supported.')

    split = value_string.split('.')
    iso_date = split[0]
    microseconds = 0
    if len(split) > 1:
      microseconds = int(split[1])

    time_struct = time.strptime(iso_date, '%Y-%m-%d %H:%M:%S')[0:6]
    value = datetime.datetime(*(time_struct + (microseconds,)))
    return value
  elif type_ == Rating:
    return Rating(int(value_string))
  elif type_ == bool:
    return value_string == 'True'
  elif type_ == users.User:
    return users.User(value_string, _auth_domain)
  elif type_ == type(None):
    return None
  return type_(value_string)