--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/thirdparty/google_appengine/google/appengine/ext/key_range/__init__.py Sun Sep 06 23:31:53 2009 +0200
@@ -0,0 +1,570 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Key range representation and splitting."""
+
+
+import os
+
+try:
+ import simplejson
+except ImportError:
+ simplejson = None
+
+from google.appengine.api import datastore
+from google.appengine.datastore import datastore_pb
+from google.appengine.ext import db
+
+
+class Error(Exception):
+ """Base class for exceptions in this module."""
+
+
+class KeyRangeError(Error):
+ """Error while trying to generate a KeyRange."""
+
+
+class SimplejsonUnavailableError(Error):
+ """Error while using json functionality whith unavailable simplejson."""
+
+class EmptyDbQuery(db.Query):
+ """A query that returns no results."""
+
+ def get(self):
+ return None
+
+ def fetch(self, limit=1000, offset=0):
+ return []
+
+ def count(self, limit=1000):
+ return 0
+
+
+class EmptyDatastoreQuery(datastore.Query):
+ """A query that returns no results."""
+
+ def __init__(self, kind):
+ datastore.Query.__init__(self, kind)
+
+ def _Run(self, *unused_args, **unused_kwargs):
+ empty_result_pb = datastore_pb.QueryResult()
+ empty_result_pb.set_cursor(0)
+ empty_result_pb.set_more_results(False)
+ return datastore.Iterator(empty_result_pb)
+
+ def Count(self, *unused_args, **unused_kwargs):
+ return 0
+
+ def Get(self, *unused_args, **unused_kwargs):
+ return []
+
+ def Next(self, *unused_args, **unused_kwargs):
+ return []
+
+
+class KeyRange(object):
+ """Represents a range of keys in the datastore.
+
+ A KeyRange object represents a key range
+ (key_start, include_start, key_end, include_end)
+ and a scan direction (KeyRange.DESC or KeyRange.ASC).
+ """
+
+ DESC = 'DESC'
+ ASC = 'ASC'
+
+ def __init__(self,
+ key_start=None,
+ key_end=None,
+ direction=None,
+ include_start=True,
+ include_end=True):
+ """Initialize a KeyRange object.
+
+ Args:
+ key_start: The starting key for this range.
+ key_end: The ending key for this range.
+ direction: The direction of the query for this range.
+ include_start: Whether the start key should be included in the range.
+ include_end: Whether the end key should be included in the range.
+ """
+ if direction is None:
+ direction = KeyRange.ASC
+ assert direction in (KeyRange.ASC, KeyRange.DESC)
+ self.direction = direction
+ self.key_start = key_start
+ self.key_end = key_end
+ self.include_start = include_start
+ self.include_end = include_end
+
+ def __str__(self):
+ if self.include_start:
+ left_side = '['
+ else:
+ left_side = '('
+ if self.include_end:
+ right_side = ']'
+ else:
+ right_side = '('
+ return '%s%s%s-%s%s' % (self.direction, left_side, repr(self.key_start),
+ repr(self.key_end), right_side)
+
+ def __repr__(self):
+ return ('key_range.KeyRange(key_start=%s,key_end=%s,direction=%s,'
+ 'include_start=%s,include_end=%s)') % (repr(self.key_start),
+ repr(self.key_end),
+ repr(self.direction),
+ repr(self.include_start),
+ repr(self.include_end))
+
+ def filter_query(self, query):
+ """Add query filter to restrict to this key range.
+
+ Args:
+ query: A db.Query instance.
+
+ Returns:
+ The input query restricted to this key range or an empty query if
+ this key range is empty.
+ """
+ assert isinstance(query, db.Query)
+ if self.key_start == self.key_end and not (
+ self.include_start or self.include_end):
+ return EmptyDbQuery()
+ if self.include_start:
+ start_comparator = '>='
+ else:
+ start_comparator = '>'
+ if self.include_end:
+ end_comparator = '<='
+ else:
+ end_comparator = '<'
+ if self.key_start:
+ query.filter('__key__ %s' % start_comparator, self.key_start)
+ if self.key_end:
+ query.filter('__key__ %s' % end_comparator, self.key_end)
+ return query
+
+ def filter_datastore_query(self, query):
+ """Add query filter to restrict to this key range.
+
+ Args:
+ query: A datastore.Query instance.
+
+ Returns:
+ The input query restricted to this key range or an empty query if
+ this key range is empty.
+ """
+ assert isinstance(query, datastore.Query)
+ if self.key_start == self.key_end and not (
+ self.include_start or self.include_end):
+ return EmptyDatastoreQuery(query.kind)
+ if self.include_start:
+ start_comparator = '>='
+ else:
+ start_comparator = '>'
+ if self.include_end:
+ end_comparator = '<='
+ else:
+ end_comparator = '<'
+ if self.key_start:
+ query.update({'__key__ %s' % start_comparator: self.key_start})
+ if self.key_end:
+ query.update({'__key__ %s' % end_comparator: self.key_end})
+ return query
+
+ def __get_direction(self, asc, desc):
+ """Check that self.direction is in (KeyRange.ASC, KeyRange.DESC).
+
+ Args:
+ asc: Argument to return if self.direction is KeyRange.ASC
+ desc: Argument to return if self.direction is KeyRange.DESC
+
+ Returns:
+ asc or desc appropriately
+
+ Raises:
+ KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC).
+ """
+ if self.direction == KeyRange.ASC:
+ return asc
+ elif self.direction == KeyRange.DESC:
+ return desc
+ else:
+ raise KeyRangeError('KeyRange direction unexpected: %s', self.direction)
+
+ def make_directed_query(self, kind_class):
+ """Construct a query for this key range, including the scan direction.
+
+ Args:
+ kind_class: A kind implementation class.
+
+ Returns:
+ A db.Query instance.
+
+ Raises:
+ KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC).
+ """
+ direction = self.__get_direction('', '-')
+ query = db.Query(kind_class)
+ query.order('%s__key__' % direction)
+
+ query = self.filter_query(query)
+ return query
+
+ def make_directed_datastore_query(self, kind):
+ """Construct a query for this key range, including the scan direction.
+
+ Args:
+ kind: A string.
+
+ Returns:
+ A datastore.Query instance.
+
+ Raises:
+ KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC).
+ """
+ direction = self.__get_direction(datastore.Query.ASCENDING,
+ datastore.Query.DESCENDING)
+ query = datastore.Query(kind)
+ query.Order(('__key__', direction))
+
+ query = self.filter_datastore_query(query)
+ return query
+
+ def make_ascending_query(self, kind_class):
+ """Construct a query for this key range without setting the scan direction.
+
+ Args:
+ kind_class: A kind implementation class.
+
+ Returns:
+ A db.Query instance.
+ """
+ query = db.Query(kind_class)
+ query.order('__key__')
+
+ query = self.filter_query(query)
+ return query
+
+ def make_ascending_datastore_query(self, kind):
+ """Construct a query for this key range without setting the scan direction.
+
+ Args:
+ kind: A string.
+
+ Returns:
+ A datastore.Query instance.
+ """
+ query = datastore.Query(kind)
+ query.Order(('__key__', datastore.Query.ASCENDING))
+
+ query = self.filter_datastore_query(query)
+ return query
+
+ def split_range(self, batch_size=0):
+ """Split this key range into a list of at most two ranges.
+
+ This method attempts to split the key range approximately in half.
+ Numeric ranges are split in the middle into two equal ranges and
+ string ranges are split lexicographically in the middle. If the
+ key range is smaller than batch_size it is left unsplit.
+
+ Note that splitting is done without knowledge of the distribution
+ of actual entities in the key range, so there is no guarantee (nor
+ any particular reason to believe) that the entities of the range
+ are evenly split.
+
+ Args:
+ batch_size: The maximum size of a key range that should not be split.
+
+ Returns:
+ A list of one or two key ranges covering the same space as this range.
+ """
+ key_start = self.key_start
+ key_end = self.key_end
+ include_start = self.include_start
+ include_end = self.include_end
+
+ key_pairs = []
+ if not key_start:
+ key_pairs.append((key_start, include_start, key_end, include_end,
+ KeyRange.ASC))
+ elif not key_end:
+ key_pairs.append((key_start, include_start, key_end, include_end,
+ KeyRange.DESC))
+ else:
+ key_split = KeyRange.split_keys(key_start, key_end, batch_size)
+ first_include_end = True
+ if key_split == key_start:
+ first_include_end = first_include_end and include_start
+
+ key_pairs.append((key_start, include_start,
+ key_split, first_include_end,
+ KeyRange.DESC))
+
+ second_include_end = include_end
+ if key_split == key_end:
+ second_include_end = False
+ key_pairs.append((key_split, False,
+ key_end, second_include_end,
+ KeyRange.ASC))
+
+ ranges = [KeyRange(key_start=start,
+ include_start=include_start,
+ key_end=end,
+ include_end=include_end,
+ direction=direction)
+ for (start, include_start, end, include_end, direction)
+ in key_pairs]
+
+ return ranges
+
+ def __cmp__(self, other):
+ """Compare two key ranges.
+
+ Key ranges with a value of None for key_start or key_end, are always
+ considered to have include_start=False or include_end=False, respectively,
+ when comparing. Since None indicates an unbounded side of the range,
+ the include specifier is meaningless. The ordering generated is total
+ but somewhat arbitrary.
+
+ Args:
+ other: An object to compare to this one.
+
+ Returns:
+ -1: if this key range is less than other.
+ 0: if this key range is equal to other.
+ 1: if this key range is greater than other.
+ """
+ if not isinstance(other, KeyRange):
+ return 1
+
+ self_list = [self.key_start, self.key_end, self.direction,
+ self.include_start, self.include_end]
+ if not self.key_start:
+ self_list[3] = False
+ if not self.key_end:
+ self_list[4] = False
+
+ other_list = [other.key_start,
+ other.key_end,
+ other.direction,
+ other.include_start,
+ other.include_end]
+ if not other.key_start:
+ other_list[3] = False
+ if not other.key_end:
+ other_list[4] = False
+
+ return cmp(self_list, other_list)
+
+ @staticmethod
+ def bisect_string_range(start, end):
+ """Returns a string that is approximately in the middle of the range.
+
+ (start, end) is treated as a string range, and it is assumed
+ start <= end in the usual lexicographic string ordering. The output key
+ mid is guaranteed to satisfy start <= mid <= end.
+
+ The method proceeds by comparing initial characters of start and
+ end. When the characters are equal, they are appended to the mid
+ string. In the first place that the characters differ, the
+ difference characters are averaged and this average is appended to
+ the mid string. If averaging resulted in rounding down, and
+ additional character is added to the mid string to make up for the
+ rounding down. This extra step is necessary for correctness in
+ the case that the average of the two characters is equal to the
+ character in the start string.
+
+ This method makes the assumption that most keys are ascii and it
+ attempts to perform splitting within the ascii range when that
+ results in a valid split.
+
+ Args:
+ start: A string.
+ end: A string such that start <= end.
+
+ Returns:
+ A string mid such that start <= mid <= end.
+ """
+ if start == end:
+ return start
+ start += '\0'
+ end += '\0'
+ midpoint = []
+ expected_max = 127
+ for i in xrange(min(len(start), len(end))):
+ if start[i] == end[i]:
+ midpoint.append(start[i])
+ else:
+ ord_sum = ord(start[i]) + ord(end[i])
+ midpoint.append(unichr(ord_sum / 2))
+ if ord_sum % 2:
+ if len(start) > i + 1:
+ ord_start = ord(start[i+1])
+ else:
+ ord_start = 0
+ if ord_start < expected_max:
+ ord_split = (expected_max + ord_start) / 2
+ else:
+ ord_split = (0xFFFF + ord_start) / 2
+ midpoint.append(unichr(ord_split))
+ break
+ return ''.join(midpoint)
+
+ @staticmethod
+ def split_keys(key_start, key_end, batch_size):
+ """Return a key that is between key_start and key_end inclusive.
+
+ This method compares components of the ancestor paths of key_start
+ and key_end. The first place in the path that differs is
+ approximately split in half. If the kind components differ, a new
+ non-existent kind halfway between the two is used to split the
+ space. If the id_or_name components differ, then a new id_or_name
+ that is halfway between the two is selected. If the lower
+ id_or_name is numeric and the upper id_or_name is a string, then
+ the minumum string key u'\0' is used as the split id_or_name. The
+ key that is returned is the shared portion of the ancestor path
+ followed by the generated split component.
+
+ Args:
+ key_start: A db.Key instance for the lower end of a range.
+ key_end: A db.Key instance for the upper end of a range.
+ batch_size: The maximum size of a range that should not be split.
+
+ Returns:
+ A db.Key instance, k, such that key_start <= k <= key_end.
+ """
+ assert key_start.app() == key_end.app()
+ path1 = key_start.to_path()
+ path2 = key_end.to_path()
+ len1 = len(path1)
+ len2 = len(path2)
+ assert len1 % 2 == 0
+ assert len2 % 2 == 0
+ out_path = []
+ min_path_len = min(len1, len2) / 2
+ for i in xrange(min_path_len):
+ kind1 = path1[2*i]
+ kind2 = path2[2*i]
+
+ if kind1 != kind2:
+ split_kind = KeyRange.bisect_string_range(kind1, kind2)
+ out_path.append(split_kind)
+ out_path.append(unichr(0))
+ break
+
+ last = (len1 == len2 == 2*(i + 1))
+
+ id_or_name1 = path1[2*i + 1]
+ id_or_name2 = path2[2*i + 1]
+ id_or_name_split = KeyRange._split_id_or_name(
+ id_or_name1, id_or_name2, batch_size, last)
+ if id_or_name1 == id_or_name_split:
+ out_path.append(kind1)
+ out_path.append(id_or_name1)
+ else:
+ out_path.append(kind1)
+ out_path.append(id_or_name_split)
+ break
+
+ return db.Key.from_path(*out_path)
+
+ @staticmethod
+ def _split_id_or_name(id_or_name1, id_or_name2, batch_size, maintain_batches):
+ """Return an id_or_name that is between id_or_name1 an id_or_name2.
+
+ Attempts to split the range [id_or_name1, id_or_name2] in half,
+ unless maintain_batches is true and the size of the range
+ [id_or_name1, id_or_name2] is less than or equal to batch_size.
+
+ Args:
+ id_or_name1: A number or string or the id_or_name component of a key
+ id_or_name2: A number or string or the id_or_name component of a key
+ batch_size: The range size that will not be split if maintain_batches
+ is true.
+ maintain_batches: A boolean for whether to keep small ranges intact.
+
+ Returns:
+ An id_or_name such that id_or_name1 <= id_or_name <= id_or_name2.
+ """
+ if (isinstance(id_or_name1, (int, long)) and
+ isinstance(id_or_name2, (int, long))):
+ if not maintain_batches or id_or_name2 - id_or_name1 > batch_size:
+ return (id_or_name1 + id_or_name2) / 2
+ else:
+ return id_or_name1
+ elif (isinstance(id_or_name1, basestring) and
+ isinstance(id_or_name2, basestring)):
+ return KeyRange.bisect_string_range(id_or_name1, id_or_name2)
+ else:
+ assert (isinstance(id_or_name1, (int, long)) and
+ isinstance(id_or_name2, basestring))
+ return unichr(0)
+
+ def to_json(self):
+ """Serialize KeyRange to json.
+
+ Returns:
+ string with KeyRange json representation.
+ """
+ if simplejson is None:
+ raise SimplejsonUnavailableError(
+ "JSON functionality requires simplejson to be available")
+
+ def key_to_str(key):
+ if key:
+ return str(key)
+ else:
+ return None
+
+ return simplejson.dumps({
+ "direction": self.direction,
+ "key_start": key_to_str(self.key_start),
+ "key_end": key_to_str(self.key_end),
+ "include_start": self.include_start,
+ "include_end": self.include_end,
+ }, sort_keys=True)
+
+
+ @staticmethod
+ def from_json(json_str):
+ """Deserialize KeyRange from its json representation.
+
+ Args:
+ json_str: string with json representation created by key_range_to_json.
+
+ Returns:
+ deserialized KeyRange instance.
+ """
+ if simplejson is None:
+ raise SimplejsonUnavailableError(
+ "JSON functionality requires simplejson to be available")
+
+ def key_from_str(key_str):
+ if key_str:
+ return db.Key(key_str)
+ else:
+ return None
+
+ json = simplejson.loads(json_str)
+ return KeyRange(key_from_str(json["key_start"]),
+ key_from_str(json["key_end"]),
+ json["direction"],
+ json["include_start"],
+ json["include_end"])