thirdparty/google_appengine/google/appengine/ext/search/__init__.py
changeset 2864 2e0b0af889be
parent 2309 be1b94099f2d
equal deleted inserted replaced
2862:27971a13089f 2864:2e0b0af889be
    44   for result in query:
    44   for result in query:
    45     ...
    45     ...
    46 
    46 
    47 The full text index is stored in a property named __searchable_text_index.
    47 The full text index is stored in a property named __searchable_text_index.
    48 
    48 
       
    49 Specifying multiple indexes and properties to index
       
    50 ---------------------------------------------------
       
    51 
       
    52 By default, one index is created with all string properties. You can define
       
    53 multiple indexes and specify which properties should be indexed for each by
       
    54 overriding SearchableProperties() method of model.SearchableModel, for example:
       
    55 
       
    56   class Article(search.SearchableModel):
       
    57     @classmethod
       
    58     def SearchableProperties(cls):
       
    59       return [['book', 'author'], ['book']]
       
    60 
       
    61 In this example, two indexes will be maintained - one that includes 'book' and
       
    62 'author' properties, and another one for 'book' property only. They will be
       
    63 stored in properties named __searchable_text_index_book_author and
       
    64 __searchable_text_index_book respectively. Note that the index that includes
       
    65 all properties will not be created unless added explicitly like this:
       
    66 
       
    67   @classmethod
       
    68   def SearchableProperties(cls):
       
    69     return [['book', 'author'], ['book'], search.ALL_PROPERTIES]
       
    70 
       
    71 The default return value of SearchableProperties() is [search.ALL_PROPERTIES]
       
    72 (one index, all properties).
       
    73 
       
    74 To search using a custom-defined index, pass its definition
       
    75 in 'properties' parameter of 'search':
       
    76 
       
    77   Article.all().search('Lem', properties=['book', 'author'])
       
    78 
       
    79 Note that the order of properties in the list matters.
       
    80 
       
    81 Adding indexes to  index.yaml
       
    82 -----------------------------
    49 
    83 
    50 In general, if you just want to provide full text search, you *don't* need to
    84 In general, if you just want to provide full text search, you *don't* need to
    51 add any extra indexes to your index.yaml. However, if you want to use search()
    85 add any extra indexes to your index.yaml. However, if you want to use search()
    52 in a query *in addition to* an ancestor, filter, or sort order, you'll need to
    86 in a query *in addition to* an ancestor, filter, or sort order, you'll need to
    53 create an index in index.yaml with the __searchable_text_index property. For
    87 create an index in index.yaml with the __searchable_text_index property. For
    58     - name: __searchable_text_index
    92     - name: __searchable_text_index
    59     - name: date
    93     - name: date
    60       direction: desc
    94       direction: desc
    61     ...
    95     ...
    62 
    96 
       
    97 Similarly, if you created a custom index (see above), use the name of the
       
    98 property it's stored in, e.g. __searchable_text_index_book_author.
       
    99 
    63 Note that using SearchableModel will noticeable increase the latency of save()
   100 Note that using SearchableModel will noticeable increase the latency of save()
    64 operations, since it writes an index row for each indexable word. This also
   101 operations, since it writes an index row for each indexable word. This also
    65 means that the latency of save() will increase roughly with the size of the
   102 means that the latency of save() will increase roughly with the size of the
    66 properties in a given entity. Caveat hacker!
   103 properties in a given entity. Caveat hacker!
    67 """
   104 """
    76 from google.appengine.api import datastore
   113 from google.appengine.api import datastore
    77 from google.appengine.api import datastore_errors
   114 from google.appengine.api import datastore_errors
    78 from google.appengine.api import datastore_types
   115 from google.appengine.api import datastore_types
    79 from google.appengine.ext import db
   116 from google.appengine.ext import db
    80 from google.appengine.datastore import datastore_pb
   117 from google.appengine.datastore import datastore_pb
       
   118 
       
   119 ALL_PROPERTIES = []
    81 
   120 
    82 class SearchableEntity(datastore.Entity):
   121 class SearchableEntity(datastore.Entity):
    83   """A subclass of datastore.Entity that supports full text indexing.
   122   """A subclass of datastore.Entity that supports full text indexing.
    84 
   123 
    85   Automatically indexes all string and Text properties, using the datastore's
   124   Automatically indexes all string and Text properties, using the datastore's
   122    'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
   161    'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
   123    'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
   162    'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
   124 
   163 
   125   _word_delimiter_regex = re.compile('[' + re.escape(string.punctuation) + ']')
   164   _word_delimiter_regex = re.compile('[' + re.escape(string.punctuation) + ']')
   126 
   165 
       
   166   _searchable_properties = [ALL_PROPERTIES]
       
   167 
   127   def __init__(self, kind_or_entity, word_delimiter_regex=None, *args,
   168   def __init__(self, kind_or_entity, word_delimiter_regex=None, *args,
   128                **kwargs):
   169                **kwargs):
   129     """Constructor. May be called as a copy constructor.
   170     """Constructor. May be called as a copy constructor.
   130 
   171 
   131     If kind_or_entity is a datastore.Entity, copies it into this Entity.
   172     If kind_or_entity is a datastore.Entity, copies it into this Entity.
   142     """
   183     """
   143     self._word_delimiter_regex = word_delimiter_regex
   184     self._word_delimiter_regex = word_delimiter_regex
   144     if isinstance(kind_or_entity, datastore.Entity):
   185     if isinstance(kind_or_entity, datastore.Entity):
   145       self._Entity__key = kind_or_entity._Entity__key
   186       self._Entity__key = kind_or_entity._Entity__key
   146       self._Entity__unindexed_properties = frozenset(kind_or_entity.unindexed_properties())
   187       self._Entity__unindexed_properties = frozenset(kind_or_entity.unindexed_properties())
       
   188       if isinstance(kind_or_entity, SearchableEntity):
       
   189         if getattr(kind_or_entity, '_searchable_properties', None) is not None:
       
   190           self._searchable_properties = kind_or_entity._searchable_properties
   147       self.update(kind_or_entity)
   191       self.update(kind_or_entity)
   148     else:
   192     else:
   149       super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs)
   193       super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs)
   150 
   194 
   151   def _ToPb(self):
   195   def _ToPb(self):
   152     """Rebuilds the full text index, then delegates to the superclass.
   196     """Rebuilds the full text index, then delegates to the superclass.
   153 
   197 
   154     Returns:
   198     Returns:
   155       entity_pb.Entity
   199       entity_pb.Entity
   156     """
   200     """
   157     if SearchableEntity._FULL_TEXT_INDEX_PROPERTY in self:
   201     for properties_to_index in self._searchable_properties:
   158       del self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY]
   202       index_property_name = SearchableEntity.IndexPropertyName(properties_to_index)
   159 
   203       if index_property_name in self:
   160     index = set()
   204         del self[index_property_name]
   161     for (name, values) in self.items():
   205 
   162       if not isinstance(values, list):
   206 
   163         values = [values]
   207       if not properties_to_index:
   164       if (isinstance(values[0], basestring) and
   208         properties_to_index = self.keys()
   165           not isinstance(values[0], datastore_types.Blob)):
   209 
   166         for value in values:
   210       index = set()
   167           index.update(SearchableEntity._FullTextIndex(
   211       for name in properties_to_index:
   168               value, self._word_delimiter_regex))
   212         if not self.has_key(name):
   169 
   213           continue
   170     index_list = list(index)
   214 
   171     if index_list:
   215         values = self[name]
   172       self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] = index_list
   216         if not isinstance(values, list):
       
   217           values = [values]
       
   218 
       
   219         if (isinstance(values[0], basestring) and
       
   220             not isinstance(values[0], datastore_types.Blob)):
       
   221           for value in values:
       
   222             index.update(SearchableEntity._FullTextIndex(
       
   223                 value, self._word_delimiter_regex))
       
   224 
       
   225       index_list = list(index)
       
   226       if index_list:
       
   227         self[index_property_name] = index_list
   173 
   228 
   174     return super(SearchableEntity, self)._ToPb()
   229     return super(SearchableEntity, self)._ToPb()
   175 
   230 
   176   @classmethod
   231   @classmethod
   177   def _FullTextIndex(cls, text, word_delimiter_regex=None):
   232   def _FullTextIndex(cls, text, word_delimiter_regex=None):
   204     else:
   259     else:
   205       words = set()
   260       words = set()
   206 
   261 
   207     return words
   262     return words
   208 
   263 
       
   264   @classmethod
       
   265   def IndexPropertyName(cls, properties):
       
   266     """Given index definition, returns the name of the property to put it in."""
       
   267     name = SearchableEntity._FULL_TEXT_INDEX_PROPERTY
       
   268 
       
   269     if properties:
       
   270       name += '_' + '_'.join(properties)
       
   271 
       
   272     return name
       
   273 
   209 
   274 
   210 class SearchableQuery(datastore.Query):
   275 class SearchableQuery(datastore.Query):
   211   """A subclass of datastore.Query that supports full text search.
   276   """A subclass of datastore.Query that supports full text search.
   212 
   277 
   213   Only searches over entities that were created and stored using the
   278   Only searches over entities that were created and stored using the
   214   SearchableEntity or SearchableModel classes.
   279   SearchableEntity or SearchableModel classes.
   215   """
   280   """
   216 
   281 
   217   def Search(self, search_query, word_delimiter_regex=None):
   282   def Search(self, search_query, word_delimiter_regex=None,
       
   283              properties=ALL_PROPERTIES):
   218     """Add a search query. This may be combined with filters.
   284     """Add a search query. This may be combined with filters.
   219 
   285 
   220     Note that keywords in the search query will be silently dropped if they
   286     Note that keywords in the search query will be silently dropped if they
   221     are stop words or too short, ie if they wouldn't be indexed.
   287     are stop words or too short, ie if they wouldn't be indexed.
   222 
   288 
   228       SearchableQuery
   294       SearchableQuery
   229     """
   295     """
   230     datastore_types.ValidateString(search_query, 'search query')
   296     datastore_types.ValidateString(search_query, 'search query')
   231     self._search_query = search_query
   297     self._search_query = search_query
   232     self._word_delimiter_regex = word_delimiter_regex
   298     self._word_delimiter_regex = word_delimiter_regex
       
   299     self._properties = properties
   233     return self
   300     return self
   234 
   301 
   235   def _ToPb(self, limit=None, offset=None):
   302   def _ToPb(self, *args, **kwds):
   236     """Adds filters for the search query, then delegates to the superclass.
   303     """Adds filters for the search query, then delegates to the superclass.
   237 
   304 
   238     Raises BadFilterError if a filter on the index property already exists.
   305     Mimics Query._ToPb()'s signature. Raises BadFilterError if a filter on the
   239 
   306     index property already exists.
   240     Args:
       
   241       # an upper bound on the number of results returned by the query.
       
   242       limit: int
       
   243       # number of results that match the query to skip.  limit is applied
       
   244       # after the offset is fulfilled.
       
   245       offset: int
       
   246 
   307 
   247     Returns:
   308     Returns:
   248       datastore_pb.Query
   309       datastore_pb.Query
   249     """
   310     """
   250     if SearchableEntity._FULL_TEXT_INDEX_PROPERTY in self:
   311 
       
   312     properties = getattr(self, "_properties", ALL_PROPERTIES)
       
   313 
       
   314     index_property_name = SearchableEntity.IndexPropertyName(properties)
       
   315     if index_property_name in self:
   251       raise datastore_errors.BadFilterError(
   316       raise datastore_errors.BadFilterError(
   252         '%s is a reserved name.' % SearchableEntity._FULL_TEXT_INDEX_PROPERTY)
   317         '%s is a reserved name.' % index_property_name)
   253 
   318 
   254     pb = super(SearchableQuery, self)._ToPb(limit=limit, offset=offset)
   319     pb = super(SearchableQuery, self)._ToPb(*args, **kwds)
   255 
   320 
   256     if hasattr(self, '_search_query'):
   321     if hasattr(self, '_search_query'):
   257       keywords = SearchableEntity._FullTextIndex(
   322       keywords = SearchableEntity._FullTextIndex(
   258           self._search_query, self._word_delimiter_regex)
   323           self._search_query, self._word_delimiter_regex)
   259       for keyword in keywords:
   324       for keyword in keywords:
   260         filter = pb.add_filter()
   325         filter = pb.add_filter()
   261         filter.set_op(datastore_pb.Query_Filter.EQUAL)
   326         filter.set_op(datastore_pb.Query_Filter.EQUAL)
   262         prop = filter.add_property()
   327         prop = filter.add_property()
   263         prop.set_name(SearchableEntity._FULL_TEXT_INDEX_PROPERTY)
   328         prop.set_name(index_property_name)
   264         prop.set_multiple(len(keywords) > 1)
   329         prop.set_multiple(len(keywords) > 1)
   265         prop.mutable_value().set_stringvalue(unicode(keyword).encode('utf-8'))
   330         prop.mutable_value().set_stringvalue(unicode(keyword).encode('utf-8'))
   266 
   331 
   267     return pb
   332     return pb
   268 
   333 
   288 class SearchableModel(db.Model):
   353 class SearchableModel(db.Model):
   289   """A subclass of db.Model that supports full text search and indexing.
   354   """A subclass of db.Model that supports full text search and indexing.
   290 
   355 
   291   Automatically indexes all string-based properties. To search, use the all()
   356   Automatically indexes all string-based properties. To search, use the all()
   292   method to get a SearchableModel.Query, then use its search() method.
   357   method to get a SearchableModel.Query, then use its search() method.
       
   358 
       
   359   Override SearchableProperties() to define properties to index and/or multiple
       
   360   indexes (see the file's comment).
   293   """
   361   """
       
   362 
       
   363   @classmethod
       
   364   def SearchableProperties(cls):
       
   365     return [ALL_PROPERTIES]
   294 
   366 
   295   class Query(db.Query):
   367   class Query(db.Query):
   296     """A subclass of db.Query that supports full text search."""
   368     """A subclass of db.Query that supports full text search."""
   297     _search_query = None
   369     _search_query = None
   298 
   370     _properties = None
   299     def search(self, search_query):
   371 
       
   372     def search(self, search_query, properties=ALL_PROPERTIES):
   300       """Adds a full text search to this query.
   373       """Adds a full text search to this query.
   301 
   374 
   302       Args:
   375       Args:
   303         search_query, a string containing the full text search query.
   376         search_query, a string containing the full text search query.
   304 
   377 
   305       Returns:
   378       Returns:
   306         self
   379         self
   307       """
   380       """
   308       self._search_query = search_query
   381       self._search_query = search_query
       
   382       self._properties = properties
       
   383 
       
   384       if self._properties not in getattr(self, '_searchable_properties', [ALL_PROPERTIES]):
       
   385         raise datastore_errors.BadFilterError(
       
   386           '%s does not have a corresponding index. Please add it to'
       
   387           'the SEARCHABLE_PROPERTIES list' % self._properties)
       
   388 
   309       return self
   389       return self
   310 
   390 
   311     def _get_query(self):
   391     def _get_query(self):
   312       """Wraps db.Query._get_query() and injects SearchableQuery."""
   392       """Wraps db.Query._get_query() and injects SearchableQuery."""
   313       query = db.Query._get_query(self,
   393       query = db.Query._get_query(self,
   314                                   _query_class=SearchableQuery,
   394                                   _query_class=SearchableQuery,
   315                                   _multi_query_class=SearchableMultiQuery)
   395                                   _multi_query_class=SearchableMultiQuery)
   316       if self._search_query:
   396       if self._search_query:
   317         query.Search(self._search_query)
   397         query.Search(self._search_query, properties=self._properties)
   318       return query
   398       return query
   319 
   399 
   320   def _populate_internal_entity(self):
   400   def _populate_internal_entity(self):
   321     """Wraps db.Model._populate_internal_entity() and injects
   401     """Wraps db.Model._populate_internal_entity() and injects
   322     SearchableEntity."""
   402     SearchableEntity."""
   323     return db.Model._populate_internal_entity(self,
   403     entity = db.Model._populate_internal_entity(self,
   324                                               _entity_class=SearchableEntity)
   404                                                 _entity_class=SearchableEntity)
       
   405     entity._searchable_properties = self.SearchableProperties()
       
   406     return entity
   325 
   407 
   326   @classmethod
   408   @classmethod
   327   def from_entity(cls, entity):
   409   def from_entity(cls, entity):
   328     """Wraps db.Model.from_entity() and injects SearchableEntity."""
   410     """Wraps db.Model.from_entity() and injects SearchableEntity."""
   329     if not isinstance(entity, SearchableEntity):
   411     if not isinstance(entity, SearchableEntity):
   331     return super(SearchableModel, cls).from_entity(entity)
   413     return super(SearchableModel, cls).from_entity(entity)
   332 
   414 
   333   @classmethod
   415   @classmethod
   334   def all(cls):
   416   def all(cls):
   335     """Returns a SearchableModel.Query for this kind."""
   417     """Returns a SearchableModel.Query for this kind."""
   336     return SearchableModel.Query(cls)
   418     query = SearchableModel.Query(cls)
       
   419     query._searchable_properties = cls.SearchableProperties()
       
   420     return query