44 for result in query: |
44 for result in query: |
45 ... |
45 ... |
46 |
46 |
47 The full text index is stored in a property named __searchable_text_index. |
47 The full text index is stored in a property named __searchable_text_index. |
48 |
48 |
|
49 Specifying multiple indexes and properties to index |
|
50 --------------------------------------------------- |
|
51 |
|
52 By default, one index is created with all string properties. You can define |
|
53 multiple indexes and specify which properties should be indexed for each by |
|
54 overriding SearchableProperties() method of model.SearchableModel, for example: |
|
55 |
|
56 class Article(search.SearchableModel): |
|
57 @classmethod |
|
58 def SearchableProperties(cls): |
|
59 return [['book', 'author'], ['book']] |
|
60 |
|
61 In this example, two indexes will be maintained - one that includes 'book' and |
|
62 'author' properties, and another one for 'book' property only. They will be |
|
63 stored in properties named __searchable_text_index_book_author and |
|
64 __searchable_text_index_book respectively. Note that the index that includes |
|
65 all properties will not be created unless added explicitly like this: |
|
66 |
|
67 @classmethod |
|
68 def SearchableProperties(cls): |
|
69 return [['book', 'author'], ['book'], search.ALL_PROPERTIES] |
|
70 |
|
71 The default return value of SearchableProperties() is [search.ALL_PROPERTIES] |
|
72 (one index, all properties). |
|
73 |
|
74 To search using a custom-defined index, pass its definition |
|
75 in 'properties' parameter of 'search': |
|
76 |
|
77 Article.all().search('Lem', properties=['book', 'author']) |
|
78 |
|
79 Note that the order of properties in the list matters. |
|
80 |
|
81 Adding indexes to index.yaml |
|
82 ----------------------------- |
49 |
83 |
50 In general, if you just want to provide full text search, you *don't* need to |
84 In general, if you just want to provide full text search, you *don't* need to |
51 add any extra indexes to your index.yaml. However, if you want to use search() |
85 add any extra indexes to your index.yaml. However, if you want to use search() |
52 in a query *in addition to* an ancestor, filter, or sort order, you'll need to |
86 in a query *in addition to* an ancestor, filter, or sort order, you'll need to |
53 create an index in index.yaml with the __searchable_text_index property. For |
87 create an index in index.yaml with the __searchable_text_index property. For |
58 - name: __searchable_text_index |
92 - name: __searchable_text_index |
59 - name: date |
93 - name: date |
60 direction: desc |
94 direction: desc |
61 ... |
95 ... |
62 |
96 |
|
97 Similarly, if you created a custom index (see above), use the name of the |
|
98 property it's stored in, e.g. __searchable_text_index_book_author. |
|
99 |
63 Note that using SearchableModel will noticeable increase the latency of save() |
100 Note that using SearchableModel will noticeable increase the latency of save() |
64 operations, since it writes an index row for each indexable word. This also |
101 operations, since it writes an index row for each indexable word. This also |
65 means that the latency of save() will increase roughly with the size of the |
102 means that the latency of save() will increase roughly with the size of the |
66 properties in a given entity. Caveat hacker! |
103 properties in a given entity. Caveat hacker! |
67 """ |
104 """ |
76 from google.appengine.api import datastore |
113 from google.appengine.api import datastore |
77 from google.appengine.api import datastore_errors |
114 from google.appengine.api import datastore_errors |
78 from google.appengine.api import datastore_types |
115 from google.appengine.api import datastore_types |
79 from google.appengine.ext import db |
116 from google.appengine.ext import db |
80 from google.appengine.datastore import datastore_pb |
117 from google.appengine.datastore import datastore_pb |
|
118 |
|
119 ALL_PROPERTIES = [] |
81 |
120 |
82 class SearchableEntity(datastore.Entity): |
121 class SearchableEntity(datastore.Entity): |
83 """A subclass of datastore.Entity that supports full text indexing. |
122 """A subclass of datastore.Entity that supports full text indexing. |
84 |
123 |
85 Automatically indexes all string and Text properties, using the datastore's |
124 Automatically indexes all string and Text properties, using the datastore's |
122 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely', |
161 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely', |
123 'will', 'with', 'within', 'without', 'would', 'yet', 'you']) |
162 'will', 'with', 'within', 'without', 'would', 'yet', 'you']) |
124 |
163 |
125 _word_delimiter_regex = re.compile('[' + re.escape(string.punctuation) + ']') |
164 _word_delimiter_regex = re.compile('[' + re.escape(string.punctuation) + ']') |
126 |
165 |
|
166 _searchable_properties = [ALL_PROPERTIES] |
|
167 |
127 def __init__(self, kind_or_entity, word_delimiter_regex=None, *args, |
168 def __init__(self, kind_or_entity, word_delimiter_regex=None, *args, |
128 **kwargs): |
169 **kwargs): |
129 """Constructor. May be called as a copy constructor. |
170 """Constructor. May be called as a copy constructor. |
130 |
171 |
131 If kind_or_entity is a datastore.Entity, copies it into this Entity. |
172 If kind_or_entity is a datastore.Entity, copies it into this Entity. |
142 """ |
183 """ |
143 self._word_delimiter_regex = word_delimiter_regex |
184 self._word_delimiter_regex = word_delimiter_regex |
144 if isinstance(kind_or_entity, datastore.Entity): |
185 if isinstance(kind_or_entity, datastore.Entity): |
145 self._Entity__key = kind_or_entity._Entity__key |
186 self._Entity__key = kind_or_entity._Entity__key |
146 self._Entity__unindexed_properties = frozenset(kind_or_entity.unindexed_properties()) |
187 self._Entity__unindexed_properties = frozenset(kind_or_entity.unindexed_properties()) |
|
188 if isinstance(kind_or_entity, SearchableEntity): |
|
189 if getattr(kind_or_entity, '_searchable_properties', None) is not None: |
|
190 self._searchable_properties = kind_or_entity._searchable_properties |
147 self.update(kind_or_entity) |
191 self.update(kind_or_entity) |
148 else: |
192 else: |
149 super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs) |
193 super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs) |
150 |
194 |
151 def _ToPb(self): |
195 def _ToPb(self): |
152 """Rebuilds the full text index, then delegates to the superclass. |
196 """Rebuilds the full text index, then delegates to the superclass. |
153 |
197 |
154 Returns: |
198 Returns: |
155 entity_pb.Entity |
199 entity_pb.Entity |
156 """ |
200 """ |
157 if SearchableEntity._FULL_TEXT_INDEX_PROPERTY in self: |
201 for properties_to_index in self._searchable_properties: |
158 del self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] |
202 index_property_name = SearchableEntity.IndexPropertyName(properties_to_index) |
159 |
203 if index_property_name in self: |
160 index = set() |
204 del self[index_property_name] |
161 for (name, values) in self.items(): |
205 |
162 if not isinstance(values, list): |
206 |
163 values = [values] |
207 if not properties_to_index: |
164 if (isinstance(values[0], basestring) and |
208 properties_to_index = self.keys() |
165 not isinstance(values[0], datastore_types.Blob)): |
209 |
166 for value in values: |
210 index = set() |
167 index.update(SearchableEntity._FullTextIndex( |
211 for name in properties_to_index: |
168 value, self._word_delimiter_regex)) |
212 if not self.has_key(name): |
169 |
213 continue |
170 index_list = list(index) |
214 |
171 if index_list: |
215 values = self[name] |
172 self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] = index_list |
216 if not isinstance(values, list): |
|
217 values = [values] |
|
218 |
|
219 if (isinstance(values[0], basestring) and |
|
220 not isinstance(values[0], datastore_types.Blob)): |
|
221 for value in values: |
|
222 index.update(SearchableEntity._FullTextIndex( |
|
223 value, self._word_delimiter_regex)) |
|
224 |
|
225 index_list = list(index) |
|
226 if index_list: |
|
227 self[index_property_name] = index_list |
173 |
228 |
174 return super(SearchableEntity, self)._ToPb() |
229 return super(SearchableEntity, self)._ToPb() |
175 |
230 |
176 @classmethod |
231 @classmethod |
177 def _FullTextIndex(cls, text, word_delimiter_regex=None): |
232 def _FullTextIndex(cls, text, word_delimiter_regex=None): |
204 else: |
259 else: |
205 words = set() |
260 words = set() |
206 |
261 |
207 return words |
262 return words |
208 |
263 |
|
264 @classmethod |
|
265 def IndexPropertyName(cls, properties): |
|
266 """Given index definition, returns the name of the property to put it in.""" |
|
267 name = SearchableEntity._FULL_TEXT_INDEX_PROPERTY |
|
268 |
|
269 if properties: |
|
270 name += '_' + '_'.join(properties) |
|
271 |
|
272 return name |
|
273 |
209 |
274 |
210 class SearchableQuery(datastore.Query): |
275 class SearchableQuery(datastore.Query): |
211 """A subclass of datastore.Query that supports full text search. |
276 """A subclass of datastore.Query that supports full text search. |
212 |
277 |
213 Only searches over entities that were created and stored using the |
278 Only searches over entities that were created and stored using the |
214 SearchableEntity or SearchableModel classes. |
279 SearchableEntity or SearchableModel classes. |
215 """ |
280 """ |
216 |
281 |
217 def Search(self, search_query, word_delimiter_regex=None): |
282 def Search(self, search_query, word_delimiter_regex=None, |
|
283 properties=ALL_PROPERTIES): |
218 """Add a search query. This may be combined with filters. |
284 """Add a search query. This may be combined with filters. |
219 |
285 |
220 Note that keywords in the search query will be silently dropped if they |
286 Note that keywords in the search query will be silently dropped if they |
221 are stop words or too short, ie if they wouldn't be indexed. |
287 are stop words or too short, ie if they wouldn't be indexed. |
222 |
288 |
228 SearchableQuery |
294 SearchableQuery |
229 """ |
295 """ |
230 datastore_types.ValidateString(search_query, 'search query') |
296 datastore_types.ValidateString(search_query, 'search query') |
231 self._search_query = search_query |
297 self._search_query = search_query |
232 self._word_delimiter_regex = word_delimiter_regex |
298 self._word_delimiter_regex = word_delimiter_regex |
|
299 self._properties = properties |
233 return self |
300 return self |
234 |
301 |
235 def _ToPb(self, limit=None, offset=None): |
302 def _ToPb(self, *args, **kwds): |
236 """Adds filters for the search query, then delegates to the superclass. |
303 """Adds filters for the search query, then delegates to the superclass. |
237 |
304 |
238 Raises BadFilterError if a filter on the index property already exists. |
305 Mimics Query._ToPb()'s signature. Raises BadFilterError if a filter on the |
239 |
306 index property already exists. |
240 Args: |
|
241 # an upper bound on the number of results returned by the query. |
|
242 limit: int |
|
243 # number of results that match the query to skip. limit is applied |
|
244 # after the offset is fulfilled. |
|
245 offset: int |
|
246 |
307 |
247 Returns: |
308 Returns: |
248 datastore_pb.Query |
309 datastore_pb.Query |
249 """ |
310 """ |
250 if SearchableEntity._FULL_TEXT_INDEX_PROPERTY in self: |
311 |
|
312 properties = getattr(self, "_properties", ALL_PROPERTIES) |
|
313 |
|
314 index_property_name = SearchableEntity.IndexPropertyName(properties) |
|
315 if index_property_name in self: |
251 raise datastore_errors.BadFilterError( |
316 raise datastore_errors.BadFilterError( |
252 '%s is a reserved name.' % SearchableEntity._FULL_TEXT_INDEX_PROPERTY) |
317 '%s is a reserved name.' % index_property_name) |
253 |
318 |
254 pb = super(SearchableQuery, self)._ToPb(limit=limit, offset=offset) |
319 pb = super(SearchableQuery, self)._ToPb(*args, **kwds) |
255 |
320 |
256 if hasattr(self, '_search_query'): |
321 if hasattr(self, '_search_query'): |
257 keywords = SearchableEntity._FullTextIndex( |
322 keywords = SearchableEntity._FullTextIndex( |
258 self._search_query, self._word_delimiter_regex) |
323 self._search_query, self._word_delimiter_regex) |
259 for keyword in keywords: |
324 for keyword in keywords: |
260 filter = pb.add_filter() |
325 filter = pb.add_filter() |
261 filter.set_op(datastore_pb.Query_Filter.EQUAL) |
326 filter.set_op(datastore_pb.Query_Filter.EQUAL) |
262 prop = filter.add_property() |
327 prop = filter.add_property() |
263 prop.set_name(SearchableEntity._FULL_TEXT_INDEX_PROPERTY) |
328 prop.set_name(index_property_name) |
264 prop.set_multiple(len(keywords) > 1) |
329 prop.set_multiple(len(keywords) > 1) |
265 prop.mutable_value().set_stringvalue(unicode(keyword).encode('utf-8')) |
330 prop.mutable_value().set_stringvalue(unicode(keyword).encode('utf-8')) |
266 |
331 |
267 return pb |
332 return pb |
268 |
333 |
288 class SearchableModel(db.Model): |
353 class SearchableModel(db.Model): |
289 """A subclass of db.Model that supports full text search and indexing. |
354 """A subclass of db.Model that supports full text search and indexing. |
290 |
355 |
291 Automatically indexes all string-based properties. To search, use the all() |
356 Automatically indexes all string-based properties. To search, use the all() |
292 method to get a SearchableModel.Query, then use its search() method. |
357 method to get a SearchableModel.Query, then use its search() method. |
|
358 |
|
359 Override SearchableProperties() to define properties to index and/or multiple |
|
360 indexes (see the file's comment). |
293 """ |
361 """ |
|
362 |
|
363 @classmethod |
|
364 def SearchableProperties(cls): |
|
365 return [ALL_PROPERTIES] |
294 |
366 |
295 class Query(db.Query): |
367 class Query(db.Query): |
296 """A subclass of db.Query that supports full text search.""" |
368 """A subclass of db.Query that supports full text search.""" |
297 _search_query = None |
369 _search_query = None |
298 |
370 _properties = None |
299 def search(self, search_query): |
371 |
|
372 def search(self, search_query, properties=ALL_PROPERTIES): |
300 """Adds a full text search to this query. |
373 """Adds a full text search to this query. |
301 |
374 |
302 Args: |
375 Args: |
303 search_query, a string containing the full text search query. |
376 search_query, a string containing the full text search query. |
304 |
377 |
305 Returns: |
378 Returns: |
306 self |
379 self |
307 """ |
380 """ |
308 self._search_query = search_query |
381 self._search_query = search_query |
|
382 self._properties = properties |
|
383 |
|
384 if self._properties not in getattr(self, '_searchable_properties', [ALL_PROPERTIES]): |
|
385 raise datastore_errors.BadFilterError( |
|
386 '%s does not have a corresponding index. Please add it to' |
|
387 'the SEARCHABLE_PROPERTIES list' % self._properties) |
|
388 |
309 return self |
389 return self |
310 |
390 |
311 def _get_query(self): |
391 def _get_query(self): |
312 """Wraps db.Query._get_query() and injects SearchableQuery.""" |
392 """Wraps db.Query._get_query() and injects SearchableQuery.""" |
313 query = db.Query._get_query(self, |
393 query = db.Query._get_query(self, |
314 _query_class=SearchableQuery, |
394 _query_class=SearchableQuery, |
315 _multi_query_class=SearchableMultiQuery) |
395 _multi_query_class=SearchableMultiQuery) |
316 if self._search_query: |
396 if self._search_query: |
317 query.Search(self._search_query) |
397 query.Search(self._search_query, properties=self._properties) |
318 return query |
398 return query |
319 |
399 |
320 def _populate_internal_entity(self): |
400 def _populate_internal_entity(self): |
321 """Wraps db.Model._populate_internal_entity() and injects |
401 """Wraps db.Model._populate_internal_entity() and injects |
322 SearchableEntity.""" |
402 SearchableEntity.""" |
323 return db.Model._populate_internal_entity(self, |
403 entity = db.Model._populate_internal_entity(self, |
324 _entity_class=SearchableEntity) |
404 _entity_class=SearchableEntity) |
|
405 entity._searchable_properties = self.SearchableProperties() |
|
406 return entity |
325 |
407 |
326 @classmethod |
408 @classmethod |
327 def from_entity(cls, entity): |
409 def from_entity(cls, entity): |
328 """Wraps db.Model.from_entity() and injects SearchableEntity.""" |
410 """Wraps db.Model.from_entity() and injects SearchableEntity.""" |
329 if not isinstance(entity, SearchableEntity): |
411 if not isinstance(entity, SearchableEntity): |