120 'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness', |
120 'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness', |
121 'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when', |
121 'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when', |
122 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely', |
122 'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely', |
123 'will', 'with', 'within', 'without', 'would', 'yet', 'you']) |
123 'will', 'with', 'within', 'without', 'would', 'yet', 'you']) |
124 |
124 |
125 _PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']') |
125 _word_delimiter_regex = re.compile('[' + re.escape(string.punctuation) + ']') |
126 |
126 |
127 def __init__(self, kind_or_entity, *args, **kwargs): |
127 def __init__(self, kind_or_entity, word_delimiter_regex=None, *args, |
|
128 **kwargs): |
128 """Constructor. May be called as a copy constructor. |
129 """Constructor. May be called as a copy constructor. |
129 |
130 |
130 If kind_or_entity is a datastore.Entity, copies it into this Entity. |
131 If kind_or_entity is a datastore.Entity, copies it into this Entity. |
131 datastore.Get() and Query() returns instances of datastore.Entity, so this |
132 datastore.Get() and Query() returns instances of datastore.Entity, so this |
132 is useful for converting them back to SearchableEntity so that they'll be |
133 is useful for converting them back to SearchableEntity so that they'll be |
135 Otherwise, passes through the positional and keyword args to the |
136 Otherwise, passes through the positional and keyword args to the |
136 datastore.Entity constructor. |
137 datastore.Entity constructor. |
137 |
138 |
138 Args: |
139 Args: |
139 kind_or_entity: string or datastore.Entity |
140 kind_or_entity: string or datastore.Entity |
140 """ |
141 word_delimiter_regex: a regex matching characters that delimit words |
|
142 """ |
|
143 self._word_delimiter_regex = word_delimiter_regex |
141 if isinstance(kind_or_entity, datastore.Entity): |
144 if isinstance(kind_or_entity, datastore.Entity): |
142 self._Entity__key = kind_or_entity._Entity__key |
145 self._Entity__key = kind_or_entity._Entity__key |
143 self.update(kind_or_entity) |
146 self.update(kind_or_entity) |
144 else: |
147 else: |
145 super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs) |
148 super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs) |
158 if not isinstance(values, list): |
161 if not isinstance(values, list): |
159 values = [values] |
162 values = [values] |
160 if (isinstance(values[0], basestring) and |
163 if (isinstance(values[0], basestring) and |
161 not isinstance(values[0], datastore_types.Blob)): |
164 not isinstance(values[0], datastore_types.Blob)): |
162 for value in values: |
165 for value in values: |
163 index.update(SearchableEntity._FullTextIndex(value)) |
166 index.update(SearchableEntity._FullTextIndex( |
|
167 value, self._word_delimiter_regex)) |
164 |
168 |
165 index_list = list(index) |
169 index_list = list(index) |
166 if index_list: |
170 if index_list: |
167 self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] = index_list |
171 self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] = index_list |
168 |
172 |
169 return super(SearchableEntity, self)._ToPb() |
173 return super(SearchableEntity, self)._ToPb() |
170 |
174 |
171 @classmethod |
175 @classmethod |
172 def _FullTextIndex(cls, text): |
176 def _FullTextIndex(cls, text, word_delimiter_regex=None): |
173 """Returns a set of keywords appropriate for full text indexing. |
177 """Returns a set of keywords appropriate for full text indexing. |
174 |
178 |
175 See SearchableQuery.Search() for details. |
179 See SearchableQuery.Search() for details. |
176 |
180 |
177 Args: |
181 Args: |
179 |
183 |
180 Returns: |
184 Returns: |
181 set of strings |
185 set of strings |
182 """ |
186 """ |
183 |
187 |
|
188 if word_delimiter_regex is None: |
|
189 word_delimiter_regex = cls._word_delimiter_regex |
|
190 |
184 if text: |
191 if text: |
185 datastore_types.ValidateString(text, 'text', max_len=sys.maxint) |
192 datastore_types.ValidateString(text, 'text', max_len=sys.maxint) |
186 text = cls._PUNCTUATION_REGEX.sub(' ', text) |
193 text = word_delimiter_regex.sub(' ', text) |
187 words = text.lower().split() |
194 words = text.lower().split() |
188 |
195 |
189 words = set(unicode(w) for w in words) |
196 words = set(unicode(w) for w in words) |
190 |
197 |
191 words -= cls._FULL_TEXT_STOP_WORDS |
198 words -= cls._FULL_TEXT_STOP_WORDS |
204 |
211 |
205 Only searches over entities that were created and stored using the |
212 Only searches over entities that were created and stored using the |
206 SearchableEntity or SearchableModel classes. |
213 SearchableEntity or SearchableModel classes. |
207 """ |
214 """ |
208 |
215 |
209 def Search(self, search_query): |
216 def Search(self, search_query, word_delimiter_regex=None): |
210 """Add a search query. This may be combined with filters. |
217 """Add a search query. This may be combined with filters. |
211 |
218 |
212 Note that keywords in the search query will be silently dropped if they |
219 Note that keywords in the search query will be silently dropped if they |
213 are stop words or too short, ie if they wouldn't be indexed. |
220 are stop words or too short, ie if they wouldn't be indexed. |
214 |
221 |
219 # this query |
226 # this query |
220 SearchableQuery |
227 SearchableQuery |
221 """ |
228 """ |
222 datastore_types.ValidateString(search_query, 'search query') |
229 datastore_types.ValidateString(search_query, 'search query') |
223 self._search_query = search_query |
230 self._search_query = search_query |
|
231 self._word_delimiter_regex = word_delimiter_regex |
224 return self |
232 return self |
225 |
233 |
226 def _ToPb(self, limit=None, offset=None): |
234 def _ToPb(self, limit=None, offset=None): |
227 """Adds filters for the search query, then delegates to the superclass. |
235 """Adds filters for the search query, then delegates to the superclass. |
228 |
236 |
243 '%s is a reserved name.' % SearchableEntity._FULL_TEXT_INDEX_PROPERTY) |
251 '%s is a reserved name.' % SearchableEntity._FULL_TEXT_INDEX_PROPERTY) |
244 |
252 |
245 pb = super(SearchableQuery, self)._ToPb(limit=limit, offset=offset) |
253 pb = super(SearchableQuery, self)._ToPb(limit=limit, offset=offset) |
246 |
254 |
247 if hasattr(self, '_search_query'): |
255 if hasattr(self, '_search_query'): |
248 keywords = SearchableEntity._FullTextIndex(self._search_query) |
256 keywords = SearchableEntity._FullTextIndex( |
|
257 self._search_query, self._word_delimiter_regex) |
249 for keyword in keywords: |
258 for keyword in keywords: |
250 filter = pb.add_filter() |
259 filter = pb.add_filter() |
251 filter.set_op(datastore_pb.Query_Filter.EQUAL) |
260 filter.set_op(datastore_pb.Query_Filter.EQUAL) |
252 prop = filter.add_property() |
261 prop = filter.add_property() |
253 prop.set_name(SearchableEntity._FULL_TEXT_INDEX_PROPERTY) |
262 prop.set_name(SearchableEntity._FULL_TEXT_INDEX_PROPERTY) |