thirdparty/google_appengine/google/appengine/ext/search/__init__.py
changeset 297 35211afcd563
parent 149 f2e327a7c5de
child 1278 a7766286a7be
equal deleted inserted replaced
296:b02dd2a5f329 297:35211afcd563
   120    'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness',
   120    'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness',
   121    'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when',
   121    'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when',
   122    'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
   122    'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
   123    'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
   123    'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
   124 
   124 
   125   _PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']')
   125   _word_delimiter_regex = re.compile('[' + re.escape(string.punctuation) + ']')
   126 
   126 
   127   def __init__(self, kind_or_entity, *args, **kwargs):
   127   def __init__(self, kind_or_entity, word_delimiter_regex=None, *args,
       
   128                **kwargs):
   128     """Constructor. May be called as a copy constructor.
   129     """Constructor. May be called as a copy constructor.
   129 
   130 
   130     If kind_or_entity is a datastore.Entity, copies it into this Entity.
   131     If kind_or_entity is a datastore.Entity, copies it into this Entity.
   131     datastore.Get() and Query() returns instances of datastore.Entity, so this
   132     datastore.Get() and Query() returns instances of datastore.Entity, so this
   132     is useful for converting them back to SearchableEntity so that they'll be
   133     is useful for converting them back to SearchableEntity so that they'll be
   135     Otherwise, passes through the positional and keyword args to the
   136     Otherwise, passes through the positional and keyword args to the
   136     datastore.Entity constructor.
   137     datastore.Entity constructor.
   137 
   138 
   138     Args:
   139     Args:
   139       kind_or_entity: string or datastore.Entity
   140       kind_or_entity: string or datastore.Entity
   140     """
   141       word_delimiter_regex: a regex matching characters that delimit words
       
   142     """
       
   143     self._word_delimiter_regex = word_delimiter_regex
   141     if isinstance(kind_or_entity, datastore.Entity):
   144     if isinstance(kind_or_entity, datastore.Entity):
   142       self._Entity__key = kind_or_entity._Entity__key
   145       self._Entity__key = kind_or_entity._Entity__key
   143       self.update(kind_or_entity)
   146       self.update(kind_or_entity)
   144     else:
   147     else:
   145       super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs)
   148       super(SearchableEntity, self).__init__(kind_or_entity, *args, **kwargs)
   158       if not isinstance(values, list):
   161       if not isinstance(values, list):
   159         values = [values]
   162         values = [values]
   160       if (isinstance(values[0], basestring) and
   163       if (isinstance(values[0], basestring) and
   161           not isinstance(values[0], datastore_types.Blob)):
   164           not isinstance(values[0], datastore_types.Blob)):
   162         for value in values:
   165         for value in values:
   163           index.update(SearchableEntity._FullTextIndex(value))
   166           index.update(SearchableEntity._FullTextIndex(
       
   167               value, self._word_delimiter_regex))
   164 
   168 
   165     index_list = list(index)
   169     index_list = list(index)
   166     if index_list:
   170     if index_list:
   167       self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] = index_list
   171       self[SearchableEntity._FULL_TEXT_INDEX_PROPERTY] = index_list
   168 
   172 
   169     return super(SearchableEntity, self)._ToPb()
   173     return super(SearchableEntity, self)._ToPb()
   170 
   174 
   171   @classmethod
   175   @classmethod
   172   def _FullTextIndex(cls, text):
   176   def _FullTextIndex(cls, text, word_delimiter_regex=None):
   173     """Returns a set of keywords appropriate for full text indexing.
   177     """Returns a set of keywords appropriate for full text indexing.
   174 
   178 
   175     See SearchableQuery.Search() for details.
   179     See SearchableQuery.Search() for details.
   176 
   180 
   177     Args:
   181     Args:
   179 
   183 
   180     Returns:
   184     Returns:
   181       set of strings
   185       set of strings
   182     """
   186     """
   183 
   187 
       
   188     if word_delimiter_regex is None:
       
   189       word_delimiter_regex = cls._word_delimiter_regex
       
   190 
   184     if text:
   191     if text:
   185       datastore_types.ValidateString(text, 'text', max_len=sys.maxint)
   192       datastore_types.ValidateString(text, 'text', max_len=sys.maxint)
   186       text = cls._PUNCTUATION_REGEX.sub(' ', text)
   193       text = word_delimiter_regex.sub(' ', text)
   187       words = text.lower().split()
   194       words = text.lower().split()
   188 
   195 
   189       words = set(unicode(w) for w in words)
   196       words = set(unicode(w) for w in words)
   190 
   197 
   191       words -= cls._FULL_TEXT_STOP_WORDS
   198       words -= cls._FULL_TEXT_STOP_WORDS
   204 
   211 
   205   Only searches over entities that were created and stored using the
   212   Only searches over entities that were created and stored using the
   206   SearchableEntity or SearchableModel classes.
   213   SearchableEntity or SearchableModel classes.
   207   """
   214   """
   208 
   215 
   209   def Search(self, search_query):
   216   def Search(self, search_query, word_delimiter_regex=None):
   210     """Add a search query. This may be combined with filters.
   217     """Add a search query. This may be combined with filters.
   211 
   218 
   212     Note that keywords in the search query will be silently dropped if they
   219     Note that keywords in the search query will be silently dropped if they
   213     are stop words or too short, ie if they wouldn't be indexed.
   220     are stop words or too short, ie if they wouldn't be indexed.
   214 
   221 
   219       # this query
   226       # this query
   220       SearchableQuery
   227       SearchableQuery
   221     """
   228     """
   222     datastore_types.ValidateString(search_query, 'search query')
   229     datastore_types.ValidateString(search_query, 'search query')
   223     self._search_query = search_query
   230     self._search_query = search_query
       
   231     self._word_delimiter_regex = word_delimiter_regex
   224     return self
   232     return self
   225 
   233 
   226   def _ToPb(self, limit=None, offset=None):
   234   def _ToPb(self, limit=None, offset=None):
   227     """Adds filters for the search query, then delegates to the superclass.
   235     """Adds filters for the search query, then delegates to the superclass.
   228 
   236 
   243         '%s is a reserved name.' % SearchableEntity._FULL_TEXT_INDEX_PROPERTY)
   251         '%s is a reserved name.' % SearchableEntity._FULL_TEXT_INDEX_PROPERTY)
   244 
   252 
   245     pb = super(SearchableQuery, self)._ToPb(limit=limit, offset=offset)
   253     pb = super(SearchableQuery, self)._ToPb(limit=limit, offset=offset)
   246 
   254 
   247     if hasattr(self, '_search_query'):
   255     if hasattr(self, '_search_query'):
   248       keywords = SearchableEntity._FullTextIndex(self._search_query)
   256       keywords = SearchableEntity._FullTextIndex(
       
   257           self._search_query, self._word_delimiter_regex)
   249       for keyword in keywords:
   258       for keyword in keywords:
   250         filter = pb.add_filter()
   259         filter = pb.add_filter()
   251         filter.set_op(datastore_pb.Query_Filter.EQUAL)
   260         filter.set_op(datastore_pb.Query_Filter.EQUAL)
   252         prop = filter.add_property()
   261         prop = filter.add_property()
   253         prop.set_name(SearchableEntity._FULL_TEXT_INDEX_PROPERTY)
   262         prop.set_name(SearchableEntity._FULL_TEXT_INDEX_PROPERTY)