app/django/utils/stopwords.py
author Sverre Rabbelier <sverre@rabbelier.nl>
Fri, 15 May 2009 23:05:13 +0200
changeset 2319 3eee2308f1dd
parent 54 03e267d67478
permissions -rw-r--r--
Do not rely on notifiction module being imported This has worked so far mostly by accident, but it turned out to be brittle while writing tests. This makes sure that the notification module is always imported before use.

# Performance note: I benchmarked this code using a set instead of
# a list for the stopwords and was surprised to find that the list
# performed /better/ than the set - maybe because it's only a small
# list.

stopwords = '''
i
a
an
are
as
at
be
by
for
from
how
in
is
it
of
on
or
that
the
this
to
was
what
when
where
'''.split()

def strip_stopwords(sentence):
    "Removes stopwords - also normalizes whitespace"
    words = sentence.split()
    sentence = []
    for word in words:
        if word.lower() not in stopwords:
            sentence.append(word)
    return u' '.join(sentence)