app/django/utils/stopwords.py
author Sverre Rabbelier <srabbelier@gmail.com>
Tue, 17 Feb 2009 16:36:49 +0000
changeset 1371 2d97dbbb4d99
parent 54 03e267d67478
permissions -rw-r--r--
Implemented simple group sorted using numerical ordering See role.py for some example usages. Note that the reason ToS does not appear seperately is due to the fact that it is defined as a regular field which does not take the group field from the one defined in the model. Patch by: Sverre Rabbelier

# Performance note: I benchmarked this code using a set instead of
# a list for the stopwords and was surprised to find that the list
# performed /better/ than the set - maybe because it's only a small
# list.

stopwords = '''
i
a
an
are
as
at
be
by
for
from
how
in
is
it
of
on
or
that
the
this
to
was
what
when
where
'''.split()

def strip_stopwords(sentence):
    "Removes stopwords - also normalizes whitespace"
    words = sentence.split()
    sentence = []
    for word in words:
        if word.lower() not in stopwords:
            sentence.append(word)
    return u' '.join(sentence)