app/django/utils/stopwords.py
author Lennard de Rijk <ljvderijk@gmail.com>
Fri, 23 Jan 2009 09:08:26 +0000
changeset 913 db38e7680d1c
parent 54 03e267d67478
permissions -rw-r--r--
Added state property to role model. This can be used when for instance a member has been removed from a club or a when a program has been marked inactive. Certain roles would then be shown on the upcoming roles page marked as previous roles. This would give us the archiving capability that was shown in the mockup. Patch by: Lennard de Rijk Reviewd by: to-be-reviewed

# Performance note: I benchmarked this code using a set instead of
# a list for the stopwords and was surprised to find that the list
# performed /better/ than the set - maybe because it's only a small
# list.

stopwords = '''
i
a
an
are
as
at
be
by
for
from
how
in
is
it
of
on
or
that
the
this
to
was
what
when
where
'''.split()

def strip_stopwords(sentence):
    "Removes stopwords - also normalizes whitespace"
    words = sentence.split()
    sentence = []
    for word in words:
        if word.lower() not in stopwords:
            sentence.append(word)
    return u' '.join(sentence)