app/django/utils/stopwords.py
author Lennard de Rijk <ljvderijk@gmail.com>
Thu, 30 Jul 2009 09:53:22 +0200
changeset 2688 dfe0439a0711
parent 54 03e267d67478
permissions -rw-r--r--
Added list of Surveys to the Project's manage page. This list will show wether or not a Survey has been taken and will also feature links to (re)take a Survey and in the near future to see the actual SurveyRecord. Note that the take Survey link is working however Org Admins will receive an access denied message when following this link.

# Performance note: I benchmarked this code using a set instead of
# a list for the stopwords and was surprised to find that the list
# performed /better/ than the set - maybe because it's only a small
# list.

stopwords = '''
i
a
an
are
as
at
be
by
for
from
how
in
is
it
of
on
or
that
the
this
to
was
what
when
where
'''.split()

def strip_stopwords(sentence):
    "Removes stopwords - also normalizes whitespace"
    words = sentence.split()
    sentence = []
    for word in words:
        if word.lower() not in stopwords:
            sentence.append(word)
    return u' '.join(sentence)