Skip to content

Commit

Permalink
add unit tests where we don't ignore stopwords
Browse files Browse the repository at this point in the history
  • Loading branch information
greenat92 committed Nov 17, 2017
1 parent 49d76a2 commit c818bb5
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions nltk/test/unit/test_stem.py
Expand Up @@ -15,6 +15,7 @@ def test_arabic(self):
this unit testing for test the snowball arabic light stemmer
this stemmer deals with prefixes and suffixes
"""
# Test where the ignore_stopwords=True.
ar_stemmer = SnowballStemmer("arabic", True)
assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
assert ar_stemmer.stem("العربية") == "عرب"
Expand All @@ -25,6 +26,18 @@ def test_arabic(self):
assert ar_stemmer.stem("الطالبون") == "طالب"
assert ar_stemmer.stem("اللذان") == "اللذان"
assert ar_stemmer.stem("من") == "من"
#Test where the ignore_stopwords=False.
ar_stemmer = SnowballStemmer("arabic", False)
assert ar_stemmer.stem("اللذان") == "اللذ" # this is a stop word
assert ar_stemmer.stem("الطالبات") == "طالب"
assert ar_stemmer.stem("الكلمات") == "كلم"
# test where create the arabic stemmer without given init value to ignore_stopwords
ar_stemmer = SnowballStemmer("arabic")
assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
assert ar_stemmer.stem("العربية") == "عرب"
assert ar_stemmer.stem("فقالوا") == "قال"
assert ar_stemmer.stem("الطالبات") == "طالب"
assert ar_stemmer.stem("الكلمات") == "كلم"

def test_russian(self):
# Russian words both consisting of Cyrillic
Expand Down

0 comments on commit c818bb5

Please sign in to comment.