def nonenglish(string):
# '''Description: This function takes in the string of descriptions and return the string with nonenglish words removed (useful for course syllabi)
# Parameters: String of descriptions
# Output: the string with nonenglish words removed'''
words = set(nltk.corpus.words.words())
result=[w for w in nltk.wordpunct_tokenize(string) if w.lower() in words]
return " ".join(result)
评论列表
文章目录