def calculate_languages_ratios(text):
"""
Compute per language included in nltk number of unique stopwords appearing
in analyzed text.
"""
languages_ratios = {}
tokens = wordpunct_tokenize(text)
words = {word.lower() for word in tokens}
for language in stopwords.fileids():
stopwords_set = set(stopwords.words(language))
common_elements = words & stopwords_set
languages_ratios[language] = len(common_elements)
return languages_ratios
评论列表
文章目录