def clean_terms(terms, stopwords=None, lemmatize=None, stem=None, only_N_J=None):
if stopwords is not None:
terms = [t for t in terms if t not in stopwords]
if only_N_J is not None: # include only nouns and verbs
tagged = nltk.pos_tag(terms)
terms = [t for t, pos in tagged if pos in tags]
if lemmatize is not None:
lem = WordNetLemmatizer()
terms = [lem.lemmatize(t) for t in terms]
if stem is not None:
stem = PorterStemmer()
terms = [stem.stem(t) for t in terms]
return terms
graph_of_words.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录