def tagged_abstracts(RS_pmids_tokenizedabstracts_dict):
""" Takes a dict of tokenized abstracts
and tags them using the NLTK module for Natural Language Entities.
Input dictionary: key is the RS ID, value is a dictionary where key is the pmid and value is a list of tokens"""
RS_pmids_taggedabstracts_dict = {}
for each_RS in RS_pmids_tokenizedabstracts_dict:
pmids_taggedabstracts = {}
pmids_tokenizedabstracts = RS_pmids_tokenizedabstracts_dict[each_RS]
for pmid in pmids_tokenizedabstracts:
taggedabstracts_list = []
for token in pmids_tokenizedabstracts[pmid]:
tagged = nltk.pos_tag(token)
taggedabstracts_list.append(tagged)
pmids_taggedabstracts[pmid] = taggedabstracts_list
RS_pmids_taggedabstracts_dict[each_RS] = pmids_taggedabstracts
return RS_pmids_taggedabstracts_dict
评论列表
文章目录