def transform(self, documents):
"""
Returns a dictionary of text features in advance of a DictVectorizer.
"""
for document in documents:
# Collect token and vocabulary counts
counts = Counter(
item[0] for para in document for sent in para for item in sent
)
# Yield structured information about the document
yield {
'paragraphs': len(document),
'sentences': sum(len(para) for para in document),
'words': sum(counts.values()),
'vocab': len(counts),
}
##########################################################################
## Model Building Functions
##########################################################################
评论列表
文章目录