def getredundantComponents(sentences):
window_size=4
introList=[]
midlist=[]
endlist=[]
for sent in sentences:
words = WordPunctTokenizer().tokenize(sent)
length_sent=len(words)
f_point = (length_sent)//3
m_point=(length_sent)//2
index_span=window_size//2
intro=' '.join(word for word in words[0:window_size])
mid=' '.join(word for word in words[m_point-index_span:m_point+index_span])
end=' '.join(word for word in words[-window_size:])
introList.append(intro)
midlist.append(mid)
endlist.append(end)
return introList, midlist, endlist
评论列表
文章目录