def segByPunc(self):
punctuation = [u'?', u'/', u'?', u'?', u'?', u' ', u'\'']
wordSequenceList = [] #???? [[(id,comtend),()....]] ???????????????????
seg_list = jieba.cut(self.commentSentence)
segmentedComment = [item for item in seg_list]
segmentedCommentTuple = list(enumerate(segmentedComment))
subWordSequenceList = []
for wordTuple in segmentedCommentTuple:
if (wordTuple[1] in punctuation):
if (subWordSequenceList != []):
wordSequenceList.append(subWordSequenceList)
subWordSequenceList = []
else:
subWordSequenceList.append(wordTuple)
if (subWordSequenceList != []):
wordSequenceList.append(subWordSequenceList)
return (wordSequenceList)
#?????????????????????????
SentiAnalysis.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录