def jieba_contend_split(contend):
punctuation = [u'?', u'/', u'?', u'?', u'?', u' ', u'\'']
wordSequenceList = [] # ???? [[(id,comtend),()....]] ???????????????????
seg_list = jieba.cut(self.commentSentence)
segmentedComment = [item for item in seg_list]
segmentedCommentTuple = list(enumerate(segmentedComment))
subWordSequenceList = []
for wordTuple in segmentedCommentTuple:
if wordTuple[1] in punctuation:
if subWordSequenceList:
wordSequenceList.append(subWordSequenceList)
subWordSequenceList = []
else:
subWordSequenceList.append(wordTuple)
if subWordSequenceList:
wordSequenceList.append(subWordSequenceList)
return wordSequenceList
ContendSplit.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录