def sentenceToIndex(sentence, word2idx, maxLen):
"""
??????????embeddings??????
:param sentence: ??
:param word2idx: ?????
:param maxLen: ???????
:return: ??????????
"""
unknown = word2idx.get("UNKNOWN", 0)
num = word2idx.get("NUM", len(word2idx))
index = [unknown] * maxLen
i = 0
for word in jieba.cut(sentence):
if word in word2idx:
index[i] = word2idx[word]
else:
if re.match("\d+", word):
index[i] = num
else:
index[i] = unknown
if i >= maxLen - 1:
break
i += 1
return index
评论列表
文章目录