def process(file_name):
content = read(file_name)
words = jieba.lcut(content, cut_all=False)
words = words + ['\n']
vocab = set(words)
word2int = { w: i for i, w in enumerate(vocab)}
int2word = dict(enumerate(vocab))
data = np.array([word2int[c] for c in words], dtype=np.int32)
return data, word2int, int2word, vocab
评论列表
文章目录