def build_tensor(filename, numrecs, word2index, maxlen,
make_categorical=False):
data = np.empty((numrecs, ), dtype=list)
fin = open(filename, "rb")
i = 0
for line in fin:
wids = []
for word in line.strip().split():
if word2index.has_key(word):
wids.append(word2index[word])
else:
wids.append(word2index["UNK"])
if make_categorical:
data[i] = np_utils.to_categorical(
wids, num_classes=len(word2index))
else:
data[i] = wids
i += 1
fin.close()
pdata = sequence.pad_sequences(data, maxlen=maxlen)
return pdata
pos-tagging-explore.py 文件源码
python
阅读 48
收藏 0
点赞 0
评论 0
评论列表
文章目录