def transfer_corpus(sents):
words_dict = invert_dict(corpora.Dictionary.load('words.dict'))
max_length = 40
sentence = numpy.zeros(shape=(len(sents), max_length),dtype=numpy.int32)
label = numpy.zeros(shape=(len(sents), max_length), dtype=numpy.int32)
lengths = []
for i in range(len(sents)):
current_sent = sents[i]
words = []
labels = []
lengths.append(len(current_sent))
for item in current_sent:
words.append(words_dict[item[0]])
labels.append(label_str[item[1]])
sentence[i] = numpy.asarray(words + (max_length - len(current_sent))*[28782],dtype=numpy.float32)
label[i] = numpy.asarray(labels + (max_length - len(current_sent))*[8],dtype=numpy.float32)
return sentence,label,numpy.asarray(lengths,dtype=numpy.int32)
# train = train_ + valid_ = 16551
# test = test = 3327
评论列表
文章目录