def extract_lstm_test(dictionary, file_name, tag_num=CLASS_NUM, col_tag=0, col_content=1, length=MAX_LENGTH):
contents = pd.read_excel(file_name, header=None)
cw = lambda x: [word.encode('utf-8') for word in jieba.cut(x) if word not in stopwords and word.strip() != '' and word.encode('utf-8') in dictionary.index]
contents['words'] = contents[col_content].apply(cw)
get_sent = lambda x: list(dictionary['id'][x])
contents['sent'] = contents['words'].apply(get_sent) # ????,??????????
print("Pad sequences (samples x time)")
contents['sent'] = list(sequence.pad_sequences(contents['sent'], maxlen=length))
x = np.array(list(contents['sent'])) # ???
y = np.zeros((len(list(contents[col_tag])), tag_num))
for i in range(len(list(contents[col_tag]))):
for j in range(tag_num):
if contents[col_tag][i] == j:
y[i][j] = 1
return x, y
# dictionary model ????
extract_feature.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录