extract_feature.py 文件源码-python代码片段

extract_feature.py 文件源码

python

阅读 27 收藏 0 点赞 0 评论 0

项目：Book_DeepLearning_Practice 作者: wac81 项目源码文件源码

def extract_lstm_test(dictionary, file_name, tag_num=CLASS_NUM, col_tag=0, col_content=1, length=MAX_LENGTH):
    contents = pd.read_excel(file_name, header=None)
    cw = lambda x: [word.encode('utf-8') for word in jieba.cut(x) if word not in stopwords and word.strip() != '' and word.encode('utf-8') in dictionary.index]
    contents['words'] = contents[col_content].apply(cw)
    get_sent = lambda x: list(dictionary['id'][x])
    contents['sent'] = contents['words'].apply(get_sent)  # ????,??????????
    print("Pad sequences (samples x time)")
    contents['sent'] = list(sequence.pad_sequences(contents['sent'], maxlen=length))
    x = np.array(list(contents['sent']))  # ???
    y = np.zeros((len(list(contents[col_tag])), tag_num))
    for i in range(len(list(contents[col_tag]))):
        for j in range(tag_num):
            if contents[col_tag][i] == j:
                y[i][j] = 1
    return x, y


# dictionary model ????