def get_questions_matrix(split):
if split == 'train':
data_path = 'data/train_qa'
elif split == 'val':
data_path = 'data/val_qa'
else:
print('Invalid split!')
sys.exit()
df = pd.read_pickle(data_path)
questions = df[['question']].values.tolist()
word_idx = ebd.load_idx()
seq_list = []
for question in questions:
words = word_tokenize(question[0])
seq = []
for word in words:
seq.append(word_idx.get(word,0))
seq_list.append(seq)
question_matrix = pad_sequences(seq_list)
return question_matrix
prepare_data.py 文件源码
python
阅读 38
收藏 0
点赞 0
评论 0
评论列表
文章目录