def vectorizeData(xContext, xQuestion, xAnswerBeing, xAnswerEnd, word_index, context_maxlen, question_maxlen):
'''Vectorize the words to their respective index and pad context to max context length and question to max question length.
Answers vectors are padded to the max context length as well.
'''
X = []
Xq = []
YBegin = []
YEnd = []
for i in xrange(len(xContext)):
x = [word_index[w] for w in xContext[i]]
xq = [word_index[w] for w in xQuestion[i]]
# map the first and last words of answer span to one-hot representations
y_Begin = np.zeros(len(xContext[i]))
y_Begin[xAnswerBeing[i]] = 1
y_End = np.zeros(len(xContext[i]))
y_End[xAnswerEnd[i]] = 1
X.append(x)
Xq.append(xq)
YBegin.append(y_Begin)
YEnd.append(y_End)
return pad_sequences(X, maxlen=context_maxlen, padding='post'), pad_sequences(Xq, maxlen=question_maxlen, padding='post'), pad_sequences(YBegin, maxlen=context_maxlen, padding='post'), pad_sequences(YEnd, maxlen=context_maxlen, padding='post')
# Note: Need to download and unzip Glove pre-train model files into same file as this script
QnA.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录