def tfidf_retrieval(tfidf_vec, train_contexts_txt, train_responses_txt, output_file):
print type(tfidf_vec)
tfidf_vec = tfidf_vec.toarray()
print tfidf_vec.shape
prod_mat = np.dot(tfidf_vec, tfidf_vec.T)
print prod_mat.shape
prod_mat = prod_mat / mat_vector_2norm_squared(tfidf_vec)
print prod_mat.shape
response_list = []
for i in xrange(len(prod_mat)):
row = prod_mat[i]
# No idea what's going on here. See the following page:
# stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
ind = np.argpartition(row, -2)[-2:]
ind = ind[np.argsort(row[ind])][0]
response_list.append(train_responses_txt[ind])
print train_contexts_txt[i]
print response_list[i]
with open(output_file, 'w') as f1:
for response in response_list:
f1.write(response)
评论列表
文章目录