def get_score_for_question(question_answer_word_dir,question_num,k):
DCG_score_list = []
for question_index in range(int(question_num)):
if (question_index+1)%1000 == 1:
print 'Now for line : ' + str(question_index+1) + '\n'
index = question_index + 1
file_read_name = os.path.join(question_answer_word_dir,str(index))
file_read = open(file_read_name,'rb+')
question_line = file_read.readline()
question_line_list = question_line.strip().split('\t')
question_line_list.remove('question')
answer_index = 0
answer_index_line_label_dict = {}
answer_sentences_word_list = []
for line in file_read.readlines():
answer_temp_line_list = line.strip().split('\t')
answer_label = answer_temp_line_list[1]
answer_temp_line_list.remove('answer')
answer_temp_line_list.remove(answer_label)
answer_sentences_word_list.append(answer_temp_line_list)
answer_list_temp = []
answer_list_temp.append(answer_label)
answer_index_line_label_dict[answer_index] = answer_list_temp
answer_index += 1
dic = corpora.Dictionary(answer_sentences_word_list)
corpus=[dic.doc2bow(text) for text in answer_sentences_word_list]
tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
lda = models.LdaModel(corpus_tfidf,id2word=dic,num_topics=2)
index = similarities.MatrixSimilarity(lda[corpus_tfidf])
query_bow = dic.doc2bow(question_line_list)
query_lda = lda[query_bow]
sims = index[query_lda]
sort_sims = sorted(enumerate(sims),key=lambda item:-item[1])
answer_label_list = []
for item in sort_sims:
answer_index_temp = item[0]
answer_label = int(answer_index_line_label_dict[int(answer_index_temp)][0])
answer_label_list.append(answer_label)
DCG_score = calu_DCG(answer_label_list,k)
DCG_score_list.append(DCG_score)
DCG_avg = calu_avg_answer_length(DCG_score_list)
print 'DCG_avg : \t' + str(DCG_avg)
评论列表
文章目录