evaluation.py 文件源码-python代码片段

def main():

    desired_width = 600
    pd.set_option('display.width', desired_width)

    # specify sentence/true headline/predicted headline path.
    sentence_path = './dataset/test_enc.txt'
    true_headline_path = "./dataset/test_dec.txt"
    predicted_headline_path = "./output/predicted_test_headline.txt"

    # specify number of lines to read.
    number_of_lines_read = 400

    with open(true_headline_path) as ft:
        print("reading actual headlines...")
        true_headline = [next(ft).strip() for line in range(number_of_lines_read)]
    ft.close()

    with open(predicted_headline_path) as fp:
        print("reading predicted headlines...")
        predicted_headline = []
        for line in range(number_of_lines_read):
            predicted_headline.append(next(fp).strip())
    fp.close()
    # for debugging to detect empty predicted headlines (empty predicted headline will cause error while calculating BLEU)
    # print (predicted_headline[88380])
    # print (true_headline[88380])

    with open(sentence_path) as f:
        print("reading sentences...")
        sentence = [next(f).strip() for line in range(number_of_lines_read)]
    ft.close()

    # For testing purpose
    # true_headline = ["F1's Schumacher Slams Into Wall"]
    # predicted_headline = ["Schumacher Crashes in Practice"]
    BLEUscore, avgBLEUscore = getBLEUscore(true_headline, predicted_headline)
    print("average BLEU score: %f" % avgBLEUscore)

    summary = list(zip(BLEUscore, predicted_headline, true_headline, sentence))
    # pd.set_option("display.max_rows", 999)
    # pd.set_option('max_colwidth', 80)
    df = pd.DataFrame(data=summary, columns=['BLEU score', 'Predicted headline', 'True headline', 'article'])
    df_sortBLEU = df.sort_values('BLEU score', ascending=False)
    # print(df_sortBLEU)

    # Store the top 100 predicted headline in terms of BLEU score
    output_file = 'BLEU.txt'
    df_sortBLEU.head(100).to_csv(output_file, sep='\n', index=False,
                       line_terminator='\n-------------------------------------------------\n')
    print("Finished creating results summary in %s!" %output_file)