utility.py 文件源码-python代码片段

def naive_bayes(analysis):  
    tags = []
    words = []
    deps_cc = []
    for sen in analysis["sentences"]:
        tags += sen['pos']
        words += sen['tokens']
        deps_cc += sen["deps_cc"]
    norm = normalize_title(tags, words)

    f1 = [] 
    current = list(nltk.ngrams(norm.split(), 1)) + list(nltk.ngrams(norm.split(), 2)) + list(nltk.ngrams(norm.split(),3))
    ngram_list = [' '.join(list(g)) for g in current]
    for pos in common_grams:
        if pos in ngram_list:
            f1.append(1)
        else:
            f1.append(0)
    f1 = numpy.array(f1).reshape(1, len(f1))

    #pos ngrams
    f2 = []
    current_pos = list(nltk.ngrams(tags, 1)) + list(nltk.ngrams(tags, 2)) + list(nltk.ngrams(tags,3))
    ngram_list = [' '.join(list(g)) for g in current_pos]
    for pos in common_pos_grams:
        if pos in ngram_list:
            f2.append(1)
        else:
            f2.append(0)
    f2 = numpy.array(f2).reshape(1, len(f2))
    # print f2.shape


    # syntactic ngrams
    f3 = []
    current_sngrams = list(syntactic_n_gram(deps_cc, 1)) + list(syntactic_n_gram(deps_cc, 2)) + list(syntactic_n_gram(deps_cc, 3))
    ngram_list = [' '.join(list(g)) for g in current_sngrams]
    for pos in common_sn_grams:
        if pos in ngram_list:
            f3.append(1)
        else:
            f3.append(0)
    f3 = numpy.array(f3).reshape(1, len(f3))

    return [clf1.predict(f1)[0], clf2.predict(f2)[0], clf3.predict(f3)[0]]