def bigram_predict(testSet,PP,PN,positive_probabilities,negative_probabilities,unseen_pos_prob,unseen_neg_prob):
predicted_class = []
for review in testSet:
negative_probab = math.log10(PN)
positive_probab = math.log10(PP)
review_words = []
review_words.append('*')
review_words.extend(word_tokenize(review))
review_words.append('$')
review_bigrams = bigrams(review_words)
for w in review_bigrams:
bigram = w
w = w[0]+" " +w[1]
if w in negative_probabilities and w in positive_probabilities:
negative_probab = negative_probab + math.log10(negative_probabilities[w])
positive_probab = positive_probab + math.log10(positive_probabilities[w])
else:
if bigram[0] in negative_probabilities and bigram[0] in positive_probabilities:
#if(bigram[0] == '*'):
# negative_probab = negative_probab
# positive_probab = positive_probab
#else:
#if(negative_probabilities[bigram[0]] < 0 or positive_probabilities[bigram[0]] < 0):
# print("issue with " + bigram[0] + " " + str(negative_probabilities[bigram[0]]) + " " + str(positive_probabilities[bigram[0]]))
#if(negative_probabilities[bigram[0]] > 0 and positive_probabilities[bigram[0]] > 0):
negative_probab = negative_probab + math.log10(negative_probabilities[bigram[0]])
positive_probab = positive_probab + math.log10(positive_probabilities[bigram[0]])
else:
negative_probab = negative_probab + math.log10(unseen_neg_prob)
positive_probab = positive_probab + math.log10(unseen_pos_prob)
if(negative_probab > positive_probab):
result = '-'
else:
result = '+'
predicted_class.append(result)
return predicted_class
评论列表
文章目录