def demo_sent_subjectivity(text):
"""
Classify a single sentence as subjective or objective using a stored
SentimentAnalyzer.
:param text: a sentence whose subjectivity has to be classified.
"""
from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import regexp
word_tokenizer = regexp.WhitespaceTokenizer()
try:
sentim_analyzer = load('sa_subjectivity.pickle')
except LookupError:
print('Cannot find the sentiment analyzer you want to load.')
print('Training a new one using NaiveBayesClassifier.')
sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)
# Tokenize and convert to lower case
tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
print(sentim_analyzer.classify(tokenized_text))
python类classify()的实例源码
classifiers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def train(self, *args, **kwargs):
"""Train the classifier with a labeled feature set and return
the classifier. Takes the same arguments as the wrapped NLTK class.
This method is implicitly called when calling ``classify`` or
``accuracy`` methods and is included only to allow passing in arguments
to the ``train`` method of the wrapped NLTK class.
.. versionadded:: 0.6.2
:rtype: A classifier
"""
try:
self.classifier = self.nltk_class.train(self.train_features,
*args, **kwargs)
return self.classifier
except AttributeError:
raise ValueError("NLTKClassifier must have a nltk_class"
" variable that is not None.")
def ment(text):
try:
doc = pickle.load(open("pickle/doc.pickle", "rb"))
except:
print("Pickles missing! ")
print("Program will now constuct pickles, this may take some time.")
trainClassifier().train()
doc = pickle.load(open("pickle/doc.pickle", "rb"))
wordFeat = pickle.load(open("pickle/wordFeat.pickle", "rb"))
featSet = pickle.load(open("pickle/featSet.pickle", "rb"))
ONB = pickle.load(open("pickle/ONB.pickle", "rb"))
MNB = pickle.load(open("pickle/MNB.pickle", "rb"))
BNB = pickle.load(open("pickle/BNB.pickle", "rb"))
LR = pickle.load(open("pickle/LR.pickle", "rb"))
LSVC = pickle.load(open("pickle/LSVC.pickle", "rb"))
SGDC = pickle.load(open("pickle/SGDC.pickle", "rb"))
vote = sent(ONB,MNB,BNB,LR,LSVC,SGDC)
feats = sent().featureFind(text,wordFeat)
out = (vote.conf(feats))*100
# out = str(out)+"%"
return vote.classify(feats),out
def train(self, *args, **kwargs):
"""Train the classifier with a labeled feature set and return
the classifier. Takes the same arguments as the wrapped NLTK class.
This method is implicitly called when calling ``classify`` or
``accuracy`` methods and is included only to allow passing in arguments
to the ``train`` method of the wrapped NLTK class.
.. versionadded:: 0.6.2
:rtype: A classifier
"""
try:
self.classifier = self.nltk_class.train(self.train_features,
*args, **kwargs)
return self.classifier
except AttributeError:
raise ValueError("NLTKClassifier must have a nltk_class"
" variable that is not None.")
def demo_sent_subjectivity(text):
"""
Classify a single sentence as subjective or objective using a stored
SentimentAnalyzer.
:param text: a sentence whose subjectivity has to be classified.
"""
from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import regexp
word_tokenizer = regexp.WhitespaceTokenizer()
try:
sentim_analyzer = load('sa_subjectivity.pickle')
except LookupError:
print('Cannot find the sentiment analyzer you want to load.')
print('Training a new one using NaiveBayesClassifier.')
sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)
# Tokenize and convert to lower case
tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
print(sentim_analyzer.classify(tokenized_text))
def classify(self,features,threshold=0.8):
business_keywords = ["business","wi","fi","wifi","internet","wireless"]
clean_keywords = ['clean']
prob_dist = self.classifier.prob_classify(features)
most_prob_label = prob_dist.max()
if prob_dist.prob(most_prob_label) >= threshold:
return most_prob_label
elif any( bkeyword in features for bkeyword in business_keywords):
# features contain keyword for business
return common.AspectBusiness
elif any ( ckeyword in features for ckeyword in clean_keywords):
return common.AspectClean
else:
return common.AspectUnknown
def train(self, *args, **kwargs):
"""Train the classifier with a labeled feature set and return
the classifier. Takes the same arguments as the wrapped NLTK class.
This method is implicitly called when calling ``classify`` or
``accuracy`` methods and is included only to allow passing in arguments
to the ``train`` method of the wrapped NLTK class.
.. versionadded:: 0.6.2
:rtype: A classifier
"""
try:
self.classifier = self.nltk_class.train(self.train_features,
*args, **kwargs)
return self.classifier
except AttributeError:
raise ValueError("NLTKClassifier must have a nltk_class"
" variable that is not None.")
def train(self, *args, **kwargs):
"""Train the classifier with a labeled feature set and return
the classifier. Takes the same arguments as the wrapped NLTK class.
This method is implicitly called when calling ``classify`` or
``accuracy`` methods and is included only to allow passing in arguments
to the ``train`` method of the wrapped NLTK class.
.. versionadded:: 0.6.2
:rtype: A classifier
"""
try:
self.classifier = self.nltk_class.train(self.train_features,
*args, **kwargs)
return self.classifier
except AttributeError:
raise ValueError("NLTKClassifier must have a nltk_class"
" variable that is not None.")
def demo_sent_subjectivity(text):
"""
Classify a single sentence as subjective or objective using a stored
SentimentAnalyzer.
:param text: a sentence whose subjectivity has to be classified.
"""
from nltk.classify import NaiveBayesClassifier
from nltk.tokenize import regexp
word_tokenizer = regexp.WhitespaceTokenizer()
try:
sentim_analyzer = load('sa_subjectivity.pickle')
except LookupError:
print('Cannot find the sentiment analyzer you want to load.')
print('Training a new one using NaiveBayesClassifier.')
sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)
# Tokenize and convert to lower case
tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
print(sentim_analyzer.classify(tokenized_text))
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
def confidence(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
choice_votes = votes.count(mode(votes))
conf = choice_votes / len(votes)
return conf
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
def confidence(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
choice_votes = votes.count(mode(votes))
conf = choice_votes / len(votes)
return conf
def sentiment(text):
feats = find_features(text)
return voted_classifier.classify(feats),voted_classifier.confidence(feats)
Train_Classifiers.py 文件源码
项目:NLP-Sentiment-Analysis-Twitter
作者: aalind0
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __init__(self, *classifiers):
self._classifiers = classifiers
#Creating our own classify method.
#After iterating we return mode(votes), which just returns the most popular vote.
Train_Classifiers.py 文件源码
项目:NLP-Sentiment-Analysis-Twitter
作者: aalind0
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
#Defining another parameter, confidence.
#Since we have algorithms voting, we can tally the votes for and against the winning vote, and call this "confidence.
def is_positive(sentence):
sentence_features = find_features(sentence, all_features)
return 1 if classifier.classify(sentence_features) == "pos" else 0
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
def confidence(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
choice_votes = votes.count(mode(votes))
conf = choice_votes / len(votes)
return conf
classifiers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def classify(self, text):
"""Classifies a string of text."""
raise NotImplementedError('Must implement a "classify" method.')
classifiers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def classify(self, text):
"""Classifies the text.
:param str text: A string of text.
"""
text_features = self.extract_features(text)
return self.classifier.classify(text_features)
classifiers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def accuracy(self, test_set, format=None):
"""Compute the accuracy on a test set.
:param test_set: A list of tuples of the form ``(text, label)``, or a
file pointer.
:param format: If ``test_set`` is a filename, the file format, e.g.
``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
file format.
"""
if is_filelike(test_set):
test_data = self._read_data(test_set)
else: # test_set is a list of tuples
test_data = test_set
test_features = [(self.extract_features(d), c) for d, c in test_data]
return nltk.classify.accuracy(self.classifier, test_features)
classifiers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def train(self, *args, **kwargs):
"""Train the classifier with a labeled and unlabeled feature sets and return
the classifier. Takes the same arguments as the wrapped NLTK class.
This method is implicitly called when calling ``classify`` or
``accuracy`` methods and is included only to allow passing in arguments
to the ``train`` method of the wrapped NLTK class.
:rtype: A classifier
"""
self.classifier = self.nltk_class.train(self.positive_features,
self.unlabeled_features,
self.positive_prob_prior)
return self.classifier
def classify(self, features):
self.votes = []
for self.i in self._classifiers:
self.j = self.i.classify(features)
self.votes.append(self.j)
return mode(self.votes)
# find the confidents of results
# must be handed:
# *featured words
def conf(self, features):
self.votes = []
for self.i in self._classifiers:
self.j = self.i.classify(features)
self.votes.append(self.j)
self.choice_votes = self.votes.count(mode(self.votes))
self.conf = self.choice_votes / len(self.votes)
return self.conf
# find the features of document
# must be handed:
# *document to find feature of
# *word features
def classify(self, text):
"""Classifies a string of text."""
raise NotImplementedError('Must implement a "classify" method.')
def classify(self, text):
"""Classifies the text.
:param str text: A string of text.
"""
text_features = self.extract_features(text)
return self.classifier.classify(text_features)
def accuracy(self, test_set, format=None):
"""Compute the accuracy on a test set.
:param test_set: A list of tuples of the form ``(text, label)``, or a
file pointer.
:param format: If ``test_set`` is a filename, the file format, e.g.
``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
file format.
"""
if is_filelike(test_set):
test_data = self._read_data(test_set)
else: # test_set is a list of tuples
test_data = test_set
test_features = [(self.extract_features(d), c) for d, c in test_data]
return nltk.classify.accuracy(self.classifier, test_features)
def train(self, *args, **kwargs):
"""Train the classifier with a labeled and unlabeled feature sets and return
the classifier. Takes the same arguments as the wrapped NLTK class.
This method is implicitly called when calling ``classify`` or
``accuracy`` methods and is included only to allow passing in arguments
to the ``train`` method of the wrapped NLTK class.
:rtype: A classifier
"""
self.classifier = self.nltk_class.train(self.positive_features,
self.unlabeled_features,
self.positive_prob_prior)
return self.classifier
def DoClassify(CurClassifier, topicResultsTxt, topicTweetsLDATxt):
counter = 0
topicSentiments = dict()
topicResult = open(topicResultsTxt, 'w')
with open(topicTweetsLDATxt) as topicFile:
for line in topicFile:
if counter != 100:
tSentiment = CurClassifier.classify(extract_features(line.split()))
if tSentiment in topicSentiments.keys():
topicSentiments[tSentiment] += 1
else:
topicSentiments[tSentiment] = 1
counter += 1
else:
majorSentiment = 'Dummy'
topicSentiments[majorSentiment] = 1
for sentiKey in topicSentiments.keys():
if topicSentiments[majorSentiment] < topicSentiments[sentiKey]:
majorSentiment = sentiKey
topicResult.write(majorSentiment +'\n')
topicSentiments.clear()
counter = 0
topicResult.close()
#Extracting the features of the tweet without term frequencies with the format as needed by the classifier