def train(labeled_featuresets, C=1e5):
"""
:param labeled_featuresets: A list of classified featuresets,
i.e., a list of tuples ``(featureset, label)``.
"""
feat = [featureset for featureset, label in labeled_featuresets]
feature_vectorizer = MVectorizer.DictsVectorizer()
X = feature_vectorizer.fit_transform(feat)
X = Normalizer().fit_transform(X)
label_set = set( [label for featureset, label in labeled_featuresets] )
label_vectorizer = dict( [(label,num) for num,label in enumerate(label_set)] )
y = numpy.array([label_vectorizer[label] for featureset, label in labeled_featuresets])
# print "Training on %d examples with %d features..."%(X.shape[0],X.shape[1]),
classifier = OneVsRestClassifier(LinearSVC(loss='squared_hinge', penalty='l2', dual=True, tol=1e-5, C=C))
classifier.fit(X,y)
# print "done"
return scikit_classifier(feature_vectorizer,label_vectorizer,classifier)
scikit_classifier.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录