def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
python类LinearSVC()的实例源码
def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
def train(labeled_featuresets, C=1e5):
"""
:param labeled_featuresets: A list of classified featuresets,
i.e., a list of tuples ``(featureset, label)``.
"""
feat = [featureset for featureset, label in labeled_featuresets]
feature_vectorizer = MVectorizer.DictsVectorizer()
X = feature_vectorizer.fit_transform(feat)
X = Normalizer().fit_transform(X)
label_set = set( [label for featureset, label in labeled_featuresets] )
label_vectorizer = dict( [(label,num) for num,label in enumerate(label_set)] )
y = numpy.array([label_vectorizer[label] for featureset, label in labeled_featuresets])
# print "Training on %d examples with %d features..."%(X.shape[0],X.shape[1]),
classifier = OneVsRestClassifier(LinearSVC(loss='squared_hinge', penalty='l2', dual=True, tol=1e-5, C=C))
classifier.fit(X,y)
# print "done"
return scikit_classifier(feature_vectorizer,label_vectorizer,classifier)
def train_svms():
if not os.path.isfile('models/fine_tune.model.index'):
print('models/fine_tune.model doesn\'t exist.')
return
net = create_alexnet()
model = tflearn.DNN(net)
model.load('models/fine_tune.model')
train_file_dir = 'svm_train/'
flist = os.listdir(train_file_dir)
svms = []
for train_file in flist:
if "pkl" in train_file:
continue
X, Y = generate_single_svm_train_data(train_file_dir + train_file)
train_features = []
for i in X:
feats = model.predict([i])
train_features.append(feats[0])
print("feature dimension of fitting: {}".format(np.shape(train_features)))
clf = svm.LinearSVC()
clf.fit(train_features, Y)
svms.append(clf)
joblib.dump(svms, 'models/train_svm.model')
def article_trainers(articles: ArticleDB):
"""
Run repeated models against article db to predict validity score for
articles.
"""
models = [(DecisionTreeClassifier, {}),
(RandomForestClassifier, {}),
(LogisticRegression, {'C': [0.01, 0.1, 1, 10, 100]}),
(MultinomialNB, {'alpha': [0.1, 1.0, 10.0, 100.0]}),
(LinearSVC, {'C': [0.01, 0.1, 1, 10, 100]})]
trained_models = []
for classifier, param_grid in models:
res = train_model(articles, classifier, param_grid, probabilities=True)
trained_models.append((str(res), res))
ensemble_learner = VotingClassifier(estimators=trained_models[:4],
voting='soft')
train_model(articles, ensemble_learner, {})
def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
def test_decision_function_rocauc(self):
"""
Test ROCAUC with classifiers that have a decision function
"""
# Load the model and assert there is no predict_proba method.
model = LinearSVC()
with self.assertRaises(AttributeError):
model.predict_proba
# Fit model and visualizer
visualizer = ROCAUC(model)
visualizer.fit(X, yb)
expected = np.asarray([
0.204348, 0.228593, 0.219908, -0.211756, -0.26155 , -0.221405
])
# Get the predict_proba scores and evaluate
y_scores = visualizer._get_y_scores(X)
npt.assert_array_almost_equal(y_scores, expected, decimal=1)
def tune_para(dataframe, i):
# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
columns = ['SMA_10','Momentum','stoch_K','WMA_10','MACD','A/D','Volume']
X = dataframe[columns].as_matrix()
y = dataframe['Adj Close'].as_matrix()
X_train = X[i-200:i]
y_train = y[i-200:i]
X_test = X[i:i+1]
y_test = y[i:i+1]
### Train four kinds of SVM model
C = 1 # SVM regularization parameter
svc = svm.SVC(cache_size = 1000, kernel='linear', C=C).fit(X_train, y_train)
rbf_svc = svm.SVC(cache_size = 1000, kernel='rbf', gamma=0.7, C=C).fit(X_train, y_train)
poly_svc = svm.SVC(cache_size = 1000, kernel='poly', degree=3, C=C).fit(X_train, y_train)
lin_svc = svm.LinearSVC(loss='squared_hinge', penalty='l1', dual=False, C=C).fit(X_train, y_train)
Y_result = y_test
### Make the prediction
for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
pred = clf.predict(X_test)
Y_result = np.vstack((Y_result, np.array(pred))) # append prediction on Y_result
return Y_result.T
new_data_mlp.py 文件源码
项目:NVDM-For-Document-Classification
作者: cryanzpj
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def SVMbanchmark(X_train, y_train, X_test, y_test):
# optimial c is 10.0, f1 = 0.52
print("Training LinearSVC with l1-based feature selection")
X_valid, y_valid = X_test[:10000], y_test[:10000]
score_list = []
CList = [0.1, 0.5, 1, 10, 50, 100]
for c in CList:
clf = OneVsRestClassifier(LinearSVC(C=c, penalty='l1', dual=False))
clf.fit(X_train, y_train)
pred = clf.predict(X_valid)
score = metrics.f1_score(y_valid, pred, average="macro")
score_list.append(score)
print("f1-score: {:f}, c is {:f}".format(score, c))
clf = OneVsRestClassifier(LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)]))
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
score = metrics.f1_score(y_test, pred, average="micro")
print("f1-score for test set: {:f}".format(score))
def SVMbanchmark(X_train, X_test, y_train, y_test):
# optimial c is 10.0, f1 = 0.52
print("Training LinearSVC with l1-based feature selection")
import pdb
pdb.set_trace()
X_valid, y_valid = X_test[:10000], y_test[:10000]
score_list = []
CList = [0.1, 0.5, 1, 10, 50, 100]
for c in CList:
clf = LinearSVC(C=c, penalty='l1', dual=False)
clf.fit(X_train, y_train)
pred = clf.predict(X_valid)
score = metrics.accuracy_score(y_valid, pred)
score_list.append(score)
print("f1-score: {:f}, c is {:f}".format(score, c))
clf = LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)])
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("f1-score for test set: {:f}".format(score))
def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
graphssl.py 文件源码
项目:graph-based-semi-supervised-learning
作者: deerishi
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def compareWithSvm(self,datasetTrain,datasetTest):
C=[0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000]
print '\n'
print 'dataset shape is ',datasetTrain.shape
self.y_train=self.y_train.reshape(-1,)
for c in C:
self.Svm=svm.LinearSVC(C=c)
self.Svm.fit(datasetTrain,self.y_train)
labels=self.Svm.predict(datasetTest)
print 'accuracy with c=',c,' is ',self.checkAccuracy(labels,self.y_test),'% ','\n'
#for graph based reasoning , replace every 0 with -1
def trainClassifier(foldername,classifierName):
model = cv2.ml.KNearest_create()
features = []
labels = []
os.chdir(foldername)
for filename in glob.iglob('*.png'):
features.append(cv2.imread((filename),-1))
labels.append(filename[0])
list_hog_fd = []
for feature in features:
fd = hog(feature.reshape((27, 35)), orientations=9, pixels_per_cell=(9, 7), cells_per_block=(1, 1), visualise=False)
list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')
os.chdir("..")
clf = LinearSVC()
clf.fit(hog_features, labels)
joblib.dump(clf,classifierName, compress=3)
os.chdir("..")
def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
def __init__(self, cls, dim, feature_scale=1.0,
C=0.001, B=10.0, pos_weight=2.0):
self.pos = np.zeros((0, dim), dtype=np.float32)
self.neg = np.zeros((0, dim), dtype=np.float32)
self.B = B
self.C = C
self.cls = cls
self.pos_weight = pos_weight
self.dim = dim
self.feature_scale = feature_scale
self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
intercept_scaling=B, verbose=1,
penalty='l2', loss='l1',
random_state=cfg.RNG_SEED, dual=True)
self.pos_cur = 0
self.num_neg_added = 0
self.retrain_limit = 2000
self.evict_thresh = -1.1
self.loss_history = []
def test_RFECV():
'''
test the method of RFECV
:return: None
'''
iris=load_iris()
X=iris.data
y=iris.target
estimator=LinearSVC()
selector=RFECV(estimator=estimator,cv=3)
selector.fit(X,y)
print("N_features %s"%selector.n_features_)
print("Support is %s"%selector.support_)
print("Ranking %s"%selector.ranking_)
print("Grid Scores %s"%selector.grid_scores_)
k_fold_predictor.py 文件源码
项目:movie-quality-profitability-predictor
作者: wbowditch
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def compute_cross_fold(data):
data_table = pd.read_csv("total_set.csv",index_col=0)
#data_norm = (data - data.mean()) / (data.sum())
scaler = preprocessing.StandardScaler().fit(data)
data_scaled = scaler.transform(data)
#print data_scaled
profitability_target = data_table['Profitable']
#print profitability_target
#gross_target = data_table['Domestic Gross']
#tomato = data_table['Rotten']
#normalized_target_gross = (gross_target - gross_target.mean()) / (gross_target.max() - gross_target.min())
#tomato = (tomato - tomato.mean()) / (tomato.max() - tomato.min())
#clf_profit = svm.SVC(kernel='rbf',C=0.8, gamma=5,verbose=True)
clf_profit = svm.LinearSVC(C=0.001,verbose=True,tol=.1)
clf_profit.fit(data_scaled,profitability_target)
scores = cross_val_score(clf_profit, data_scaled, profitability_target, cv=10)
#print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
return (scores.mean(), scores.std() * 2)
def _train(self, X_matrix, y, **kwargs):
"""????
Parameters:
X_matrix (numpy.array): - ????????????
y (numpy.array): - ???????????
Returns:
sklearn.model: - sklearn???
"""
from sklearn.svm import LinearSVC
model = LinearSVC(**kwargs)
model.fit(X_matrix, y)
return model
def test_random_hasher():
# test random forest hashing on circles dataset
# make sure that it is linearly separable.
# even after projected to two SVD dimensions
# Note: Not all random_states produce perfect results.
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# test fit and transform:
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
assert_array_equal(hasher.fit(X).transform(X).toarray(),
X_transformed.toarray())
# one leaf active per data point per forest
assert_equal(X_transformed.shape[0], X.shape[0])
assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
linear_clf = LinearSVC()
linear_clf.fit(X_reduced, y)
assert_equal(linear_clf.score(X_reduced, y), 1.)