def trained_models():
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
lr = LogisticRegression()
lr.fit(X_train, y_train)
svc_w_linear_kernel = SVC(kernel='linear')
svc_w_linear_kernel.fit(X_train, y_train)
svc_wo_linear_kernel = SVC()
svc_wo_linear_kernel.fit(X_train, y_train)
dummy = DummyClassifier()
dummy.fit(X_train, y_train)
return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
python类SVC的实例源码
def evaluate_svm(train_data, train_labels, test_data, test_labels, n_jobs=-1):
"""
Evaluates a representation using a Linear SVM
It uses 3-fold cross validation for selecting the C parameter
:param train_data:
:param train_labels:
:param test_data:
:param test_labels:
:param n_jobs:
:return: the test accuracy
"""
# Scale data to 0-1
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)
parameters = {'kernel': ['linear'], 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]}
model = svm.SVC(max_iter=10000)
clf = grid_search.GridSearchCV(model, parameters, n_jobs=n_jobs, cv=3)
clf.fit(train_data, train_labels)
lin_svm_test = clf.score(test_data, test_labels)
return lin_svm_test
def get_feature_importance(self,clf, model_name ):
clfs = {'RandomForestClassifier':'feature_importances',
'ExtraTreesClassifier': 'feature_importances',
'AdaBoostClassifier': 'feature_importances',
'LogisticRegression': 'coef',
'svm.SVC': 'coef',
'GradientBoostingClassifier': 'feature_importances',
'GaussianNB': None,
'DecisionTreeClassifier': 'feature_importances',
'SGDClassifier': 'coef',
'KNeighborsClassifier': None,
'linear.SVC': 'coef'}
if clfs[model_name] == 'feature_importances':
return list(clf.feature_importances_)
elif clfs[model_name] == 'coef':
return list(clf.coef_.tolist())
else:
return None
def get_classifier_class(class_name):
name_table = {
'svm': SVC,
'k_neighbors': KNeighborsClassifier,
'gaussian_process': GaussianProcessClassifier,
'decision_tree': DecisionTreeClassifier,
'random_forest': RandomForestClassifier,
'ada_boost': AdaBoostClassifier,
'mlp': MLPClassifier,
'gaussian_naive_bayes': GaussianNB,
'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
}
if class_name not in name_table:
raise ValueError('No such classifier')
return name_table[class_name]
def define_model(self, model, parameters, n_cores = 0):
clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
'GaussianNB': GaussianNB(),
'DecisionTreeClassifier': DecisionTreeClassifier(),
'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3),
'linear.SVC': svm.LinearSVC() }
if model not in clfs:
raise ConfigError("Unsupported model {}".format(model))
clf = clfs[model]
clf.set_params(**parameters)
return clf
def __init__(
self,data_block, predictors=[],cv_folds=10,
scoring_metric='accuracy',additional_display_metrics=[]):
base_classification.__init__(
self, alg=SVC(), data_block=data_block, predictors=predictors,
cv_folds=cv_folds,scoring_metric=scoring_metric,
additional_display_metrics=additional_display_metrics
)
self.model_output=pd.Series(self.default_parameters)
self.model_output['Coefficients'] = "-"
#Set parameters to default values:
self.set_parameters(set_default=True)
#Check if probabilities enables:
if not self.alg.get_params()['probability']:
self.probabilities_available = False
def quiz15():
X, Y, N = read_file("features.train")
Y_0 = (Y == 0).astype(int)
c_l = []
w_l = []
for i in range(-6, 4, 2):
c = 10 ** i
c_l.append(c)
clf = svm.SVC(C=c, kernel='linear', shrinking=False)
clf.fit(X, Y_0)
w = clf.coef_.flatten()
norm_w = np.linalg.norm(w, ord=2)
w_l.append(norm_w)
print("C = ", c, ' norm(w) =', norm_w)
plt.semilogx(c_l, w_l)
plt.savefig("h5_q15.png", dpi=300)
def train_and_predict(self, param_dict, predict_on='val'):
"""Initializes an SVM classifier according to the desired parameter settings,
trains it, and returns the predictions on the appropriate evaluation dataset.
Args:
param_dict: A dictionary with keys representing parameter names and
values representing settings for those parameters.
predict_on: The dataset used for evaluating the model. Can set to
'Test' to get final results.
Returns: The predicted Y labels.
"""
if predict_on == 'test':
predict_X = self.data_loader.test_X
else:
predict_X = self.data_loader.val_X
self.model = SVC(C=param_dict['C'], kernel=param_dict['kernel'], gamma=param_dict['beta'])
self.model.fit(self.data_loader.train_X, self.data_loader.train_Y)
preds = self.predict_on_data(predict_X)
return preds
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
# aggregate the kernel matrix to save memory
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
num_training_samples=num_epochs_per_subj*(num_subjects-1))
predict = clf.predict()
print(predict)
print(clf.decision_function())
test_labels = labels[0:num_epochs_per_subj]
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when aggregating the similarity matrix to save memory, '
'the accuracy is %d / %d = %.2f' %
(num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
# when the kernel matrix is computed in portion, the test data is already in
print(clf.score(None, test_labels))
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
# NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
# when the kernel matrix is computed in portions; also, this method only works
# for self-correlation, i.e. correlation between the same data matrix.
# no shrinking, set C=1
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
#logit_clf = LogisticRegression()
clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
# doing leave-one-subject-out cross validation
# no shuffling in cv
skf = model_selection.StratifiedKFold(n_splits=num_subjects,
shuffle=False)
scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
y=labels,
cv=skf)
print(scores)
logger.info(
'the overall cross validation accuracy is %.2f' %
np.mean(scores)
)
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
# aggregate the kernel matrix to save memory
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
num_training_samples=num_epochs_per_subj*(num_subjects-1)
clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
labels[0:num_training_samples])
X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
predict = clf.predict(X)
print(predict)
print(clf.decision_function(X))
test_labels = labels[num_training_samples:]
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when aggregating the similarity matrix to save memory, '
'the accuracy is %d / %d = %.2f' %
(num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
# when the kernel matrix is computed in portion, the test data is already in
print(clf.score(X, test_labels))
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels,
num_subjects, num_epochs_per_subj):
# aggregate the kernel matrix to save memory
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
num_training_samples=num_epochs_per_subj*(num_subjects-1)
clf.fit(list(zip(raw_data, raw_data2)), labels,
num_training_samples=num_training_samples)
predict = clf.predict()
print(predict)
print(clf.decision_function())
test_labels = labels[num_training_samples:]
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when aggregating the similarity matrix to save memory, '
'the accuracy is %d / %d = %.2f' %
(num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
# when the kernel matrix is computed in portion, the test data is already in
print(clf.score(None, test_labels))
# python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
def test_build_param_grid_set_estimator():
clf1 = SVC()
clf2 = LogisticRegression()
clf3 = SVC()
clf4 = SGDClassifier()
estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
('clf', None)]),
clf=[set_grid(clf1, kernel=['linear']),
clf2,
set_grid(clf3, kernel=['poly'], degree=[2, 3]),
clf4])
param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
{'clf': [clf3], 'clf__kernel': ['poly'],
'clf__degree': [2, 3], 'sel__k': [2, 3]},
{'clf': [clf2, clf4], 'sel__k': [2, 3]}]
assert build_param_grid(estimator) == param_grid
def test_make_grid_search():
X, y = load_iris(return_X_y=True)
lr = LogisticRegression()
svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
gs1 = make_grid_search(lr, cv=5) # empty grid
gs2 = make_grid_search(svc, cv=5)
gs3 = make_grid_search([lr, svc], cv=5)
for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
gs.fit(X, y)
assert gs.cv == 5
assert len(gs.cv_results_['params']) == n_results
svc_mask = gs3.cv_results_['param_root'] == svc
assert svc_mask.sum() == 2
assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
def train(train_dataTables, human_marks):
global classifier
samples =[]
target = []
for nn, dataTable in enumerate(train_dataTables):
for i in xrange(dataTable.row):
for j in xrange(dataTable.col):
mention = dataTable[i][j]
if mention.cid == -1:
continue
eids = dataTable.get_eids(i, j)
words = dataTable.get_words(i, j)
entites = dataTable.get_entities(i ,j)
true_id = human_marks[nn][i][j]['id']
for ii, entity in enumerate(mention.candidates):
prior = entity.popular
SR = mention.getSR(ii, entites)
res = int(true_id == entity.id)
samples.append([prior, SR])
target.append(res)
from sklearn import svm
classifier = svm.SVC(probability=True)
classifier.fit(samples, target)
def run(self):
training_x, training_y, training_ids = self.get_training_data()
test_x, test_y, test_ids = self.get_test_data()
clf = self.define_model(self.model_name, self.model_params)
clf.fit(training_x, training_y)
res_predict = clf.predict(test_x)
if (self.model_name == "SGDClassifier" and (clf.loss =="hinge" or clf.loss == "perceptron")) or self.model_name == "linear.SVC":
res = list(clf.decision_function(test_x))
else:
res = list(clf.predict_proba(test_x)[:,1])
#fp, fn, tp, tn = self.compute_confusion_matrix(res[:,0], test_y)
result_dictionary = {'training_ids': training_ids,
'predictions_test_y': list(res_predict),
'prob_prediction_test_y': res ,
'test_y': list(test_y),
'test_ids': list(test_ids),
'model_name': self.model_name,
'model_params': self.model_params,
'label': self.label,
'feature_columns_used': self.cols_to_use,
'config': self.config,
'feature_importance': self.get_feature_importance(clf, self.model_name),
'columned_used_for_feat_importance': list(training_x.columns.values)}
return result_dictionary, clf
def svc_model(self, X, y, x_test, y_test, x_val, y_val, i, j):
X, y = shuffle(X, y, random_state=self.SEED)
clf = SVC(C=self.C, kernel='rbf', gamma=self.gamma, cache_size=self.cache_size,
verbose=0, random_state=self.SEED)
model = clf.fit(X, y)
yhat_train = model.predict(X)
yhat_val = model.predict(x_val)
yhat_test = model.predict(x_test)
train_error = (1 - accuracy_score(y, yhat_train)) * 100
val_error = (1 - accuracy_score(y_val, yhat_val)) * 100
test_error = (1 - accuracy_score(y_test, yhat_test)) * 100
self.warn_log.append([i, train_error, val_error, test_error])
return model
def __init__(self, isTrain, isOutlierRemoval=0):
"""
The linear models ``LinearSVC()`` and ``SVC(kernel='linear')`` yield slightly
different decision boundaries. This can be a consequence of the following
differences:
- ``LinearSVC`` minimizes the squared hinge loss while ``SVC`` minimizes the
regular hinge loss.
- ``LinearSVC`` uses the One-vs-All (also known as One-vs-Rest) multiclass
reduction while ``SVC`` uses the One-vs-One multiclass reduction.
:return:
"""
super(ClassificationSVM, self).__init__(isTrain, isOutlierRemoval)
# data preprocessing
self.dataPreprocessing()
self.clf = svm.SVC() # define the SVM classifier
C = 1.0 # SVM regularization parameter
self.svc = svm.SVC(kernel='linear', C=C, max_iter=100000)
self.rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C)
self.poly_svc = svm.SVC(kernel='poly', coef0=1, degree=3, C=C)
self.lin_svc = svm.LinearSVC(C=C)
def parameterChoosing(self):
# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'],
'gamma': np.logspace(-4, 3, 30),
'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
{'kernel': ['poly'],
'degree': [1, 2, 3, 4],
'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
'coef0': np.logspace(-4, 3, 30)},
{'kernel': ['linear'],
'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]
clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
clf.fit(self.X_train, self.y_train.ravel())
print "Best parameters set found on development set:\n"
print clf.best_params_
print "Grid scores on development set:\n"
for params, mean_score, scores in clf.grid_scores_:
print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)
print "Detailed classification report:\n"
y_true, y_pred = self.y_test, clf.predict(self.X_test)
print classification_report(y_true, y_pred)
def svm_classify(X, label, split_ratios, C):
"""
trains a linear SVM on the data
input C specifies the penalty factor for SVM
"""
train_size = int(len(X)*split_ratios[0])
val_size = int(len(X)*split_ratios[1])
train_data, valid_data, test_data = X[0:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
train_label, valid_label, test_label = label[0:train_size], label[train_size:train_size + val_size], label[train_size + val_size:]
print('training SVM...')
clf = svm.SVC(C=C, kernel='linear')
clf.fit(train_data, train_label.ravel())
p = clf.predict(train_data)
train_acc = accuracy_score(train_label, p)
p = clf.predict(valid_data)
valid_acc = accuracy_score(valid_label, p)
p = clf.predict(test_data)
test_acc = accuracy_score(test_label, p)
return [train_acc, valid_acc, test_acc]
def setup(self):
"""
This function ...
:return:
"""
# Call the setup of the base class
super(Classifier, self).setup()
# Create the vector classifier
self.vector_classifier = svm.SVC(gamma=0.001, C=100.) # support vector classification
# Determine the path to the collection directory for the current mode
collection_mode_path = os.path.join(self.collection_user_path, self.config.mode)
# Determine the paths to the 'yes' and 'no' saturation collection directories
self.yes_path = os.path.join(collection_mode_path, "yes")
self.no_path = os.path.join(collection_mode_path, "no")
# Determine the path to the classification directory for the current mode
self.classification_mode_path = os.path.join(self.classification_user_path, self.config.mode)
# -----------------------------------------------------------------
def setup(self):
"""
This function ...
:return:
"""
# Call the setup of the base class
super(Classifier, self).setup()
# Create the vector classifier
self.vector_classifier = svm.SVC(gamma=0.001, C=100.) # support vector classification
# Determine the path to the collection directory for the current mode
collection_mode_path = os.path.join(self.collection_user_path, self.config.mode)
# Determine the paths to the 'yes' and 'no' saturation collection directories
self.yes_path = os.path.join(collection_mode_path, "yes")
self.no_path = os.path.join(collection_mode_path, "no")
# Determine the path to the classification directory for the current mode
self.classification_mode_path = os.path.join(self.classification_user_path, self.config.mode)
# -----------------------------------------------------------------
def test_visualize():
pytest.importorskip('graphviz')
X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
random_state=0)
clf = SVC(random_state=0)
grid = {'C': [.1, .5, .9]}
gs = dcv.GridSearchCV(clf, grid).fit(X, y)
assert hasattr(gs, 'dask_graph_')
with tmpdir() as d:
gs.visualize(filename=os.path.join(d, 'mydask'))
assert os.path.exists(os.path.join(d, 'mydask.png'))
# Doesn't work if not fitted
gs = dcv.GridSearchCV(clf, grid)
with pytest.raises(NotFittedError):
gs.visualize()
def performSVMClass(X_train, y_train, X_test, y_test, fout, savemodel):
"""
SVM binary Classification
"""
# c = parameters[0]
# g = parameters[1]
clf = SVC()
clf.fit(X_train, y_train)
# if savemodel == True:
# fname_out = '{}-{}.pickle'.format(fout, datetime.now())
# with open(fname_out, 'wb') as f:
# cPickle.dump(clf, f, -1)
accuracy = clf.score(X_test, y_test)
return accuracy
def performSVMClass(X_train, y_train, X_test, y_test):
classifier = svm.SVC()
classifier.fit(X_train, y_train)
results = classifier.predict(X_test)
# colors = {1:'red', 0:'blue'}
# df = pd.DataFrame(dict(adj=X_test[:,5], return_=X_test[:,50], label=results))
# fig, ax = plt.subplots()
# colors = {1:'red', 0:'blue'}
# ax.scatter(df['adj'],df['return_'], c=df['label'].apply(lambda x: colors[x]))
# # ax.scatter(X_test[:,5], X_test[:,50], c=y_test_list.apply(lambda x: colors[x]))
# plt.show()
# print y_pred
# cm = confusion_matrix(y_test, results)
# print cm
# plt.figure()
# plot_confusion_matrix(cm)
# plt.show()
num_correct = (results == y_test).sum()
recall = num_correct / len(y_test)
# print "SVM model accuracy (%): ", recall * 100, "%"
return recall*100
def performSVMClass(X_train, y_train, X_test, y_test, fout, savemodel):
"""
SVM binary Classification
"""
# c = parameters[0]
# g = parameters[1]
clf = SVC()
clf.fit(X_train, y_train)
# if savemodel == True:
# fname_out = '{}-{}.pickle'.format(fout, datetime.now())
# with open(fname_out, 'wb') as f:
# cPickle.dump(clf, f, -1)
accuracy = clf.score(X_test, y_test)
print "SVM: ", accuracy
def support_vector_machine(self, sensors_set):
features = list(self.dataset.get_sensors_set_features(sensors_set))
print("SUPPORT VECTOR MACHINE.....")
print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
print("NUMBER OF FEATURES: ", len(features))
train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
self.dataset.get_train, self.dataset.get_test, features)
train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features)
classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False)
classifier_svm.fit(train_features_scaled, train_classes)
test_prediction = classifier_svm.predict(test_features_scaled)
acc = accuracy_score(test_classes, test_prediction)
print("ACCURACY : " + str(acc))
print("END SUPPORT VECTOR MACHINE.....")
if not os.path.exists(const.DIR_RESULTS):
os.makedirs(const.DIR_RESULTS)
file_content = "acc\n" + str(acc)
with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f:
f.write(file_content)
# use different algorithms changing target classes, try all combination of two target classes
def test_support_vector_classifier(self):
for dtype in self.number_data_type.keys():
scikit_model = SVC(kernel='rbf', gamma=1.2, C=1)
data = self.scikit_data['data'].astype(dtype)
target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
coreml_model = create_model(spec)
for idx in range(0, 10):
test_data = data[idx].reshape(1, -1)
try:
self.assertEqual(scikit_model.predict(test_data)[0],
bool(int(coreml_model.predict({'data': test_data})['target'])),
msg="{} != {} for Dtype: {}".format(
scikit_model.predict(test_data)[0],
bool(int(coreml_model.predict({'data': test_data})['target'])),
dtype
)
)
except RuntimeError:
print("{} not supported. ".format(dtype))
def learn(training_data, training_labels, show_score=False, store=False):
print ("Start Learning....")
clf = SVC(kernel='linear', probability=True, C=1)
clf.fit(training_data, training_labels)
print ("Done Learning.")
if store:
print ("Pickling classifier...")
pickle.dump(clf, open(path_config.CLASSIFIER_PICKLING_FILE, 'wb'))
print ("Done Pickling.")
if show_score:
print ("Scoring classifier ...")
print ("Data-Level Training Set Prediction Accuracy: %s" % clf.score(training_data, training_labels))
def classification_linear_svm(self):
self.signals.PrintInfo.emit("???????? Linear SVM")
output_dir = self.output_dir + 'linear_svm_out/'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
vectorizer = HashingVectorizer()
fdata = vectorizer.fit_transform(self.fdata)
trainingSet = fdata[:self.split]
testSet = fdata[self.split:]
classificator = SVC(kernel="linear", probability=True, C=self.linear_svm_c)
classificator.fit(trainingSet, self.trainingClass)
results = classificator.predict(testSet)
proba = classificator.predict_proba(testSet)
self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_, self.test_filenames)
out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
self.signals.PrintInfo.emit(out_text)