def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
"""Description of compare
compare multiple classifier and display the best one
"""
utils.print_success("Comparison of differents classifiers")
if data is not None:
train_features = data["train_features"]
train_groundtruths = data["train_groundtruths"]
test_features = data["test_features"]
test_groundtruths = data["test_groundtruths"]
else:
train = utils.abs_path_file(train)
test = utils.abs_path_file(test)
train_features, train_groundtruths = read_file(train)
test_features, test_groundtruths = read_file(test)
if not utils.create_dir(res_dir):
res_dir = utils.abs_path_dir(res_dir)
classifiers = {
"RandomForest": RandomForestClassifier(n_jobs=-1)
# "RandomForest": RandomForestClassifier(n_estimators=5),
# "KNeighbors":KNeighborsClassifier(3),
# "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
# "DecisionTree":DecisionTreeClassifier(max_depth=5),
# "MLP":MLPClassifier(),
# "AdaBoost":AdaBoostClassifier(),
# "GaussianNB":GaussianNB(),
# "QDA":QuadraticDiscriminantAnalysis(),
# "SVM":SVC(kernel="linear", C=0.025),
# "GradientBoosting":GradientBoostingClassifier(),
# "ExtraTrees":ExtraTreesClassifier(),
# "LogisticRegression":LogisticRegression(),
# "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
}
for key in classifiers:
utils.print_success(key)
clf = classifiers[key]
utils.print_info("\tFit")
clf.fit(train_features, train_groundtruths)
utils.print_info("\tPredict")
predictions = clf.predict(test_features)
return predictions
python类KNeighborsClassifier()的实例源码
def __init__(self, config = conf, split = 0.3, clf = KNeighborsClassifier(), auto_rebuild = False, debug = False):
self.clf = clf
self.conf = conf
self.split = split
self.debug = debug
self.auto_rebuild = auto_rebuild
self.init()
def __init__(self, conf = conf, clf = KNeighborsClassifier(), debug = False):
self.clf = clf
self.conf = conf
self.debug = debug
self.base = os.path.dirname(os.path.realpath(__file__))
self.vote_db = {}
self.letter_db = {}
self.writer_db = {}
self.total = self.right = 0
def knn(train, test, smoteit=True):
"kNN"
if smoteit:
train = SMOTE(train)
neigh = KNeighborsClassifier()
train_DF = formatData(train)
test_DF = formatData(test)
features = train_DF.columns[:-2]
klass = train_DF[train_DF.columns[-2]]
# set_trace()
neigh.fit(train_DF[features], klass)
preds = neigh.predict(test_DF[test_DF.columns[:-2]]).tolist()
return preds
def setClf(self):
clf = KNeighborsClassifier(n_neighbors = 33)
min_max_scaler = preprocessing.MinMaxScaler()
self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
return
def lession_4():
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
# print iris_X[:2]
# print iris_y
X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=0.3)
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
print knn.predict(X_test)
print y_test
# dataset usage
classification.py 文件源码
项目:oss-github-analysis-project
作者: itu-oss-project-team
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def knn_classify(self, out_folder_path, training_set, test_set, training_labels, test_labels, k=1, msg=""):
print("message: " + msg)
out_file_pre_path = os.path.join(out_folder_path, "knn" + str(k) + msg) # Any output file should extend this path
knn_classifier = neighbors.KNeighborsClassifier(k, weights='distance')
knn_classifier.fit(training_set, training_labels)
predicted = knn_classifier.predict(test_set)
success = accuracy_score(test_labels, predicted, normalize=False)
conf_matrix = self.__retrieve_confusion_matrix(test_labels, predicted, out_file_pre_path)
return conf_matrix, success
def __init__(self):
from sklearn.neighbors import KNeighborsClassifier as KNN
self.clf = KNN()
def get_models(test):
return [
(LinearSVC, {
'C': [0.01, 0.1, 1.0, 10.0],
'multi_class': ['ovr', 'crammer_singer'],
}),
] + ([
(KNeighborsClassifier, {
'weights': ['uniform', 'distance'],
}),
(SVC, {
'C': [0.01, 0.1, 1.0, 10.0, 100.0],
'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
'decision_function_shape': ['ovr', 'ovo'],
}),
(RandomForestClassifier, {
'criterion': ['gini', 'entropy'],
'min_samples_split': [5, 10, 25],
'min_samples_leaf': [5, 10, 25],
'n_estimators': [5, 10, 50, 100],
})
] if not test else [])
def n4_non_linearity_of_nearest_neighbor_classifier( data, random_seed = 42, iterations = 20 ):
def generate_interpolated_data_cl(data, cl, features, labels):
points_in_class = data[data[labels] == cl].index.tolist()
data_interpolated = pd.DataFrame(columns = features + [labels])
for a, b in random_combinations(points_in_class):
new_point = linear_interpolation(data.iloc[a, :-1], data.iloc[b, :-1] )
df = pd.DataFrame([new_point + [cl]], columns = features + [labels] )
data_interpolated = data_interpolated.append(df)
return data_interpolated
def get_n4_for_iteration(data):
labels = data.columns[-1]
features = data.columns[:-1,].tolist()
classes = data.iloc[:, -1].unique()
data_to_interpolate = data.copy()
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(data[features], data[labels])
for cl in classes:
data_interpolated = generate_interpolated_data_cl(data_to_interpolate, cl, features, labels)
mistakes = 1 - knn.score(data_interpolated[features], data_interpolated[labels])
return mistakes
random.seed( random_seed )
n4 = []
for i in range(iterations):
mistakes = get_n4_for_iteration(data)
n4.append(mistakes)
return np.mean(n4)
def __init__(self,data_file):
self.file = data_file
df = pd.read_csv(data_file)
X = np.array(df.drop(['class'], 1))
y = np.array(df['class'])
self.size = sum(1 for line in open(data_file))
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
clf = neighbors.KNeighborsClassifier()
self.prediction = clf.fit(X_train, y_train)
def KNN(X, y):
print("Iniciando treinamento do KNN")
clf = KNeighborsClassifier(n_jobs=6,leaf_size=15)
kf = KFold(len(y),n_folds=20)
clf.fit(X,y)
X_score = X[:10000]
y_score = y[:10000]
score = clf.score(X_score, y_score)
print("KNN score: ", score)
return clf
def knn_model(X, y):
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
return neigh
def classification(lead):
#classifiers = [
# ('ab', AdaBoostClassifier()),
# ('dt', DecisionTreeClassifier(max_depth=5)),
# ('kn', KNeighborsClassifier(16)),
#]
inputs = get_dataset_input_from_database(lead.keys())
outputs = get_dataset_output_from_database()
print('The total number of examples in the dataset is: %d' % (len(inputs)))
inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
print('The number of examples used for training are: %d' % (len(inputs_training)))
print('The number of examples used for testing are: %d' % (len(inputs_test)))
knn = KNeighborsClassifier(n_neighbors=7, p=2)
knn.fit(inputs_training, np.ravel(outputs_training))
print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
#voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
#voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
#print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
print('Lead data:')
print(lead)
data_to_predict = convert_dict_to_tuple(lead)
print('Lead data to predict:')
print(data_to_predict)
lead_status = knn.predict(data_to_predict)
lead_status_value = lead_status[0]
#lead_status = voting_classifier.predict(data_to_predict)
print('According to lead data, his status is: %d' % (lead_status_value))
print('[0] unqualified [1] qualified')
proba = knn.predict_proba(data_to_predict)
max_proba = max(proba[0])
print('Proba is: %d%%' %(max_proba*100))
lead_status_dict = dict()
dict.update(lead_status_dict, value=str(lead_status_value))
dict.update(lead_status_dict, proba=str(max_proba))
return lead_status_dict
test.py 文件源码
项目:Audio-classification-using-Bag-of-Frames-approach
作者: amogh3892
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def knn_predict(training_samples, training_labels, test_samples, test_lables,k_neighbours = 5,weights = "uniform",algorithm = "auto"):
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors = k_neighbours, weights =weights, algorithm = algorithm)
t0 = time()
clf.fit(training_samples,training_labels)
training_time = round(time()-t0, 3)
t0 = time()
pred = clf.predict(test_samples)
test_time = round(time()-t0, 3)
from sklearn.metrics import accuracy_score
acc = accuracy_score(pred,test_lables)
no_features = np.array(training_samples).shape[1]
training_samples = np.array(training_samples).shape[0]
test_samples = np.array(test_samples).shape[0]
with open("Temp\\results.txt","w") as outfile:
outfile.write("Alogirthm : {}\n".format("KNN"))
outfile.write("K = {}\n".format(k_neighbours))
outfile.write("weight = {}\n".format(weights))
outfile.write("algorithm = {}\n".format(algorithm))
outfile.write("No of features : {}\n".format(no_features))
outfile.write("No of training samples : {}\n".format(training_samples))
outfile.write("No of test samples : {}\n".format(test_samples))
outfile.write("Training time : {}\n".format(training_time))
outfile.write("Test time : {}\n".format(test_time))
outfile.write("Accuracy : {}\n".format(acc))
with open("Temp\\result_labels.csv","wb") as outfile:
np.savetxt(outfile,pred)
def train_random_forest():
# Selecting the model
return mp.ModelProperties(), RandomForestClassifier(n_estimators=100) # Default estimators is 10
# http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
def train_knn():
# Selecting the model
return mp.ModelProperties(), neighbors.KNeighborsClassifier() # default is 5 neighbors
# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn-svm-svc
def __init__(self, isTrain, isOutlierRemoval=0):
super(ClassificationKNN, self).__init__(isTrain, isOutlierRemoval)
# data preprocessing
self.dataPreprocessing()
# first parameter is the K neighbors
# 'uniform' assigns uniform weights to each neighbor
# 'distance' assigns weights proportional to the inverse of the distance from the query point
# default metric is euclidean distance
self.clf = neighbors.KNeighborsClassifier(2, weights='uniform')
def build_classifier(self):
self.classifier = KNeighborsClassifier(n_neighbors=1)
self.classifier.fit(self.coordinates, self.labels)