def benchmark(clf, name = ""):
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
if name == "kNN":
pred = clf.predict(X_test[:200][:])
score = metrics.f1_score(y_test[:200][:], pred, average=average_option)
else:
pred = clf.predict(X_test)
score = metrics.f1_score(y_test, pred, average=average_option)
test_time = time() - t0
print("test time: %0.3fs" % test_time)
print("f1-score: %0.3f" % score)
if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
if opts.print_top10 and feature_names is not None:
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print(trim("%s: %s"
% (category, " ".join(feature_names[top10]))))
print()
if opts.print_cm:
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))
print()
clf_descr = str(clf).split('(')[1].split("=")[-1]
return clf_descr, score, train_time, test_time
python类density()的实例源码
baseline_rcv1.py 文件源码
项目:NVDM-For-Document-Classification
作者: cryanzpj
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
document_classification_20newsgroups.py 文件源码
项目:Parallel-SGD
作者: angadgill
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def benchmark(clf):
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
test_time = time() - t0
print("test time: %0.3fs" % test_time)
score = metrics.accuracy_score(y_test, pred)
print("accuracy: %0.3f" % score)
if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
if opts.print_top10 and feature_names is not None:
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print(trim("%s: %s"
% (category, " ".join(feature_names[top10]))))
print()
if opts.print_report:
print("classification report:")
print(metrics.classification_report(y_test, pred,
target_names=categories))
if opts.print_cm:
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))
print()
clf_descr = str(clf).split('(')[0]
return clf_descr, score, train_time, test_time
def test_density():
rng = np.random.RandomState(0)
X = rng.randint(10, size=(10, 5))
X[1, 2] = 0
X[5, 3] = 0
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
X_coo = sparse.coo_matrix(X)
X_lil = sparse.lil_matrix(X)
for X_ in (X_csr, X_csc, X_coo, X_lil):
assert_equal(density(X_), density(X))
document_classification_20newsgroups.py 文件源码
项目:ShallowLearn
作者: giacbrd
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def benchmark(clf):
global train_duration, test_duration
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
if isinstance(clf, (GensimFastText, FastText)):
clf.fit(train_text, y_train)
train_time = time() - t0
else:
clf.fit(X_train, y_train)
train_time = train_duration + (time() - t0)
print("train time: %0.3fs" % train_time)
t0 = time()
if isinstance(clf, (GensimFastText, FastText)):
pred = clf.predict(test_text)
test_time = time() - t0
# fix unknown predictions
pred = [most_freq if p is None else p for p in pred]
else:
pred = clf.predict(X_test)
test_time = test_duration + (time() - t0)
print("test time: %0.3fs" % test_time)
score = metrics.f1_score(y_test, pred, average='macro')
print("macro F1: %0.3f" % score)
if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
if opts.print_top10 and feature_names is not None:
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print(trim("%s: %s"
% (category, " ".join(feature_names[top10]))))
print()
if opts.print_report:
print("classification report:")
print(metrics.classification_report(y_test, pred,
target_names=categories))
if opts.print_cm:
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))
print()
clf_descr = str(clf).split('(')[0]
return clf_descr, score, train_time, test_time
def benchmark(clf):
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
test_time = time() - t0
print(clf)
print("test time: %0.3fs" % test_time)
score = metrics.f1_score(y_test, pred)
# print("f1-score: %0.3f" % score)
print("Predicted classes:-")
for element in range(9):
print(listdir("/home/shrinidhi/tweeot/twitter_trials/twitter/testing/"+str(y_test[element])),": ",categories[pred[element]])
'''if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
if opts.print_top10 and feature_names is not None:
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print(trim("%s: %s"
% (category, " ".join(feature_names[top10]))))
print()
if opts.print_report:
print("classification report:")
print(metrics.classification_report(y_test, pred,
target_names=categories))
if opts.print_cm:
print("confusion matrix:")
print(metrics.confusion_matrix(y_test, pred))
print()'''
clf_descr = str(clf).split('(')[0]
return clf_descr, score, train_time, test_time