def get_best_C(dataset):
"""
Find the best parameters on the dev set.
"""
best_f1 = 0
best_c = 0
labels = sorted(set(dataset._ytrain))
test_cs = [0.001, 0.003, 0.006, 0.009,
0.01, 0.03, 0.06, 0.09,
0.1, 0.3, 0.6, 0.9,
1, 3, 6, 9,
10, 30, 60, 90]
for i, c in enumerate(test_cs):
sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
sys.stdout.flush()
clf = LogisticRegression(C=c)
h = clf.fit(dataset._Xtrain, dataset._ytrain)
pred = clf.predict(dataset._Xdev)
if len(labels) == 2:
dev_f1 = f1_score(dataset._ydev, pred, pos_label=1)
else:
dev_f1 = f1_score(dataset._ydev, pred, labels=labels, average='micro')
if dev_f1 > best_f1:
best_f1 = dev_f1
best_c = c
print()
print('Best F1 on dev data: {0:.3f}'.format(best_f1))
print('Best C on dev data: {0}'.format(best_c))
return best_c, best_f1
评论列表
文章目录