def train_and_score(metric, training, testing, ks):
print "Training and scoring"
scores = []
knn = NearestNeighbors(metric=metric, algorithm="brute")
knn.fit(training)
for k in ks:
print "Evaluating for", k, "neighbors"
neighbor_indices = knn.kneighbors(testing,
n_neighbors=k,
return_distance=False)
all_predicted_scores = []
all_labels = []
for user_id in xrange(testing.shape[0]):
user_row = testing[user_id, :]
_, interaction_indices = user_row.nonzero()
interacted = set(interaction_indices)
non_interacted = set(xrange(testing.shape[1])) - interacted
n_samples = min(len(non_interacted), len(interacted))
sampled_interacted = random.sample(interacted, n_samples)
sampled_non_interacted = random.sample(non_interacted, n_samples)
indices = list(sampled_interacted)
indices.extend(sampled_non_interacted)
labels = [1] * n_samples
labels.extend([0] * n_samples)
neighbors = training[neighbor_indices[user_id, :], :]
predicted_scores = neighbors.mean(axis=0)
for idx in indices:
all_predicted_scores.append(predicted_scores[0, idx])
all_labels.extend(labels)
print len(all_labels), len(all_predicted_scores)
auc = roc_auc_score(all_labels, all_predicted_scores)
print "k", k, "AUC", auc
评论列表
文章目录