def predict(self):
if os.path.exists(DATA_QUERIES_VECTOR_NPZ) and not FORCE_LOAD:
print('{}: loading precomputed data'.format(self.__class__.__name__))
self.load_precomputed_data()
else:
self.precomputed_similarity()
batch_size = 100
batch_elements = math.ceil(self.queries_vector.shape[0] / batch_size)
batch_queue = np.array_split(self.queries_vector.A, batch_elements)
print("starting batch computation of Similarity and KNN calculation")
# # multiple versions of calculating the prediction, some faster, some use more mem
# prediction = self.multiprocessor_batch_calc(batch_queue)
prediction = self.batch_calculation(batch_queue)
# prediction = self.individual_calculation()
# prediction = self.cosine_knn_calc()
# prediction = self.custom_knn_calculation(prediction)
train_avg_salary = sum(self.y_train) / len(self.y_train)
cleaned_predictions = [x if str(x) != 'nan' else train_avg_salary for x in prediction]
return self.y_train, cleaned_predictions
StandaloneSimilarity.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录