StandaloneSimilarity.py 文件源码-python代码片段

StandaloneSimilarity.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：job-salary-prediction 作者: soton-data-mining 项目源码文件源码

def predict(self):
        if os.path.exists(DATA_QUERIES_VECTOR_NPZ) and not FORCE_LOAD:
            print('{}: loading precomputed data'.format(self.__class__.__name__))
            self.load_precomputed_data()
        else:
            self.precomputed_similarity()

        batch_size = 100
        batch_elements = math.ceil(self.queries_vector.shape[0] / batch_size)
        batch_queue = np.array_split(self.queries_vector.A, batch_elements)
        print("starting batch computation of Similarity and KNN calculation")

        # # multiple versions of calculating the prediction, some faster, some use more mem

        # prediction = self.multiprocessor_batch_calc(batch_queue)
        prediction = self.batch_calculation(batch_queue)
        # prediction = self.individual_calculation()
        # prediction = self.cosine_knn_calc()
        # prediction = self.custom_knn_calculation(prediction)

        train_avg_salary = sum(self.y_train) / len(self.y_train)
        cleaned_predictions = [x if str(x) != 'nan' else train_avg_salary for x in prediction]

        return self.y_train, cleaned_predictions