def compute_clients_dist(self, client_data):
client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
# Compute the distances between the user and the cached continuous
# and categorical features.
cont_features = distance.cdist(self.continuous_features,
np.array([client_continuous_feats]),
'canberra')
# The lambda trick is needed to prevent |cdist| from force-casting the
# string features to double.
cat_features = distance.cdist(self.categorical_features,
np.array([client_categorical_feats]),
lambda x, y: distance.hamming(x, y))
# Take the product of similarities to attain a univariate similarity score.
# Addition of 0.001 to the continuous features avoids a zero value from the
# categorical variables, allowing categorical features precedence.
return (cont_features + 0.001) * cat_features
评论列表
文章目录