tasks.py 文件源码-python代码片段

tasks.py 文件源码

python

阅读 35 收藏 0 点赞 0 评论 0

项目：fexum 作者: KDD-OpenSource 项目源码文件源码

def calculate_densities(target_feature_id, feature_id):
    feature = Feature.objects.get(pk=feature_id)
    target_feature = Feature.objects.get(pk=target_feature_id)

    df = _get_dataframe(feature.dataset.id)
    target_col = df[target_feature.name]
    categories = target_feature.categories

    def calc_density(category):
        kde = KernelDensity(kernel='gaussian', bandwidth=0.75)
        X = df[target_col == category][feature.name]
        # Fitting requires expanding dimensions
        X = np.expand_dims(X, axis=1)
        kde.fit(X)
        # We'd like to sample 100 values
        X_plot = np.linspace(feature.min, feature.max, 100)
        # We need the last dimension again
        X_plot = np.expand_dims(X_plot, axis=1)
        log_dens = kde.score_samples(X_plot)
        return np.exp(log_dens).tolist()

    return [{'target_class': category, 'density_values': calc_density(category)} for category in categories]