def calculate_feature_statistics(feature_id):
feature = Feature.objects.get(pk=feature_id)
dataframe = _get_dataframe(feature.dataset.id)
feature_col = dataframe[feature.name]
feature.min = np.amin(feature_col).item()
feature.max = np.amax(feature_col).item()
feature.mean = np.mean(feature_col).item()
feature.variance = np.nanvar(feature_col).item()
unique_values = np.unique(feature_col)
integer_check = (np.mod(unique_values, 1) == 0).all()
feature.is_categorical = integer_check and (unique_values.size < 10)
if feature.is_categorical:
feature.categories = list(unique_values)
feature.save(update_fields=['min', 'max', 'variance', 'mean', 'is_categorical', 'categories'])
del unique_values, feature
评论列表
文章目录