def run_data_composition_analyses_for_rsmeval(df_test_metadata,
df_test_excluded,
subgroups,
candidate_column,
exclude_zero_scores=True,
exclude_listwise=False):
"""
Similar to `run_data_composition_analyses_for_rsmtool()`
but for RSMEval.
"""
# analyze excluded responses
df_test_excluded_analysis = analyze_excluded_responses(df_test_excluded,
['raw'], 'Human/System',
exclude_zero_scores=exclude_zero_scores,
exclude_listwise=exclude_listwise)
# rename the columns and index in the analysis data frame
df_test_excluded_analysis.rename(columns={'all features numeric': 'numeric system score',
'non-numeric feature values': 'non-numeric system score'},
inplace=True)
df_data_composition = analyze_used_predictions(df_test_metadata,
subgroups,
candidate_column)
# create contingency table for each group
data_composition_by_group_dict = {}
for grouping_variable in subgroups:
series_crosstab_group = pd.pivot_table(df_test_metadata,
values='spkitemid',
index=[grouping_variable],
aggfunc=len)
df_crosstab_group = pd.DataFrame(series_crosstab_group)
df_crosstab_group.insert(0, grouping_variable, df_crosstab_group.index)
df_crosstab_group.rename(columns={'spkitemid': 'N responses'},
inplace=True)
data_composition_by_group_dict[grouping_variable] = df_crosstab_group
return(df_test_excluded_analysis,
df_data_composition,
data_composition_by_group_dict)
评论列表
文章目录