def create_answer_matrix(data, user_column, item_column, value_column, aggfunc=np.mean, time_column=None):
if time_column:
# select only the first response
data = data.loc[data.groupby([item_column, user_column])[time_column].idxmin()]
data = data.drop_duplicates(subset=[item_column, user_column])
answers = pd.pivot_table(data, values=[value_column], index=[user_column], columns=[item_column],
aggfunc=aggfunc)
if not answers.empty:
answers = answers[value_column]
return answers
# TODO: add Cronbach's alpha to item statistics
# see http://stackoverflow.com/questions/20799403/improving-performance-of-cronbach-alpha-code-python-numpy
评论列表
文章目录