def make_subset(coocc_features, x_axis, y_axis):
logsource = np.log(coocc_features.ix[x_axis][y_axis]+1)
x_sorted = logsource.ix[logsource.sum(axis=1).sort_values(ascending=False).index]
y_sorted = x_sorted.T.ix[x_sorted.T.sum(axis=1).sort_values(ascending=False).index]
logsource = y_sorted.T.ix[:25, :25]
n_cols = len(logsource.columns)
n_rows = len(logsource.index)
df = pd.DataFrame()
df["x"] = list(itertools.chain.from_iterable(list(itertools.repeat(i, times=n_cols)) for i in logsource.index))
df["y"] = list(itertools.chain.from_iterable(list(itertools.repeat(logsource.stack().index.levels[1].values, times=n_rows))))
df["counts"] = logsource.stack().values
df["raw"] = df["counts"].map(np.exp)-1
df.sort_values("counts", ascending=False, inplace=True)
new_axis_factors = logsource.index.values.tolist()
return df, new_axis_factors, new_axis_factors
评论列表
文章目录