def test_joint(knn_xz):
# Simulate from the joint distribution of x,z (see
# generate_real_nominal_data) and perform a KS tests at each of the
# subpopulations at the six levels of z.
data = np.asarray(knn_xz.data.values())
indicators = sorted(set(data[:,1].astype(int)))
joint_samples = knn_xz.simulate(-1, [0,1], N=len(data))
_, ax = plt.subplots()
ax.set_title('Joint Simulation')
for t in indicators:
# Plot original data.
data_subpop = data[data[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data for indicator t.
samples_subpop = [j[0] for j in joint_samples if j[1] == t]
ax.scatter(
np.add([t]*len(samples_subpop), .25), samples_subpop,
color=gu.colors[t])
# KS test.
pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
assert .05 < pvalue
ax.set_xlabel('z')
ax.set_ylabel('x')
ax.grid()
评论列表
文章目录