def generate_data(sample_size=200, pd=[[0.4, 0.4], [0.1, 0.1]]):
pd = np.array(pd)
pd /= pd.sum()
offset = 50
bins = np.r_[np.zeros((1,)), np.cumsum(pd)]
bin_counts = np.histogram(np.random.rand(sample_size), bins)[0]
data = np.empty((0, 2))
targets = []
for ((i, j), p), count in zip(np.ndenumerate(pd), bin_counts):
xs = np.random.uniform(low=0.0, high=50.0, size=count) + j * offset
ys = np.random.uniform(low=0.0, high=50.0, size=count) + -i * offset
data = np.vstack((data, np.c_[xs, ys]))
if i == j:
targets.extend([1] * count)
else:
targets.extend([-1] * count)
return np.c_[data, targets]
评论列表
文章目录