def n1_fraction_borderline(data):
def get_n1_for_round(sparse_matrix, y):
Tcsr = minimum_spanning_tree(sparse_matrix)
borders = set()
a = Tcsr.nonzero()[0]
b = Tcsr.nonzero()[1]
for i in range(len(a)):
if (y[a[i]] != y[b[i]]):
borders.add(a[i])
borders.add(b[i])
n1 = len(borders)
return n1
features = data.columns[:-1, ]
dist = pdist(data[features], 'euclidean')
df_dist = pd.DataFrame(squareform(dist))
sparse_matrix = csr_matrix(df_dist.values)
labels = data.columns[-1]
y = data[labels]
n1 = 0
rounds = 10
for round in range(rounds):
n1 = n1 + get_n1_for_round(sparse_matrix, y)
n = len(data)
n1 = (1.0 * n1) / (rounds * n)
return n1
评论列表
文章目录