def findMinPts(ssearch, eps):
"""
Find a good value for MinPts.
"""
###########################################################################
# Count neighbors within threshold
###########################################################################
print 'Calculating pair-wise distances...'
# Calculate pair-wise cosine distance for all documents.
t0 = time.time()
DD = sklearn.metrics.pairwise.cosine_distances(ssearch.index.index)
elapsed = time.time() - t0
print ' Took %.2f seconds' % elapsed
print 'Counting number of neighbors...'
t0 = time.time()
# Create a list to hold the number of neighbors for each point.
numNeighbors = [0]*len(DD)
for i in range(0, len(DD)):
dists = DD[i]
count = 0
for j in range(0, len(DD)):
if (dists[j] < eps):
count += 1
numNeighbors[i] = count
elapsed = time.time() - t0
print ' Took %.2f seconds' % elapsed
###############################################################################
# Histogram the nearest neighbor distances.
###############################################################################
import matplotlib.pyplot as plt
counts, bins, patches = plt.hist(numNeighbors, bins=60)
plt.title("Number of neighbors")
plt.xlabel("Number of neighbors")
plt.ylabel("Frequency")
print '\n%d bins:' % (len(bins) - 1)
binsStr = ''
for b in bins:
binsStr += ' %0.2f' % b
print binsStr
评论列表
文章目录