runDBSCAN.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:simsearch 作者: chrisjmccormick 项目源码 文件源码
def findMinPts(ssearch, eps):
    """
    Find a good value for MinPts.
    """

    ###########################################################################
    # Count neighbors within threshold
    ###########################################################################

    print 'Calculating pair-wise distances...'
    # Calculate pair-wise cosine distance for all documents.
    t0 = time.time()

    DD = sklearn.metrics.pairwise.cosine_distances(ssearch.index.index)

    elapsed = time.time() - t0

    print '    Took %.2f seconds' % elapsed

    print 'Counting number of neighbors...'

    t0 = time.time()

    # Create a list to hold the number of neighbors for each point.
    numNeighbors = [0]*len(DD)

    for i in range(0, len(DD)):
        dists = DD[i]

        count = 0
        for j in range(0, len(DD)):
            if (dists[j] < eps):
                count += 1

        numNeighbors[i] = count            

    elapsed = time.time() - t0

    print '    Took %.2f seconds' % elapsed

    ###############################################################################
    # Histogram the nearest neighbor distances.
    ###############################################################################

    import matplotlib.pyplot as plt

    counts, bins, patches = plt.hist(numNeighbors, bins=60)
    plt.title("Number of neighbors")
    plt.xlabel("Number of neighbors")
    plt.ylabel("Frequency")

    print '\n%d bins:' % (len(bins) - 1)
    binsStr = ''
    for b in bins:
        binsStr += '  %0.2f' % b

    print binsStr
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号