def main(argv=None):
if argv is None:
argv = sys.argv
try:
words, vectors, options = process_options(argv[1:])
except Exception, e:
if str(e):
print >> sys.stderr, 'Error: %s' % str(e)
return 1
else:
raise
dbscan = sklearn.cluster.DBSCAN(eps=options.eps, metric=options.metric)
dbscan.fit(numpy.array(vectors))
noisy = sum(1 for l in dbscan.labels_ if l == -1)
unique = len(set(dbscan.labels_))
logging.info('%d clusters, %d noisy, %d vectors' % (unique, noisy,
len(vectors)))
if noisy >= len(vectors) / 4:
logging.warning('%d/%d noisy (-1) labels (try higher eps?)' % \
(noisy, len(vectors)))
elif unique < (len(vectors)/2)**0.5:
logging.warning('only %d clusters (try lower eps?)' % unique)
write_cluster_ids(words, dbscan.labels_)
return 0
评论列表
文章目录