def sdbscanTrain(self, settings, mname, data):
'''
:param data: -> dataframe with data
:param settings: -> settings dictionary
:param mname: -> name of serialized clusterer
:return: -> clusterer
:example settings: -> {eps:0.9, min_samples:10, metric:'euclidean' ,
algorithm:'auto, leaf_size:30, p:0.2, n_jobs:1}
'''
for k, v in settings.iteritems():
logger.info('[%s] : [INFO] SDBSCAN %s set to %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v)
print "SDBSCAN %s set to %s" % (k, v)
sdata = StandardScaler().fit_transform(data)
try:
db = DBSCAN(eps=float(settings['eps']), min_samples=int(settings['min_samples']), metric=settings['metric'],
algorithm=settings['algorithm'], leaf_size=int(settings['leaf_size']), p=float(settings['p']),
n_jobs=int(settings['n_jobs'])).fit(sdata)
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot instanciate sDBSCAN with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
print "Error while instanciating sDBSCAN with %s and %s" % (type(inst), inst.args)
sys.exit(1)
labels = db.labels_
print labels
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print'Estimated number of clusters: %d' % n_clusters_
self.__serializemodel(db, 'sdbscan', mname)
return db
评论列表
文章目录