def predict_dt():
data_with_idx=data_dt.zipWithIndex().map(lambda k,v : (v,k))
test=data_with_idx.sample(False, 0.2, 42)
train=data_with_idx.subtractByKey(test)
test_data=test.map(lambda idx,p:p)
train_data=train.map(lambda idx,p:p)
maxDepths=[1,2,3,4,5,10,20]
maxBins=[2,4,8,16,32,64,100]
m={}
for maxDepth in maxDepths:
for maxBin in maxBins:
metrics=evaluate_dt(train_data, test_data, maxDepth, maxBin)
print( "metrics in maxDepth: %d; maxBins: %d" % (maxDepth, maxBin))
print( metrics)
m["maxDepth:%d;maxBins:%d" % (maxDepth, maxBin)]=metrics[2]
mSort=sorted(m.iteritems(), key=operator.itemgetter(1), reverse=True)
print( mSort)
评论列表
文章目录