def makePrediction(para,rawData,totalNumRows,labels):
traingSetSize=int(math.floor(totalNumRows*para['trainingSetPercent']))
print('%d instances are selected as training dataset!'%traingSetSize)
trainX=np.array(rawData[0:traingSetSize])
trainY=np.array(labels[0:traingSetSize])
clf=tree.DecisionTreeClassifier()
clf=clf.fit(trainX,trainY)
feaNames=['event'+str(i) for i in range(1,386)]
classNames=trainY
# generate the decision tree figure
# dot_data = StringIO() #class_names=classNames,
# tree.export_graphviz(clf, out_file=dot_data, feature_names=feaNames,
# filled=True, rounded=True,
# special_characters=True)
# graph = pydot.graph_from_dot_data(dot_data.getvalue())
# graph.write_png('sample_SOSP.png')
testingX=rawData[traingSetSize:]
testingY=labels[traingSetSize:]
prediction=list(clf.predict(testingX))
if len(prediction)!=len(testingY):
print ('prediction and testingY have different length and SOMEWHERE WRONG!')
sameLabelNum=0
sameFailureNum=0
for i in range(len(testingY)):
if prediction[i]==testingY[i]:
sameLabelNum+=1
if prediction[i]==1:
sameFailureNum+=1
accuracy=float(sameLabelNum)/len(testingY)
print ('accuracy is %.5f:'%accuracy)
predictSuccess=0
predictFailure=0
for item in prediction:
if item==0:
predictSuccess+=1
elif item==1:
predictFailure+=1
testSuccess=0
testFailure=0
for tt in testingY:
if tt==0:
testSuccess+=1
elif tt==1:
testFailure+=1
print(predictSuccess,predictFailure,testSuccess,testFailure,sameFailureNum)
if sameFailureNum==0:
print ('precision is 0 and recall is 0')
else:
precision=float(sameFailureNum)/(predictFailure)
print('precision is %.5f'%precision)
recall=float(sameFailureNum)/(testFailure)
print('recall is %.5f'%recall)
F_measure=2*precision*recall/(precision+recall)
print('F_measure is %.5f'%F_measure)
return predictFailure,testFailure,sameFailureNum,precision,recall,F_measure
评论列表
文章目录