def chooseAttr(data,class_values):
# Initialising best
best={
"name":"temp",
"split_entropy":999999
}
# DataFrame.dtype.to_dict() returns a dictionary having keys as attribute name and value as attribute type
for name,dtype in data.dtypes.to_dict().iteritems():
attr={"name":name,"type":dtype}
# If data_type is not number, use subEntropyChar
# Keys returned by subEntropyChar ["split_entropy"]
if dtype in np.sctypes["others"] :
attr.update(subEntropyChar(data,class_values, name))
# If data_type is number, use subEntropyFloat
# Keys returned by subEntropyFloat ["split_entropy","split_value"]
else:
attr.update(subEntropyFloat(data,class_values, name))
if attr["split_entropy"] < best["split_entropy"]:
best = attr
best["tree_entropy"] = entropy(class_values)
best["gain"] = best["tree_entropy"] - best["split_entropy"]
return best
评论列表
文章目录