makeTree.py 文件源码-python代码片段

makeTree.py 文件源码

python

阅读 42 收藏 0 点赞 0 评论 0

项目：Decision-Tree-Classifier 作者: boaloysius 项目源码文件源码

def chooseAttr(data,class_values):

    # Initialising best    
    best={
            "name":"temp",
            "split_entropy":999999
         }

    # DataFrame.dtype.to_dict() returns a dictionary having keys as attribute name and value as attribute type 
    for name,dtype in data.dtypes.to_dict().iteritems():
        attr={"name":name,"type":dtype}
        # If data_type is not number, use subEntropyChar
        # Keys returned by subEntropyChar ["split_entropy"]
        if dtype in np.sctypes["others"] :
            attr.update(subEntropyChar(data,class_values, name)) 
        # If data_type is number, use subEntropyFloat
        # Keys returned by subEntropyFloat ["split_entropy","split_value"]
        else:
            attr.update(subEntropyFloat(data,class_values, name))

        if attr["split_entropy"] < best["split_entropy"]:
            best = attr


    best["tree_entropy"] = entropy(class_values)
    best["gain"] = best["tree_entropy"] - best["split_entropy"]

    return best