thresholds.py 文件源码-python代码片段

thresholds.py 文件源码

python

阅读 26 收藏 0 点赞 0 评论 0

def thresholds():
  for name in ['ant', 'ivy', 'jedit', 'lucene', 'poi']:
    print("##", name)
    train, test = explore(dir='../Data/Jureczko/', name=name)
    data_DF=csv2DF(train, toBin=True)
    metrics=[str[1:] for str in data_DF[data_DF.columns[:-1]]]
    ubr = LogisticRegression()
    X = data_DF[data_DF.columns[:-1]].values
    y = data_DF[data_DF.columns[-1]].values
    ubr.fit(X,y)
    inter, coef, pVal = ubr.intercept_[0], ubr.coef_[0], f_classif(X,y)[1]

    table= texttable.Texttable()
    table.set_cols_align(["l","l","l"])
    table.set_cols_valign(["m","m","m"])
    table.set_cols_dtype(['t', 't', 't'])
    table_rows=[["Metric", "Threshold", "P-Value"]]

    for i in xrange(len(metrics)):
      if VARL(coef[i], inter, p0=0.05)>0 and pVal[i]<0.05:
        thresh="%0.2f"%VARL(coef[i], inter, p0=0.1)
        table_rows.append([metrics[i], thresh, "%0.3f"%pVal[i]])

    table.add_rows(table_rows)
    print(table.draw())

  # === DEBUG ===
  set_trace()
  return None