factor_regression.py 文件源码-python代码片段

def Regress(self, print_data):
    a = np.ndarray(shape=(len(self._rows), len(self._REGRESS_COLUMNS)))
    b = np.zeros(shape=(len(self._rows)))
    rn = 0
    if print_data:
      print('Found %d rows' % (len(self._rows)))
      print('Columns')
      print('\t'.join(sorted(self._REGRESS_COLUMNS)))
    for juris,cols in self._rows.iteritems():
      if not self._IsValidInputData(cols):
        continue
      b[rn] = self._values[juris]
      cn = 0
      for c in sorted(self._REGRESS_COLUMNS):
        a[rn,cn] = cols[c]
        cn += 1
      if print_data:
        print('\t'.join(str(x) for x in a[rn:rn+1,][0].tolist()))
      rn += 1
    a.resize((rn, len(self._REGRESS_COLUMNS)))
    b.resize(rn)
    if print_data:
      print('LogValue')
      print('\n'.join(str(x) for x in b.tolist()))
    results = sm.OLS(b, a).fit()
    print(results.summary(yname='log(Votes)',
      xname=sorted(self._REGRESS_COLUMNS), alpha=0.01))
    i = 0
    for juris,cols in sorted(self._rows.iteritems()):
      p = []
      for c in sorted(self._REGRESS_COLUMNS):
        p.append(cols[c])
      pred = results.predict(p)
      diff = math.e**self._values[juris] - math.e**pred
      print('%-20s\t%7d\t%7d\t%7.4f\t%7d' % (juris, math.e**pred,
        math.e**self._values[juris], diff / math.e**pred, diff))
    return results