def optimize_logisticRegression():
train_data = pd.read_csv(r"data/train.csv")
print u"?????\n",train_data.info()
print u'?????\n',train_data.describe()
#display_data(train_data) # ????????
#display_with_process(train_data) # ??????????????????,????
process_data = fe_preprocessData(train_data,'process_train_data') # ????????????
train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*') # ???????????
train_np = train_data.as_matrix() # ????
'''??model'''
X = train_np[:,1:]
y = train_np[:,0]
#=X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
#=model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X_train,y_train)
model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X,y)
print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})
'''??????'''
test_data = pd.read_csv(r"data/test.csv")
process_test_data = fe_preprocessData(test_data,'process_test_data') # ?????
test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
test_np = test_data.as_matrix()
predict = model.predict(test_np)
result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
result.to_csv(r'optimize_logisticRegression_result/prediction.csv',index=False)
#clf = linear_model.LogisticRegression(C=1.0,tol=1e-6)
#print cross_validation.cross_val_score(clf, X,y,cv=5)
## ????????
评论列表
文章目录