from sklearn.linear_model import LogisticRegression # 逻辑回归
from sklearn.neighbors import KNeighborsClassifier # K近邻
from sklearn.svm import SVC # 支持向量机
from sklearn.tree import DecisionTreeClassifier # 决策树
from sklearn.ensemble import RandomForestClassifier # 随机森林
from sklearn.ensemble import AdaBoostClassifier # AdaBoost
from xgboost.sklearn import XGBClassifier # Xgboost
from lightgbm.sklearn import LGBMClassifier # lightgbm
def grid_search(pipeline,param_grid,x_train,y_train,x_test,y_test,num_folds=5,metrics='accuracy'):
response={}
gs = GridSearchCV(estimator=pipeline,
param_grid=param_grid,
cv=num_folds,
scoring=metrics)
search=gs.fit(x_train,y_train)
print('GridSearch 最优参数:{},最优分数:{}'.format(search.best_params_,search.best_score_))
y_pred = gs.predict(x_test)
if metrics=='roc_auc':
y_prob=gs.predict_proba(x_test)[:,1]
score=roc_auc_score(x_test, y_prob)
print('auc得分',score)
else:
score=eval("{}_score".format(metrics))(y_test,y_pred)
print('{}得分:{:.3f}'.format(metrics,score))
response['y_pred']=y_pred
response[metrics]=score
return response
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
classifiers=[LogisticRegression(solver='liblinear'),
KNeighborsClassifier(metric='minkowski'),
SVC(kernel='rbf'),
DecisionTreeClassifier(criterion='gini'),
RandomForestClassifier(criterion='gini'),
AdaBoostClassifier(),
XGBClassifier(),
LGBMClassifier(is_unbalance=True)]
classifier_name=['lr',
'kneighborsclassifier',
'svc',
'decisiontreeclassifier',
'randomforestclassifier',
'adaboostclassifier',
'xgboost',
'lightgbm']
classifier_param_grid=[{'lr__C':[0.001, 0.01, 0.1, 1]},
{'kneighborsclassifier__n_neighbors':[4,6,8]},
{'svc__C':[0.01,0.1,1], 'svc__gamma':[0.01,0.1,1]},
{'decisiontreeclassifier__max_depth':range(5,11)},
{'randomforestclassifier__n_estimators':range(1,11)},
{'adaboostclassifier__n_estimators':range(70,121,10)},
{'xgboost__max_depth':[5,7,9]},
{'lightgbm__max_depth':range(4,10)}]
for model,model_name,model_param_grid in zip(classifiers,classifier_name,classifier_param_grid):
pipeline = Pipeline([("scaler",MinMaxScaler()),(model_name,model)])
res=grid_search(pipeline,model_param_grid,X_train,y_train,X_test,y_test)