bagging
1.组合不同的算法
①分类问题
直接使用sklearn的VotingClassifier实现
from sklearn.ensemble import VotingClassifier
xgboost = XGBClassifier(n_estimators=200, learning_rate=0.2, max_depth=2, mim_child_weight=0.8, gamma=0.009,
colsample_bytree=0.7, subsample=0.9) # accuracy:0.858 +/- 0.031
gbdt = GradientBoostingClassifier(n_estimators=300, learning_rate=0.1, max_depth=2, min_samples_split=200,
min_samples_leaf=6) # 0.847
rf = RandomForestClassifier(n_estimators=500, min_samples_split=4, min_samples_leaf=2, n_jobs=-1) # 0.83
lr = LogisticRegression(penalty='l2', C=0.1) # 0.83
svm = SVC(C=10, gamma=0.01,probability=True) # 0.83
#采用投票的机制,给每个不同的模型分配权重
clf=VotingClassifier(estimators=[('xgboost',xgboost),('gbdt',gbdt),('rf',rf),('lr',lr),('svm',svm)],voting='soft',weights=[0.50,0.05,0.05,0.2,0.2])
参数:
voting:取值分“soft"和”hard",soft表示按predict_prob预测样本,hard表示按predict,一般是选择soft。
weights:每个基模型的权重,不写默认权重一样,一般是需要调参
②回归问题
自己编写
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin,clone
#使用bagging组合多个算法,定义了类AveragingModels
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self, models, weights):
self.models = models
self.weights = np.array(weights)
def fit(self, X, y):
self.models_ = [clone(x) for x in self.models]
for model in self.models_:
model.fit(X, y)
return self
def predict(self, X):
predictions = np.column_stack([model.predict(X) for model in self.models_])
return np.sum(self.weights * predictions, axis=1)
gbr=GradientBoostingRegressor(n_estimators=250,learning_rate=0.1,max_depth=2,min_samples_split=10,min_samples_leaf=7)
xgb=XGBRegressor(n_estimators=700,learning_rate=0.07,max_depth=2,subsample=0.7,colsample_bytree=0.7,n_jobs=-1)
enet=ElasticNet(alpha=0.0035,l1_ratio=0.5)
#提供模型名称极其权重,调用即可
model_aver = AveragingModels(models=(xgb,enet,svm),weights=(0.35,0.45,0.2))