bagging

1.组合不同的算法

①分类问题

直接使用sklearn的VotingClassifier实现

from sklearn.ensemble import VotingClassifier

xgboost = XGBClassifier(n_estimators=200, learning_rate=0.2, max_depth=2, mim_child_weight=0.8, gamma=0.009,
                        colsample_bytree=0.7, subsample=0.9)  # accuracy:0.858 +/- 0.031
gbdt = GradientBoostingClassifier(n_estimators=300, learning_rate=0.1, max_depth=2, min_samples_split=200,
                                  min_samples_leaf=6)  # 0.847
rf = RandomForestClassifier(n_estimators=500, min_samples_split=4, min_samples_leaf=2, n_jobs=-1)  # 0.83
lr = LogisticRegression(penalty='l2', C=0.1)  # 0.83
svm = SVC(C=10, gamma=0.01,probability=True)  # 0.83
#采用投票的机制,给每个不同的模型分配权重
clf=VotingClassifier(estimators=[('xgboost',xgboost),('gbdt',gbdt),('rf',rf),('lr',lr),('svm',svm)],voting='soft',weights=[0.50,0.05,0.05,0.2,0.2])

参数:

voting:取值分“soft"和”hard",soft表示按predict_prob预测样本,hard表示按predict,一般是选择soft。

weights:每个基模型的权重,不写默认权重一样,一般是需要调参

②回归问题

自己编写

from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin,clone

#使用bagging组合多个算法,定义了类AveragingModels
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, models, weights):
        self.models = models
        self.weights = np.array(weights)

    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        for model in self.models_:
            model.fit(X, y)
        return self

    def predict(self, X):
        predictions = np.column_stack([model.predict(X) for model in self.models_])
        return np.sum(self.weights * predictions, axis=1)

gbr=GradientBoostingRegressor(n_estimators=250,learning_rate=0.1,max_depth=2,min_samples_split=10,min_samples_leaf=7)
xgb=XGBRegressor(n_estimators=700,learning_rate=0.07,max_depth=2,subsample=0.7,colsample_bytree=0.7,n_jobs=-1)
enet=ElasticNet(alpha=0.0035,l1_ratio=0.5)
#提供模型名称极其权重,调用即可
model_aver = AveragingModels(models=(xgb,enet,svm),weights=(0.35,0.45,0.2))

results matching ""

    No results matching ""