Skip to content

Commit

Permalink
fix behavior when sample weight is None
Browse files Browse the repository at this point in the history
  • Loading branch information
kota7 committed Sep 24, 2018
1 parent b21be33 commit b46079a
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 23 deletions.
11 changes: 9 additions & 2 deletions mlxtend/regressor/stacking_cv_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,18 @@ def fit(self, X, y, groups=None, sample_weight=None):
meta_features = sparse.hstack((X, meta_features))
else:
meta_features = np.hstack((X, meta_features))
self.meta_regr_.fit(meta_features, y, sample_weight=sample_weight)

if sample_weight is None:
self.meta_regr_.fit(meta_features, y)
else:
self.meta_regr_.fit(meta_features, y, sample_weight=sample_weight)

# Retrain base models on all data
for regr in self.regr_:
regr.fit(X, y, sample_weight=sample_weight)
if sample_weight is None:
regr.fit(X, y)
else:
regr.fit(X, y, sample_weight=sample_weight)

return self

Expand Down
11 changes: 9 additions & 2 deletions mlxtend/regressor/stacking_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def fit(self, X, y, sample_weight=None):
if self.verbose > 1:
print(_name_estimators((regr,))[0][1])

regr.fit(X, y, sample_weight=sample_weight)
if sample_weight is None:
regr.fit(X, y)
else:
regr.fit(X, y, sample_weight=sample_weight)

meta_features = self.predict_meta_features(X)

Expand All @@ -157,7 +160,11 @@ def fit(self, X, y, sample_weight=None):
meta_features = sparse.hstack((X, meta_features))
else:
meta_features = np.hstack((X, meta_features))
self.meta_regr_.fit(meta_features, y, sample_weight=sample_weight)

if sample_weight is None:
self.meta_regr_.fit(meta_features, y)
else:
self.meta_regr_.fit(meta_features, y, sample_weight=sample_weight)

# save meta-features for training data
if self.store_train_meta_features:
Expand Down
27 changes: 22 additions & 5 deletions mlxtend/regressor/tests/test_stacking_cv_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from mlxtend.utils import assert_raises
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.base import clone
Expand Down Expand Up @@ -268,10 +267,13 @@ def test_sample_weight():
stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=svr_rbf,
cv=KFold(4, shuffle=True, random_state=7))
stack.fit(X1, y, sample_weight=w).predict(X1)
pred1 = stack.fit(X1, y, sample_weight=w).predict(X1)
mse = 0.21 # 0.20770
got = np.mean((stack.predict(X1) - y) ** 2)
assert round(got, 2) == mse, "Expected %.2f, but got %.5f" % (mse, got)
pred2 = stack.fit(X1, y).predict(X1)
maxdiff = np.max(np.abs(pred1 - pred2))
assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff


def test_weight_ones():
Expand Down Expand Up @@ -299,15 +301,30 @@ def test_unsupported_regressor():
svr_rbf = SVR(kernel='rbf')
stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge, lasso],
meta_regressor=svr_rbf)
stack.fit(X1, y).predict(X1)
stack.fit(X1, y, sample_weight=w).predict(X1)


@raises(TypeError)
def test_unsupported_meta_regressor():
lr = LinearRegression()
svr_lin = SVR(kernel='linear')
ridge = Ridge(random_state=1)
mlp = MLPRegressor()
lasso = Lasso()
stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=lasso)
stack.fit(X1, y, sample_weight=w).predict(X1)


def test_weight_unsupported_with_no_weight():
# should be okay since we do not pass weight
lr = LinearRegression()
svr_lin = SVR(kernel='linear')
ridge = Ridge(random_state=1)
lasso = Lasso()
stack = StackingCVRegressor(regressors=[svr_lin, lr, lasso],
meta_regressor=ridge)
stack.fit(X1, y).predict(X1)

stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=mlp)
meta_regressor=lasso)
stack.fit(X1, y).predict(X1)
48 changes: 34 additions & 14 deletions mlxtend/regressor/tests/test_stacking_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
Expand Down Expand Up @@ -79,10 +78,14 @@ def test_sample_weight():
svr_rbf = SVR(kernel='rbf')
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=svr_rbf)
stregr.fit(X1, y, sample_weight=w).predict(X1)
pred1 = stregr.fit(X1, y, sample_weight=w).predict(X1)
mse = 0.22
got = np.mean((stregr.predict(X1) - y) ** 2)
assert round(got, 2) == mse
# make sure that this is not equivalent to the model with no weight
pred2 = stregr.fit(X1, y).predict(X1)
maxdiff = np.max(np.abs(pred1 - pred2))
assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff


def test_weight_ones():
Expand All @@ -93,15 +96,14 @@ def test_weight_ones():
svr_rbf = SVR(kernel='rbf')
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=svr_rbf)

stregr.fit(X1, y, sample_weight=np.ones(40)).predict(X1)
mse = 0.21
got = np.mean((stregr.predict(X1) - y) ** 2)
assert round(got, 2) == mse
pred1 = stregr.fit(X1, y).predict(X1)
pred2 = stregr.fit(X1, y, sample_weight=np.ones(40)).predict(X1)
maxdiff = np.max(np.abs(pred1 - pred2))
assert maxdiff < 1e-3, "max diff is %.4f" % maxdiff


@raises(TypeError)
def test_unsupported_regressor():
def test_weight_unsupported_regressor():
# including regressor that does not support
# sample_weight should raise error
lr = LinearRegression()
Expand All @@ -111,19 +113,37 @@ def test_unsupported_regressor():
lasso = Lasso(random_state=1)
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso],
meta_regressor=svr_rbf)
stregr.fit(X1, y, w).predict(X1)
stregr.fit(X1, y, sample_weight=w).predict(X1)


@raises(TypeError)
def test_unsupported_meta_regressor():
# meta regressor that does not support sample_weight should raise error
def test_weight_unsupported_meta():
# meta regressor with no support for
# sample_weight should raise error
lr = LinearRegression()
svr_lin = SVR(kernel='linear')
ridge = Ridge(random_state=1)
lasso = Lasso(random_state=1)
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=lasso)
stregr.fit(X1, y, sample_weight=w).predict(X1)


def test_weight_unsupported_with_no_weight():
# pass no weight to regressors with no weight support
# should not be a problem
lr = LinearRegression()
svr_lin = SVR(kernel='linear')
ridge = Ridge(random_state=1)
mlp = MLPRegressor()
svr_rbf = SVR(kernel='rbf')
lasso = Lasso(random_state=1)
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge, lasso],
meta_regressor=svr_rbf)
stregr.fit(X1, y).predict(X1)

stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
meta_regressor=mlp)
stregr.fit(X1, y, w).predict(X1)
meta_regressor=lasso)
stregr.fit(X1, y).predict(X1)


def test_gridsearch():
Expand Down

0 comments on commit b46079a

Please sign in to comment.