-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Alexander März
committed
Aug 8, 2023
1 parent
0f23016
commit 4a213cd
Showing
1 changed file
with
251 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
from xgboostlss.model import * | ||
from xgboostlss.distributions.Gaussian import * | ||
from xgboostlss.distributions.Expectile import * | ||
from xgboostlss.distributions.MVN import * | ||
from xgboostlss.datasets.data_loader import load_simulated_gaussian_data, load_simulated_multivariate_gaussian_data | ||
import pytest | ||
from pytest import approx | ||
|
||
|
||
@pytest.fixture | ||
def univariate_data(): | ||
train, test = load_simulated_gaussian_data() | ||
X_train, y_train = train.filter(regex="x"), train["y"].values | ||
X_test, y_test = test.filter(regex="x"), test["y"].values | ||
dtrain = xgb.DMatrix(X_train, label=y_train) | ||
dtest = xgb.DMatrix(X_test) | ||
deval = xgb.DMatrix(X_test, label=y_test) | ||
|
||
return dtrain, dtest, deval, X_test | ||
|
||
|
||
@pytest.fixture | ||
def univariate_xgblss(): | ||
return XGBoostLSS(Gaussian()) | ||
|
||
|
||
@pytest.fixture | ||
def expectile_xgblss(): | ||
return XGBoostLSS(Expectile()) | ||
|
||
|
||
@pytest.fixture | ||
def univariate_params(): | ||
opt_params = { | ||
"eta": 0.10015347345470738, | ||
"max_depth": 8, | ||
"gamma": 24.75078796889987, | ||
"subsample": 0.6161756203438147, | ||
"colsample_bytree": 0.851057889242629, | ||
"min_child_weight": 147.09687376037445, | ||
"booster": "gbtree", | ||
} | ||
n_rounds = 98 | ||
|
||
return opt_params, n_rounds | ||
|
||
|
||
@pytest.fixture | ||
def expectile_params(): | ||
opt_params = {"eta": 0.7298897353706068, | ||
"max_depth": 2, | ||
"gamma": 5.90940257278992e-06, | ||
"subsample": 0.9810129322454306, | ||
"colsample_bytree": 0.9546244491014185, | ||
"min_child_weight": 113.32324947486019, | ||
"booster": "gbtree" | ||
} | ||
n_rounds = 3 | ||
|
||
return opt_params, n_rounds | ||
|
||
|
||
@pytest.fixture | ||
def multivariate_data(): | ||
data_sim = load_simulated_multivariate_gaussian_data() | ||
|
||
# Create 60%, 20%, 20% split for train, validation and test | ||
train, validate, test = np.split(data_sim.sample(frac=1, random_state=123), | ||
[int(0.6 * len(data_sim)), int(0.8 * len(data_sim))]) | ||
|
||
# Train | ||
x_train = train.filter(regex="x") | ||
y_train = train.filter(regex="y").values | ||
dtrain_mvn = xgb.DMatrix(x_train, label=y_train) | ||
|
||
# Validation | ||
x_eval = validate.filter(regex="x") | ||
y_eval = validate.filter(regex="y").values | ||
deval_mvn = xgb.DMatrix(x_eval, label=y_eval) | ||
|
||
# Test | ||
x_test = test.filter(regex="x") | ||
dtest_mvn = xgb.DMatrix(x_test) | ||
|
||
return dtrain_mvn, deval_mvn, dtest_mvn, x_test | ||
|
||
|
||
@pytest.fixture | ||
def multivariate_xgblss(): | ||
return XGBoostLSS(MVN(D=3)) | ||
|
||
|
||
@pytest.fixture | ||
def multivariate_params(): | ||
opt_params = {"eta": 0.06058920928573687, | ||
"max_depth": 2, | ||
"gamma": 2.8407158704437237e-05, | ||
"subsample": 0.5214068113552733, | ||
"colsample_bytree": 0.8185136492497096, | ||
"min_child_weight": 8.847572679915343, | ||
"booster": "gbtree" | ||
} | ||
n_rounds = 100 | ||
|
||
return opt_params, n_rounds | ||
|
||
|
||
class TestClass: | ||
def test_model_univ_train(self, univariate_data, univariate_xgblss, univariate_params): | ||
# Unpack | ||
dtrain, _, _, _ = univariate_data | ||
opt_params, n_rounds = univariate_params | ||
xgblss = univariate_xgblss | ||
|
||
# Train the model | ||
xgblss.train(opt_params, dtrain, n_rounds) | ||
|
||
# Assertions | ||
assert isinstance(xgblss.booster, xgb.Booster) | ||
|
||
def test_model_univ_train_eval(self, univariate_data, univariate_xgblss, univariate_params): | ||
# Unpack | ||
dtrain, dtest, deval, _ = univariate_data | ||
opt_params, n_rounds = univariate_params | ||
xgblss = univariate_xgblss | ||
|
||
# Add evaluation set | ||
eval_set = [(dtrain, "train"), (deval, "evaluation")] | ||
eval_result = {} | ||
|
||
# Train the model | ||
xgblss.train(opt_params, dtrain, n_rounds, evals=eval_set, evals_result=eval_result) | ||
|
||
# Assertions | ||
assert isinstance(xgblss.booster, xgb.Booster) | ||
|
||
def test_model_mvn_train(self, multivariate_data, multivariate_xgblss, multivariate_params): | ||
# Unpack | ||
dtrain, deval, _, _ = multivariate_data | ||
opt_params, n_rounds = multivariate_params | ||
xgblss_mvn = multivariate_xgblss | ||
# Add evaluation set | ||
eval_set = [(dtrain, "train"), (deval, "evaluation")] | ||
eval_result = {} | ||
|
||
# Train the model | ||
xgblss_mvn.train(opt_params, | ||
dtrain, | ||
n_rounds, | ||
evals=eval_set, | ||
evals_result=eval_result) | ||
|
||
# Assertions | ||
assert isinstance(xgblss_mvn.booster, xgb.Booster) | ||
|
||
def test_model_hpo(self, univariate_data, univariate_xgblss,): | ||
# Unpack | ||
dtrain, _, _, _ = univariate_data | ||
xgblss = univariate_xgblss | ||
|
||
# Create hyperparameter dictionary | ||
param_dict = { | ||
"eta": ["float", {"low": 1e-5, "high": 1, "log": True}], | ||
"max_depth": ["int", {"low": 1, "high": 2, "log": False}], | ||
"tree_method": ["categorical", ["auto"]], | ||
} | ||
|
||
# Train the model | ||
np.random.seed(123) | ||
opt_param = xgblss.hyper_opt( | ||
param_dict, | ||
dtrain, | ||
num_boost_round=10, | ||
nfold=5, | ||
early_stopping_rounds=20, | ||
max_minutes=10, | ||
n_trials=5, | ||
silence=True, | ||
seed=123, | ||
hp_seed=123 | ||
) | ||
|
||
# Assertions | ||
assert isinstance(opt_param, dict) | ||
|
||
def test_model_predict(self, univariate_data, univariate_xgblss, univariate_params): | ||
# Unpack | ||
dtrain, dtest, _, _ = univariate_data | ||
opt_params, n_rounds = univariate_params | ||
xgblss = univariate_xgblss | ||
|
||
# Train the model | ||
xgblss.train(opt_params, dtrain, n_rounds) | ||
|
||
# Call the predict method | ||
n_samples = 100 | ||
quantiles = [0.1, 0.5, 0.9] | ||
|
||
pred_params = xgblss.predict(dtest, pred_type="parameters") | ||
pred_samples = xgblss.predict(dtest, pred_type="samples", n_samples=n_samples) | ||
pred_quantiles = xgblss.predict(dtest, pred_type="quantiles", quantiles=quantiles) | ||
|
||
# Assertions | ||
assert isinstance(pred_params, (pd.DataFrame, type(None))) | ||
assert not pred_params.isna().any().any() | ||
assert not np.isinf(pred_params).any().any() | ||
assert pred_params.shape[1] == xgblss.dist.n_dist_param | ||
assert approx(pred_params["loc"].mean(), abs=0.2) == 10.0 | ||
|
||
assert isinstance(pred_samples, (pd.DataFrame, type(None))) | ||
assert not pred_samples.isna().any().any() | ||
assert not np.isinf(pred_samples).any().any() | ||
assert pred_samples.shape[1] == n_samples | ||
|
||
assert isinstance(pred_quantiles, (pd.DataFrame, type(None))) | ||
assert not pred_quantiles.isna().any().any() | ||
assert not np.isinf(pred_quantiles).any().any() | ||
assert pred_quantiles.shape[1] == len(quantiles) | ||
|
||
def test_model_plot(self, univariate_data, univariate_xgblss, univariate_params): | ||
# Unpack | ||
dtrain, dtest, _, X_test = univariate_data | ||
opt_params, n_rounds = univariate_params | ||
xgblss = univariate_xgblss | ||
|
||
# Train the model | ||
xgblss.train(opt_params, dtrain, n_rounds) | ||
|
||
# Call the function | ||
xgblss.plot(X_test, parameter="scale", feature="x_true", plot_type="Partial_Dependence") | ||
xgblss.plot(X_test, parameter="scale", feature="x_true", plot_type="Feature_Importance") | ||
|
||
def test_model_expectile_plot(self, univariate_data, expectile_xgblss, expectile_params): | ||
# Unpack | ||
dtrain, dtest, _, X_test = univariate_data | ||
opt_params, n_rounds = expectile_params | ||
xgblss_expectile = expectile_xgblss | ||
|
||
# Train the model | ||
xgblss_expectile.train(opt_params, dtrain, n_rounds) | ||
|
||
# Call the function | ||
xgblss_expectile.expectile_plot(X_test, | ||
expectile="expectile_0.9", | ||
feature="x_true", | ||
plot_type="Partial_Dependence") | ||
|
||
xgblss_expectile.expectile_plot(X_test, | ||
expectile="expectile_0.9", | ||
feature="x_true", | ||
plot_type="Feature_Importance") |