Skip to content

Commit

Permalink
update to bambi 0.14 (#63)
Browse files Browse the repository at this point in the history
* update to bambi 0.14

* drop python 3.9
  • Loading branch information
aloctavodia authored Jul 20, 2024
1 parent 124ab22 commit d3118e9
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 88 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]

name: Set up Python ${{ matrix.python-version }}
steps:
Expand Down
7 changes: 5 additions & 2 deletions kulprit/plots/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,15 @@ def plot_densities(

# set default variable names to the reference model terms
if not var_names:
var_names = list(set(model.response_component.terms.keys()) - set([model.response_name]))
var_names = list(
set(model.components[model.family.likelihood.parent].common_terms.keys())
- set([model.response_component.term.name])
)

if include_reference:
data = [idata]
l_labels = ["Reference"]
var_names.append(f"~{model.response_name}_mean")
var_names.append(f"~{model.family.likelihood.parent}")
else:
data = []
l_labels = []
Expand Down
33 changes: 20 additions & 13 deletions kulprit/projection/projector.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
self.idata = idata

# log properties of the reference model
self.response_name = self.model.response_name
self.response_name = model.response_component.term.name
self.ref_family = self.model.family.name
self.priors = self.model.constant_components

Expand Down Expand Up @@ -80,7 +80,10 @@ def project(
terms = list(terms)

# test `terms` input
if not set(terms).issubset(set(self.model.response_component.common_terms)):
ref_terms = list(
self.model.components[self.model.family.likelihood.parent].common_terms.keys()
)
if not set(terms).issubset(set(ref_terms)):
raise UserWarning(
"Please ensure that all terms selected for projection exist in"
+ " the reference model."
Expand Down Expand Up @@ -128,15 +131,17 @@ def project_names(self, term_names: Sequence[str]) -> SubModel:
new_model = self._build_restricted_model(term_names=term_names_)

# extract the design matrix from the model
if new_model.response_component.design.common:
X = new_model.response_component.design.common.design_matrix
slices = new_model.response_component.design.common.slices
d_component = new_model.distributional_components[new_model.family.likelihood.parent]

if d_component:
X = d_component.design.common.design_matrix
slices = d_component.design.common.slices

# Add offset columns to their own design matrix
# Remove them from the common design matrix.
if hasattr(new_model, "offset_terms"): # pragma: no cover
for term in new_model.offset_terms:
term_slice = new_model.response_component.design.common.slices[term]
term_slice = slices[term]
X = np.delete(X, term_slice, axis=1)

# build new term_names (add dispersion parameter if included)
Expand Down Expand Up @@ -174,26 +179,28 @@ def project_names(self, term_names: Sequence[str]) -> SubModel:
model=new_model,
idata=new_idata,
loss=loss,
size=len(new_model.response_component.common_terms),
size=len(new_model.components[new_model.family.likelihood.parent].common_terms),
term_names=term_names,
)
return sub_model

def compute_model_log_likelihood(self, model, idata):
# extract observed data
obs_array = self.idata.observed_data[model.response_name]
obs_array = self.idata.observed_data[model.response_component.term.name]
obs_array = obs_array.expand_dims(
chain=idata.posterior.dims["chain"],
draw=idata.posterior.dims["draw"],
)

preds = model.predict(idata, kind="mean", inplace=False).posterior[
f"{model.response_name}_mean"
preds = model.predict(idata, kind="response_params", inplace=False).posterior[
f"{model.family.likelihood.parent}"
]
if model.family.name == "gaussian":
# initialise probability distribution object
dist = XrContinuousRV(
stats.norm, preds.values, idata.posterior[f"{model.response_name}_sigma"]
stats.norm,
preds.values,
idata.posterior["sigma"],
)
elif model.family.name == "binomial":
# initialise probability distribution object
Expand Down Expand Up @@ -254,12 +261,12 @@ def _extend_term_names(
"""Extend the model term names to include dispersion terms."""

# add intercept term if present
if new_model.response_component.intercept_term:
if bmb.formula.formula_has_intercept(new_model.formula.main):
term_names.insert(0, "Intercept")

# add the auxiliary parameters
if self.priors:
aux_params = [f"{self.response_name}_{str(k)}" for k in self.priors]
aux_params = [f"{str(k)}" for k in self.priors]
term_names += aux_params
# TODO generalize #pylint: disable=fixme
slices[aux_params[0]] = slice(-1, None, None)
Expand Down
4 changes: 2 additions & 2 deletions kulprit/projection/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(
self.ref_idata = idata

# log the reference model's response name and family
self.response_name = self.ref_model.response_name
self.response_name = self.ref_model.response_component.term.name
self.ref_family = self.ref_model.family.name

# define sampling options
Expand All @@ -44,7 +44,7 @@ def __init__(
def pps(self):
# make in-sample predictions with the reference model if not available
if "posterior_predictive" not in self.ref_idata.groups():
self.ref_model.predict(self.ref_idata, kind="pps", inplace=True)
self.ref_model.predict(self.ref_idata, kind="response", inplace=True)

pps = az.extract(
self.ref_idata,
Expand Down
29 changes: 6 additions & 23 deletions kulprit/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ def __init__(
The ArviZ InferenceData object of the fitted reference model
"""
# test that the reference model has an intercept term
if model.response_component.intercept_term is None:
if not bmb.formula.formula_has_intercept(model.formula.main):
raise UserWarning(
"The procedure currently requires reference models to have an intercept term."
)

# test that the reference model does not admit any hierarchical structure
if model.response_component.group_specific_terms:
if any(val.group_specific_groups for val in model.distributional_components.values()):
raise NotImplementedError("Hierarchical models currently not supported.")

# build posterior if not provided
Expand Down Expand Up @@ -137,11 +137,11 @@ def search(
"""

# set default `max_terms` value
n_terms = len(self.model.components[self.model.family.likelihood.parent].common_terms)
if max_terms is None:
max_terms = len(self.model.response_component.common_terms)

max_terms = n_terms
# test `max_terms` input
if max_terms > len(self.model.response_component.common_terms):
elif max_terms > n_terms:
raise UserWarning(
"Please ensure that the maximum number to consider in the "
+ "submodel search is between 1 and the number of terms in the "
Expand Down Expand Up @@ -283,9 +283,6 @@ def plot_densities(
def check_model_idata_compatability(model, idata):
"""Check that the Bambi model and idata are compatible with vanilla procedure.
In the future, this will be extended to allow for different structures and
covariates, checking instead only that the observation data are the same.
Parameters:
----------
model : Bambi model
Expand All @@ -297,22 +294,8 @@ def check_model_idata_compatability(model, idata):
-------
bool : Indicator of whether the two objects are compatible
"""

# test that the variate's name is the same in reference model and idata
if not model.response_name == list(idata.observed_data.data_vars.variables)[0]:
return False

# test that the variate has the same dimensions in reference model and idata
if model.response_component.design.response.kind != "proportion" and (
idata.observed_data[model.response_name].to_numpy().shape
!= model.data[model.response_name].shape
):
return False

if model.response_component.design.response.kind == "proportion" and (
idata.observed_data[model.response_name].to_numpy().shape
!= model.response_component.design.response.evaluate_new_data(model.data).shape
):
if not model.response_component.term.name == list(idata.observed_data.data_vars.variables)[0]:
return False

# return default truth
Expand Down
13 changes: 11 additions & 2 deletions kulprit/search/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def __init__(self, projector: Projector) -> None:
self.projector = projector

# log the names of the terms in the reference model
self.ref_terms = list(self.projector.model.response_component.common_terms)

model = self.projector.model
self.ref_terms = list(model.components[model.family.likelihood.parent].common_terms.keys())

# initialise search
self.k_term_idx = {}
Expand All @@ -29,7 +31,14 @@ def __repr__(self) -> str:
"""String representation of the search path."""

path_dict = {
k: [list(submodel.model.response_component.terms.keys()), submodel.loss]
k: [
list(
submodel.model.components[
submodel.model.family.likelihood.parent
].common_terms.keys()
),
submodel.loss,
]
for k, submodel in self.k_submodel.items()
}
df = pd.DataFrame.from_dict(
Expand Down
35 changes: 15 additions & 20 deletions kulprit/search/l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,10 @@ def __init__(self, projector: Projector) -> None:

# log the projector object
self.projector = projector
terms = self.projector.model.components[self.projector.model.family.likelihood.parent].terms

# test whether the model includes categorical terms, and if so raise error
if (
sum(
(
self.projector.model.response_component.terms[term].categorical
for term in self.projector.model.response_component.terms.keys()
)
)
> 0
):
if sum(terms[term].categorical for term in terms.keys()) > 0:
raise NotImplementedError("Group-lasso not yet implemented")

# initialise search
Expand All @@ -39,7 +32,14 @@ def __repr__(self) -> str:
"""String representation of the search path."""

path_dict = {
k: [list(submodel.model.response_component.terms.keys()), submodel.loss]
k: [
list(
submodel.model.components[
submodel.model.family.likelihood.parent
].common_terms.keys()
),
submodel.loss,
]
for k, submodel in self.k_submodel.items()
}
df = pd.DataFrame.from_dict(
Expand Down Expand Up @@ -92,23 +92,18 @@ def compute_path(self) -> None:
-------
np.ndarray: The coefficients for each model size
"""

model = self.projector.model
# extract reference model data and latent predictor
self.common_terms = list( # pylint: disable=attribute-defined-outside-init
self.projector.model.response_component.common_terms
)
X = np.column_stack(
[
self.projector.model.response_component.design.common[term]
for term in self.common_terms
]
model.components[model.family.likelihood.parent].common_terms
)
d_component = model.distributional_components[model.family.likelihood.parent]
X = np.column_stack([d_component.design.common[term] for term in self.common_terms])
# XXX we need to make this more general # pylint: disable=fixme
mean_param_name = list(self.projector.model.family.link.keys())[0]
eta = self.projector.model.family.link[mean_param_name].link(
np.array(self.projector.model.response_component.design.response)
model.components[model.family.likelihood.parent].design.response.design_matrix
)

# compute L1 path in the latent space
_, coef_path, _ = lasso_path(X, eta)
cov_order = self.first_non_zero_idx(coef_path)
Expand Down
32 changes: 7 additions & 25 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,26 +84,6 @@ def test_different_variate_name(self, bambi_model_idata):
# build a bad reference model object
kpt.ProjectionPredictive(bad_model, bambi_model_idata)

def test_different_variate_dim(self, bambi_model_idata):
"""Test that an error is raised when model and idata aren't compatible."""

# define model data
data = pd.DataFrame(
{
"z": np.array([1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839]),
"x": np.array([59, 60, 62, 56, 63, 59, 62, 60]),
"y": np.array([6, 13, 18, 28, 52, 53, 61, 60]),
}
)

# define model
formula = "z ~ x + y"
bad_model = bmb.Model(formula, data, family="gaussian")

with pytest.raises(UserWarning):
# build a bad reference model object
kpt.ProjectionPredictive(bad_model, bambi_model_idata)

def test_no_term_names_error(self, ref_model):
"""Test that an error is raised when no term names are provided."""

Expand Down Expand Up @@ -196,9 +176,11 @@ def test_build_restricted_model(self, bambi_model, bambi_model_idata):
)
assert new_model.family.name == bambi_model.family.name
assert (
new_model.response_component.terms.keys() == bambi_model.response_component.terms.keys()
new_model.components[new_model.family.likelihood.parent].common_terms.keys()
== bambi_model.components[bambi_model.family.likelihood.parent].common_terms.keys()
)
assert set(new_model.response_component.common_terms.keys()) == set(
bambi_model.response_component.common_terms.keys()
)
assert new_model.response_name == bambi_model.response_name

assert set(
new_model.components[new_model.family.likelihood.parent].common_terms.keys()
) == set(bambi_model.components[bambi_model.family.likelihood.parent].common_terms.keys())
assert new_model.response_component.term.name == bambi_model.response_component.term.name

0 comments on commit d3118e9

Please sign in to comment.