update to bambi 0.14 (#63)

* update to bambi 0.14 * drop python 3.9
bambinos · Jul 20, 2024 · d3118e9 · d3118e9
1 parent 124ab22
commit d3118e9
Show file tree

Hide file tree

Showing 8 changed files with 67 additions and 88 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     name: Set up Python ${{ matrix.python-version }}
     steps:

diff --git a/kulprit/plots/plots.py b/kulprit/plots/plots.py
@@ -121,12 +121,15 @@ def plot_densities(
 
     # set default variable names to the reference model terms
     if not var_names:
-        var_names = list(set(model.response_component.terms.keys()) - set([model.response_name]))
+        var_names = list(
+            set(model.components[model.family.likelihood.parent].common_terms.keys())
+            - set([model.response_component.term.name])
+        )
 
     if include_reference:
         data = [idata]
         l_labels = ["Reference"]
-        var_names.append(f"~{model.response_name}_mean")
+        var_names.append(f"~{model.family.likelihood.parent}")
     else:
         data = []
         l_labels = []

diff --git a/kulprit/projection/projector.py b/kulprit/projection/projector.py
@@ -46,7 +46,7 @@ def __init__(
         self.idata = idata
 
         # log properties of the reference model
-        self.response_name = self.model.response_name
+        self.response_name = model.response_component.term.name
         self.ref_family = self.model.family.name
         self.priors = self.model.constant_components
 
@@ -80,7 +80,10 @@ def project(
                 terms = list(terms)
 
             # test `terms` input
-            if not set(terms).issubset(set(self.model.response_component.common_terms)):
+            ref_terms = list(
+                self.model.components[self.model.family.likelihood.parent].common_terms.keys()
+            )
+            if not set(terms).issubset(set(ref_terms)):
                 raise UserWarning(
                     "Please ensure that all terms selected for projection exist in"
                     + " the reference model."
@@ -128,15 +131,17 @@ def project_names(self, term_names: Sequence[str]) -> SubModel:
         new_model = self._build_restricted_model(term_names=term_names_)
 
         # extract the design matrix from the model
-        if new_model.response_component.design.common:
-            X = new_model.response_component.design.common.design_matrix
-            slices = new_model.response_component.design.common.slices
+        d_component = new_model.distributional_components[new_model.family.likelihood.parent]
+
+        if d_component:
+            X = d_component.design.common.design_matrix
+            slices = d_component.design.common.slices
 
             # Add offset columns to their own design matrix
             # Remove them from the common design matrix.
             if hasattr(new_model, "offset_terms"):  # pragma: no cover
                 for term in new_model.offset_terms:
-                    term_slice = new_model.response_component.design.common.slices[term]
+                    term_slice = slices[term]
                     X = np.delete(X, term_slice, axis=1)
 
         # build new term_names (add dispersion parameter if included)
@@ -174,26 +179,28 @@ def project_names(self, term_names: Sequence[str]) -> SubModel:
             model=new_model,
             idata=new_idata,
             loss=loss,
-            size=len(new_model.response_component.common_terms),
+            size=len(new_model.components[new_model.family.likelihood.parent].common_terms),
             term_names=term_names,
         )
         return sub_model
 
     def compute_model_log_likelihood(self, model, idata):
         # extract observed data
-        obs_array = self.idata.observed_data[model.response_name]
+        obs_array = self.idata.observed_data[model.response_component.term.name]
         obs_array = obs_array.expand_dims(
             chain=idata.posterior.dims["chain"],
             draw=idata.posterior.dims["draw"],
         )
 
-        preds = model.predict(idata, kind="mean", inplace=False).posterior[
-            f"{model.response_name}_mean"
+        preds = model.predict(idata, kind="response_params", inplace=False).posterior[
+            f"{model.family.likelihood.parent}"
         ]
         if model.family.name == "gaussian":
             # initialise probability distribution object
             dist = XrContinuousRV(
-                stats.norm, preds.values, idata.posterior[f"{model.response_name}_sigma"]
+                stats.norm,
+                preds.values,
+                idata.posterior["sigma"],
             )
         elif model.family.name == "binomial":
             # initialise probability distribution object
@@ -254,12 +261,12 @@ def _extend_term_names(
         """Extend the model term names to include dispersion terms."""
 
         # add intercept term if present
-        if new_model.response_component.intercept_term:
+        if bmb.formula.formula_has_intercept(new_model.formula.main):
             term_names.insert(0, "Intercept")
 
         # add the auxiliary parameters
         if self.priors:
-            aux_params = [f"{self.response_name}_{str(k)}" for k in self.priors]
+            aux_params = [f"{str(k)}" for k in self.priors]
             term_names += aux_params
             # TODO generalize  #pylint: disable=fixme
             slices[aux_params[0]] = slice(-1, None, None)

diff --git a/kulprit/projection/solver.py b/kulprit/projection/solver.py
@@ -30,7 +30,7 @@ def __init__(
         self.ref_idata = idata
 
         # log the reference model's response name and family
-        self.response_name = self.ref_model.response_name
+        self.response_name = self.ref_model.response_component.term.name
         self.ref_family = self.ref_model.family.name
 
         # define sampling options
@@ -44,7 +44,7 @@ def __init__(
     def pps(self):
         # make in-sample predictions with the reference model if not available
         if "posterior_predictive" not in self.ref_idata.groups():
-            self.ref_model.predict(self.ref_idata, kind="pps", inplace=True)
+            self.ref_model.predict(self.ref_idata, kind="response", inplace=True)
 
         pps = az.extract(
             self.ref_idata,

diff --git a/kulprit/reference.py b/kulprit/reference.py
@@ -39,13 +39,13 @@ def __init__(
             The ArviZ InferenceData object of the fitted reference model
         """
         # test that the reference model has an intercept term
-        if model.response_component.intercept_term is None:
+        if not bmb.formula.formula_has_intercept(model.formula.main):
             raise UserWarning(
                 "The procedure currently requires reference models to have an intercept term."
             )
 
         # test that the reference model does not admit any hierarchical structure
-        if model.response_component.group_specific_terms:
+        if any(val.group_specific_groups for val in model.distributional_components.values()):
             raise NotImplementedError("Hierarchical models currently not supported.")
 
         # build posterior if not provided
@@ -137,11 +137,11 @@ def search(
         """
 
         # set default `max_terms` value
+        n_terms = len(self.model.components[self.model.family.likelihood.parent].common_terms)
         if max_terms is None:
-            max_terms = len(self.model.response_component.common_terms)
-
+            max_terms = n_terms
         # test `max_terms` input
-        if max_terms > len(self.model.response_component.common_terms):
+        elif max_terms > n_terms:
             raise UserWarning(
                 "Please ensure that the maximum number to consider in the "
                 + "submodel search is between 1 and the number of terms in the "
@@ -283,9 +283,6 @@ def plot_densities(
 def check_model_idata_compatability(model, idata):
     """Check that the Bambi model and idata are compatible with vanilla procedure.
 
-    In the future, this will be extended to allow for different structures and
-    covariates, checking instead only that the observation data are the same.
-
     Parameters:
     ----------
     model : Bambi model
@@ -297,22 +294,8 @@ def check_model_idata_compatability(model, idata):
     -------
         bool : Indicator of whether the two objects are compatible
     """
-
     # test that the variate's name is the same in reference model and idata
-    if not model.response_name == list(idata.observed_data.data_vars.variables)[0]:
-        return False
-
-    # test that the variate has the same dimensions in reference model and idata
-    if model.response_component.design.response.kind != "proportion" and (
-        idata.observed_data[model.response_name].to_numpy().shape
-        != model.data[model.response_name].shape
-    ):
-        return False
-
-    if model.response_component.design.response.kind == "proportion" and (
-        idata.observed_data[model.response_name].to_numpy().shape
-        != model.response_component.design.response.evaluate_new_data(model.data).shape
-    ):
+    if not model.response_component.term.name == list(idata.observed_data.data_vars.variables)[0]:
         return False
 
     # return default truth

diff --git a/kulprit/search/forward.py b/kulprit/search/forward.py
@@ -16,7 +16,9 @@ def __init__(self, projector: Projector) -> None:
         self.projector = projector
 
         # log the names of the terms in the reference model
-        self.ref_terms = list(self.projector.model.response_component.common_terms)
+
+        model = self.projector.model
+        self.ref_terms = list(model.components[model.family.likelihood.parent].common_terms.keys())
 
         # initialise search
         self.k_term_idx = {}
@@ -29,7 +31,14 @@ def __repr__(self) -> str:
         """String representation of the search path."""
 
         path_dict = {
-            k: [list(submodel.model.response_component.terms.keys()), submodel.loss]
+            k: [
+                list(
+                    submodel.model.components[
+                        submodel.model.family.likelihood.parent
+                    ].common_terms.keys()
+                ),
+                submodel.loss,
+            ]
             for k, submodel in self.k_submodel.items()
         }
         df = pd.DataFrame.from_dict(

diff --git a/kulprit/search/l1.py b/kulprit/search/l1.py
@@ -16,17 +16,10 @@ def __init__(self, projector: Projector) -> None:
 
         # log the projector object
         self.projector = projector
+        terms = self.projector.model.components[self.projector.model.family.likelihood.parent].terms
 
         # test whether the model includes categorical terms, and if so raise error
-        if (
-            sum(
-                (
-                    self.projector.model.response_component.terms[term].categorical
-                    for term in self.projector.model.response_component.terms.keys()
-                )
-            )
-            > 0
-        ):
+        if sum(terms[term].categorical for term in terms.keys()) > 0:
             raise NotImplementedError("Group-lasso not yet implemented")
 
         # initialise search
@@ -39,7 +32,14 @@ def __repr__(self) -> str:
         """String representation of the search path."""
 
         path_dict = {
-            k: [list(submodel.model.response_component.terms.keys()), submodel.loss]
+            k: [
+                list(
+                    submodel.model.components[
+                        submodel.model.family.likelihood.parent
+                    ].common_terms.keys()
+                ),
+                submodel.loss,
+            ]
             for k, submodel in self.k_submodel.items()
         }
         df = pd.DataFrame.from_dict(
@@ -92,23 +92,18 @@ def compute_path(self) -> None:
         -------
         np.ndarray: The coefficients for each model size
         """
-
+        model = self.projector.model
         # extract reference model data and latent predictor
         self.common_terms = list(  # pylint: disable=attribute-defined-outside-init
-            self.projector.model.response_component.common_terms
-        )
-        X = np.column_stack(
-            [
-                self.projector.model.response_component.design.common[term]
-                for term in self.common_terms
-            ]
+            model.components[model.family.likelihood.parent].common_terms
         )
+        d_component = model.distributional_components[model.family.likelihood.parent]
+        X = np.column_stack([d_component.design.common[term] for term in self.common_terms])
         # XXX we need to make this more general  # pylint: disable=fixme
         mean_param_name = list(self.projector.model.family.link.keys())[0]
         eta = self.projector.model.family.link[mean_param_name].link(
-            np.array(self.projector.model.response_component.design.response)
+            model.components[model.family.likelihood.parent].design.response.design_matrix
         )
-
         # compute L1 path in the latent space
         _, coef_path, _ = lasso_path(X, eta)
         cov_order = self.first_non_zero_idx(coef_path)

diff --git a/tests/test_project.py b/tests/test_project.py
@@ -84,26 +84,6 @@ def test_different_variate_name(self, bambi_model_idata):
             # build a bad reference model object
             kpt.ProjectionPredictive(bad_model, bambi_model_idata)
 
-    def test_different_variate_dim(self, bambi_model_idata):
-        """Test that an error is raised when model and idata aren't compatible."""
-
-        # define model data
-        data = pd.DataFrame(
-            {
-                "z": np.array([1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839]),
-                "x": np.array([59, 60, 62, 56, 63, 59, 62, 60]),
-                "y": np.array([6, 13, 18, 28, 52, 53, 61, 60]),
-            }
-        )
-
-        # define model
-        formula = "z ~ x + y"
-        bad_model = bmb.Model(formula, data, family="gaussian")
-
-        with pytest.raises(UserWarning):
-            # build a bad reference model object
-            kpt.ProjectionPredictive(bad_model, bambi_model_idata)
-
     def test_no_term_names_error(self, ref_model):
         """Test that an error is raised when no term names are provided."""
 
@@ -196,9 +176,11 @@ def test_build_restricted_model(self, bambi_model, bambi_model_idata):
         )
         assert new_model.family.name == bambi_model.family.name
         assert (
-            new_model.response_component.terms.keys() == bambi_model.response_component.terms.keys()
+            new_model.components[new_model.family.likelihood.parent].common_terms.keys()
+            == bambi_model.components[bambi_model.family.likelihood.parent].common_terms.keys()
         )
-        assert set(new_model.response_component.common_terms.keys()) == set(
-            bambi_model.response_component.common_terms.keys()
-        )
-        assert new_model.response_name == bambi_model.response_name
+
+        assert set(
+            new_model.components[new_model.family.likelihood.parent].common_terms.keys()
+        ) == set(bambi_model.components[bambi_model.family.likelihood.parent].common_terms.keys())
+        assert new_model.response_component.term.name == bambi_model.response_component.term.name