lnccbrown · digicosmos86 · Mar 7, 2024 · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/src/hssm/distribution_utils/__init__.py b/src/hssm/distribution_utils/__init__.py
@@ -2,18 +2,22 @@
 
 from ..utils import download_hf
 from .dist import (
+    assemble_callables,
     make_blackbox_op,
     make_distribution,
     make_family,
     make_likelihood_callable,
+    make_missing_data_callable,
     make_ssm_rv,
 )
 
 __all__ = [
+    "assemble_callables",
     "download_hf",
     "make_blackbox_op",
     "make_distribution",
     "make_likelihood_callable",
+    "make_missing_data_callable",
     "make_family",
     "make_ssm_rv",
 ]
diff --git a/src/hssm/distribution_utils/dist.py b/src/hssm/distribution_utils/dist.py
@@ -566,12 +566,6 @@ def make_likelihood_callable(
             + "but did not provide `pytensor` or `jax` as backend."
         )
 
-    if params_is_reg is None:
-        raise ValueError(
-            "You set `loglik_kind` to `approx_differentiable` "
-            + "but did not provide `params_is_reg`."
-        )
-
     if isinstance(loglik, (str, PathLike)):
         if not Path(loglik).exists():
             loglik = download_hf(str(loglik))
@@ -582,6 +576,11 @@ def make_likelihood_callable(
         lan_logp_pt = make_pytensor_logp(onnx_model, data_dim)
         return lan_logp_pt
     if backend == "jax":
+        if params_is_reg is None:
+            raise ValueError(
+                "You set `loglik_kind` to `approx_differentiable` "
+                + "and `backend` to `jax` but did not provide `params_is_reg`."
+            )
         logp, logp_grad, logp_nojit = make_jax_logp_funcs_from_onnx(
             onnx_model,
             params_is_reg,
@@ -594,3 +593,74 @@ def make_likelihood_callable(
         return lan_logp_jax
 
     raise ValueError("Incorrect likelihood specification.")
+
+
+def make_missing_data_callable(
+    loglik: pytensor.graph.Op | Callable | PathLike | str,
+    is_cpn_only: bool,
+    backend: Literal["pytensor", "jax", "cython", "other"] | None = "jax",
+    params_is_reg: list[bool] | None = None,
+) -> pytensor.graph.Op | Callable:
+    """Make a secondary network for the likelihood function.
+
+    Please refer to the documentation of `make_likelihood_callable` for more.
+    """
+    return make_likelihood_callable(
+        loglik, "approx_differentiable", backend, params_is_reg, 0 if is_cpn_only else 1
+    )  # Just assume that the missing data network is always approx_differentiable
+
+
+def assemble_callables(
+    callable: pytensor.graph.Op | Callable,
+    missing_data_callable: pytensor.graph.Op | Callable,
+    is_cpn_only: bool,
+) -> Callable:
+    """Assemble the likelihood callables into a single callable.
+
+    Assembles the likelihood callables into a single callable.
+
+    Parameters
+    ----------
+    callable
+        The callable for the likelihood function.
+    missing_data_callable
+        The callable for the secondary network for the likelihood function.
+    is_cpn_only
+        Whether the missing data model is a CPN only model, in which case we do not
+        apply any data to the missing data model.
+    """
+
+    def likelihood_callable(data, *dist_params):
+        """Compute the log-likelihoood of the model."""
+        # Assuming the first column of the data is always rt
+        data = pt.as_tensor_variable(data)
+        dist_params = [pt.as_tensor_variable(param) for param in dist_params]
+
+        missing_mask = pt.eq(data[:, 0], -999.0)
+        observed_mask = pt.bitwise_not(missing_mask)
+
+        observed_data = data[observed_mask, :]
+
+        dist_params_observed = [
+            param if param.ndim == 0 else param[observed_mask] for param in dist_params
+        ]
+
+        logp_observed = callable(observed_data[:, :-1], *dist_params_observed)
+
+        dist_params_missing = [
+            param if param.ndim == 0 else param[missing_mask] for param in dist_params
+        ]
+
+        if is_cpn_only:
+            logp_missing = missing_data_callable(*dist_params_missing)
+        else:
+            missing_data = data[missing_mask, -1:]
+            logp_missing = missing_data_callable(missing_data, *dist_params_missing)
+
+        logp = pt.empty_like(missing_mask, dtype=pytensor.config.floatX)
+        logp = pt.set_subtensor(logp[observed_mask], logp_observed)
+        logp = pt.set_subtensor(logp[missing_mask], logp_missing)
+
+        return logp
+
+    return likelihood_callable
diff --git a/src/hssm/distribution_utils/onnx/onnx.py b/src/hssm/distribution_utils/onnx/onnx.py
@@ -12,7 +12,7 @@
 import onnx
 import pytensor
 import pytensor.tensor as pt
-from jax import jit, vjp, vmap
+from jax import grad, jit, vjp, vmap
 from numpy.typing import ArrayLike
 from pytensor.graph import Apply, Op
 from pytensor.link.jax.dispatch import jax_funcify
@@ -76,38 +76,44 @@ def logp_no_data(*dist_params: float) -> float:
             # Makes a matrix to feed to the LAN model
             input_vector = jnp.array(dist_params)
 
-            return interpret_onnx(loaded_model.graph, input_vector)[0].squeeze()
+            result = interpret_onnx(loaded_model.graph, input_vector)[0]
+            return result.squeeze() if any(params_is_reg) else result
 
         # The vectorization of the logp function
-        vmap_logp_no_data = vmap(
-            logp_no_data,
-            in_axes=[0 if is_regression else None for is_regression in params_is_reg],
-        )
-
-        def vjp_vmap_logp_no_data(
-            *dist_params: list[float | ArrayLike], gz: ArrayLike
-        ) -> list[ArrayLike]:
-            """Compute the VJP of the log-likelihood function.
-
-            Parameters
-            ----------
-            data
-                A two-column numpy array with response time and response.
-            dist_params
-                A list of parameters used in the likelihood computation.
-            gz
-                The value of vmap_logp at which the VJP is evaluated, typically is just
-                vmap_logp(data, *dist_params)
-
-            Returns
-            -------
-            list[ArrayLike]
-                The VJP of the log-likelihood function computed at gz.
-            """
-            _, vjp_fn = vjp(vmap_logp_no_data, *dist_params)
-            return vjp_fn(gz)[1:]
+        if any(params_is_reg):
+            vmap_logp_no_data = vmap(
+                logp_no_data,
+                in_axes=[
+                    0 if is_regression else None for is_regression in params_is_reg
+                ],
+            )
 
-        return jit(vmap_logp_no_data), jit(vjp_vmap_logp_no_data), vmap_logp_no_data
+            def vjp_vmap_logp_no_data(
+                *dist_params: list[float | ArrayLike], gz: ArrayLike
+            ) -> list[ArrayLike]:
+                """Compute the VJP of the log-likelihood function.
+
+                Parameters
+                ----------
+                data
+                    A two-column numpy array with response time and response.
+                dist_params
+                    A list of parameters used in the likelihood computation.
+                gz
+                    The value of vmap_logp at which the VJP is evaluated, typically is
+                    just vmap_logp(data, *dist_params)
+
+                Returns
+                -------
+                list[ArrayLike]
+                    The VJP of the log-likelihood function computed at gz.
+                """
+                _, vjp_fn = vjp(vmap_logp_no_data, *dist_params)
+                return vjp_fn(gz)[1:]
+
+            return jit(vmap_logp_no_data), jit(vjp_vmap_logp_no_data), vmap_logp_no_data
+
+        return jit(logp_no_data), jit(grad(logp_no_data)), logp_no_data
 
     def logp(data: np.ndarray, *dist_params: float) -> float:
         """Compute the log-likelihood.
@@ -383,7 +389,7 @@ def grad(self, inputs, output_gradients):
             inputs
                 The same as the inputs produced in `make_node`.
             output_gradients
-                Holds the results of the perform `perform` method.
+                Holds the results of the `perform` method.
 
             Notes
             -----
@@ -475,10 +481,34 @@ def make_pytensor_logp(
         onnx.load(str(model)) if isinstance(model, (str, PathLike)) else model
     )
 
+    if data_dim == 0:
+
+        def logp_no_data(*dist_params: list[float | ArrayLike]) -> ArrayLike:
+            # Specify input layer of MLP
+            dist_params_tensors = [
+                pt.as_tensor_variable(param) for param in dist_params  # type: ignore
+            ]
+            n_rows = pt.max(
+                [
+                    1 if param.ndim == 0 else param.shape[0]  # type: ignore
+                    for param in dist_params_tensors
+                ]
+            )
+            inputs = pt.empty((n_rows, len(dist_params)))
+            for i, dist_param in enumerate(dist_params):
+                inputs = pt.set_subtensor(
+                    inputs[:, i],
+                    dist_param,
+                )
+
+            # Returns elementwise log-likelihoods
+            return pt.squeeze(pt_interpret_onnx(loaded_model.graph, inputs)[0])
+
+        return logp_no_data
+
     def logp(data: np.ndarray, *dist_params: list[float | ArrayLike]) -> ArrayLike:
         # Specify input layer of MLP
-        data = data.reshape((-1, data_dim)) if data_dim > 1 else data
-        inputs = pt.zeros((data.shape[0], len(dist_params) + data_dim))
+        inputs = pt.empty((data.shape[0], (len(dist_params) + data_dim)))
         for i, dist_param in enumerate(dist_params):
             inputs = pt.set_subtensor(
                 inputs[:, i],

diff --git a/src/hssm/distribution_utils/onnx/onnx2pt.py b/src/hssm/distribution_utils/onnx/onnx2pt.py
@@ -12,6 +12,11 @@
 from .onnx2xla import _asarray, attribute_handlers
 
 
+def onnx_add(a, b, axis=None, broadcast=True):
+    """Numpy-backed implementation of ONNX Add op."""
+    return [pt.add(a, b)]
+
+
 def pytensor_gemm(
     a, b, c=0.0, alpha=1.0, beta=1.0, transA=0, transB=0
 ):  # pylint: disable=C0103
@@ -26,13 +31,16 @@ def pytensor_gemm(
 
 
 pt_onnx_ops = {
-    "Add": pt.add,
+    "Add": lambda a, b: onnx_add(a, b),
     "Constant": lambda value: [value],
     "MatMul": lambda x, y: [pt.dot(x, y)],
     "Relu": lambda x: [pt.math.max(x, 0)],
     "Reshape": lambda x, shape: [pt.reshape(x, shape)],
     "Tanh": lambda x: [pt.tanh(x)],
     "Gemm": pytensor_gemm,
+    "Neg": lambda x: [-x],
+    "Exp": lambda x: [pt.exp(x)],
+    "Log": lambda x: [pt.log(x)],
 }
 
 

diff --git a/src/hssm/distribution_utils/onnx/onnx2xla.py b/src/hssm/distribution_utils/onnx/onnx2xla.py
@@ -36,7 +36,6 @@
 """
 
 import jax.numpy as jnp
-import numpy as np
 import onnx
 from jax import lax
 from onnx import numpy_helper
@@ -99,12 +98,6 @@ def onnx_conv(
 
 def onnx_add(a, b, axis=None, broadcast=True):
     """Numpy-backed implementation of ONNX Add op."""
-    if broadcast:
-        axis = (a.dim - b.ndim) if axis is None else axis % a.ndim
-        assert a.shape[axis:][: b.ndim] == b.shape
-        b_shape = np.ones(a.ndim, dtype="int64")
-        b_shape[axis : axis + b.ndim] = b.shape
-        b = jnp.reshape(b, b_shape)
     return [a + b]
 
 
@@ -132,6 +125,9 @@ def onnx_gemm(
     # Added by HSSM developers
     "Tanh": lambda x: [jnp.tanh(x)],
     "Gemm": onnx_gemm,
+    "Neg": lambda x: [-x],
+    "Exp": lambda x: [jnp.exp(x)],
+    "Log": lambda x: [jnp.log(x)],
 }