Fix Code Issues to Get Unit Tests Running (#121)

* First pass. * Action on push * Install deps. * ubuntu 20? * Ok, just mac * Try commenting out the partially implemented models? * Let failing tests fail * Tests failing now only for `sample_description_from_xarray` and code-issues * Fix location and design matrices * Remove method init_par from Estimator * Remove rcond because dask doesn't support? * check chunk type for dask * Fix tf + dask compat issue. * Remove TF1 and TF2 Tests * Downgrade `sparse` * Make sure arrays are not immutable * Remove TF mentions * Remove TF2 * Remove TF from requirements.txt * Undo erroneous deletion of dep * Remove constants. * Remove constants. * Fix small tensorflow merges issues. Co-authored-by: David S. Fischer <[email protected]>
theislab · Jan 28, 2022 · b5c5f49 · b5c5f49
1 parent 4697bbe
commit b5c5f49
Show file tree

Hide file tree

Showing 9 changed files with 82 additions and 38 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,19 @@
+on: push
+jobs:
+  build:
+    runs-on: macos-latest
+    strategy:
+      matrix:
+        python-version: [ '3.6' ]
+    name: Python ${{ matrix.python-version }} Tests
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+      - name: Install Dependencies
+        run: pip install -r requirements.txt
+      - name: Run Tests
+        run: python -m unittest
diff --git a/README.md b/README.md
@@ -2,6 +2,18 @@
 # Fast and scalable fitting of over-determined generalized-linear models (GLMs)
 
 batchglm was developed in the context of [diffxpy](https://github.com/theislab/diffxpy) to allow fast model fitting for differential expression analysis for single-cell RNA-seq data. However, one can use batchglm or its concepts in other scenarios where over-determined GLMs are encountered.
+
+```
+pip install -r requirements.txt
+```
+
+To run unit tests:
+
+```
+pip install -e .
+python -m unittest
+```
+
 <!-- 
 # Installation
 1. Install [tensorflow](https://www.tensorflow.org/install/), see below. Please use the pip installation if you are unsure.

diff --git a/batchglm/models/base/input.py b/batchglm/models/base/input.py
@@ -1,3 +1,4 @@
+from operator import indexOf
 import dask.array
 import logging
 import numpy as np
@@ -108,12 +109,13 @@ def feature_isallzero(self):
         return self._feature_allzero
 
     def fetch_x_dense(self, idx):
-        assert isinstance(self.x, np.ndarray), "tried to fetch dense from non ndarray"
+        # Better way than accessing ._meta to check type of dask chunks?
+        assert isinstance(self.x, np.ndarray) or isinstance(self.x._meta, np.ndarray), "tried to fetch dense from non ndarray"
 
         return self.x[idx, :]
 
     def fetch_x_sparse(self, idx):
-        assert isinstance(self.x, scipy.sparse.csr_matrix), "tried to fetch sparse from non csr_matrix"
+        assert isinstance(self.x, scipy.sparse.csr_matrix) or isinstance(self.x._meta, scipy.sparse.csr_matrix), "tried to fetch sparse from non csr_matrix"
 
         data = self.x[idx, :]
 

diff --git a/batchglm/models/base_glm/simulator.py b/batchglm/models/base_glm/simulator.py
@@ -172,6 +172,12 @@ def constraints_loc(self):
     def constraints_scale(self):
         return np.identity(n=self.b_var.shape[0])
 
+    def param_bounds(self, dtype):
+        pass
+
+    def eta_loc_j(self, j) -> np.ndarray:
+        pass
+
     def np_clip_param(
             self,
             param,

diff --git a/batchglm/train/numpy/base_glm/estimator.py b/batchglm/train/numpy/base_glm/estimator.py
@@ -276,7 +276,8 @@ def iwls_step(
 
         delta_theta = np.zeros_like(self.model.a_var)
         if isinstance(delta_theta, dask.array.core.Array):
-            delta_theta = delta_theta.compute()
+            # Need to copy to prevent returning an immutable view of the data
+            delta_theta = delta_theta.compute().copy()
 
         if isinstance(a, dask.array.core.Array):
             # Have to use a workaround to solve problems in parallel in dask here. This workaround does
@@ -456,7 +457,8 @@ def _b_step_loop(
         """
         delta_theta = np.zeros_like(self.model.b_var)
         if isinstance(delta_theta, dask.array.core.Array):
-            delta_theta = delta_theta.compute()
+            # Need to copy to prevent returning an immutable view of the data
+            delta_theta = delta_theta.compute().copy()
 
         xh_scale = np.matmul(self.model.design_scale, self.model.constraints_scale).compute()
         b_var = self.model.b_var.compute()

diff --git a/batchglm/unit_test/test_acc_glm_all_numpy.py b/batchglm/unit_test/test_acc_glm_all_numpy.py
@@ -144,10 +144,6 @@ def get_simulator(self):
         else:
             if self.noise_model == "nb":
                 from batchglm.api.models.numpy.glm_nb import Simulator
-            elif self.noise_model == "norm":
-                from batchglm.api.models import Simulator
-            elif self.noise_model == "beta":
-                from batchglm.api.models.numpy.glm_beta import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 

diff --git a/batchglm/unit_test/test_extreme_values_glm_all.py b/batchglm/unit_test/test_extreme_values_glm_all.py
@@ -20,10 +20,6 @@ def _modify_sim(self, idx, val):
         else:
             if self.noise_model == "nb":
                 from batchglm.api.models.numpy.glm_nb import Estimator, InputDataGLM
-            elif self.noise_model == "norm":
-                from batchglm.api.models import Estimator, InputDataGLM
-            elif self.noise_model == "beta":
-                from batchglm.api.models.numpy.glm_beta import Estimator, InputDataGLM
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -130,10 +126,11 @@ def test_norm(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logger.error("TestAccuracyXtremeNorm.test_norm()")
+        logger.info('Normal noise model not implemented for numpy')
 
-        np.random.seed(1)
-        self.noise_model = "norm"
-        self._test_all()
+        # np.random.seed(1)
+        # self.noise_model = "norm"
+        # self._test_all()
 
 
 class TestAccuracyXtremeBeta(
@@ -148,10 +145,11 @@ def test_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logger.error("TestAccuracyXtremeBeta.test_beta()")
+        logger.info('Beta noise model not implemented for numpy')
 
-        np.random.seed(1)
-        self.noise_model = "beta"
-        self._test_all()
+        # np.random.seed(1)
+        # self.noise_model = "beta"
+        # self._test_all()
 
 
 if __name__ == '__main__':

diff --git a/batchglm/unit_test/test_graph_glm_all.py b/batchglm/unit_test/test_graph_glm_all.py
@@ -24,10 +24,6 @@ def __init__(
         else:
             if noise_model == "nb":
                 from batchglm.api.models.numpy.glm_nb import Estimator, InputDataGLM
-            elif noise_model == "norm":
-                from batchglm.api.models import Estimator, InputDataGLM
-            elif noise_model == "beta":
-                from batchglm.api.models.numpy.glm_beta import Estimator, InputDataGLM
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -112,10 +108,6 @@ def get_simulator(self):
         else:
             if self.noise_model == "nb":
                 from batchglm.api.models.numpy.glm_nb import Simulator
-            elif self.noise_model == "norm":
-                from batchglm.api.models import Simulator
-            elif self.noise_model == "beta":
-                from batchglm.api.models.numpy.glm_beta import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -283,18 +275,22 @@ def test_full_norm(self):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logger.error("TestGraphGlmNorm.test_full_norm()")
 
-        self.noise_model = "norm"
-        self._test_full(sparse=False)
-        self._test_full(sparse=True)
+        logger.info('Normal noise model not implemented for numpy')
+
+        # self.noise_model = "norm"
+        # self._test_full(sparse=False)
+        # self._test_full(sparse=True)
 
     def test_batched_norm(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logger.error("TestGraphGlmNorm.test_batched_norm()")
 
-        self.noise_model = "norm"
-        self._test_batched(sparse=False)
-        self._test_batched(sparse=True)
+        logger.info('Normal noise model not implemented for numpy')
+
+        # self.noise_model = "norm"
+        # self._test_batched(sparse=False)
+        # self._test_batched(sparse=True)
 
 
 class TestGraphGlmBeta(
@@ -310,18 +306,22 @@ def test_full_beta(self):
         logging.getLogger("batchglm").setLevel(logging.ERROR)
         logger.error("TestGraphGlmBeta.test_full_beta()")
 
-        self.noise_model = "beta"
-        self._test_full(sparse=False)
-        self._test_full(sparse=True)
+        logger.info('Beta noise model not implemented for numpy')
+
+        # self.noise_model = "beta"
+        # self._test_full(sparse=False)
+        # self._test_full(sparse=True)
 
     def test_batched_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logger.error("TestGraphGlmBeta.test_batched_beta()")
 
-        self.noise_model = "beta"
-        self._test_batched(sparse=False)
-        self._test_batched(sparse=True)
+        logger.info('Beta noise model not implemented for numpy')
+
+        # self.noise_model = "beta"
+        # self._test_batched(sparse=False)
+        # self._test_batched(sparse=True)
 
 
 if __name__ == '__main__':

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,9 @@
+anndata==0.7.8
+dask==2021.3.0
+numpy>=1.16.4
+pandas==1.1.5
+patsy==0.5.2
+pytest==6.2.5
+scipy>=1.2.1
+sparse==0.9.1
+toolz==0.11.2