Skip to content

Commit

Permalink
Fix Code Issues to Get Unit Tests Running (#121)
Browse files Browse the repository at this point in the history
* First pass.

* Action on push

* Install deps.

* ubuntu 20?

* Ok, just mac

* Try commenting out the partially implemented models?

* Let failing tests fail

* Tests failing now only for `sample_description_from_xarray` and code-issues

* Fix location and design matrices

* Remove method init_par from Estimator

* Remove rcond because dask doesn't support?

* check chunk type for dask

* Fix tf + dask compat issue.

* Remove TF1 and TF2 Tests

* Downgrade `sparse`

* Make sure arrays are not immutable

* Remove TF mentions

* Remove TF2

* Remove TF from requirements.txt

* Undo erroneous deletion of dep

* Remove constants.

* Remove constants.

* Fix small tensorflow merges issues.

Co-authored-by: David S. Fischer <[email protected]>
  • Loading branch information
ilan-gold and davidsebfischer authored Jan 28, 2022
1 parent 4697bbe commit b5c5f49
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 38 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
on: push
jobs:
build:
runs-on: macos-latest
strategy:
matrix:
python-version: [ '3.6' ]
name: Python ${{ matrix.python-version }} Tests
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Install Dependencies
run: pip install -r requirements.txt
- name: Run Tests
run: python -m unittest
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
# Fast and scalable fitting of over-determined generalized-linear models (GLMs)

batchglm was developed in the context of [diffxpy](https://github.com/theislab/diffxpy) to allow fast model fitting for differential expression analysis for single-cell RNA-seq data. However, one can use batchglm or its concepts in other scenarios where over-determined GLMs are encountered.

```
pip install -r requirements.txt
```

To run unit tests:

```
pip install -e .
python -m unittest
```

<!--
# Installation
1. Install [tensorflow](https://www.tensorflow.org/install/), see below. Please use the pip installation if you are unsure.
Expand Down
6 changes: 4 additions & 2 deletions batchglm/models/base/input.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from operator import indexOf
import dask.array
import logging
import numpy as np
Expand Down Expand Up @@ -108,12 +109,13 @@ def feature_isallzero(self):
return self._feature_allzero

def fetch_x_dense(self, idx):
assert isinstance(self.x, np.ndarray), "tried to fetch dense from non ndarray"
# Better way than accessing ._meta to check type of dask chunks?
assert isinstance(self.x, np.ndarray) or isinstance(self.x._meta, np.ndarray), "tried to fetch dense from non ndarray"

return self.x[idx, :]

def fetch_x_sparse(self, idx):
assert isinstance(self.x, scipy.sparse.csr_matrix), "tried to fetch sparse from non csr_matrix"
assert isinstance(self.x, scipy.sparse.csr_matrix) or isinstance(self.x._meta, scipy.sparse.csr_matrix), "tried to fetch sparse from non csr_matrix"

data = self.x[idx, :]

Expand Down
6 changes: 6 additions & 0 deletions batchglm/models/base_glm/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ def constraints_loc(self):
def constraints_scale(self):
return np.identity(n=self.b_var.shape[0])

def param_bounds(self, dtype):
pass

def eta_loc_j(self, j) -> np.ndarray:
pass

def np_clip_param(
self,
param,
Expand Down
6 changes: 4 additions & 2 deletions batchglm/train/numpy/base_glm/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,8 @@ def iwls_step(

delta_theta = np.zeros_like(self.model.a_var)
if isinstance(delta_theta, dask.array.core.Array):
delta_theta = delta_theta.compute()
# Need to copy to prevent returning an immutable view of the data
delta_theta = delta_theta.compute().copy()

if isinstance(a, dask.array.core.Array):
# Have to use a workaround to solve problems in parallel in dask here. This workaround does
Expand Down Expand Up @@ -456,7 +457,8 @@ def _b_step_loop(
"""
delta_theta = np.zeros_like(self.model.b_var)
if isinstance(delta_theta, dask.array.core.Array):
delta_theta = delta_theta.compute()
# Need to copy to prevent returning an immutable view of the data
delta_theta = delta_theta.compute().copy()

xh_scale = np.matmul(self.model.design_scale, self.model.constraints_scale).compute()
b_var = self.model.b_var.compute()
Expand Down
4 changes: 0 additions & 4 deletions batchglm/unit_test/test_acc_glm_all_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,6 @@ def get_simulator(self):
else:
if self.noise_model == "nb":
from batchglm.api.models.numpy.glm_nb import Simulator
elif self.noise_model == "norm":
from batchglm.api.models import Simulator
elif self.noise_model == "beta":
from batchglm.api.models.numpy.glm_beta import Simulator
else:
raise ValueError("noise_model not recognized")

Expand Down
18 changes: 8 additions & 10 deletions batchglm/unit_test/test_extreme_values_glm_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ def _modify_sim(self, idx, val):
else:
if self.noise_model == "nb":
from batchglm.api.models.numpy.glm_nb import Estimator, InputDataGLM
elif self.noise_model == "norm":
from batchglm.api.models import Estimator, InputDataGLM
elif self.noise_model == "beta":
from batchglm.api.models.numpy.glm_beta import Estimator, InputDataGLM
else:
raise ValueError("noise_model not recognized")

Expand Down Expand Up @@ -130,10 +126,11 @@ def test_norm(self):
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("batchglm").setLevel(logging.WARNING)
logger.error("TestAccuracyXtremeNorm.test_norm()")
logger.info('Normal noise model not implemented for numpy')

np.random.seed(1)
self.noise_model = "norm"
self._test_all()
# np.random.seed(1)
# self.noise_model = "norm"
# self._test_all()


class TestAccuracyXtremeBeta(
Expand All @@ -148,10 +145,11 @@ def test_beta(self):
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("batchglm").setLevel(logging.WARNING)
logger.error("TestAccuracyXtremeBeta.test_beta()")
logger.info('Beta noise model not implemented for numpy')

np.random.seed(1)
self.noise_model = "beta"
self._test_all()
# np.random.seed(1)
# self.noise_model = "beta"
# self._test_all()


if __name__ == '__main__':
Expand Down
40 changes: 20 additions & 20 deletions batchglm/unit_test/test_graph_glm_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ def __init__(
else:
if noise_model == "nb":
from batchglm.api.models.numpy.glm_nb import Estimator, InputDataGLM
elif noise_model == "norm":
from batchglm.api.models import Estimator, InputDataGLM
elif noise_model == "beta":
from batchglm.api.models.numpy.glm_beta import Estimator, InputDataGLM
else:
raise ValueError("noise_model not recognized")

Expand Down Expand Up @@ -112,10 +108,6 @@ def get_simulator(self):
else:
if self.noise_model == "nb":
from batchglm.api.models.numpy.glm_nb import Simulator
elif self.noise_model == "norm":
from batchglm.api.models import Simulator
elif self.noise_model == "beta":
from batchglm.api.models.numpy.glm_beta import Simulator
else:
raise ValueError("noise_model not recognized")

Expand Down Expand Up @@ -283,18 +275,22 @@ def test_full_norm(self):
logging.getLogger("batchglm").setLevel(logging.WARNING)
logger.error("TestGraphGlmNorm.test_full_norm()")

self.noise_model = "norm"
self._test_full(sparse=False)
self._test_full(sparse=True)
logger.info('Normal noise model not implemented for numpy')

# self.noise_model = "norm"
# self._test_full(sparse=False)
# self._test_full(sparse=True)

def test_batched_norm(self):
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("batchglm").setLevel(logging.WARNING)
logger.error("TestGraphGlmNorm.test_batched_norm()")

self.noise_model = "norm"
self._test_batched(sparse=False)
self._test_batched(sparse=True)
logger.info('Normal noise model not implemented for numpy')

# self.noise_model = "norm"
# self._test_batched(sparse=False)
# self._test_batched(sparse=True)


class TestGraphGlmBeta(
Expand All @@ -310,18 +306,22 @@ def test_full_beta(self):
logging.getLogger("batchglm").setLevel(logging.ERROR)
logger.error("TestGraphGlmBeta.test_full_beta()")

self.noise_model = "beta"
self._test_full(sparse=False)
self._test_full(sparse=True)
logger.info('Beta noise model not implemented for numpy')

# self.noise_model = "beta"
# self._test_full(sparse=False)
# self._test_full(sparse=True)

def test_batched_beta(self):
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("batchglm").setLevel(logging.WARNING)
logger.error("TestGraphGlmBeta.test_batched_beta()")

self.noise_model = "beta"
self._test_batched(sparse=False)
self._test_batched(sparse=True)
logger.info('Beta noise model not implemented for numpy')

# self.noise_model = "beta"
# self._test_batched(sparse=False)
# self._test_batched(sparse=True)


if __name__ == '__main__':
Expand Down
9 changes: 9 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
anndata==0.7.8
dask==2021.3.0
numpy>=1.16.4
pandas==1.1.5
patsy==0.5.2
pytest==6.2.5
scipy>=1.2.1
sparse==0.9.1
toolz==0.11.2

0 comments on commit b5c5f49

Please sign in to comment.