Skip to content

Commit

Permalink
Merge branch 'maintenance/v0.2.x' into maintenance/v0.1.x
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuqiu-Yang authored Nov 22, 2023
2 parents fae6887 + e22ad1a commit 9401f3c
Show file tree
Hide file tree
Showing 12 changed files with 151 additions and 29 deletions.
22 changes: 15 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

| Branch | Release | CI/CD | Documentation | Code Coverage |
| --- | --- | --- | --- | --- |
| main | ![Badge1](https://img.shields.io/badge/Version-v0.1.1-success) | ![Tests](https://github.com/kevin931/cytomulate/actions/workflows/ci.yml/badge.svg?branch=main) | [![Documentation Status](https://readthedocs.org/projects/cytomulate/badge/?version=dev)](https://cytomulate.readthedocs.io/en/main/?badge=main) | [![codecov](https://codecov.io/gh/kevin931/cytomulate/branch/dev/graph/badge.svg?token=F5H0QTXGMR)](https://codecov.io/gh/kevin931/cytomulate) |
| dev | ![Badge1](https://img.shields.io/badge/Version-v0.1.1-success) |![Tests](https://github.com/kevin931/cytomulate/actions/workflows/ci.yml/badge.svg?branch=dev) | [![Documentation Status](https://readthedocs.org/projects/cytomulate/badge/?version=dev)](https://cytomulate.readthedocs.io/en/dev/?badge=dev) | [![codecov](https://codecov.io/gh/kevin931/cytomulate/branch/dev/graph/badge.svg?token=F5H0QTXGMR)](https://codecov.io/gh/kevin931/cytomulate) |
| main | ![Badge1](https://img.shields.io/badge/Version-v0.2.0-success) | ![Tests](https://github.com/kevin931/cytomulate/actions/workflows/ci.yml/badge.svg?branch=main) | [![Documentation Status](https://readthedocs.org/projects/cytomulate/badge/?version=dev)](https://cytomulate.readthedocs.io/en/main/?badge=main) | [![codecov](https://codecov.io/gh/kevin931/cytomulate/branch/dev/graph/badge.svg?token=F5H0QTXGMR)](https://codecov.io/gh/kevin931/cytomulate) |
| dev | ![Badge1](https://img.shields.io/badge/Version-v0.2.0-success) |![Tests](https://github.com/kevin931/cytomulate/actions/workflows/ci.yml/badge.svg?branch=dev) | [![Documentation Status](https://readthedocs.org/projects/cytomulate/badge/?version=dev)](https://cytomulate.readthedocs.io/en/dev/?badge=dev) | [![codecov](https://codecov.io/gh/kevin931/cytomulate/branch/dev/graph/badge.svg?token=F5H0QTXGMR)](https://codecov.io/gh/kevin931/cytomulate) |


## Installation
Expand Down Expand Up @@ -124,17 +124,25 @@ guidelines, development guides, etc.

Our documentation is built automatically on the cloud! If you wish to build locally, check our detailed guide [here](https://cytomulate.readthedocs.io/en/latest/change/build.html)!

## Latest Release: v0.1.1
## Latest Release: v0.2.0

This is our first maintenance update to be released to v0.1.x,
and we are packing in lots of enhancements! All changes are
regarding documentations!
Welcome to Cytomulate v0.2.0! Hooray! We are not only bringing documentation enhancements, but we
are also introducing a new feature for more accurate simulations!

### Changes and New Features
- The `utilities.univariate_noise_model()` method:
- Added `half_normal` option to the `noise_distribution` parameter
- Changed the default `noise_distribution` to `uniform` (This is a **breaking change** because of the benefits to simulated results).
- A warning is given when no user-specified `noise_distribution` is supplied to warn the breaking change
- Added the `utilities.estimate_noise_model()` method to estimate the noise present in the data
- Added a built-in estimation procedure to match the amount of zeroes observed in the dataset

### Improvements
- Added 4 more detailed tutorials on [our documentation website](https://cytomulate.readthedocs.io)
- Added 4 more detailed tutorials on `our documentation website <https://cytomulate.readthedocs.io>`_
- Improved docstrings with more details on key parameters
- Updated the lastest references and links


## References

If you are cytomulating in your workflow, citing [our paper](https://doi.org/10.1186/s13059-023-03099-1) is appreciated:
Expand Down
4 changes: 2 additions & 2 deletions cytomulate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
A package for simulating CyTOF data
"""

__version__ = "0.1.1"
__author__ = "Yuqiu Yang, Kevin Wang, Zeyu Lu, Tao Wang, Sherry Wang"
__version__ = "0.2.0"
__author__ = "Yuqiu Yang, Kevin Wang, Zeyu Lu, Tao Wang, Sherry Wang"

from cytomulate.creation.cytof_data import CreationCytofData
from cytomulate.emulation.cytof_data import EmulationCytofData
21 changes: 19 additions & 2 deletions cytomulate/cell_type_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def __init__(self,
# cell_mean and cell_covariance are used during cell differentiation
self.cell_mean = np.zeros(self.n_markers)
self.cell_covariance = np.zeros((self.n_markers, self.n_markers))

# zero_probabilities is used for adjustment
self.zero_probabilities = np.zeros(n_markers)

def sample_cell(self,
n_samples: int) -> Tuple[np.ndarray, np.ndarray]:
"""Draw random samples from the cell type model
Expand All @@ -56,6 +58,21 @@ def sample_cell(self,
"""
X = np.zeros((n_samples, self.n_markers))
X[:, :], _ = self.model.sample(n_samples)
expressed_index = (X > 0)
X = np.clip(X, a_min=0, a_max=None)
for m in range(self.n_markers):
n_zero_exp = int((self.zero_probabilities[m]) * n_samples)
n_zero_present = np.sum(X[:, m]<0.0001)
n_zero_needed = np.max([0, n_zero_exp-n_zero_present])
if n_zero_needed > 0:
non_zero_ind = np.where(X[:,m]>=0.0001)[0]
p = 1/(X[non_zero_ind, m])
p /= np.sum(p)
# if n_zero_needed is 0, this should yield
# [] which when plugged into the next statement
# shall change nothing
ind_to_zero = np.random.choice(non_zero_ind, size=n_zero_needed,
replace=False, p=p)
X[ind_to_zero, m] = 0

expressed_index = (X > 0)
return X, expressed_index
6 changes: 4 additions & 2 deletions cytomulate/emulation/cell_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self,
"""
super().__init__(label, cell_id, n_markers)


def fit(self,
data: np.ndarray,
max_components: int,
Expand Down Expand Up @@ -60,7 +59,10 @@ def fit(self,

self.cell_mean = np.mean(data, axis=0)
self.cell_covariance = np.cov(data, rowvar=False)


for m in range(self.n_markers):
self.zero_probabilities[m] = np.mean(data[:, m] < 0.0001)

# We use BIC (the smaller the better) to perform model selection
smallest_bic = np.Inf
current_bic = 0
Expand Down
56 changes: 51 additions & 5 deletions cytomulate/utilities.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
# Math computation
import numpy as np
from numpy import random as rd
from scipy.special import erfinv

# Polynomials and spline functions
from numpy.polynomial import polynomial
from scipy.interpolate import Akima1DInterpolator
from scipy.interpolate import UnivariateSpline

# Warnings
import warnings

# Typing
from typing import Union, Optional, List, Tuple, Callable
from typing import Union, Optional, List, Callable


def spline_function(x: np.ndarray,
Expand Down Expand Up @@ -176,15 +180,18 @@ def trajectories(end_values: Optional[Union[list, np.ndarray]] = None,
return trajectories_functions


def univariate_noise_model(noise_distribution: str = "normal",
def univariate_noise_model(noise_distribution: Optional[str] = None,
**kwargs) -> Callable:
"""Generate a noise distribution
This is mainly used to generate background noise in the cytof_data object
This is mainly used to generate background noise in the cytof_data object.
.. versionchanged:: 0.2.0 The default `noise_distribution` is changed to `uniform`. If no user-specified value is provided, a warning is given to inform users of the change.
.. versionadded:: 0.2.0 Added the `half_normal` option to the `noise_distribution` parameter.
Parameters
----------
noise_distribution: str
Either "normal" or "uniform"
Either "normal", "half_normal", or "uniform"
kwargs:
extra parameters needed for numpy.random.normal or numpy.random.uniform
Expand All @@ -193,7 +200,14 @@ def univariate_noise_model(noise_distribution: str = "normal",
model: Callable
A RV generator that only takes size as its input
"""
if noise_distribution == "normal":
if noise_distribution is None:
warnings.warn("The default `noise_distribution` is now changed from `normal` to `uniform` as of v0.2.0. Please see the release notes for details.")
noise_distribution = "uniform"

if noise_distribution == "half_normal":
def model(size):
return -np.abs(rd.normal(**kwargs, size=size))
elif noise_distribution == "normal":
def model(size):
return rd.normal(**kwargs, size=size)
elif noise_distribution == "uniform":
Expand All @@ -203,3 +217,35 @@ def model(size):
raise ValueError('Unknown noise distribution')
return model


def estimate_noise_model(data: np.ndarray,
noise_distribution: str = "uniform") -> Callable:
"""Estimate the noise model from data
.. versionadded:: 0.2.0
Parameters
----------
data : np.ndarray
An array of expression matrix
noise_distribution : str, optional
Either "half_normal" or "uniform", by default "uniform"
Returns
-------
Callable
A RV generator that only takes size as its input
"""
para_dict = {"noise_distribution": noise_distribution}
if noise_distribution == "uniform":
min_val = np.min(data)
para_dict["low"] = min_val
para_dict["high"] = 0

if noise_distribution == "half_normal":
m = np.median(data[np.where(data<=0)])
scale = np.abs(m/(np.sqrt(2)*erfinv(0.5)))
para_dict["loc"] = 0
para_dict["scale"] = scale

return univariate_noise_model(**para_dict)
17 changes: 13 additions & 4 deletions docs/source/change/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,21 @@ Latest Release
---------------

**************
v0.1.1
v0.2.0
**************

This is our first maintenance update to be released to v0.1.x,
and we are packing in lots of enhancements! All changes are
regarding documentations!
Welcome to Cytomulate v0.2.0! Hooray! We are not only bringing documentation enhancements, but we
are also introducing a new feature for more accurate simulations!

Changes and New Features
--------------------------

- The `utilities.univariate_noise_model()` method:
- Added `half_normal` option to the `noise_distribution` parameter
- Changed the default `noise_distribution` to `uniform` (This is a **breaking change** because of the benefits to simulated results).
- A warning is given when no user-specified `noise_distribution` is supplied to warn the breaking change
- Added the `utilities.estimate_noise_model()` method to estimate the noise present in the data
- Added a built-in estimation procedure to match the amount of zeroes observed in the dataset

Improvements
---------------
Expand Down
23 changes: 23 additions & 0 deletions docs/source/change/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,29 @@ This is a complete history of ``cytomulate`` releases.

-------------------

**************
v0.2.0
**************

Welcome to Cytomulate v0.2.0! Hooray! We are not only bringing documentation enhancements, but we
are also introducing a new feature for more accurate simulations!

Changes and New Features
--------------------------

- The `utilities.univariate_noise_model()` method:
- Added `half_normal` option to the `noise_distribution` parameter
- Changed the default `noise_distribution` to `uniform` (This is a **breaking change** because of the benefits to simulated results).
- A warning is given when no user-specified `noise_distribution` is supplied to warn the breaking change
- Added the `utilities.estimate_noise_model()` method to estimate the noise present in the data
- Added a built-in estimation procedure to match the amount of zeroes observed in the dataset

Improvements
---------------
- Added 4 more detailed tutorials on `our documentation website <https://cytomulate.readthedocs.io>`_
- Improved docstrings with more details on key parameters
- Updated the lastest references and links

**************
v0.1.1
**************
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project = 'cytomulate'
copyright = '2022-2023, cytomulate developers'
author = 'cytomulate developers'
release = '0.1.1'
release = '0.2.0'

extensions = [
"sphinx_rtd_theme",
Expand Down
3 changes: 2 additions & 1 deletion docs/source/documentation/utilities.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ Module: utilities
.. autofunction:: polynomial_function
.. autofunction:: brownian_bridge_function
.. autofunction:: trajectories
.. autofunction:: univariate_noise_model
.. autofunction:: univariate_noise_model
.. autofunction:: estimate_noise_model
4 changes: 2 additions & 2 deletions meta.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{% set name = "cytomulate" %}
{% set version = "0.1.1" %}
{% set version = "0.2.0" %}

package:
name: cytomulate
version: {{ version }}

source:
url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz
sha256: d592c1d525a8ac9aefe2cdabab37fb68fece4c3f0954a76b329c0da86b6324ad
sha256: 499008618b335573641f520bf097db5e7d40899bda499952efdad9c0ae7cb9b4

build:
noarch: python
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import shutil
import distutils.cmd

VERSION = "0.1.1"
VERSION = "0.2.0"

class PypiCommand(distutils.cmd.Command):

Expand Down
20 changes: 18 additions & 2 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
polynomial_function, \
brownian_bridge_function, \
trajectories, \
univariate_noise_model
univariate_noise_model, \
estimate_noise_model


@pytest.mark.parametrize("x, y, smoothing_factor, t, expected", [
Expand Down Expand Up @@ -58,7 +59,7 @@ def test_trajectories(end_values, coefficients, x, y, t, expected):

@pytest.mark.parametrize("kwargs, size, expected", [
({"noise_distribution":"normal", "loc":0, "scale":1}, 5, (5, )),
({"noise_distribution":"normal", "loc":0, "scale":1}, (5, 3), (5, 3)),
({"noise_distribution":"half_normal", "loc":0, "scale":1}, (5, 3), (5, 3)),
({"noise_distribution":"uniform", "low":0, "high":1}, 5, (5, )),
({"noise_distribution":"uniform", "low":0, "high":1}, (5, 3), (5, 3)),
({"noise_distribution":"gamma"}, (5, 3), (5, 3)),
Expand All @@ -69,4 +70,19 @@ def test_univariate_noise_model(kwargs, size, expected):
assert f(size).shape == expected
except ValueError:
assert True


def test_univariate_noise_model_warning():
with pytest.warns(UserWarning) as record:
f = univariate_noise_model()
assert str(record[0].message) == "The default `noise_distribution` is now changed from `normal` to `uniform` as of v0.2.0. Please see the release notes for details."


@pytest.mark.parametrize("data, noise_distribution, size, expected", [
(-np.abs(np.random.normal(size=(5,5), loc=0, scale=1)), "half_normal", 5, (5, )),
(np.random.uniform(size=(5,5), low=-1, high=0), "uniform", 5, (5, )),
])
def test_estimate_noise_model(data, noise_distribution, size, expected):
f = estimate_noise_model(data=data,
noise_distribution=noise_distribution)
assert f(size).shape == expected

0 comments on commit 9401f3c

Please sign in to comment.