From f4db0434ac8fb9d9ec93b0ce18d960a7e314875d Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 10:33:37 -0500 Subject: [PATCH 01/21] add api --- docs/source/api.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/source/api.rst diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..e69de29 From f59ac3d040219335d1916e41346741a4dcda1a56 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 10:36:08 -0500 Subject: [PATCH 02/21] add api --- docs/source/api.rst | 5 +++++ docs/source/index.rst | 1 + 2 files changed, 6 insertions(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index e69de29..06486cd 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -0,0 +1,5 @@ +The pychromVAR API reference +============================= + +.. automodule:: pychromvar + :members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index b5b58a5..a30d7ff 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -17,6 +17,7 @@ For more methdological detials, please refer to the original `paper Date: Tue, 31 Jan 2023 10:40:12 -0500 Subject: [PATCH 03/21] add api --- docs/source/api.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 06486cd..e5ab876 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -1,5 +1,11 @@ -The pychromVAR API reference +API ============================= .. automodule:: pychromvar + :members: + +Preprocessing +------------------ + +.. automodule:: pychromvar.preprocessing :members: \ No newline at end of file From 896654289d6609580722c3ffc7b4ce1f65da9e38 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 10:53:47 -0500 Subject: [PATCH 04/21] update api --- .readthedocs.yaml | 5 ++++- docs/source/api.rst | 8 ++++++-- docs/source/index.rst | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 5889224..877ead1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,6 +18,7 @@ build: # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py + fail_on_warining: true # If using Sphinx, optionally build your docs in additional formats such as PDF # formats: @@ -26,4 +27,6 @@ sphinx: # Optionally declare the Python requirements required to build your docs python: install: - - requirements: docs/source/requirements.txt + - requirements: docs/source/requirements.txt + - method: pip + path: . diff --git a/docs/source/api.rst b/docs/source/api.rst index e5ab876..6837152 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -7,5 +7,9 @@ API Preprocessing ------------------ -.. automodule:: pychromvar.preprocessing - :members: \ No newline at end of file +.. autosummary:: + :toctree: generated + + get_bg_peaks + add_peak_seq + add_gc_bias \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index a30d7ff..ba8b9b9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,4 +1,4 @@ -pychromVAR +Welcome to pychromVAR's documentation! ============================================================== pychromVAR is a python package for inferring transcription factor binding variability from From 67c61e58a1ec7575f944c2535fdce38705b8e829 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 10:58:01 -0500 Subject: [PATCH 05/21] update api --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 877ead1..7522b69 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,7 +18,7 @@ build: # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py - fail_on_warining: true + fail_on_warning: true # If using Sphinx, optionally build your docs in additional formats such as PDF # formats: From 3cc9fd061cb17ed7ef66005c38be2e0025967dcc Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:00:59 -0500 Subject: [PATCH 06/21] update api --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7522b69..5e95a90 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" + python: "3.10" # You can also specify other tool versions: # nodejs: "19" # rust: "1.64" From 862ca15fdd71cb44275c8a1db1b2bd625e0f7431 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:02:41 -0500 Subject: [PATCH 07/21] update api --- docs/source/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt index 877c6dd..027a9f4 100644 --- a/docs/source/requirements.txt +++ b/docs/source/requirements.txt @@ -1,5 +1,4 @@ numpydoc nbsphinx ipython -scikit-learn skranger From 56becb0553cf434cc4debc56ef3b9450d84d036f Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:05:52 -0500 Subject: [PATCH 08/21] update api --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index cd50706..51169c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "scipy", "mudata", "scanpy", + "scikit-learn", "muon", "biopython", "MOODS-python", From cb43b1127c22cec82cacc5ac4a0216a3410070fe Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:12:38 -0500 Subject: [PATCH 09/21] update api --- pychromvar/preprocessing.py | 5 +++-- pyproject.toml | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index c2fe336..2a64724 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -18,10 +18,11 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): AnnData object with peak counts or MuData object with 'atac' modality. niterations (int, optional): Number of background peaks to sample. Defaults to 50. - n_jobs: + n_jobs: (int): + Number of cpus for compute. If set to -1, all cpus will be used. Default: -1 Raises: - TypeError: _description_ + TypeError: "Expected AnnData or MuData object with 'atac' modality" """ if isinstance(data, AnnData): adata = data diff --git a/pyproject.toml b/pyproject.toml index 51169c8..db2aa83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,4 +34,5 @@ dependencies = [ ] [project.urls] -Source = "https://github.com/lzj1769/pychromVAR" \ No newline at end of file +Source = "https://github.com/lzj1769/pychromVAR" +Documentation = "https://pychromvar.readthedocs.io/en/latest/" \ No newline at end of file From 79565b71b73f8f3f20cc6c75dcde139754fa7024 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:15:36 -0500 Subject: [PATCH 10/21] update api --- docs/source/api.rst | 4 +--- pychromvar/preprocessing.py | 15 +++++---------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 6837152..4dc6d49 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -10,6 +10,4 @@ Preprocessing .. autosummary:: :toctree: generated - get_bg_peaks - add_peak_seq - add_gc_bias \ No newline at end of file + get_bg_peaks \ No newline at end of file diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index 2a64724..66f7910 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -11,18 +11,13 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): """ - Find background peaks based on GC bias. + Find background peaks based on GC bias and number of reads per peak. - Args: - data (Union[AnnData, MuData]): - AnnData object with peak counts or MuData object with 'atac' modality. - niterations (int, optional): - Number of background peaks to sample. Defaults to 50. - n_jobs: (int): - Number of cpus for compute. If set to -1, all cpus will be used. Default: -1 + :parameter data (Union[AnnData, MuData]): AnnData object with peak counts or MuData object with 'atac' modality. + :parameter niterations (int, optional): Number of background peaks to sample. Defaults to 50. + :parameter n_jobs: (int): Number of cpus for compute. If set to -1, all cpus will be used. Default: -1 - Raises: - TypeError: "Expected AnnData or MuData object with 'atac' modality" + :return: None """ if isinstance(data, AnnData): adata = data From cbeeeda453920032c9b3e7d9592d9a5a9a9d06da Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:24:38 -0500 Subject: [PATCH 11/21] update api --- pychromvar/preprocessing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index 66f7910..ea1fac5 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -11,12 +11,12 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): """ - Find background peaks based on GC bias and number of reads per peak. - - :parameter data (Union[AnnData, MuData]): AnnData object with peak counts or MuData object with 'atac' modality. - :parameter niterations (int, optional): Number of background peaks to sample. Defaults to 50. - :parameter n_jobs: (int): Number of cpus for compute. If set to -1, all cpus will be used. Default: -1 + Find background peaks based on GC bias and number of reads per peak + :param data: AnnData object with peak counts or MuData object with 'atac' modality + :param niterations: niterations (int, optional): Number of background peaks to sample, defaults to 50 + :param n_jobs: Number of cpus for compute. If set to -1, all cpus will be used, defaults to -1 + :raises TypeError: Expected AnnData or MuData object with 'atac' modality :return: None """ if isinstance(data, AnnData): From bff7a76f378b488aec79ea62f606375d20be9d65 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:34:07 -0500 Subject: [PATCH 12/21] update api --- docs/source/index.rst | 2 +- pychromvar/preprocessing.py | 28 +++++++++++++++++++++------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index ba8b9b9..0b9aecd 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,7 +12,7 @@ with `scanpy `__ and For more methdological detials, please refer to the original `paper `__. .. toctree:: - :caption: mail + :caption: pychromvar :maxdepth: 1 :hidden: diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index ea1fac5..9408333 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -9,16 +9,30 @@ from tqdm import tqdm from pynndescent import NNDescent + def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): + """Find background peaks based on GC bias and number of reads per peak + + Parameters + ---------- + data : Union[AnnData, MuData] + AnnData object with peak counts or MuData object with 'atac' modality + niterations : int, optional + Number of background peaks to sample,, by default 50 + n_jobs : int, optional + Number of cpus for compute. If set to -1, all cpus will be used, by default -1 + + Returns + ------- + _type_ + _description_ + + Raises + ------ + TypeError + Expected AnnData or MuData object with 'atac' modality """ - Find background peaks based on GC bias and number of reads per peak - :param data: AnnData object with peak counts or MuData object with 'atac' modality - :param niterations: niterations (int, optional): Number of background peaks to sample, defaults to 50 - :param n_jobs: Number of cpus for compute. If set to -1, all cpus will be used, defaults to -1 - :raises TypeError: Expected AnnData or MuData object with 'atac' modality - :return: None - """ if isinstance(data, AnnData): adata = data elif isinstance(data, MuData) and "atac" in data.mod: From f331842b21106e6ecd0f8c838d975ba76e79624f Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:35:43 -0500 Subject: [PATCH 13/21] update api --- pychromvar/preprocessing.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index 9408333..cf4c8f6 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -24,13 +24,8 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): Returns ------- - _type_ - _description_ - Raises - ------ - TypeError - Expected AnnData or MuData object with 'atac' modality + updates `data`. """ if isinstance(data, AnnData): From a0efd50415f2326e21acd723ccadc47243ca6b64 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:44:09 -0500 Subject: [PATCH 14/21] update api --- docs/source/api.rst | 4 +++- pychromvar/preprocessing.py | 38 +++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 4dc6d49..6837152 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -10,4 +10,6 @@ Preprocessing .. autosummary:: :toctree: generated - get_bg_peaks \ No newline at end of file + get_bg_peaks + add_peak_seq + add_gc_bias \ No newline at end of file diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index cf4c8f6..8b71326 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -57,21 +57,26 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): return None - def add_peak_seq(data: Union[AnnData, MuData], genome_file: str, delimiter="-"): """ Add the DNA sequence of each peak to data object. The sequences will be used in GC bias estimation and motif binding sites matching. - Args: - data (Union[AnnData, MuData]): - AnnData object with peak counts or MuData object with 'atac' modality. - genome_file (str): - Filename of genome reference - delimiter (str, optional): - Delimiter that separates peaks. Defaults to "-". + Parameters + ---------- + data : Union[AnnData, MuData] + AnnData object with peak counts or MuData object with 'atac' modality. + genome_file : str + Filename of genome reference + delimiter : str, optional + Delimiter that separates peaks, by default "-" + + Returns + ------- + Update `data` """ + if isinstance(data, AnnData): adata = data elif isinstance(data, MuData) and "atac" in data.mod: @@ -89,16 +94,17 @@ def add_peak_seq(data: Union[AnnData, MuData], genome_file: str, delimiter="-"): return None - def add_gc_bias(data: Union[AnnData, MuData]): - """ - Compute GC bias for each peak. + """Compute GC bias for each peak. + + Parameters + ---------- + data : Union[AnnData, MuData] + AnnData object with peak counts or MuData object with 'atac' modality. - Args: - data (Union[AnnData, MuData]): - AnnData object with peak counts or MuData object with 'atac' modality. - Returns: - _type_: _description_ + Returns + ------- + Update data """ if isinstance(data, AnnData): From 3cb6dcbbd7a4155e13389d74eff94a8454f06b3c Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 11:48:16 -0500 Subject: [PATCH 15/21] update doc --- docs/source/index.rst | 16 ++++++++++++++-- docs/source/installation.rst | 12 ------------ 2 files changed, 14 insertions(+), 14 deletions(-) delete mode 100644 docs/source/installation.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 0b9aecd..4357a72 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,12 +11,24 @@ with `scanpy `__ and For more methdological detials, please refer to the original `paper `__. +Installation +============ + +**pychromVAR** requires Python version >= 3.8 to run. + +PyPI +---- +**pychromVAR** is also available on PyPI: + +.. code-block:: console + + pip install pychromvar + .. toctree:: :caption: pychromvar :maxdepth: 1 :hidden: - - installation + api .. toctree:: diff --git a/docs/source/installation.rst b/docs/source/installation.rst deleted file mode 100644 index a02d44b..0000000 --- a/docs/source/installation.rst +++ /dev/null @@ -1,12 +0,0 @@ -Installation -============ - -**pychromVAR** requires Python version >= 3 to run. - -PyPI ----- -**pychromVAR** is also available on PyPI: - -.. code-block:: console - - pip install pychromvar \ No newline at end of file From 787c594302aff84242f44720f3bf61d07e749c6f Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 12:18:17 -0500 Subject: [PATCH 16/21] update doc --- pychromvar/preprocessing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index 8b71326..28fe031 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -60,8 +60,7 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): def add_peak_seq(data: Union[AnnData, MuData], genome_file: str, delimiter="-"): """ Add the DNA sequence of each peak to data object. - The sequences will be used in GC bias estimation and motif binding sites matching. - + Parameters ---------- data : Union[AnnData, MuData] From 8ea9233602845141b8a41387f4f38d7990a80a79 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 13:25:06 -0500 Subject: [PATCH 17/21] update api --- docs/source/api.rst | 10 ++++++++- pychromvar/match_motif.py | 44 +++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 6837152..b471020 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -12,4 +12,12 @@ Preprocessing get_bg_peaks add_peak_seq - add_gc_bias \ No newline at end of file + add_gc_bias + +Motif match +------------------ + +.. autosummary:: + :toctree: generated + + match_motif \ No newline at end of file diff --git a/pychromvar/match_motif.py b/pychromvar/match_motif.py index bccfca0..95f78b2 100644 --- a/pychromvar/match_motif.py +++ b/pychromvar/match_motif.py @@ -13,28 +13,28 @@ def match_motif(data: Union[AnnData, MuData], motifs, pseudocounts=0.0001, p_value=5e-05, background: _BACKGROUND = "even", genome_file: str = None): - """ - Perform motif matching to predict binding sites using MOODS. - This function wraps - - Args: - data (Union[AnnData, MuData]): - AnnData object with peak counts or MuData object with 'atac' modality. - motifs: - List of motifs - pseudocounts: - Pseudocounts for each nucleotide. Default value is 0.0001 - p_value: - P-value threshold for motif matching. Default: 5e-05 - background: - Background distribution of nucleotides for computing thresholds from p-value. - Three options are available: "subject" to use the subject sequences, "genome" to use the - whole genome (need to provide a genome file), or even using 0.25 for each base. - Default: "subject". - genome_file: - If background is set to genome, a genome file must be provided. Default: None - n_jobs: - Number of cpus used for motif matching. If set to -1, all cpus will be used. Default: 1 + """Perform motif matching to predict binding sites using MOODS. + + Parameters + ---------- + data : Union[AnnData, MuData] + AnnData object with peak counts or MuData object with 'atac' modality. + motifs : _type_ + List of motifs + pseudocounts : float, optional + Pseudocounts for each nucleotide, by default 0.0001 + p_value : _type_, optional + _description_, by default 5e-05 + background : _BACKGROUND, optional + Background distribution of nucleotides for computing thresholds from p-value. + Three options are available: "subject" to use the subject sequences, "genome" to use the + whole genome (need to provide a genome file), or even using 0.25 for each base, by default "even" + genome_file : str, optional + If background is set to genome, a genome file must be provided, by default None + + Returns + ------- + Update data. """ if isinstance(data, AnnData): From 7925dd189938324596ea5d856adb632eb35e6a26 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 13:35:13 -0500 Subject: [PATCH 18/21] update api --- docs/source/api.rst | 19 ++++++++++++- pychromvar/compute_deviations.py | 48 +++++++++++++++++++++----------- pychromvar/get_genome.py | 17 ++++++----- 3 files changed, 57 insertions(+), 27 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index b471020..c2297a9 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -20,4 +20,21 @@ Motif match .. autosummary:: :toctree: generated - match_motif \ No newline at end of file + match_motif + +Genome +------------------ + +.. autosummary:: + :toctree: generated + + get_genome + +Compute deviation +------------------ + +.. autosummary:: + :toctree: generated + + compute_deviations + compute_expectation \ No newline at end of file diff --git a/pychromvar/compute_deviations.py b/pychromvar/compute_deviations.py index 3a3ba9f..c4629b4 100644 --- a/pychromvar/compute_deviations.py +++ b/pychromvar/compute_deviations.py @@ -12,15 +12,20 @@ datefmt='%Y-%m-%d %H:%M:%S') -def compute_deviations(data: Union[AnnData, MuData], n_jobs=-1): - """ - Compute raw and bias-corrected deviations. - - Args: - data (Union[AnnData, MuData]): - AnnData object with peak counts or MuData object with 'atac' modality. - n_jobs: - Number of cpus used for motif matching. If set to -1, all cpus will be used. Default: -1 +def compute_deviations(data: Union[AnnData, MuData], n_jobs=-1) -> AnnData: + """Compute raw and bias-corrected deviations. + + Parameters + ---------- + data : Union[AnnData, MuData] + AnnData object with peak counts or MuData object with 'atac' modality. + n_jobs : int, optional + Number of cpus used for motif matching. If set to -1, all cpus will be used. Default: -1. + + Returns + ------- + Anndata + An anndata object containing estimated deviations. """ if isinstance(data, AnnData): @@ -60,7 +65,8 @@ def compute_deviations(data: Union[AnnData, MuData], n_jobs=-1): if n_jobs == 1: for i in range(n_bg_peaks): bg_peak_idx = adata.varm['bg_peaks'][:, i] - bg_motif_match = adata.varm['motif_match'][bg_peak_idx, :].transpose() + bg_motif_match = adata.varm['motif_match'][bg_peak_idx, :].transpose( + ) bg_dev[i, :, :] = _compute_dev((bg_motif_match, adata.X.transpose(), expectation.transpose())).transpose() @@ -69,8 +75,10 @@ def compute_deviations(data: Union[AnnData, MuData], n_jobs=-1): arguments_list = list() for i in range(n_bg_peaks): bg_peak_idx = adata.varm['bg_peaks'][:, i] - bg_motif_match = adata.varm['motif_match'][bg_peak_idx, :].transpose() - arguments = (bg_motif_match, adata.X.transpose(), expectation.transpose()) + bg_motif_match = adata.varm['motif_match'][bg_peak_idx, :].transpose( + ) + arguments = (bg_motif_match, adata.X.transpose(), + expectation.transpose()) arguments_list.append(arguments) # run the function with multiple cpus @@ -105,11 +113,17 @@ def _compute_dev(arguments): def compute_expectation(count: np.array) -> np.array: """ - Compute expetation accessibility per peak and per cell by assuming identical - read probability per peak for each cell with a sequencing depth matched to that cell - observed sequencing depth. - Args: - count (_type_): _description_ + Compute expetation accessibility per peak and per cell by assuming identical read probability per peak for each cell with a sequencing depth matched to that cell observed sequencing depth + + Parameters + ---------- + count : np.array + Count matrix containing raw accessibility data. + + Returns + ------- + np.array + Expectation matrix """ a = np.sum(count, axis=0, keepdims=True) diff --git a/pychromvar/get_genome.py b/pychromvar/get_genome.py index 6c249bb..192b0eb 100644 --- a/pychromvar/get_genome.py +++ b/pychromvar/get_genome.py @@ -13,17 +13,16 @@ } def get_genome(genome:str="hg38", output_dir:str=None): + """Download genome + + Parameters + ---------- + genome : str, optional + Which genome should be downloaded, Available options are: "hg19", "hg38", "mm9", "mm10". By default "hg38" + output_dir : str, optional + Output directory. Default: current directory. """ - Download genome - Args: - genome (str, optional): - Which genome should be downloaded. Available options are: "hg19", "hg38", "mm9", "mm10". - Defaults to "hg38". - - output_dir (str): - Output directory. Default: current directory. - """ assert genome in ["hg19", "hg38", "mm10", "mm39"], f"Cannot find {genome}!" if not os.path.exists(output_dir): From 7ce2c7bc960571406bbc1d4764c3093a25561506 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 13:39:10 -0500 Subject: [PATCH 19/21] update api --- pychromvar/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pychromvar/__init__.py b/pychromvar/__init__.py index d3eee70..2a438ca 100644 --- a/pychromvar/__init__.py +++ b/pychromvar/__init__.py @@ -2,7 +2,7 @@ __version_info__ = tuple([int(num) for num in __version__.split('.')]) # noqa: F401 from .preprocessing import * -from .match_motif import match_motif -from .compute_deviations import compute_deviations -from .get_genome import get_genome +from .match_motif import * +from .compute_deviations import * +from .get_genome import * From 29a74e378f71e26d4386e7daa88e30e0b31465df Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 13:48:58 -0500 Subject: [PATCH 20/21] update api --- pychromvar/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pychromvar/__init__.py b/pychromvar/__init__.py index 2a438ca..d63873f 100644 --- a/pychromvar/__init__.py +++ b/pychromvar/__init__.py @@ -1,8 +1,8 @@ __version__ = "0.0.3" __version_info__ = tuple([int(num) for num in __version__.split('.')]) # noqa: F401 -from .preprocessing import * -from .match_motif import * -from .compute_deviations import * -from .get_genome import * +from .preprocessing import get_bg_peaks, add_gc_bias, add_peak_seq +from .match_motif import match_motif +from .compute_deviations import compute_deviations, compute_expectation +from .get_genome import get_genome From b66a2ecac1afc6daf0bc3ce17a5d318ccb102ec5 Mon Sep 17 00:00:00 2001 From: lzj1769 Date: Tue, 31 Jan 2023 13:51:10 -0500 Subject: [PATCH 21/21] update test --- pychromvar/preprocessing.py | 5 +++-- tests/test_compute_deviations.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pychromvar/preprocessing.py b/pychromvar/preprocessing.py index 28fe031..d55138c 100644 --- a/pychromvar/preprocessing.py +++ b/pychromvar/preprocessing.py @@ -57,9 +57,9 @@ def get_bg_peaks(data: Union[AnnData, MuData], niterations=50, n_jobs=-1): return None + def add_peak_seq(data: Union[AnnData, MuData], genome_file: str, delimiter="-"): - """ - Add the DNA sequence of each peak to data object. + """Add the DNA sequence of each peak to data object. Parameters ---------- @@ -93,6 +93,7 @@ def add_peak_seq(data: Union[AnnData, MuData], genome_file: str, delimiter="-"): return None + def add_gc_bias(data: Union[AnnData, MuData]): """Compute GC bias for each peak. diff --git a/tests/test_compute_deviations.py b/tests/test_compute_deviations.py index 13e9b9a..1f0ae38 100644 --- a/tests/test_compute_deviations.py +++ b/tests/test_compute_deviations.py @@ -1,6 +1,6 @@ import numpy as np -from pychromvar.compute_deviations import * +from pychromvar.compute_deviations import compute_expectation def test_compute_expectation(): count = np.array([[1, 0, 1, ], [0, 1, 1]], dtype=np.float32)