From de94fa89c0b4cd02f423fa3075540905aef529be Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sat, 18 Jun 2022 16:30:12 -0500 Subject: [PATCH] Release prep for v0.0.1 --- .gitignore | 3 + LICENSE_associated.txt | 2 +- README.md | 200 +++++++++++++++++++------------- docs/source/change/index.rst | 19 +-- docs/source/change/releases.rst | 27 +++-- docs/source/installation.rst | 19 ++- meta.yaml | 48 ++++++++ setup.py | 98 +++++++++++++++- 8 files changed, 310 insertions(+), 106 deletions(-) create mode 100644 meta.yaml diff --git a/.gitignore b/.gitignore index 8fce74a..c375d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ # Pyton Build Files and Directory *.egg-info +/build +/dist +/dist_conda # CI: Testing and coverage *.coverage diff --git a/LICENSE_associated.txt b/LICENSE_associated.txt index 4d258eb..4bbe596 100644 --- a/LICENSE_associated.txt +++ b/LICENSE_associated.txt @@ -30,7 +30,7 @@ Usage: Project ideation and links to HDCytoData server Project: poetic Link: https://github.com/kevin931/poetic -Usgae: Implementation of DataLoader in data.py +Usgae: Implementation of DataLoader in data.py; setup.py build commands. The MIT License (MIT) diff --git a/README.md b/README.md index 3a0ddfa..74d58ce 100644 --- a/README.md +++ b/README.md @@ -5,84 +5,80 @@ This package is an all-in-one CyTOF data analysis package for your experiments. ## Installation -We're currently under development! To install, you can do the following: +You can install ``PyCytoData`` easily from ``pip``: -```shell -git clone https://github.com/kevin931/PyCytoData -cd PyCytoData -python setup.py deveop ``` +pip install PyCytoData +``` + +or from ``conda``: -This approach will allow you to use the package while developing! +``` +conda install pycytodata -c kevin931 -c bioconda +``` -### Dependencies +If you wish to use ``CytofDR`` along with PyCytoData, use can optionally install it as well: -We need the following dependencies: +``` +pip install CytofDR +``` -- fcsparser -- pandas -- numpy +For more information on optional dependencies or installation details, look [here](https://pycytodata.readthedocs.io/en/latest/installation.html). ## Install and Load Benchmark Datasets You can load the data easily with the following python snippet: ```python -from PyCytoData import DataLoader +>>> from PyCytoData import DataLoader -exprs = DataLoader.load_dataset(dataset = "levine13") -exprs.expression_matrix # Expression matrix -exprs.cell_types # Cell types -exprs.sample_index # Sample index -exprs.features # The feature/marker names +>>> exprs = DataLoader.load_dataset(dataset = "levine13") +>>> exprs.expression_matrix # Expression matrix +>>> exprs.cell_types # Cell types +>>> exprs.sample_index # Sample index +>>> exprs.features # The feature/marker names ``` The resulting ``exprs`` is a ``PyCytoData`` object, which is easy to use. The expression matrix, cell types (if available), and sample index are directly accessible with attributes, and they are all stored as **numpy.array**. You can also access some metadata of the object with the following attributes: ```python -exprs.n_cells -exprs.n_cell_types -exprs.n_samples -exprs.n_features +>>> exprs.n_cells +>>> exprs.n_cell_types +>>> exprs.n_samples +>>> exprs.n_features ``` All these metadata is automatically set, and there is protection in place for unintended changes. You can also add a sample with the following: ```python -exprs.add_sample(expression_matrix, cell_types, sample_index) # All inputs should be ArrayLike +>>> exprs.add_sample(expression_matrix, cell_types, sample_index) # All inputs should be ArrayLike ``` -**Note**: The data are downloaded from a server instead of being shipped with this package. Each dataset only needs to be downloaded once, which is automatically managed. During the first-time download of the data, a command-line confirmation is needed. To override this, you can do the following: - -```python -from PyCytoData import DataLoader - -exprs = DataLoader.load_dataset(dataset = "levine13", force_download = True) -``` +**Note**: The data are downloaded from a server instead of being shipped with this package. Each dataset only needs to be downloaded once, which is automatically managed. During the first-time download of the data, a command-line confirmation is needed. ## Bring Your Own Dataset (BYOD) Yes, you read it right! You can load your own datasets. Currently, we only support reading in plain text files with saved with delimiters. The data need to have cells as rows and features as columns. To do load them in as a ``PyCytoData`` object, you can simply do the following: ```python -from PyCytoData import FileIO +>>> from PyCytoData import FileIO -FileIO.load_delim(files="/path", # Path to file - col_names=True, # Whether the first row is feature (column) names - delim="\t" # Delimiter - ) +>>> FileIO.load_delim(files="/path", # Path to file +... col_names=True, # Whether the first row is feature (column) names +... delim="\t" # Delimiter +... ) ``` If your experiment has multiple samples, you can simply import them together: ```python -from PyCytoData import FileIO +>>> from PyCytoData import FileIO -expression_paths = ["path1", "path2", "path3"] -FileIO.load_delim(files=expression_paths, # Path to file - col_names=True, # Whether the first row is feature (column) names - delim="\t" # Delimiter - ) +>>> expression_paths = ["path1", "path2", "path3"] +>>> FileIO.load_delim(files=expression_paths, # Path to file +... col_names=True, # Whether the first row is feature (column) names +... delim="\t" # Delimiter +... ) ``` In this case, the expression matrices are concatenated automatically without any normalization. To access particular samples, you can access the ``sample_index`` of the attribute and use the standard ``numpy`` indexing techniques. @@ -94,44 +90,58 @@ In this case, the expression matrices are concatenated automatically without any Currently, ``levine13``, ``levine32``, and ``samusik`` have all been mostly preprocessed. All you need to do is to perform ``aecsinh`` transformaion. You can simply do this: ```python -from PyCytoData import DataLoader +>>> from PyCytoData import DataLoader -exprs = DataLoader.load_dataset(dataset = "levine13") -exprs.preprocess(arcsinh=True) +>>> exprs = DataLoader.load_dataset(dataset = "levine13") +>>> exprs.preprocess(arcsinh=True) ``` When you perform BYOD, you can have much more flexibility: ```python -from PyCytoData import FileIO - -byod = FileIO.load_delim(files="/path", # Path to file - col_names=True, # Whether the first row is feature (column) names - delim="\t" # Delimiter - ) -byod.lineage_channels = ["CD4", "CD8", "FoxP3", "CD15"] -byod.preprocess(arcsinh=True, - gate_debris_removal=True, - gate_intact_cells=True, - gate_live_cells=True, - gate_center_offset_residual=True, - bead_normalization=True) +>>> from PyCytoData import FileIO + +>>> byod = FileIO.load_delim(files="/path", # Path to file +... col_names=True, # Whether the first row is feature (column) names +... delim="\t" # Delimiter +... ) +>>> byod.lineage_channels = ["CD4", "CD8", "FoxP3", "CD15"] +>>> byod.preprocess(arcsinh=True, +... gate_debris_removal=True, +... gate_intact_cells=True, +... gate_live_cells=True, +... gate_center_offset_residual=True, +... bead_normalization=True) byod.expression_matrix # This is preprocessed ``` As the example shows, we support five unique preprocessing steps! And of course, you can use a subset of these to suit your own needs! By default, we automatically detect the necessary channels, such as "Bead1" or "Center". However, if your dataset is unconventionally named, our auto-detect algorithm may fail. Thus, we can perform a manual override: ```python -byod.preprocess(arcsinh=True, - gate_debris_removal=True, - gate_intact_cells=True, - gate_live_cells=True, - gate_center_offset_residual=True, - bead_normalization=True, - bead_channels = ["1bead", "2bead"], - time_channel = ["clock"]) +>>> byod.preprocess(arcsinh=True, +... gate_debris_removal=True, +... gate_intact_cells=True, +... gate_live_cells=True, +... gate_center_offset_residual=True, +... bead_normalization=True, +... bead_channels = ["1bead", "2bead"], +... time_channel = ["clock"]) ``` +## Dimension Reduction + +If you wish to run DR on your dataset, you can easily do so as well if you have ``CytofDR`` installed (assume you have loaded the dataset and preprocessed it accordingly): + +```python +>>> exprs.run_dr_methods(methods = ["PCA", "UMAP", "ICA"]) +Running PCA +Running ICA +Running UMAP +>>> type(exprs.reductions) + +``` +The ``reductions`` attribute is a ``Reductions`` object from ``CytofDR``. You can perform all downstream DR workflows as usual. + ## Datasets Supported We only support the following datasets as of now. The *Literal* is the string literal used in this package to refer to the datasets whereas the *Dataset Name* is what these datasets are more commonly known for. @@ -146,29 +156,59 @@ More datasets will be added in the future to be fully compatible with HDCytoData ## Documentation -We use ``sphinx`` and ``readthedocs`` for documentation! You will need to install the following packages: +For detailed documentation along with tutorials and API Reference, please visit our [Official Documentation](https://pycytodata.readthedocs.io/en/latest/). This is automatically updated with each update. -- sphinx -- sphinx-rtd-theme -- sphinx-git -- sphinxcontrib-autoprogram -- sphinx-autodoc-typehints +If you prefer to build documentation on your own, refer to [this guide](https://pycytodata.readthedocs.io/en/latest/change/build.html) for more details. -We currently don't have an online documentation. You will need to build the docs on your own! More detailed docs coming soon! +## Latest Release: 0.0.1 -## Unit Testing +This is our latest pre-release with the following release notes: -You will need the following packages: +- This is the first official prerelease of the ``PyCytoData`` package. +- We have proper support for the following workflows, including: + - Downloading data + - Using PyCytoData as CyTOF data analysis pipeline + - FileIO + - CyTOF DR Integration +- Releases on PyPI and conda -- pytest -- pytest-cov -- pytest-mock -- coverage +### Known Issue + +There is a potential issue of compatibility with ``CytofDR`` on ``conda``. If a problem occurs, try +using pip instead. ## References -[Levine J.H., Simonds E.F. Bendall S.C., Davis KL, Amir el-A.D., Tadmor M.D., Litvin O., Fienberg H.G., Jager A., Zunder E.R., Finck R., Gedman A.L., Radtke I., Downing J.R., & Pe'er D., Nolan G.P. "Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis." *Cell*. 2015 Jul 2;162(1):184-97. doi: 10.1016/j.cell.2015.05.047.](https://pubmed.ncbi.nlm.nih.gov/26095251/) +If you use ``PyCytoData`` to perform DR, citing the [our DR Review paper](https://doi.org/10.1101/2022.04.26.489549) is highly appreciated: + +``` +@article {Wang2022.04.26.489549, + author = {Wang, Kaiwen and Yang, Yuqiu and Wu, Fangjiang and Song, Bing and Wang, Xinlei and Wang, Tao}, + title = {Comparative Analysis of Dimension Reduction Methods for Cytometry by Time-of-Flight Data}, + elocation-id = {2022.04.26.489549}, + year = {2022}, + doi = {10.1101/2022.04.26.489549}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2022/06/02/2022.04.26.489549}, + eprint = {https://www.biorxiv.org/content/early/2022/06/02/2022.04.26.489549.full.pdf}, + journal = {bioRxiv} +} +``` + +If you use ``Cytomulate`` with this package, [our paper](https://doi.org/10.1101/2022.06.14.496200) can be cited here: -[Samusik et al. (2016), "Automated mapping of phenotype space with single-cell data", *Nature Methods, 13*(6), 493-496](https://www.ncbi.nlm.nih.gov/pubmed/27183440) +``` +@article {Yang2022.06.14.496200, + author = {Yang, Yuqiu and Wang, Kaiwen and Lu, Zeyu and Wang, Tao and Wang, Xinlei}, + title = {Cytomulate: Accurate and Efficient Simulation of CyTOF data}, + elocation-id = {2022.06.14.496200}, + year = {2022}, + doi = {10.1101/2022.06.14.496200}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2022/06/16/2022.06.14.496200}, + eprint = {https://www.biorxiv.org/content/early/2022/06/16/2022.06.14.496200.full.pdf}, + journal = {bioRxiv} +} +``` -[Weber L.M. and Soneson C. (2019). "HDCytoData: Collection of high-dimensional cytometry benchmark datasets in Bioconductor object formats." *F1000Research, 8*:1459, v2.](https://f1000research.com/articles/8-1459) +If you use the builtin datasets, please visit our [Reference Page](https://pycytodata.readthedocs.io/en/latest/references.html) and cite the papers accordingly. diff --git a/docs/source/change/index.rst b/docs/source/change/index.rst index 3e4afb7..5410747 100644 --- a/docs/source/change/index.rst +++ b/docs/source/change/index.rst @@ -11,14 +11,19 @@ Latest Release v0.0.1 ******** - - This is the first official prerelease of the ``PyCytoData`` package. - - We have proper support for the following workflows, including: - - Downloading data - - Using PyCytoData as CyTOF data analysis pipeline - - FileIO - - CyTOF DR Integration - - Releases on PyPI and conda +- This is the first official prerelease of the ``PyCytoData`` package. +- We have proper support for the following workflows, including: + - Downloading data + - Using PyCytoData as CyTOF data analysis pipeline + - FileIO + - CyTOF DR Integration +- Releases on PyPI and conda + +.. warning:: + + There is a potential issue of compatibility with ``CytofDR`` on ``conda``. If a problem occurs, try + using pip instead. .. toctree:: diff --git a/docs/source/change/releases.rst b/docs/source/change/releases.rst index b25fb9a..f766d1c 100644 --- a/docs/source/change/releases.rst +++ b/docs/source/change/releases.rst @@ -1,13 +1,22 @@ -========== +########## Releases -========== +########## +------------------ + +******** v0.0.1 ******** - - This is the first official prerelease of the ``PyCytoData`` package. - - We have proper support for the following workflows, including: - - Downloading data - - Using PyCytoData as CyTOF data analysis pipeline - - FileIO - - CyTOF DR Integration - - Releases on PyPI and conda \ No newline at end of file + +- This is the first official prerelease of the ``PyCytoData`` package. +- We have proper support for the following workflows, including: + - Downloading data + - Using PyCytoData as CyTOF data analysis pipeline + - FileIO + - CyTOF DR Integration +- Releases on PyPI and conda + +.. warning:: + + There is a potential issue of compatibility with ``CytofDR`` on ``conda``. If a problem occurs, try + using pip instead. \ No newline at end of file diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 8ab2718..35920a7 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -12,18 +12,29 @@ if you prefer. Just follow the instructions below and you are good to go! Conda *********** -It's a great idea to release your package on Conda! +You can install our package on ``conda``: +.. code-block:: ---------- + conda install pycytodata -c kevin931 -c bioconda + +Our ``conda`` package is published `here `_. + +---------------- *********** PyPI *********** -It's a great idea to release your package on PyPI! +You can also install our package on from ``PyPI``: ---------- +.. code-block:: + + pip install PyCytoData + +Our ``PyPI`` package is published `on this page `_. + +---------------- ************* Dependencies diff --git a/meta.yaml b/meta.yaml new file mode 100644 index 0000000..6c2e4a8 --- /dev/null +++ b/meta.yaml @@ -0,0 +1,48 @@ +{% set name = "PyCytoData" %} +{% set version = "0.0.1" %} + +package: + name: "{{ name|lower }}" + version: "{{ version }}" + +source: + url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz + sha256: 8af8064b1a77e08947d8f12eb5757fdc1c25f88da28941be98d843add465c73a + +build: + noarch: python + number: 0 + script: "{{ PYTHON }} -m pip install . --force-reinstall --no-deps -vv " + +requirements: + build: + - python + - setuptools + host: + - python + run: + - python >=3.7 + - numpy >=1.21 + - pandas + - fcsparser + +test: + requires: + - pytest + - pytest-cov + - coverage + - pytest-mock + imports: + - PyCytoData + + +about: + home: https://github.com/kevin931/PyCytoData + license: MIT + license_family: MIT + license_file: LICENSE.txt + summary: An Elegant Data Analysis Tool for CyTOF. + +extra: + recipe-maintainers: + - kevin931 \ No newline at end of file diff --git a/setup.py b/setup.py index 714b628..ab60d33 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,99 @@ import setuptools +import os +import sys +import shutil +import distutils.cmd + +from typing import List + +VERSION = "0.0.1" + +class PypiCommand(distutils.cmd.Command): + + description = "Build and upload for PyPi." + user_options = [] + + def initialize_options(self): + pass + + + def finalize_options(self): + pass + + + def run(self): + shutil.rmtree("dist/") + + wheel_file = "PyCytoData-{}-py3-none-any.whl".format(VERSION) + tar_file = "PyCytoData-{}.tar.gz".format(VERSION) + + os.system("{} setup.py sdist bdist_wheel".format(sys.executable)) + os.system("twine upload dist/{} dist/{}".format(wheel_file, tar_file)) + + +class CondaCommand(distutils.cmd.Command): + + description = "Build and upload for conda." + user_options = [] + + + @staticmethod + def move_assets(origin: str, destination: str, exclude: List[str], new_destination_dir: bool) -> None: + + if origin[-1] != "\\" and origin[-1] != "/": + origin += "/" + + if destination[-1] != "\\" and destination[-1] != "/": + destination += "/" + + if new_destination_dir: + if os.path.isdir(destination): + raise ValueError("Destination directory already exists.") + else: + os.mkdir(destination) + + all_files = os.listdir(origin) + + for files in all_files: + if files in exclude: + pass + else: + origin_path = origin + files + destination_path = destination + files + shutil.move(origin_path, destination_path) + + + def initialize_options(self): + pass + + + def finalize_options(self): + pass + + + def run(self): + self.move_assets("./PyCytoData/data/", "./temp_assets/", [], True) + shutil.rmtree("./PyCytoData/data/") + try: + shutil.rmtree("dist_conda/") + except FileNotFoundError: + pass + os.system("conda build . --output-folder dist_conda/ -c bioconda") + os.system("anaconda upload ./dist_conda/noarch/pycytodata-{}-py_0.tar.bz2".format(VERSION)) + + self.move_assets("./temp_assets/", "./PyCytoData/data/", [], False) + shutil.rmtree("./temp_assets/") -VERSION = "0.0.0" setuptools.setup( name = "PyCytoData", version = VERSION, - description = "A Python Interface to HDCytoData", + description = "An Elegant Data Analysis Tool for CyTOF", + long_description_content_type = "text/markdown", + long_description = open("README.md").read(), packages=["PyCytoData"], - python_requires=">=3.9", - install_requires=["fcsparser", "pandas", "numpy"], + python_requires=">=3.7", + install_requires=["fcsparser", "pandas", "numpy>=1.21"], test_requires=["pytest", "pytest-cov", "pytest-mock", @@ -16,5 +101,8 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", "Natural Language :: English" - ] + ], + cmdclass = {"pypi": PypiCommand, + "conda": CondaCommand + } ) \ No newline at end of file