From 157bdb0ce54d5c98a637be6db69c82f3714a8083 Mon Sep 17 00:00:00 2001 From: Michel Aguena Date: Mon, 7 Oct 2024 13:51:57 +0300 Subject: [PATCH] Issue/401/use qp+fix zinteg (#644) * implement qp * unify z_grid in integration, use qp for interpolation * add quantiles in mock_data and GCData.get_pzpdfs * lower requirement for quantiles integration * rename pzpdf column to pzquantiles when type is quantiles * update examples/demo_mock_cluster.ipynb * mv pylint to after tests in github * add examples/demo_compute_bkg_probability_details.ipynb * Install qp with pip prereqs, add qp to environment.yml * Update version to 1.14.0 --------- Co-authored-by: Hsin Fan <57552401+hsinfan1996@users.noreply.github.com> Co-authored-by: Celine Combet --- .github/workflows/build_check.yml | 8 +- INSTALL.md | 3 +- clmm/__init__.py | 2 +- clmm/gcdata.py | 36 +- clmm/redshift/tools.py | 50 +-- clmm/support/mock_data.py | 32 +- environment.yml | 2 + ...demo_compute_bkg_probability_details.ipynb | 352 ++++++++++++++++++ examples/demo_mock_cluster.ipynb | 263 +++++++++---- requirements.txt | 2 + tests/test_galaxycluster.py | 19 +- tests/test_gcdata.py | 12 + tests/test_mockdata.py | 17 +- 13 files changed, 677 insertions(+), 121 deletions(-) create mode 100644 examples/demo_compute_bkg_probability_details.ipynb diff --git a/.github/workflows/build_check.yml b/.github/workflows/build_check.yml index 424121a06..9f2419f44 100644 --- a/.github/workflows/build_check.yml +++ b/.github/workflows/build_check.yml @@ -39,10 +39,6 @@ jobs: git clone https://github.com/LSSTDESC/CCL cd CCL pip install --no-use-pep517 . - - name: Analysing the code with pylint - run: | - pip install pylint - pylint clmm --ignored-classes=astropy.units - name: Run the unit tests run: | pip install pytest pytest-cov @@ -53,6 +49,10 @@ jobs: # - name: Install Sphinx prereq # run: | # conda install -c conda-forge sphinx sphinx_rtd_theme nbconvert pandoc ipython ipython_genutils + - name: Analysing the code with pylint + run: | + pip install pylint + pylint clmm --ignored-classes=astropy.units - name: Run the docs run: | make -C docs/ html diff --git a/INSTALL.md b/INSTALL.md index 05ca78243..1213a338e 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -56,9 +56,10 @@ Note, you may choose to install some or all of the ccl, numcosmo, and/or cluster Now, you can install CLMM and its dependencies as ```bash - pip install numpy scipy astropy matplotlib + pip install numpy scipy astropy matplotlib healpy pip install pytest sphinx sphinx_rtd_theme pip install jupyter # need to have jupyter notebook tied to this environment, you can then see the environment in jupyter.nersc.gov + pip install git+https://github.com/LSSTDESC/qp.git # Install qp package git clone https://github.com/LSSTDESC/CLMM.git # If you'd like to contribute but don't have edit permissions to the CLMM repo, see below how to fork the repo instead. cd CLMM python setup.py install # build from source diff --git a/clmm/__init__.py b/clmm/__init__.py index a4b97c140..64fee9cf1 100644 --- a/clmm/__init__.py +++ b/clmm/__init__.py @@ -26,4 +26,4 @@ ) from . import support -__version__ = "1.13.2" +__version__ = "1.14.0" diff --git a/clmm/gcdata.py b/clmm/gcdata.py index 29f9eb5d5..8ed47f44f 100644 --- a/clmm/gcdata.py +++ b/clmm/gcdata.py @@ -6,6 +6,8 @@ from astropy.table import Table as APtable import numpy as np +import qp + class GCMetaData(OrderedDict): r"""Object to store metadata, it always has a cosmo key with protective changes @@ -65,7 +67,10 @@ def __init__(self, *args, **kwargs): metakwargs = {} if metakwargs is None else metakwargs self.meta = GCMetaData(**metakwargs) # this attribute is set when source galaxies have p(z) - self.pzpdf_info = {"type": None} + self.pzpdf_info = { + "type": None, + "unpack_quantile_zbins_limits": (0, 5, 501), + } def _str_colnames(self): """Colnames in comma separated str""" @@ -83,6 +88,12 @@ def _str_pzpdf_info(self): np.set_printoptions(edgeitems=5, threshold=10) out += " " + str(np.round(self.pzpdf_info.get("zbins"), 2)) np.set_printoptions(**default_cfg) + elif out == "quantiles": + np.set_printoptions(formatter={'float': "{0:g}".format}, edgeitems=3, threshold=6) + out += " " + str(self.pzpdf_info["quantiles"]) + out += " - unpacked with zgrid : " + str( + self.pzpdf_info["unpack_quantile_zbins_limits"] + ) return out def __repr__(self): @@ -227,6 +238,8 @@ def has_pzpdfs(self): return ("zbins" in self.pzpdf_info) and ("pzpdf" in self.columns) if pzpdf_type == "individual_bins": return ("pzbins" in self.columns) and ("pzpdf" in self.columns) + if pzpdf_type == "quantiles": + return ("quantiles" in self.pzpdf_info) and ("pzquantiles" in self.columns) raise NotImplementedError(f"PDF use '{pzpdf_type}' not implemented.") def get_pzpdfs(self): @@ -235,18 +248,35 @@ def get_pzpdfs(self): Returns ------- pzbins : array - zbins of PDF. 1D if `shared_bins`, + zbins of PDF. 1D if `shared_bins` or `quantiles`. zbins of each object in data if `individual_bins`. pzpdfs : array PDF of each object in data + + Notes + ----- + If pzpdf type is quantiles, a pdf will be unpacked on a grid contructed with + `np.linspace(*self.pzpdf_info["unpack_quantile_zbins_limits"])` """ pzpdf_type = self.pzpdf_info["type"] if pzpdf_type is None: raise ValueError("No PDF information stored!") if pzpdf_type == "shared_bins": pzbins = self.pzpdf_info["zbins"] + pzpdf = self["pzpdf"] elif pzpdf_type == "individual_bins": pzbins = self["pzbins"] + pzpdf = self["pzpdf"] + elif pzpdf_type == "quantiles": + pzbins = np.linspace(*self.pzpdf_info["unpack_quantile_zbins_limits"]) + qp_ensemble = qp.Ensemble( + qp.quant, + data={ + "quants": np.array(self.pzpdf_info["quantiles"]), + "locs": self["pzquantiles"], + }, + ) + pzpdf = qp_ensemble.pdf(pzbins) else: raise NotImplementedError(f"PDF use '{pzpdf_type}' not implemented.") - return pzbins, self["pzpdf"] + return pzbins, pzpdf diff --git a/clmm/redshift/tools.py b/clmm/redshift/tools.py index 904d0579a..9be5fe95c 100644 --- a/clmm/redshift/tools.py +++ b/clmm/redshift/tools.py @@ -2,10 +2,11 @@ import warnings import numpy as np from scipy.integrate import simpson -from scipy.interpolate import interp1d +import qp -def _integ_pzfuncs(pzpdf, pzbins, zmin=0.0, zmax=5, kernel=lambda z: 1.0, ngrid=1000): + +def _integ_pzfuncs(pzpdf, pzbins, zmin=0.0, zmax=5, kernel=None, ngrid=1000): r""" Integrates the product of a photo-z pdf with a given kernel. This function was created to allow for data with different photo-z binnings. @@ -30,30 +31,31 @@ def _integ_pzfuncs(pzpdf, pzbins, zmin=0.0, zmax=5, kernel=lambda z: 1.0, ngrid= ------- numpy.ndarray Kernel integrated with the pdf of each galaxy. - - Notes - ----- - Will be replaced by qp at some point. """ - # adding these lines to interpolate CLMM redshift grid for each galaxies - # to a constant redshift grid for all galaxies. If there is a constant grid for all galaxies - # these lines are not necessary and z_grid, pz_matrix = pzbins, pzpdf - - if hasattr(pzbins[0], "__len__"): - # First need to interpolate on a fixed grid - z_grid = np.linspace(zmin, zmax, ngrid) - pdf_interp_list = [ - interp1d(pzbin, pdf, bounds_error=False, fill_value=0.0) - for pzbin, pdf in zip(pzbins, pzpdf) - ] - pz_matrix = np.array([pdf_interp(z_grid) for pdf_interp in pdf_interp_list]) - kernel_matrix = kernel(z_grid) + + if len(np.array(pzbins).shape) > 1: + # Each galaxy as a diiferent zbin + _qp_type = qp.interp_irregular else: - # OK perform the integration directly from the pdf binning common to all galaxies - mask = (pzbins >= zmin) * (pzbins <= zmax) - z_grid = pzbins[mask] - pz_matrix = np.array(pzpdf)[:, mask] - kernel_matrix = kernel(z_grid) + _qp_type = qp.interp + + qp_ensemble = qp.Ensemble( + _qp_type, + data={ + "xvals": np.array(pzbins), + "yvals": np.array(pzpdf), + }, + ) + + if kernel is None: + + def kernel(z): + # pylint: disable=unused-argument + return 1.0 + + z_grid = np.linspace(zmin, zmax, ngrid) + pz_matrix = qp_ensemble.pdf(z_grid) + kernel_matrix = kernel(z_grid) return simpson(pz_matrix * kernel_matrix, x=z_grid, axis=1) diff --git a/clmm/support/mock_data.py b/clmm/support/mock_data.py index a63f98ab3..27858b822 100644 --- a/clmm/support/mock_data.py +++ b/clmm/support/mock_data.py @@ -2,6 +2,9 @@ import warnings import numpy as np + +from scipy.special import erfc + from astropy import units as u from astropy.coordinates import SkyCoord @@ -39,6 +42,7 @@ def generate_galaxy_catalog( ngals=None, ngal_density=None, pz_bins=101, + pz_quantiles_conf=(5, 31), pzpdf_type="shared_bins", coordinate_system="euclidean", validate_input=True, @@ -145,12 +149,16 @@ def generate_galaxy_catalog( pz_bins: int, array Photo-z pdf bins in the given range. If int, the limits are set automatically. If is array, must be the bin edges. + pz_quantiles_conf: tuple + Configuration for quantiles when `pzpdf_type='quantiles'`. Must be with the format + `(max_sigma_dev, num_points)`, which is used as + `sigma_steps = np.linspace(-max_sigma_dev, max_sigma_dev, num_points)` pzpdf_type: str, None Type of photo-z pdf to be stored, options are: `None` - does not store PDFs; `'shared_bins'` - single binning for all galaxies `'individual_bins'` - individual binning for each galaxy - `'quantiles'` - quantiles of PDF (not implemented yet) + `'quantiles'` - quantiles of PDF nretry : int, optional The number of times that we re-draw each galaxy with non-sensical derived properties ngals : float, optional @@ -238,6 +246,7 @@ def generate_galaxy_catalog( "mean_e_err": mean_e_err, "photoz_sigma_unscaled": photoz_sigma_unscaled, "pz_bins": pz_bins, + "pz_quantiles_conf": pz_quantiles_conf, "field_size": field_size, "pzpdf_type": pzpdf_type, "coordinate_system": coordinate_system, @@ -370,6 +379,7 @@ def _generate_galaxy_catalog( mean_e_err=None, photoz_sigma_unscaled=None, pz_bins=101, + pz_quantiles_conf=(5, 31), pzpdf_type="shared_bins", coordinate_system="euclidean", field_size=None, @@ -389,7 +399,10 @@ def _generate_galaxy_catalog( if photoz_sigma_unscaled is not None: galaxy_catalog.pzpdf_info["type"] = pzpdf_type galaxy_catalog = _compute_photoz_pdfs( - galaxy_catalog, photoz_sigma_unscaled, pz_bins=pz_bins + galaxy_catalog, + photoz_sigma_unscaled, + pz_bins=pz_bins, + pz_quantiles_conf=pz_quantiles_conf, ) # Draw galaxy positions @@ -462,7 +475,10 @@ def _generate_galaxy_catalog( if all(c is not None for c in (photoz_sigma_unscaled, pzpdf_type)): if galaxy_catalog.pzpdf_info["type"] == "individual_bins": cols += ["pzbins"] - cols += ["pzpdf"] + if galaxy_catalog.pzpdf_info["type"] == "quantiles": + cols += ["pzquantiles"] + else: + cols += ["pzpdf"] if coordinate_system == "celestial": galaxy_catalog["e2"] *= -1 # flip e2 to match the celestial coordinate system @@ -529,7 +545,9 @@ def _draw_source_redshifts(zsrc, zsrc_min, zsrc_max, ngals): return GCData([zsrc_list, zsrc_list], names=("ztrue", "z")) -def _compute_photoz_pdfs(galaxy_catalog, photoz_sigma_unscaled, pz_bins=101): +def _compute_photoz_pdfs( + galaxy_catalog, photoz_sigma_unscaled, pz_bins=101, pz_quantiles_conf=(5, 31) +): """Private function to add photo-z errors and PDFs to the mock catalog. Parameters @@ -582,7 +600,11 @@ def _compute_photoz_pdfs(galaxy_catalog, photoz_sigma_unscaled, pz_bins=101): gaussian(row["pzbins"], row["z"], row["pzsigma"]) for row in galaxy_catalog ] elif galaxy_catalog.pzpdf_info["type"] == "quantiles": - raise NotImplementedError("PDF storing in quantiles not implemented.") + sigma_steps = np.linspace(-pz_quantiles_conf[0], pz_quantiles_conf[0], pz_quantiles_conf[1]) + galaxy_catalog.pzpdf_info["quantiles"] = 0.5 * erfc(-sigma_steps / np.sqrt(2)) + galaxy_catalog["pzquantiles"] = ( + galaxy_catalog["z"][:, None] + sigma_steps * galaxy_catalog["pzsigma"][:, None] + ) else: raise ValueError( "Value of pzpdf_info['type'] " f"(={galaxy_catalog.pzpdf_info['type']}) " "not valid." diff --git a/environment.yml b/environment.yml index 86a876f6b..6b988d7bb 100644 --- a/environment.yml +++ b/environment.yml @@ -18,3 +18,5 @@ dependencies: - numcosmo==0.18.2 - pytest=7.4.0 - sphinx==7.2.4 + - pip: + - qp-prob @ git+https://github.com/LSSTDESC/qp.git diff --git a/examples/demo_compute_bkg_probability_details.ipynb b/examples/demo_compute_bkg_probability_details.ipynb new file mode 100644 index 000000000..b2a4c199d --- /dev/null +++ b/examples/demo_compute_bkg_probability_details.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "01425612", + "metadata": {}, + "source": [ + "# Test computation of background probablitily\n", + "\n", + "This notbook compares the computation of the source background probability, used for stacking clusters\n", + "(look at `demo_compute_deltasigma_weights.ipynb` for more details)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fe558e3", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import sys\n", + "import os\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from astropy.table import Table\n", + "import scipy\n", + "\n", + "import clmm\n", + "from clmm import Cosmology\n", + "from clmm import GalaxyCluster\n", + "from clmm.dataops import compute_galaxy_weights\n", + "from clmm.support import mock_data as mock\n", + "\n", + "clmm.__version__" + ] + }, + { + "cell_type": "markdown", + "id": "ec66ea96", + "metadata": {}, + "source": [ + "- Creating the same mock catalog with three different ways of storing the photo-z PDF information (see `demo_mock_cluster.ipynb` for details):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2570ad7b", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "cosmo = Cosmology(H0=71.0, Omega_dm0=0.265 - 0.0448, Omega_b0=0.0448, Omega_k0=0.0)\n", + "cluster_z = 0.4\n", + "args = (\n", + " 1e14, # cluster_mass\n", + " cluster_z,\n", + " 4, # concentration\n", + " cosmo,\n", + ")\n", + "kwargs = dict(\n", + " zsrc=\"chang13\",\n", + " delta_so=200,\n", + " massdef=\"critical\",\n", + " halo_profile_model=\"nfw\",\n", + " zsrc_min=0.0,\n", + " zsrc_max=3.0,\n", + " field_size=10.0,\n", + " shapenoise=0.5,\n", + " photoz_sigma_unscaled=0.05,\n", + " mean_e_err=0.1,\n", + " ngals=10000,\n", + ")\n", + "\n", + "np.random.seed(41363)\n", + "noisy_data_z_sb = mock.generate_galaxy_catalog(*args, **kwargs, pzpdf_type=\"shared_bins\")\n", + "np.random.seed(41363)\n", + "noisy_data_z_ib = mock.generate_galaxy_catalog(*args, **kwargs, pzpdf_type=\"individual_bins\")\n", + "np.random.seed(41363)\n", + "noisy_data_z_qt = mock.generate_galaxy_catalog(*args, **kwargs, pzpdf_type=\"quantiles\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f5cb556", + "metadata": {}, + "outputs": [], + "source": [ + "label = \"shared bins\"\n", + "for i, data in enumerate(noisy_data_z_sb[:5]):\n", + " plt.plot(noisy_data_z_sb.pzpdf_info[\"zbins\"], data[\"pzpdf\"], lw=0.5, color=f\"C{i}\", label=label)\n", + " label = None\n", + "\n", + "label = \"individual bins\"\n", + "for i, data in enumerate(noisy_data_z_ib[:5]):\n", + " plt.plot(data[\"pzbins\"], data[\"pzpdf\"], lw=0.9, color=f\"C{i}\", ls=\"--\", label=label)\n", + " label = None\n", + "\n", + "\n", + "pzbins, pzpdfs = noisy_data_z_qt.get_pzpdfs()\n", + "label = \"quantiles\"\n", + "for i, data in enumerate(pzpdfs[:5]):\n", + " plt.plot(pzbins, data, lw=3, color=f\"C{i}\", ls=\":\", label=label)\n", + " label = None\n", + "plt.xlim(0.4, 2.1)\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "4bf78d6c", + "metadata": {}, + "source": [ + "## Test P(z) integrals\n", + "\n", + "Probability to be in the background of the cluster i.e. to be higher than a given threshold is given by:\n", + "$$ \n", + "P(z > z_l) = \\int_{z_l}^{+\\infty} dz\\ p(z) \n", + "$$\n", + ",\n", + "\n", + "where `z_l` is the redshift of the lens.\n", + "\n", + "Below we will compare the performance of this computation with `clmm` and with `qp` considering the different types of photo-z PDF data." + ] + }, + { + "cell_type": "markdown", + "id": "f87dd517", + "metadata": {}, + "source": [ + "### From CLMM\n", + "\n", + "In `clmm`, the integration of the PDF is made with the `_integ_pzfuncs` function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b5638e0", + "metadata": {}, + "outputs": [], + "source": [ + "from clmm.dataops import _integ_pzfuncs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed6bb988", + "metadata": {}, + "outputs": [], + "source": [ + "integrals = {\n", + " \"clmm_shared\": _integ_pzfuncs(*noisy_data_z_sb.get_pzpdfs()[::-1], cluster_z),\n", + " \"clmm_individual\": _integ_pzfuncs(*noisy_data_z_ib.get_pzpdfs()[::-1], cluster_z),\n", + " \"clmm_quantiles\": _integ_pzfuncs(*noisy_data_z_qt.get_pzpdfs()[::-1], cluster_z),\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "8c26067b", + "metadata": {}, + "source": [ + "### From `qp`\n", + "\n", + "In `qp`, this integral can be done using the cdf of a data ensemble.\n", + "Below we present how to make the corresponding ensemble for each type of PDF data stored:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c5b029f", + "metadata": {}, + "outputs": [], + "source": [ + "import qp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce8568e3", + "metadata": {}, + "outputs": [], + "source": [ + "qp_dat = qp.Ensemble(\n", + " qp.interp,\n", + " data={\"xvals\": noisy_data_z_sb.pzpdf_info[\"zbins\"], \"yvals\": noisy_data_z_sb[\"pzpdf\"]},\n", + ")\n", + "integrals[\"qp_shared\"] = 1 - qp_dat.cdf(cluster_z)[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac25248b", + "metadata": {}, + "outputs": [], + "source": [ + "qp_dat2 = qp.Ensemble(\n", + " qp.interp_irregular,\n", + " data={\"xvals\": noisy_data_z_ib[\"pzbins\"], \"yvals\": noisy_data_z_ib[\"pzpdf\"]},\n", + ")\n", + "integrals[\"qp_individual\"] = 1 - qp_dat2.cdf(cluster_z)[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa130397", + "metadata": {}, + "outputs": [], + "source": [ + "qp_dat3 = qp.Ensemble(\n", + " qp.quant,\n", + " data={\n", + " \"locs\": noisy_data_z_qt[\"pzquantiles\"],\n", + " \"quants\": noisy_data_z_qt.pzpdf_info[\"quantiles\"],\n", + " },\n", + ")\n", + "integrals[\"qp_quantiles\"] = 1 - qp_dat3.cdf(cluster_z)[:, 0]" + ] + }, + { + "cell_type": "markdown", + "id": "b345fcf7", + "metadata": {}, + "source": [ + "### True Values" + ] + }, + { + "cell_type": "markdown", + "id": "30906527", + "metadata": {}, + "source": [ + "Comparison of these integrals with the analytical form.\n", + "For a gaussian distribution, the integral can be computed with the error function:\n", + "\n", + "$$\n", + "P(z > z_l) = \n", + "\\frac{1}{\\sqrt{2\\pi\\sigma_z^2}}\\int_{z_{l}}^{+\\infty} dz\\ e^{-\\frac{(z-z_{gal})^2}{2\\sigma_z^2}} =\n", + "\\frac{1}{2} {\\rm erfc}\\left(\\frac{z_{l}-z_{gal}}{\\sqrt{2}\\sigma_z}\\right)\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a39d2671", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.special import erfc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70451cfa", + "metadata": {}, + "outputs": [], + "source": [ + "true_integ = 0.5 * erfc(\n", + " (cluster_z - noisy_data_z_sb[\"z\"]) / (0.05 * (1 + noisy_data_z_sb[\"z\"]) * np.sqrt(2))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "87a930b9", + "metadata": {}, + "source": [ + "### Difference\n", + "\n", + "Show relative difference to the analytical form:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3ffd320", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import binned_statistic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27515ca8", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(2, sharex=True, sharey=True, figsize=(7, 5))\n", + "bins = np.linspace(0, 1, 21)\n", + "for comp_case, ax in zip((\"clmm\", \"qp\"), axes):\n", + " for i, pdf_case in enumerate((\"shared\", \"individual\", \"quantiles\")):\n", + " dx = (i - 1) * 0.01\n", + " integ = integrals[f\"{comp_case}_{pdf_case}\"]\n", + " ax.errorbar(\n", + " 0.5 * (bins[1:] + bins[:-1]) + dx,\n", + " binned_statistic(integ, (integ / true_integ - 1) * 100, bins=bins)[0],\n", + " binned_statistic(integ, (integ / true_integ - 1) * 100, bins=bins, statistic=\"std\")[0],\n", + " label=pdf_case,\n", + " lw=0.7,\n", + " )\n", + " ax.axhline(0, c=\"0\", ls=\"--\", lw=0.5)\n", + " ax.minorticks_on()\n", + " ax.grid()\n", + " ax.grid(which=\"minor\", lw=0.3)\n", + " ax.set_ylim(-20, 30)\n", + " ax.set_title(f\"{comp_case} integral\")\n", + "axes[0].legend()\n", + "axes[1].set_xlabel(\"True integral\")\n", + "for ax in axes:\n", + " ax.set_ylabel(\"rel. diff [%]\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/demo_mock_cluster.ipynb b/examples/demo_mock_cluster.ipynb index 02249d958..6b5a0af56 100644 --- a/examples/demo_mock_cluster.ipynb +++ b/examples/demo_mock_cluster.ipynb @@ -39,7 +39,7 @@ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", - "%matplotlib inline\n" + "%matplotlib inline" ] }, { @@ -67,6 +67,22 @@ "clmm.__version__" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to limit decimal places in catalog columns display\n", + "def _nice_display(cat):\n", + " for col in (\"ra\", \"dec\"):\n", + " cat[col].info.format = \".2f\"\n", + " for col in (\"e1\", \"e2\"):\n", + " cat[col].info.format = \".4f\"\n", + " cat[\"z\"].info.format = \".3f\"\n", + " return cat" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -280,16 +296,8 @@ " photoz_sigma_unscaled=0.05,\n", " pz_bins=101,\n", " ngals=ngals\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "noisy_data_photoz[:5]" + ")\n", + "_nice_display(noisy_data_photoz[:5])" ] }, { @@ -317,23 +325,105 @@ " photoz_sigma_unscaled=0.05,\n", " pz_bins=101,\n", " ngals=ngals\n", + ")\n", + "_nice_display(noisy_data_photoz_celestial[:3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Clean data: source galaxy redshifts drawn from a redshift distribution instead of fixed `src_z` value. Options are `chang13` for Chang et al. 2013 or `desc_srd` for the distribution given in the DESC Science Requirement Document. No shape noise or photoz errors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ideal_with_src_dist = mock.generate_galaxy_catalog(\n", + " **cluster_kwargs, zsrc=\"chang13\", zsrc_min=zsrc_min, zsrc_max=7.0, ngals=ngals\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Noisy data: galaxies following redshift distribution, redshift error, shape noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allsystematics = mock.generate_galaxy_catalog(\n", + " **cluster_kwargs,\n", + " zsrc=\"chang13\",\n", + " zsrc_min=zsrc_min,\n", + " photoz_sigma_unscaled=0.05,\n", + " ngals=ngals,\n", + " pz_bins=101,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allsystematics2 = mock.generate_galaxy_catalog(\n", + " **cluster_kwargs,\n", + " zsrc=\"desc_srd\",\n", + " zsrc_min=zsrc_min,\n", + " zsrc_max=7.0,\n", + " photoz_sigma_unscaled=0.05,\n", + " ngals=ngals,\n", + " pz_bins=101,\n", + " shapenoise=0.05,\n", ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sanity check: checking that no galaxies were originally drawn below zsrc_min, before photoz errors are applied (when relevant)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "noisy_data_photoz_celestial[:5]" + "print(\n", + " f\"\"\"Number of galaxies below zsrc_min:\n", + " ideal_data : {np.sum(ideal_data['ztrue']=4, !=5.0 matplotlib numpy>=1.17 scipy>=1.3 +healpy +qp-prob @ git+https://github.com/LSSTDESC/qp.git diff --git a/tests/test_galaxycluster.py b/tests/test_galaxycluster.py index 565d0fffe..874f2453d 100644 --- a/tests/test_galaxycluster.py +++ b/tests/test_galaxycluster.py @@ -3,11 +3,15 @@ """ import os + import numpy as np from numpy.testing import assert_raises, assert_equal, assert_allclose, assert_warns + +from scipy.stats import multivariate_normal +from scipy.special import erfc + import clmm from clmm import GCData -from scipy.stats import multivariate_normal TOLERANCE = {"rtol": 1.0e-7, "atol": 1.0e-7} @@ -313,7 +317,7 @@ def test_integrity_of_probfuncs(): expected = np.array([1.0, 1.0, 1.0]) assert_allclose(cluster.galcat["p_bkg_true"], expected, **TOLERANCE) - # photoz + deltasigma + # photoz assert_raises(TypeError, cluster.compute_background_probability, use_photoz=True) pzbins = np.linspace(0.0001, 5, 1000) cluster.galcat.pzpdf_info["zbins"] = pzbins @@ -324,6 +328,17 @@ def test_integrity_of_probfuncs(): cluster.compute_background_probability(use_pdz=True, p_background_name="p_bkg_pz") assert_allclose(cluster.galcat["p_bkg_pz"], expected, **TOLERANCE) + # quantiles photoz + del cluster.galcat["pzpdf"] + cluster.galcat.pzpdf_info["type"] = "quantiles" + sigma_steps = np.linspace(-5, 5, 31) + cluster.galcat.pzpdf_info["quantiles"] = 0.5 * erfc(-sigma_steps / np.sqrt(2)) + cluster.galcat["pzquantiles"] = cluster.galcat["z"][:, None] + sigma_steps * 0.01 * ( + 1 + cluster.galcat["z"][:, None] + ) + cluster.compute_background_probability(use_pdz=True, p_background_name="p_bkg_pz") + assert_allclose(cluster.galcat["p_bkg_pz"], expected, rtol=2e-3) + def test_integrity_of_weightfuncs(): """test integrity of weight funcs""" diff --git a/tests/test_gcdata.py b/tests/test_gcdata.py index f197b29ae..dbe2a0a84 100644 --- a/tests/test_gcdata.py +++ b/tests/test_gcdata.py @@ -143,6 +143,18 @@ def test_pzfuncs(): assert isinstance(gcdata.__repr__(), str) assert isinstance(gcdata._repr_html_(), str) + # quantiles + gcdata = GCData() + gcdata.pzpdf_info["type"] = "quantiles" + assert not gcdata.has_pzpdfs() + gcdata.pzpdf_info["quantiles"] = (0.16, 0.5, 0.84) + gcdata["pzquantiles"] = [[0.1, 0.2, 0.3]] * ngals + assert gcdata.has_pzpdfs() + assert isinstance(gcdata._str_pzpdf_info(), str) + assert isinstance(gcdata.__str__(), str) + assert isinstance(gcdata.__repr__(), str) + assert isinstance(gcdata._repr_html_(), str) + # not implemented gcdata = GCData() gcdata.pzpdf_info["type"] = "other" diff --git a/tests/test_mockdata.py b/tests/test_mockdata.py index 4b2943d6a..edb202410 100644 --- a/tests/test_mockdata.py +++ b/tests/test_mockdata.py @@ -64,18 +64,6 @@ def test_mock_data(): photoz_sigma_unscaled=0.1, pzpdf_type="xxx", ) - assert_raises( - NotImplementedError, - mock.generate_galaxy_catalog, - 1e15, - 0.3, - 4, - cosmo, - 0.8, - ngals=100, - photoz_sigma_unscaled=0.1, - pzpdf_type="quantiles", - ) # Test pdz with bad arguments assert_raises( @@ -108,6 +96,7 @@ def test_mock_data(): {"pzpdf_type": None}, {"pzpdf_type": "individual_bins"}, {"pzpdf_type": "shared_bins"}, + {"pzpdf_type": "quantiles"}, {}, ): for shapenoise in (None, 0.5): @@ -127,9 +116,13 @@ def test_mock_data(): ) if kwargs.get("pzpdf_type", "shared_bins") == "shared_bins": assert "zbins" in cat.pzpdf_info + assert "pzpdf" in cat.colnames elif kwargs.get("pzpdf_type", "shared_bins") == "individual_bins": assert "pzbins" in cat.colnames assert "pzpdf" in cat.colnames + elif kwargs.get("pzpdf_type", "shared_bins") == "quantiles": + assert "quantiles" in cat.pzpdf_info + assert "pzquantiles" in cat.colnames elif kwargs.get("pzpdf_type", "shared_bins") is None: assert "zbins" not in cat.pzpdf_info assert "pzbins" not in cat.colnames