Skip to content

Commit

Permalink
Issue/401/use qp+fix zinteg (#644)
Browse files Browse the repository at this point in the history
* implement qp

* unify z_grid in integration, use qp for interpolation

* add quantiles in mock_data and GCData.get_pzpdfs

* lower requirement for quantiles integration

* rename pzpdf column to pzquantiles when type is quantiles

* update examples/demo_mock_cluster.ipynb

* mv pylint to after tests in github

* add examples/demo_compute_bkg_probability_details.ipynb

* Install qp with pip prereqs, add qp to environment.yml

* Update version to 1.14.0

---------

Co-authored-by: Hsin Fan <[email protected]>
Co-authored-by: Celine Combet <[email protected]>
  • Loading branch information
3 people authored Oct 7, 2024
1 parent 7d90ee1 commit 157bdb0
Show file tree
Hide file tree
Showing 13 changed files with 677 additions and 121 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ jobs:
git clone https://github.com/LSSTDESC/CCL
cd CCL
pip install --no-use-pep517 .
- name: Analysing the code with pylint
run: |
pip install pylint
pylint clmm --ignored-classes=astropy.units
- name: Run the unit tests
run: |
pip install pytest pytest-cov
Expand All @@ -53,6 +49,10 @@ jobs:
# - name: Install Sphinx prereq
# run: |
# conda install -c conda-forge sphinx sphinx_rtd_theme nbconvert pandoc ipython ipython_genutils
- name: Analysing the code with pylint
run: |
pip install pylint
pylint clmm --ignored-classes=astropy.units
- name: Run the docs
run: |
make -C docs/ html
Expand Down
3 changes: 2 additions & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ Note, you may choose to install some or all of the ccl, numcosmo, and/or cluster
Now, you can install CLMM and its dependencies as

```bash
pip install numpy scipy astropy matplotlib
pip install numpy scipy astropy matplotlib healpy
pip install pytest sphinx sphinx_rtd_theme
pip install jupyter # need to have jupyter notebook tied to this environment, you can then see the environment in jupyter.nersc.gov
pip install git+https://github.com/LSSTDESC/qp.git # Install qp package
git clone https://github.com/LSSTDESC/CLMM.git # If you'd like to contribute but don't have edit permissions to the CLMM repo, see below how to fork the repo instead.
cd CLMM
python setup.py install # build from source
Expand Down
2 changes: 1 addition & 1 deletion clmm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@
)
from . import support

__version__ = "1.13.2"
__version__ = "1.14.0"
36 changes: 33 additions & 3 deletions clmm/gcdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from astropy.table import Table as APtable
import numpy as np

import qp


class GCMetaData(OrderedDict):
r"""Object to store metadata, it always has a cosmo key with protective changes
Expand Down Expand Up @@ -65,7 +67,10 @@ def __init__(self, *args, **kwargs):
metakwargs = {} if metakwargs is None else metakwargs
self.meta = GCMetaData(**metakwargs)
# this attribute is set when source galaxies have p(z)
self.pzpdf_info = {"type": None}
self.pzpdf_info = {
"type": None,
"unpack_quantile_zbins_limits": (0, 5, 501),
}

def _str_colnames(self):
"""Colnames in comma separated str"""
Expand All @@ -83,6 +88,12 @@ def _str_pzpdf_info(self):
np.set_printoptions(edgeitems=5, threshold=10)
out += " " + str(np.round(self.pzpdf_info.get("zbins"), 2))
np.set_printoptions(**default_cfg)
elif out == "quantiles":
np.set_printoptions(formatter={'float': "{0:g}".format}, edgeitems=3, threshold=6)
out += " " + str(self.pzpdf_info["quantiles"])
out += " - unpacked with zgrid : " + str(
self.pzpdf_info["unpack_quantile_zbins_limits"]
)
return out

def __repr__(self):
Expand Down Expand Up @@ -227,6 +238,8 @@ def has_pzpdfs(self):
return ("zbins" in self.pzpdf_info) and ("pzpdf" in self.columns)
if pzpdf_type == "individual_bins":
return ("pzbins" in self.columns) and ("pzpdf" in self.columns)
if pzpdf_type == "quantiles":
return ("quantiles" in self.pzpdf_info) and ("pzquantiles" in self.columns)
raise NotImplementedError(f"PDF use '{pzpdf_type}' not implemented.")

def get_pzpdfs(self):
Expand All @@ -235,18 +248,35 @@ def get_pzpdfs(self):
Returns
-------
pzbins : array
zbins of PDF. 1D if `shared_bins`,
zbins of PDF. 1D if `shared_bins` or `quantiles`.
zbins of each object in data if `individual_bins`.
pzpdfs : array
PDF of each object in data
Notes
-----
If pzpdf type is quantiles, a pdf will be unpacked on a grid contructed with
`np.linspace(*self.pzpdf_info["unpack_quantile_zbins_limits"])`
"""
pzpdf_type = self.pzpdf_info["type"]
if pzpdf_type is None:
raise ValueError("No PDF information stored!")
if pzpdf_type == "shared_bins":
pzbins = self.pzpdf_info["zbins"]
pzpdf = self["pzpdf"]
elif pzpdf_type == "individual_bins":
pzbins = self["pzbins"]
pzpdf = self["pzpdf"]
elif pzpdf_type == "quantiles":
pzbins = np.linspace(*self.pzpdf_info["unpack_quantile_zbins_limits"])
qp_ensemble = qp.Ensemble(
qp.quant,
data={
"quants": np.array(self.pzpdf_info["quantiles"]),
"locs": self["pzquantiles"],
},
)
pzpdf = qp_ensemble.pdf(pzbins)
else:
raise NotImplementedError(f"PDF use '{pzpdf_type}' not implemented.")
return pzbins, self["pzpdf"]
return pzbins, pzpdf
50 changes: 26 additions & 24 deletions clmm/redshift/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import warnings
import numpy as np
from scipy.integrate import simpson
from scipy.interpolate import interp1d

import qp

def _integ_pzfuncs(pzpdf, pzbins, zmin=0.0, zmax=5, kernel=lambda z: 1.0, ngrid=1000):

def _integ_pzfuncs(pzpdf, pzbins, zmin=0.0, zmax=5, kernel=None, ngrid=1000):
r"""
Integrates the product of a photo-z pdf with a given kernel.
This function was created to allow for data with different photo-z binnings.
Expand All @@ -30,30 +31,31 @@ def _integ_pzfuncs(pzpdf, pzbins, zmin=0.0, zmax=5, kernel=lambda z: 1.0, ngrid=
-------
numpy.ndarray
Kernel integrated with the pdf of each galaxy.
Notes
-----
Will be replaced by qp at some point.
"""
# adding these lines to interpolate CLMM redshift grid for each galaxies
# to a constant redshift grid for all galaxies. If there is a constant grid for all galaxies
# these lines are not necessary and z_grid, pz_matrix = pzbins, pzpdf

if hasattr(pzbins[0], "__len__"):
# First need to interpolate on a fixed grid
z_grid = np.linspace(zmin, zmax, ngrid)
pdf_interp_list = [
interp1d(pzbin, pdf, bounds_error=False, fill_value=0.0)
for pzbin, pdf in zip(pzbins, pzpdf)
]
pz_matrix = np.array([pdf_interp(z_grid) for pdf_interp in pdf_interp_list])
kernel_matrix = kernel(z_grid)

if len(np.array(pzbins).shape) > 1:
# Each galaxy as a diiferent zbin
_qp_type = qp.interp_irregular
else:
# OK perform the integration directly from the pdf binning common to all galaxies
mask = (pzbins >= zmin) * (pzbins <= zmax)
z_grid = pzbins[mask]
pz_matrix = np.array(pzpdf)[:, mask]
kernel_matrix = kernel(z_grid)
_qp_type = qp.interp

qp_ensemble = qp.Ensemble(
_qp_type,
data={
"xvals": np.array(pzbins),
"yvals": np.array(pzpdf),
},
)

if kernel is None:

def kernel(z):
# pylint: disable=unused-argument
return 1.0

z_grid = np.linspace(zmin, zmax, ngrid)
pz_matrix = qp_ensemble.pdf(z_grid)
kernel_matrix = kernel(z_grid)

return simpson(pz_matrix * kernel_matrix, x=z_grid, axis=1)

Expand Down
32 changes: 27 additions & 5 deletions clmm/support/mock_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import warnings
import numpy as np

from scipy.special import erfc

from astropy import units as u
from astropy.coordinates import SkyCoord

Expand Down Expand Up @@ -39,6 +42,7 @@ def generate_galaxy_catalog(
ngals=None,
ngal_density=None,
pz_bins=101,
pz_quantiles_conf=(5, 31),
pzpdf_type="shared_bins",
coordinate_system="euclidean",
validate_input=True,
Expand Down Expand Up @@ -145,12 +149,16 @@ def generate_galaxy_catalog(
pz_bins: int, array
Photo-z pdf bins in the given range. If int, the limits are set automatically.
If is array, must be the bin edges.
pz_quantiles_conf: tuple
Configuration for quantiles when `pzpdf_type='quantiles'`. Must be with the format
`(max_sigma_dev, num_points)`, which is used as
`sigma_steps = np.linspace(-max_sigma_dev, max_sigma_dev, num_points)`
pzpdf_type: str, None
Type of photo-z pdf to be stored, options are:
`None` - does not store PDFs;
`'shared_bins'` - single binning for all galaxies
`'individual_bins'` - individual binning for each galaxy
`'quantiles'` - quantiles of PDF (not implemented yet)
`'quantiles'` - quantiles of PDF
nretry : int, optional
The number of times that we re-draw each galaxy with non-sensical derived properties
ngals : float, optional
Expand Down Expand Up @@ -238,6 +246,7 @@ def generate_galaxy_catalog(
"mean_e_err": mean_e_err,
"photoz_sigma_unscaled": photoz_sigma_unscaled,
"pz_bins": pz_bins,
"pz_quantiles_conf": pz_quantiles_conf,
"field_size": field_size,
"pzpdf_type": pzpdf_type,
"coordinate_system": coordinate_system,
Expand Down Expand Up @@ -370,6 +379,7 @@ def _generate_galaxy_catalog(
mean_e_err=None,
photoz_sigma_unscaled=None,
pz_bins=101,
pz_quantiles_conf=(5, 31),
pzpdf_type="shared_bins",
coordinate_system="euclidean",
field_size=None,
Expand All @@ -389,7 +399,10 @@ def _generate_galaxy_catalog(
if photoz_sigma_unscaled is not None:
galaxy_catalog.pzpdf_info["type"] = pzpdf_type
galaxy_catalog = _compute_photoz_pdfs(
galaxy_catalog, photoz_sigma_unscaled, pz_bins=pz_bins
galaxy_catalog,
photoz_sigma_unscaled,
pz_bins=pz_bins,
pz_quantiles_conf=pz_quantiles_conf,
)

# Draw galaxy positions
Expand Down Expand Up @@ -462,7 +475,10 @@ def _generate_galaxy_catalog(
if all(c is not None for c in (photoz_sigma_unscaled, pzpdf_type)):
if galaxy_catalog.pzpdf_info["type"] == "individual_bins":
cols += ["pzbins"]
cols += ["pzpdf"]
if galaxy_catalog.pzpdf_info["type"] == "quantiles":
cols += ["pzquantiles"]
else:
cols += ["pzpdf"]

if coordinate_system == "celestial":
galaxy_catalog["e2"] *= -1 # flip e2 to match the celestial coordinate system
Expand Down Expand Up @@ -529,7 +545,9 @@ def _draw_source_redshifts(zsrc, zsrc_min, zsrc_max, ngals):
return GCData([zsrc_list, zsrc_list], names=("ztrue", "z"))


def _compute_photoz_pdfs(galaxy_catalog, photoz_sigma_unscaled, pz_bins=101):
def _compute_photoz_pdfs(
galaxy_catalog, photoz_sigma_unscaled, pz_bins=101, pz_quantiles_conf=(5, 31)
):
"""Private function to add photo-z errors and PDFs to the mock catalog.
Parameters
Expand Down Expand Up @@ -582,7 +600,11 @@ def _compute_photoz_pdfs(galaxy_catalog, photoz_sigma_unscaled, pz_bins=101):
gaussian(row["pzbins"], row["z"], row["pzsigma"]) for row in galaxy_catalog
]
elif galaxy_catalog.pzpdf_info["type"] == "quantiles":
raise NotImplementedError("PDF storing in quantiles not implemented.")
sigma_steps = np.linspace(-pz_quantiles_conf[0], pz_quantiles_conf[0], pz_quantiles_conf[1])
galaxy_catalog.pzpdf_info["quantiles"] = 0.5 * erfc(-sigma_steps / np.sqrt(2))
galaxy_catalog["pzquantiles"] = (
galaxy_catalog["z"][:, None] + sigma_steps * galaxy_catalog["pzsigma"][:, None]
)
else:
raise ValueError(
"Value of pzpdf_info['type'] " f"(={galaxy_catalog.pzpdf_info['type']}) " "not valid."
Expand Down
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ dependencies:
- numcosmo==0.18.2
- pytest=7.4.0
- sphinx==7.2.4
- pip:
- qp-prob @ git+https://github.com/LSSTDESC/qp.git
Loading

0 comments on commit 157bdb0

Please sign in to comment.