Skip to content

Commit

Permalink
smart binning, spline interpolation
Browse files Browse the repository at this point in the history
  • Loading branch information
johannesulf committed May 12, 2022
1 parent 3592bf0 commit ddddb80
Show file tree
Hide file tree
Showing 13 changed files with 808 additions and 454 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
__pycache__
*.hdf5
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ from tabcorr import TabCorr
rp_bins = np.logspace(-1, 1, 20)
halocat = CachedHaloCatalog(simname='bolplanck')
halotab = TabCorr.tabulate(halocat, wp, rp_bins, pi_max=40)
halotab = TabCorr.tabulate(halocat, wp, rp_bins, pi_max=40, verbose=True,
num_threads=4)
# We can save the result for later use.
halotab.write('bolplanck.hdf5')
Expand Down
Binary file added scripts/bolplanck_ds.hdf5
Binary file not shown.
Binary file added scripts/bolplanck_wp.hdf5
Binary file not shown.
Binary file added scripts/ds_decomposition.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
45 changes: 45 additions & 0 deletions scripts/example_ds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np
import matplotlib.pyplot as plt
from halotools.sim_manager import CachedHaloCatalog
from halotools.mock_observables import mean_delta_sigma
from halotools.mock_observables import return_xyz_formatted_array
from halotools.empirical_models import PrebuiltHodModelFactory
from tabcorr import TabCorr

# First, we tabulate the correlation functions in the halo catalog.
rp_bins = np.logspace(-1, 1, 20)

halocat = CachedHaloCatalog(simname='bolplanck')
ptcl = halocat.ptcl_table
pos_ptcl = return_xyz_formatted_array(ptcl['x'], ptcl['y'], ptcl['z'])
effective_particle_mass = halocat.particle_mass * 2048**3 / len(ptcl)
halotab = TabCorr.tabulate(
halocat, mean_delta_sigma, pos_ptcl, effective_particle_mass, rp_bins,
mode='cross', verbose=True, num_threads=4)

# We can save the result for later use.
halotab.write('bolplanck_ds.hdf5')

# We could read it in like this. Thus, we can skip the previous steps in the
# future.
halotab = TabCorr.read('bolplanck_ds.hdf5')

# Now, we're ready to calculate correlation functions for a specific model.
model = PrebuiltHodModelFactory('zheng07', threshold=-21)

rp_ave = 0.5 * (rp_bins[1:] + rp_bins[:-1])

ngal, ds = halotab.predict(model)
plt.plot(rp_ave, rp_ave * ds / 1e12, label='total')

ngal, ds = halotab.predict(model, separate_gal_type=True)
for key in ds.keys():
plt.plot(rp_ave, rp_ave * ds[key] / 1e12, label=key, ls='--')

plt.xscale('log')
plt.xlabel(r'$r_p \ [h^{-1} \ \mathrm{Mpc}]$')
plt.ylabel(r'$r_p \times \Delta\Sigma \ [10^6 \, M_\odot / \mathrm{pc}]$')
plt.legend(loc='best', frameon=False)
plt.tight_layout(pad=0.3)
plt.savefig('ds_decomposition.png', dpi=300)
plt.close()
7 changes: 4 additions & 3 deletions scripts/example.py → scripts/example_wp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
rp_bins = np.logspace(-1, 1, 20)

halocat = CachedHaloCatalog(simname='bolplanck')
halotab = TabCorr.tabulate(halocat, wp, rp_bins, pi_max=40)
halotab = TabCorr.tabulate(halocat, wp, rp_bins, pi_max=40, verbose=True,
num_threads=4)

# We can save the result for later use.
halotab.write('bolplanck.hdf5')
halotab.write('bolplanck_wp.hdf5')

# We could read it in like this. Thus, we can skip the previous steps in the
# future.
halotab = TabCorr.read('bolplanck.hdf5')
halotab = TabCorr.read('bolplanck_wp.hdf5')

# Now, we're ready to calculate correlation functions for a specific model.
model = PrebuiltHodModelFactory('zheng07', threshold=-18)
Expand Down
Binary file modified scripts/wp_decomposition.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified scripts/wp_vs_logm1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from setuptools import setup
from tabcorr import __version__

setup(name='tabcorr',
version='0.8.1',
description='Tabulated correlation functions for halotools',
version=__version__,
description='Tabulated Correlation functions for halotools',
url='https://github.com/johannesulf/TabCorr',
author='Johannes U. Lange',
author_email='[email protected]',
Expand Down
6 changes: 4 additions & 2 deletions tabcorr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from .tabcorr import *
from .tabcorr import TabCorr
from .interpolate import Interpolator

__all__ = ["TabCorr", "TabCorrInterpolation"]
__version__ = '0.9.0'
__all__ = ["TabCorr", "Interpolator"]
290 changes: 290 additions & 0 deletions tabcorr/interpolate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
"""Module for interpolation between TabCorr instances."""

import numpy as np
from scipy.spatial import Delaunay


class Interpolator:
"""Class for interpolation of multiple TabCorr instances."""

def __init__(self, tabcorr_list, param_dict_table, spline=True):
"""Initialize an interpolation of multiple TabCorr instances.
Parameters
----------
tabcorr_list : list or numpy.ndarray
TabCorr instances used to interpolate.
param_dict_table : astropy.table.Table
Table containing the keywords and values corresponding to each
instance in the TabCorr list. Must have the same length and
ordering as `tabcorr_list`.
spline : bool, optional
For multi-dimensional interpolation, whether the interpolation
is performed over a regular grid using spline interpolation or
over an irregular grid using linear barycentric interpolation.
Spline interpolation is more accurate but slower and the TabCorr
instances need to be arranged on a grid. Default is True.
Raises
------
ValueError
If `spline` is True and `param_dict_table` does not describe a
grid.
"""
if len(tabcorr_list) != len(param_dict_table):
raise ValueError("The number of TabCorr instances does not match" +
" the number of entries in 'param_dict_table'.")

self.tabcorr_list = tabcorr_list
self.keys = param_dict_table.colnames
self.param_dict_table = param_dict_table.copy()
self.spline = spline or len(self.keys) == 1

if self.spline:
self.xp = []
self.a = []
for key in self.keys:
self.xp.append(np.sort(np.unique(param_dict_table[key])))
self.a.append(spline_interpolation_matrix(self.xp[-1]))

try:
# Check that the table has the right length to describe the
# grid.
assert (np.prod([len(xp) for xp in self.xp]) ==
len(self.param_dict_table))
# Check that no combination of values in the table appears
# twice.
assert np.all(
np.unique(np.stack(self.param_dict_table),
return_counts=True)[1] == 1)
except AssertionError:
raise ValueError(
"The 'param_dict_table' does not describe a grid.")

self.param_dict_table['tabcorr_index'] = np.arange(len(
self.param_dict_table))
self.param_dict_table.sort(self.keys)

else:

if len(self.param_dict_table) <= len(self.keys):
raise ValueError(
'The number of TabCorr instances provided must be ' +
'larger than the number of dimensions.')

self.xp = np.zeros((len(self.param_dict_table), len(self.keys)))
for i, key in enumerate(self.keys):
self.xp[:, i] = self.param_dict_table[key].data
self.delaunay = Delaunay(self.xp)

def predict(self, model, n_gauss_prim=10, extrapolate=False,
same_halos=False, **occ_kwargs):
"""Interpolate the predictions from multiple TabCorr instances.
The values of parameters to interpolate should be in the parameter
dictionary of the model.
Parameters
----------
model : HodModelFactory
Instance of ``halotools.empirical_models.HodModelFactory``
describing the model for which predictions are made.
n_gauss_prim : int, optional
The number of points used in the Gaussian quadrature to calculate
the mean occupation averaged over the primary halo property in each
halo bin. Default is 10.
extrapolate : bool, optional
Whether to allow extrapolation beyond points sampled by the input
TabCorr instances. Default is False.
same_halos : bool, optional
If True, assume that all TabCorr instances are using the same
simulation and halo bins. This is typically the case if we want
to interpolate between phase-space parameters. Can speed up
calculation considerably. Default is False.
**occ_kwargs : dict, optional
Keyword arguments passed to the ``mean_occupation`` functions of
the model.
Returns
-------
ngal : float
Galaxy number density.
xi : numpy.ndarray
Correlation function values.
Raises
------
ValueError
If `extrapolate` is set to True and values are outside the
interpolation range.
"""
x_model = np.empty(len(self.keys))
for i, key in enumerate(self.keys):
try:
x_model[i] = model.param_dict[key]
except KeyError:
raise ValueError(
'The key {} is not present in the parameter '.format(key) +
'dictionary of the model.')

if self.spline:
for i in range(len(self.param_dict_table)):
tabcorr = self.tabcorr_list[
self.param_dict_table['tabcorr_index'][i]]
if i == 0 and same_halos:
model = tabcorr.mean_occupation(
model, n_gauss_prim=n_gauss_prim, **occ_kwargs)
ngal_i, xi_i = tabcorr.predict(
model, n_gauss_prim=n_gauss_prim, **occ_kwargs)
if i == 0:
ngal = np.zeros(np.prod([len(xp) for xp in self.xp]))
xi = np.zeros([np.prod([len(xp) for xp in self.xp])] +
list(xi_i.shape))
ngal[i] = ngal_i
xi[i] = xi_i
ngal = ngal.reshape([len(xp) for xp in self.xp])
xi = xi.reshape([len(xp) for xp in self.xp] + list(xi_i.shape))
return (spline_interpolate(x_model, self.xp, self.a, ngal),
spline_interpolate(x_model, self.xp, self.a, xi))

i_simplex = self.delaunay.find_simplex(x_model)

if i_simplex == -1:
if not extrapolate:
raise ValueError(
'The x-coordinates are outside of the interpolation ' +
'range and extrapolation is turned off.')
x_cm = np.mean(self.xp[self.delaunay.simplices], axis=1)
i_simplex = np.argmin(np.sum((x_model - x_cm)**2, axis=1))

simplex = self.delaunay.simplices[i_simplex]
b = self.delaunay.transform[i_simplex, :-1].dot(
x_model - self.delaunay.transform[i_simplex, -1])
w = np.append(b, 1 - np.sum(b))

for i, k in enumerate(simplex):
ngal_i, xi_i = self.tabcorr_list[k].predict(
model, **occ_kwargs)
if i == 0:
ngal = ngal_i * w[i]
xi = xi_i * w[i]
else:
ngal += ngal_i * w[i]
xi += xi_i * w[i]

return ngal, xi


def spline_interpolation_matrix(xp):
"""Calculate a matrix for quick cubic not-a-knot spline interpolation.
Parameters
----------
xp : numpy.ndarray
Abscissa for which to perform spline interpolation.
Returns
-------
Matrix `a` such that ``np.einsum('ij,j,i', a[i], y, x0**np.arange(4))``
is the value `i`-th spline evalualated at `x0`.
Raises
------
ValueError
If `xp` does not have at least 4 entries.
"""
if len(xp) < 4:
raise ValueError('Cannot perform spline interpolation with less than' +
' 4 values.')

n = len(xp) - 1
m = np.zeros((4 * n, 4 * n))

# Ensure spline goes through y-values.
for i in range(n):
m[i][i*4:(i+1)*4] = xp[i]**np.arange(4)
m[i+n][i*4:(i+1)*4] = xp[i+1]**np.arange(4)

# Ensure continuity first and second derivative at the x-values.
for i in range(n - 1):
m[i+2*n][i*4+1:(i+1)*4] = np.array([1, 2, 3]) * xp[i+1]**np.arange(3)
m[i+2*n][(i+1)*4+1:(i+2)*4] = -(
np.array([1, 2, 3]) * xp[i+1]**np.arange(3))
m[i+3*n-1][i*4+2:(i+1)*4] = np.array([2, 6]) * xp[i+1]**np.arange(2)
m[i+3*n-1][(i+1)*4+2:(i+2)*4] = -(
np.array([2, 6]) * xp[i+1]**np.arange(2))

# Ensure continuity of third derivative in first and last x-values.
m[-1][3] = 6 * xp[1]
m[-1][7] = -6 * xp[1]
m[-2][-5] = 6 * xp[-2]
m[-2][-1] = -6 * xp[-2]

# Invert matrix and determine matrix for y-values.
m = np.linalg.inv(m)
a = np.zeros((4 * n, len(xp)))
a[:, :-1] = m[:, :n]
a[:, 1:] += m[:, n:2*n]

return a.reshape((n, 4, len(xp)))


def spline_interpolate(x, xp, a, yp, extrapolate=False):
"""Interpolate one or more possible multi-dimensional functions.
This function can be used to interpolate multiple functions simultaneously
as long as the x-coordinates of each interpolation are the same.
Parameters
----------
x : float or numpy.ndarray
The single x-coordinate at which to evaluate the interpolated values.
xp : numpy.ndarray or list of numpy.ndarray
The x-coordinates of the data points. If multi-dimensional
interpolation is performed, `x` and `xp` must have the same length.
a : numpy.ndarray or list of numpy.ndarray
The spline interpolation matrix or matrices calculated with
``spline_interpolation_matrix``. For multi-dimensional interpolation,
must have the same length and order as `xp`.
yp : numpy.ndarray
The y-coordinates of the data points. If it has more dimensions than
`x`, the interpolation is performed along the first ``len(x)`` axes.
extrapolate : bool, optional
Whether to allow extrapolation beyond points sampled by x. If set to
false, attempting to extrapolate will result in a RuntimeError. Default
is False.
Returns
-------
Interpolated values with shape ``y.shape[len(x):]``.
Raises
------
ValueError
If `x` falls outside the interpolation range and `extrapolate` is
False.
"""
if not isinstance(xp, list):
xp = [xp]
if not isinstance(a, list):
a = [a]
x = np.atleast_1d(x)

for xi, ai, xpi in zip(x, a, xp):
i_spline = np.digitize(xi, xpi) - 1
if xi == xpi[-1]:
i_spline = len(xpi) - 2
if i_spline < 0 or i_spline > len(xpi) - 1:
if not extrapolate:
raise ValueError(
'The x-coordinates are outside of the interpolation ' +
'range and extrapolation is turned off.')
else:
i_spline = min(max(i_spline, 0), len(xpi) - 1)
yp = np.einsum('ij,j...,i', ai[i_spline], yp, xi**np.arange(4))

return yp
Loading

0 comments on commit ddddb80

Please sign in to comment.