Skip to content

Commit

Permalink
improved performance and updated API for interpolation
Browse files Browse the repository at this point in the history
  • Loading branch information
johannesulf committed Jun 30, 2020
1 parent 0a64f88 commit 395d4dc
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 164 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(name='tabcorr',
version='0.5.1',
version='0.6',
description='Tabulated correlation functions for halotools',
url='https://github.com/johannesulf/TabCorr',
author='Johannes Ulf Lange',
Expand Down
306 changes: 143 additions & 163 deletions tabcorr/tabcorr.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,13 @@ def tabulate(cls, halocat, tpcf, *tpcf_args,
if project_xyz and mode == 'auto':
tpcf_matrix /= 3.0

if mode == 'auto':
tpcf_matrix_flat = []
for i in range(tpcf_matrix.shape[0]):
tpcf_matrix_flat.append(symmetric_matrix_to_array(
tpcf_matrix[i]))
tpcf_matrix = np.array(tpcf_matrix_flat)

halotab.attrs = {}
halotab.attrs['tpcf'] = tpcf.__name__
halotab.attrs['mode'] = mode
Expand Down Expand Up @@ -367,15 +374,7 @@ def read(cls, fname):
for key in fstream.attrs.keys():
halotab.attrs[key] = fstream.attrs[key]

tpcf_matrix = fstream['tpcf_matrix'][()]
if halotab.attrs['mode'] == 'auto' and len(tpcf_matrix.shape) == 2:
halotab.tpcf_matrix = []
for i in range(tpcf_matrix.shape[0]):
halotab.tpcf_matrix.append(array_to_symmetric_matrix(
tpcf_matrix[i]))
halotab.tpcf_matrix = np.array(halotab.tpcf_matrix)
else:
halotab.tpcf_matrix = tpcf_matrix
halotab.tpcf_matrix = fstream['tpcf_matrix'][()]

halotab.tpcf_args = []
for key in fstream['tpcf_args'].keys():
Expand Down Expand Up @@ -420,15 +419,7 @@ def write(self, fname, overwrite=False, max_args_size=1000000,
for key in keys:
fstream.attrs[key] = self.attrs[key]

if self.attrs['mode'] == 'auto':
tpcf_matrix_write = []
for i in range(self.tpcf_matrix.shape[0]):
tpcf_matrix_write.append(symmetric_matrix_to_array(
self.tpcf_matrix[i]))
fstream['tpcf_matrix'] = np.array(tpcf_matrix_write,
dtype=matrix_dtype)
else:
fstream['tpcf_matrix'] = self.tpcf_matrix.astype(matrix_dtype)
fstream['tpcf_matrix'] = self.tpcf_matrix.astype(matrix_dtype)

for i, arg in enumerate(self.tpcf_args):
if (type(arg) is not np.ndarray or
Expand Down Expand Up @@ -525,16 +516,16 @@ def predict(self, model, separate_gal_type=False, **occ_kwargs):
ngal = mean_occupation * self.gal_type['n_h'].data

if self.attrs['mode'] == 'auto':
xi = self.tpcf_matrix * np.outer(ngal, ngal) / np.sum(ngal)**2
ngal_sq = np.outer(ngal, ngal)
ngal_sq = 2 * ngal_sq - np.diag(np.diag(ngal_sq))
ngal_sq = symmetric_matrix_to_array(ngal_sq)
xi = self.tpcf_matrix * ngal_sq / np.sum(ngal_sq)
elif self.attrs['mode'] == 'cross':
xi = self.tpcf_matrix * ngal / np.sum(ngal)

if not separate_gal_type:
ngal = np.sum(ngal)
if self.attrs['mode'] == 'auto':
xi = np.sum(xi, axis=(1, 2)).reshape(self.tpcf_shape)
elif self.attrs['mode'] == 'cross':
xi = np.sum(xi, axis=1).reshape(self.tpcf_shape)
xi = np.sum(xi, axis=1).reshape(self.tpcf_shape)
return ngal, xi
else:
ngal_dict = {}
Expand All @@ -545,17 +536,17 @@ def predict(self, model, separate_gal_type=False, **occ_kwargs):
ngal_dict[gal_type] = np.sum(ngal[mask])

if self.attrs['mode'] == 'auto':
grid = np.meshgrid(self.gal_type['gal_type'],
self.gal_type['gal_type'])
for gal_type_1, gal_type_2 in (
itertools.combinations_with_replacement(
np.unique(self.gal_type['gal_type']), 2)):
mask = (((gal_type_1 == grid[0]) &
(gal_type_2 == grid[1])) |
((gal_type_1 == grid[1]) &
(gal_type_2 == grid[0])))
mask = symmetric_matrix_to_array(np.outer(
gal_type_1 == self.gal_type['gal_type'],
gal_type_2 == self.gal_type['gal_type']) |
np.outer(
gal_type_2 == self.gal_type['gal_type'],
gal_type_1 == self.gal_type['gal_type']))
xi_dict['%s-%s' % (gal_type_1, gal_type_2)] = np.sum(
xi * mask, axis=(1, 2)).reshape(self.tpcf_shape)
xi * mask, axis=1).reshape(self.tpcf_shape)

elif self.attrs['mode'] == 'cross':
for gal_type in np.unique(self.gal_type['gal_type']):
Expand All @@ -566,166 +557,155 @@ def predict(self, model, separate_gal_type=False, **occ_kwargs):
return ngal_dict, xi_dict


def interpolate_predict(tabcorr_arr, x, xi, model, extrapolate=True,
**occ_kwargs):
"""
Linearly interprets the predictions from multiple TabCorr instances. Each
TabCorr instance will have a corresponding x-value and this function
finds a linear/barycentric interpolation for an intermediate point xi. For
example, this function can be used for predict correlation functions
for continues choices of concentrations for satellites.
Parameters
----------
tabcorr_arr : array_like
TabCorr instances used to interpolate.
x : array_like
Array of shape (npts) or (npts, ndim). Here, npts is the number of
TabCorr instances and ndim the number of dimensions over which
we will interpolate.
xi : float or array_like
x-value at which to interpolate. If x has shape (npts), xi must have
be a float. On the other hand, if x has shape (npts, ndim), xi must
be an array of len ndim.
model : HodModelFactory
Instance of ``halotools.empirical_models.HodModelFactory``
describing the model for which predictions are made.
extrapolate : boolean, optional
Whether to allow extrapolation beyond points sampled by x. If set to
False, attempting to extrapolate will result in a RuntimeError.
**occ_kwargs : dict, optional
Keyword arguments passed to the ``mean_occupation`` functions of
the model.
Returns
-------
ngal : numpy.array or dict
Array containing the number densities for each galaxy type stored in
self.gal_type. The total galaxy number density is the sum of all
elements of this array.
xi : numpy.array or dict
Array storing the prediction for the correlation function.
"""
class TabCorrInterpolation:

try:
assert isinstance(x, list) or isinstance(x, np.ndarray)
x = np.array(x)
assert (x.ndim == 1) or (x.ndim == 2)
except AssertionError:
raise RuntimeError('x must be a one or two-dimensional array.')
def __init__(self, tabcorr_list, param_dict_table):
"""
Initialize an interpolation of multiple TabCorr instances.
if x.ndim == 1:
n_points = x.shape[0]
n_dim = 1
elif x.ndim == 2:
n_points = x.shape[0]
n_dim = x.shape[1]
if n_points <= n_dim:
raise RuntimeError('x must contain more points than dimensions.')
Parameters
----------
tabcorr_list : array_like
TabCorr instances used to interpolate.
try:
if n_dim == 1:
assert isinstance(xi, float)
param_dict_table : astropy.table.Table
Table containing the keywords and values corresponding to the
TabCorr list. Must have the same length and ordering as
tabcorr_list.
"""

if len(tabcorr_list) != len(param_dict_table):
raise RuntimeError('The length of tabcorr_list does not match ' +
'the number of entries in param_dict_table.')

self.keys = param_dict_table.colnames
self.n_dim = len(self.keys)
self.n_pts = len(param_dict_table)
self.tabcorr_list = tabcorr_list

if self.n_pts < self.n_dim + 1:
raise RuntimeError('The number of TabCorr instances provided ' +
'must be at least the number of dimensions ' +
'plus 1.')

if self.n_dim == 0:
raise RuntimeError('param_dict_table is empty.')
if self.n_dim == 1:
self.x = param_dict_table.columns[0].data
else:
assert isinstance(xi, list) or isinstance(xi, np.ndarray)
xi = np.array(xi)
assert xi.ndim == 1
assert len(xi) == n_dim
except AssertionError:
raise RuntimeError('xi must match the dimensionality of x.')
self.x = np.empty((self.n_pts, self.n_dim))
for i, key in enumerate(param_dict_table.colnames):
self.x[:, i] = param_dict_table[key].data
self.delaunay = Delaunay(self.x)

if n_points != len(tabcorr_arr):
raise RuntimeError('The length of tabcorr_arr does not match the ' +
'number of points provided via x.')
def predict(self, model, extrapolate=True, **occ_kwargs):
"""
Linearly interpolate the predictions from multiple TabCorr instances.
For example, this function can be used for predict correlation
functions for continues choices of concentrations for satellites.
Parameters to linearly interpolate over should be in the parameter
dictionary of the model.
if n_dim > 1:
Parameters
----------
model : HodModelFactory
Instance of ``halotools.empirical_models.HodModelFactory``
describing the model for which predictions are made.
tri = Delaunay(x)
i = tri.find_simplex(xi)
if i != -1:
s = tri.simplices[i]
if i == -1:
if not extrapolate:
raise RuntimeError('x is outside of the interpolation range.')
else:
x_cm = np.mean(x[tri.simplices], axis=1)
i = np.argmin(np.sum((xi - x_cm)**2, axis=1)) # closest simplex
separate_gal_type : boolean, optional
If True, the return values are dictionaries divided by each galaxy
types contribution to the output result.
s = tri.simplices[i]
b = tri.transform[i, :-1].dot(xi - tri.transform[i, -1])
w = np.append(b, 1 - np.sum(b))
extrapolate : boolean, optional
Whether to allow extrapolation beyond points sampled by x. If set
to False, attempting to extrapolate will result in a RuntimeError.
else:
**occ_kwargs : dict, optional
Keyword arguments passed to the ``mean_occupation`` functions
of the model.
if np.any(x < xi) and np.any(x > xi):
s = [np.ma.MaskedArray.argmax(np.ma.masked_array(x, mask=(x > xi))),
np.ma.MaskedArray.argmin(np.ma.masked_array(x, mask=(x < xi)))]
else:
if not extrapolate:
raise RuntimeError('x is outside of the interpolation range.')
else:
s = np.argsort(np.abs(xi - x))[:2]
Returns
-------
ngal : numpy.array or dict
Array containing the number densities for each galaxy type stored
in self.gal_type. The total galaxy number density is the sum of all
elements of this array.
w = np.array([x[s[1]] - xi, xi - x[s[0]]]) / (x[s[1]] - x[s[0]])
xi : numpy.array or dict
Array storing the prediction for the correlation function.
"""

for i in range(len(s)):
ngal_i, xi_i = tabcorr_arr[s[i]].predict(model, **occ_kwargs)
if i == 0:
ngal = ngal_i * w[i]
xi = xi_i * w[i]
else:
ngal += ngal_i * w[i]
xi += xi_i * w[i]
x_model = np.empty(self.n_dim)
for i in range(self.n_dim):
try:
x_model[i] = model.param_dict[self.keys[i]]
except KeyError:
raise RuntimeError('key {} not present '.format(self.keys[i]) +
'in the parameter dictionary of the model.')

return ngal, xi
if self.n_dim > 1:

i_simplex = self.delaunay.find_simplex(x_model)

def symmetric_matrix_to_array(matrix):
if i_simplex == -1:
if not extrapolate:
raise RuntimeError('The parameters of the model are ' +
'outside of the interpolation range ' +
'and extrapolation is turned off.')
else:
x_cm = np.mean(self.x[self.delaunay.simplices], axis=1)
i_simplex = np.argmin(np.sum((x_model - x_cm)**2, axis=1))

try:
assert matrix.shape[0] == matrix.shape[1]
assert np.all(matrix == matrix.T)
except AssertionError:
raise RuntimeError('The matrix you provided is not symmetric.')
simplex = self.delaunay.simplices[i_simplex]
b = self.delaunay.transform[i_simplex, :-1].dot(
x_model - self.delaunay.transform[i_simplex, -1])
w = np.append(b, 1 - np.sum(b))

array = np.zeros((np.prod(matrix.shape) + matrix.shape[0]) // 2,
dtype=matrix.dtype)
else:

k = 0
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if j > i:
continue
if np.any(x_model < self.x) and np.any(x_model > self.x):
simplex = [np.ma.MaskedArray.argmax(
np.ma.masked_array(self.x, mask=(self.x > x_model))),
np.ma.MaskedArray.argmin(
np.ma.masked_array(self.x, mask=(self.x < x_model)))]
else:
array[k] = matrix[i, j]
k = k + 1

return array
if not extrapolate:
raise RuntimeError('The parameters of the model are ' +
'outside of the interpolation range ' +
'and extrapolation is turned off.')
else:
simplex = np.argsort(np.abs(x_model - self.x))[:2]

w1 = (self.x[simplex[1]] - x_model) / (
self.x[simplex[1]] - self.x[simplex[0]])
w = [w1, 1 - w1]

for i, k in enumerate(simplex):
ngal_i, xi_i = self.tabcorr_list[k].predict(model, **occ_kwargs)
if i == 0:
ngal = ngal_i * w[i]
xi = xi_i * w[i]
else:
ngal += ngal_i * w[i]
xi += xi_i * w[i]

return ngal, xi

def array_to_symmetric_matrix(array):

dim = int(np.rint(-0.5 + np.sqrt(0.25 + len(array) * 2)))
def symmetric_matrix_to_array(matrix):

try:
assert len(array) == (dim**2 + dim) / 2
assert matrix.shape[0] == matrix.shape[1]
assert np.all(matrix == matrix.T)
except AssertionError:
raise RuntimeError('The length of the array does not correspond to a' +
' symmetric matrix.')
raise RuntimeError('The matrix you provided is not symmetric.')

matrix = np.zeros((dim, dim), dtype=array.dtype)
n_dim = matrix.shape[0]
sel = np.zeros((n_dim**2 + n_dim) // 2, dtype=np.int)

k = 0
for i in range(matrix.shape[0]):
matrix[i, :i+1] = array[k:k+i+1]
k = k + i + 1
sel[(i*(i+1))//2:(i*(i+1))//2+(i+1)] = np.arange(
i*n_dim, i*n_dim + i + 1)

matrix = matrix + matrix.T - matrix * np.identity(matrix.shape[0])
return matrix.ravel()[sel]

return matrix

0 comments on commit 395d4dc

Please sign in to comment.