Skip to content

Commit

Permalink
Hashing (#968)
Browse files Browse the repository at this point in the history
Hashing
  • Loading branch information
nikfilippas authored Mar 6, 2023
1 parent 2c88c59 commit fa73ff2
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 10 deletions.
21 changes: 12 additions & 9 deletions pyccl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,20 @@
environ["CLASS_PARAM_DIR"] = path.dirname(path.abspath(__file__))
del environ, path

# Patch for deprecated alias in Numpy >= 1.20.0 (used in ISiTGR & FAST-PT).
# Deprecation cycle starts in Numpy 1.20 and ends in Numpy 1.24.
from packaging.version import parse
import numpy
numpy.int = int if parse(numpy.__version__) >= parse("1.20.0") else numpy.int
del parse, numpy

# SWIG-generated
from . import ccllib as lib

# monkey patch for isitgr and fast-pt if Numpy>=1.24
from packaging.version import parse
import numpy as np
if parse(np.__version__) >= parse('1.24'):
np.int = int
del parse
del np
# CCL base
from .base import (
hash_,
)

# Errors
from .errors import (
Expand Down Expand Up @@ -135,11 +139,9 @@
sigma2_B_from_mask,
)


# Miscellaneous
from .pyutils import debug_mode, resample_array


# Deprecated & Renamed modules
from .halomodel import (
halomodel_matter_power,
Expand All @@ -164,6 +166,7 @@

__all__ = (
'lib',
'hash_',
'CCLParameters', 'spline_params', 'gsl_params', 'physical_constants',
'CCLError', 'CCLWarning', 'CCLDeprecationWarning',
'Cosmology', 'CosmologyVanillaLCDM', 'CosmologyCalculator',
Expand Down
45 changes: 45 additions & 0 deletions pyccl/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import sys
from collections import OrderedDict
import numpy as np


def _to_hashable(obj):
"""Make unhashable objects hashable in a consistent manner."""

if isinstance(obj, (int, float, str)):
# Strings and Numbers are hashed directly.
return obj

elif hasattr(obj, "__iter__"):
# Encapsulate all the iterables to quickly discard as needed.

if isinstance(obj, np.ndarray):
# Numpy arrays: Convert the data buffer to a byte string.
return obj.tobytes()

elif isinstance(obj, dict):
# Dictionaries: Build a tuple from key-value pairs,
# where all values are converted to hashables.
out = {key: _to_hashable(value) for key, value in obj.items()}
# Sort unordered dictionaries for hash consistency.
if isinstance(obj, OrderedDict):
return tuple(out.items())
return tuple(sorted(out.items()))

else:
# Iterables: Build a tuple from values converted to hashables.
out = [_to_hashable(item) for item in obj]
return tuple(out)

elif hasattr(obj, "__hash__"):
# Hashables: Just return the object.
return obj

# NotImplemented: Can't hash safely, so raise TypeError.
raise TypeError(f"Hashing for {type(obj)} not implemented.")


def hash_(obj):
"""Generic hash method, which changes between processes."""
digest = hash(repr(_to_hashable(obj))) + sys.maxsize + 1
return digest
3 changes: 2 additions & 1 deletion pyccl/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,8 @@ def compute_nonlin_power(self):
if (mps != 'emu') and (mps is not None):
self.compute_linear_power()

if mps == "camb":
if mps == "camb" and self.has_nonlin_power:
# Already computed
return

if mps is None:
Expand Down
1 change: 1 addition & 0 deletions pyccl/halos/halo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class HMCalculator(object):
determines what is considered a "very large" scale.
Default: 1E-5.
"""

def __init__(self, cosmo, massfunc, hbias, mass_def,
log10M_min=8., log10M_max=16.,
nlog10M=128, integration_method_M='simpson',
Expand Down
2 changes: 2 additions & 0 deletions pyccl/halos/massdef.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def __init__(self, Delta, rho_type, c_m_relation=None):
def __eq__(self, other):
""" Allows you to compare two mass definitions
"""
if not isinstance(other, MassDef):
return False
return (self.Delta == other.Delta) and \
(self.rho_type == other.rho_type)

Expand Down
34 changes: 34 additions & 0 deletions pyccl/tests/test_hashing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Test the hashing function of CCL."""
import pytest
import pyccl as ccl
import numpy as np
from collections import OrderedDict

OBJECTS = [ccl.Cosmology, # class
(0, 1, 2), # tuple
[0, 1, 2], # list
set([0, 1, 2]), # set
np.arange(3), # array
{0: None, 1: None, 2: None}, # dict
{0: None, 1: None, 2: {2.1: None, 2.2: None}}, # nested dict
OrderedDict({0: None, 1: None, 2: None}), # OrderedDict
ccl.CosmologyVanillaLCDM(), # something else
None, # something else
]


@pytest.mark.parametrize("obj", OBJECTS)
def test_hashing_smoke(obj):
assert isinstance(ccl.hash_(obj), int)


def test_hashing_large_array():
# Hashing ultimately uses the representation of the object.
# The representation of large numpy arrays only contains the start
# and the end. We check that the entire array is considered.
array = np.random.random(64**3).reshape(64, 64, 64)
array2 = array.copy()
array2[31, 31, 31] += 1. # this is now the max value
vmax = str(array2.max())[:6]
assert vmax not in repr(array2) # make sure it doesn't show
assert ccl.hash_(array) != ccl.hash_(array2)

0 comments on commit fa73ff2

Please sign in to comment.