Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into fix_metadata
Browse files Browse the repository at this point in the history
SSL32081 authored Dec 20, 2024
2 parents 2dc1d8f + 5d23f9a commit 718cfad
Showing 3 changed files with 247 additions and 27 deletions.
80 changes: 63 additions & 17 deletions nrcatalogtools/catalog.py
Original file line number Diff line number Diff line change
@@ -15,19 +15,39 @@ def waveform_filepath_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def metadata_filename_from_simname(self, sim_name):
def waveform_url_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def metadata_filepath_from_simname(self, sim_name):
def download_waveform_data(self, sim_name):
raise NotImplementedError()

@abstractmethod
def download_waveform_data(self, sim_name):
def psi4_filename_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def waveform_url_from_simname(self, sim_name):
def psi4_filepath_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def psi4_url_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def download_psi4_data(self, sim_name):
raise NotImplementedError()

@abstractmethod
def metadata_filename_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def metadata_filepath_from_simname(self, sim_name):
raise NotImplementedError()

@abstractmethod
def metadata_url_from_simname(self, sim_name):
raise NotImplementedError()


@@ -39,14 +59,17 @@ def __init__(self, *args, **kwargs) -> None:
def simulations_list(self):
return list(self.simulations)

def get(self, sim_name):
"""Retrieve waveform modes for this simulation
def get(self, sim_name, quantity="waveform"):
"""Retrieve specific quantities for one simulation
Args:
sim_name (str): Name of simulation in catalog
quantity (str): Name of quantity to fetch.
Options: {waveform, psi4}
Raises:
IOError: If `sim_name` not found in the catalog
IOError: If `quantity` is not one of the options above
Returns:
nrcatalogtools.waveform.WaveformModes: Waveform modes
@@ -56,22 +79,45 @@ def get(self, sim_name):
f"Simulation {sim_name} not found in catalog."
f"Please check that it exists"
)
filepath = self.waveform_filepath_from_simname(sim_name)
if not os.path.exists(filepath) or os.path.getsize(filepath) == 0:
if self._verbosity > 1:
print(
f"..As data does not exist in cache:"
f" (in {filepath}),\n"
f"..we will now download it from"
" {}".format(self.waveform_url_from_simname(sim_name))
)
self.download_waveform_data(sim_name)
metadata = self.get_metadata(sim_name)
if type(metadata) is not dict and hasattr(metadata, "to_dict"):
metadata = metadata.to_dict()
elif isinstance(metadata, dict):
metadata = dict(metadata.items())
return waveform.WaveformModes.load_from_h5(filepath, metadata=metadata)

if quantity.lower() == "waveform":
filepath = self.waveform_filepath_from_simname(sim_name)
if not os.path.exists(filepath) or os.path.getsize(filepath) == 0:
if self._verbosity > 1:
print(
f"..As data does not exist in cache:"
f" (in {filepath}),\n"
f"..we will now download it from"
" {}".format(self.waveform_url_from_simname(sim_name))
)
self.download_waveform_data(sim_name)
return waveform.WaveformModes.load_from_h5(filepath, metadata=metadata)
elif quantity.lower() == "psi4":
filepath = self.psi4_filepath_from_simname(sim_name)
if not os.path.exists(filepath) or os.path.getsize(filepath) == 0:
if self._verbosity > 1:
print(
f"..As data does not exist in cache:"
f" (in {filepath}),\n"
f"..we will now download it from"
" {}".format(self.psi4_url_from_simname(sim_name))
)
self.download_psi4_data(sim_name)
try:
return waveform.WaveformModes.load_from_h5(filepath, metadata=metadata)
except OSError:
return waveform.WaveformModes.load_from_targz(
filepath, metadata=metadata
)
else:
raise IOError(
f"Cannot provide quantity: {quantity}. Only supported options are [waveform, psi4]"
)

def get_metadata(self, sim_name):
"""Get Metadata for this simulation
27 changes: 21 additions & 6 deletions nrcatalogtools/maya.py
Original file line number Diff line number Diff line change
@@ -233,6 +233,15 @@ def files(self):

return file_infos

def metadata_filename_from_simname(self, sim_name):
return os.path.basename(self.metadata_filepath_from_simname(sim_name))

def metadata_filepath_from_simname(self, sim_name, ext="txt"):
return str(self.metadata_dir / f"{sim_name}.{ext}")

def metadata_url_from_simname(self, sim_name):
return

def waveform_filename_from_simname(self, sim_name):
return sim_name + ".h5"

@@ -255,12 +264,6 @@ def waveform_url_from_simname(self, sim_name, maya_format=False):
format = "lvcnr_format"
return f"{self.waveform_data_url}/{format}/{self.waveform_filename_from_simname(sim_name)}"

def metadata_filename_from_simname(self, sim_name):
return os.path.basename(self.metadata_filepath_from_simname(sim_name))

def metadata_filepath_from_simname(self, sim_name, ext="txt"):
return str(self.metadata_dir / f"{sim_name}.{ext}")

def download_waveform_data(self, sim_name, maya_format=True, use_cache=None):
if use_cache is None:
use_cache = self.use_cache
@@ -313,3 +316,15 @@ def download_waveform_data(self, sim_name, maya_format=True, use_cache=None):
print(
"... ... but couldnt find link: {}".format(str(file_path_web))
)

def psi4_filename_from_simname(self, sim_name):
return

def psi4_filepath_from_simname(self, sim_name):
return

def psi4_url_from_simname(self, sim_name):
return

def download_psi4_data(self, sim_name):
return
167 changes: 163 additions & 4 deletions nrcatalogtools/waveform.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,8 @@
translate_data_type_to_sxs_string,
)

ELL_MIN, ELL_MAX = 2, 10


class WaveformModes(sxs_WaveformModes):
def __new__(
@@ -111,16 +113,13 @@ def load_from_h5(cls, file_path_or_open_file, metadata={}, verbosity=0):

# Set the file path attribute
cls._filepath = h5_file.filename
# If _metadata is not already
# a set attribute, then set
# it here.

# If _metadata is not already a set attribute, then set it here.
try:
cls._sim_metadata
except AttributeError:
cls._sim_metadata = metadata

ELL_MIN, ELL_MAX = 2, 10
ell_min, ell_max = 99, -1
LM = []
t_min, t_max, dt = -1e99, 1e99, 1
@@ -189,6 +188,166 @@ def load_from_h5(cls, file_path_or_open_file, metadata={}, verbosity=0):
**w_attributes,
)

@classmethod
def load_from_targz(cls, file_path, metadata={}, verbosity=0):
"""Method to load SWSH waveform modes from RIT or MAYA catalogs
from HDF5 file.
Args:
file_path_or_open_file (str or open file): Either the path to an
HDF5 file containing waveform data, or an open file pointer to
the same.
metadata (dict): Dictionary containing metadata (Note that keys
will be NR group specific)
verbosity (int, optional): Verbosity level with which to
print messages during execution. Defaults to 0.
Raises:
RuntimeError: If inputs are invalid, or if no mode found in
input file.
Returns:
WaveformModes: Object containing time-series of SWSH modes.
"""
if not os.path.exists(file_path) or os.path.getsize(file_path) == 0:
raise RuntimeError(f"Could not use or open {file_path}")

import quaternionic
import re
import tarfile

def get_tag(name):
return os.path.splitext(os.path.splitext(os.path.basename(name))[0])[0]

def get_el_em_from_filename(filename: str):
substr = re.search(pattern=r"l\d_m\d", string=filename)
if substr is None:
substr = re.search(pattern=r"l\d_m-\d", string=filename)
elem = substr[0].split("_")
return (int(elem[0].strip("l")), int(elem[1].strip("m")))

# Set the file path attribute
cls._filepath = file_path

# If _metadata is not already a set attribute, then set it here.
if not hasattr(cls, "_metadata"):
cls._metadata = metadata

ell_min, ell_max = 99, -1
t_min, t_max, dt = -1e99, 1e99, 1

file_tag = get_tag(file_path)
mode_data = {}
reference_mode_num_for_length = ()
possible_ascii_extensions = ["asc", "dat", "txt"]

with tarfile.open(file_path, "r:gz") as tar:
if verbosity > 4:
print(f"Opening tarfile: {file_path}")
for dat_file in tar.getmembers():
dat_file_name = dat_file.name
if verbosity > 4:
print(f"dat_file_name is: {dat_file_name}")
if file_tag not in dat_file_name or np.all(
[
f".{ext}" not in dat_file_name
for ext in possible_ascii_extensions
]
):
if verbosity > 5:
print(
f"{file_tag} not in {dat_file_name} is {file_tag not in dat_file_name}"
)
print(
"the other flag is: ",
np.all(
[
f".{ext}" not in dat_file_name
for ext in possible_ascii_extensions
]
),
)
continue
ell, em = get_el_em_from_filename(dat_file_name)
with tar.extractfile(dat_file_name) as f:
reference_mode_num_for_length = (ell, em)
mode_data[(ell, em)] = np.loadtxt(f)
# Convert to row-major form
nrows, ncols = np.shape(mode_data[(ell, em)])
if nrows < ncols:
mode_data[(ell, em)] = mode_data[(ell, em)].T
# mode_data[get_tag(dat_file_name)] = np.loadtxt(f)
# get the minimum time and maximum time stamps for all modes
t_min = max(t_min, mode_data[(ell, em)][0, 0])
t_max = min(t_max, mode_data[(ell, em)][-1, 0])
dt = min(
dt,
stat_mode(np.diff(mode_data[(ell, em)][:, 0]), keepdims=True)[0][0],
)
ell_min = min(ell_min, ell)
ell_max = max(ell_max, ell)

# We populate LM here because it has to be ordered, as the WaveformModes
# class expects an ordered data set.
LM = []
for ell in range(ELL_MIN, ELL_MAX + 1):
for em in range(-ell, ell + 1):
if (ell, em) in mode_data:
LM.append([ell, em])
else:
reference_mode = mode_data[reference_mode_num_for_length]
mode_data[(ell, em)] = np.zeros(np.shape(reference_mode))
mode_data[(ell, em)][:, 0] = reference_mode[:, 0] # Time axis
LM.append([ell, em])

if len(LM) == 0:
raise RuntimeError(
"We did not find even one mode in the file. Perhaps the "
"format `amp_l?_m?` and `phase_l?_m?` is not the "
"nomenclature of datagroups in the input file?"
)

times = np.arange(t_min, t_max + 0.5 * dt, dt)
data = np.empty((len(times), len(LM)), dtype=complex)
for idx, (ell, em) in enumerate(LM):
mode_time, mode_real, mode_imag = (
mode_data[(ell, em)][:, 0],
mode_data[(ell, em)][:, 1],
mode_data[(ell, em)][:, 2],
)
if verbosity > 5:
print(f"Interpolating mode {ell}, {em}. Data length: {len(mode_time)}")
mode_real_interp = InterpolatedUnivariateSpline(mode_time, mode_real)
mode_imag_interp = InterpolatedUnivariateSpline(mode_time, mode_imag)
data[:, idx] = mode_real_interp(times) + 1j * mode_imag_interp(times)

w_attributes = {}
w_attributes["metadata"] = metadata
w_attributes["history"] = ""
w_attributes["frame"] = quaternionic.array([[1.0, 0.0, 0.0, 0.0]])
w_attributes["frame_type"] = "inertial"
w_attributes["data_type"] = h
w_attributes["spin_weight"] = translate_data_type_to_spin_weight(
w_attributes["data_type"]
)
w_attributes["data_type"] = translate_data_type_to_sxs_string(
w_attributes["data_type"]
)
w_attributes["r_is_scaled_out"] = True
w_attributes["m_is_scaled_out"] = True
# w_attributes["ells"] = ell_min, ell_max

return cls(
data,
time=times,
time_axis=0,
modes_axis=1,
ell_min=ell_min,
ell_max=ell_max,
verbosity=verbosity,
**w_attributes,
)

@property
def filepath(self):
"""Return the data file path"""

0 comments on commit 718cfad

Please sign in to comment.