From cd542dd35d07c2287c7c156c5ed05374a9c88bd7 Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Mon, 14 Aug 2023 16:51:57 +0100 Subject: [PATCH 1/8] Adding some very initial scaffolding, this doesn't work yet --- swiftsimio/reader.py | 138 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index aa4e5407..04c6e9d0 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -25,7 +25,7 @@ from datetime import datetime from typing import Union, Callable, List - +from pathlib import Path class MassTable(object): """ @@ -1019,6 +1019,7 @@ def generate_getter( ): """ Generates a function that: + ORIGINALLY WE WOULD DO THIS: a) If self._`name` exists, return it b) If not, open `filename` @@ -1026,6 +1027,13 @@ def generate_getter( d) Set self._`name` e) Return self._`name`. + + BUT NOW WE WANT TO DO THIS: + a) If self._`name` exists, return it + b) If not, send a request to _the server_ + c) Receive a response from the server + d) Set self._`name` + e) Return self._`name`. Parameters ---------- @@ -1575,3 +1583,131 @@ def create_particle_datasets(self): ) return + + +class RemoteSWIFTDataset(object): + """ + A collection object for: + + + SWIFTUnits, + + SWIFTMetadata, + + SWIFTParticleDataset + + ...when requested remotely from the server. + + This object, in essence, completely represents a SWIFT snapshot. You can access + the different particles as follows: + + + SWIFTDataset.gas.particle_ids + + SWIFTDataset.dark_matter.masses + + SWIFTDataset.gas.smoothing_lengths + + These arrays all have units that are determined by the unit system in the file. + + The unit system is available as SWIFTDataset.units and the metadata as + SWIFTDataset.metadata. + + Methods + ------- + def get_units(self): + Loads the units from the SWIFT snapshot. + def get_metadata(self): + Loads the metadata from the SWIFT snapshot. + def create_particle_datasets(self): + Creates particle datasets for whatever particle types and names + are specified in metadata.particle_types. + """ + + def __init__(self, server_address, credentials, simulation_alias, local_file, mask=None): + """ + Constructor for SWIFTDataset class + + Parameters + ---------- + server_address : str + URL of API serving HDF5 files. + credentials : SWIFTCredentials + Server access credentials. + simulation_alias : str + The aliased name of a particular simulation. + local_file : Path + Path to local file containing snapshot + mask : np.ndarray, optional + mask object containing dataset to selected particles + """ + self.server_address = server_address + self.credentials = credentials + self.simulation_alias = simulation_alias + self.filename = local_file + self.mask = mask + + if mask is not None: + self.mask.convert_masks_to_ranges() + + self.get_units() + self.get_metadata() + self.create_particle_datasets() + + return + + def __str__(self): + """ + Prints out some more useful information, rather than just + the memory location. + """ + + return f"SWIFT dataset at {self.filename}." + + def __repr__(self): + return self.__str__() + + def get_units(self): + """ + Requests units from the SWIFT snapshot on the server side. + + Ordinarily this happens automatically, but you can call + this function again if you mess things up. + """ + + # run some API call here with server address and credentials + # server-side processing returns SWIFTUnits + + self.units = SWIFTUnits(self.filename) + + return + + def get_metadata(self): + """ + Requests metadata from the SWIFT snapshot on the server side. + + Ordinarily this happens automatically, but you can call + this function again if you mess things up. + """ + # run some API call here with server address and credentials + # server-side processing returns SWIFTMetadata + + self.metadata = SWIFTMetadata(self.filename, self.units) + + return + + def create_particle_datasets(self): + """ + Creates particle datasets for whatever particle types and names + are specified in metadata.particle_types. + + These can then be accessed using their underscore names, e.g. gas. + """ + + if not hasattr(self, "metadata"): + self.get_metadata() + + for particle_name in self.metadata.present_particle_names: + setattr( + self, + particle_name, + generate_dataset( # DELETE THIS COMMENT: generate_dataset should point to some remote function + getattr(self.metadata, f"{particle_name}_properties"), self.mask + ), + ) + + return \ No newline at end of file From 082ddb3dd87cca8cc2da2630e035d04eb94efd1d Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Thu, 31 Aug 2023 15:36:39 +0100 Subject: [PATCH 2/8] Add support for units from remote server --- pyproject.toml | 1 + swiftsimio/reader.py | 204 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 200 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d6e3b1b6..3abe9923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "h5py", "unyt>=2.9.0", "numba>=0.50.0", + "requests>=2.31.0", ] [project.urls] diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index 04c6e9d0..eb60eb27 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -18,8 +18,10 @@ import re import h5py +import json import unyt import numpy as np +import requests import warnings from datetime import datetime @@ -1164,6 +1166,160 @@ def getter(self): return getattr(self, f"_{name}") return getter + +def generate_getter_remote( + server_address: str, + credentials: str, + simulation_alias: str, + name: str, + field: str, + unit: unyt.unyt_quantity, + mask: Union[None, np.ndarray], + mask_size: int, + cosmo_factor: cosmo_factor, + description: str, + compression: str, + columns: Union[None, np.lib.index_tricks.IndexExpression] = None, +): + """ + Generates a function that: + + a) If self._`name` exists, return it + b) If not, send a request to _the server_ + c) Receive a response from the server + d) Set self._`name` + e) Return self._`name`. + Parameters + ---------- + + server_address: str + URI for the API that will serve HDF5 file contents. + + credentials: str + Credentials for the HDF5 API. + + simulation_alias: str + String identifier of HDF5 file that everything will be read from on the server side. + Used to generate the HDF5 dataset. + + name: str + Output name (snake_case) of the field. + + field: str + Full path of field, including e.g. particle type. Examples include + ``/PartType0/Velocities``. + + unit: unyt.unyt_quantity + Output unit of the resultant ``cosmo_array`` + + mask: None or np.ndarray + Mask to be used with ``accelerated.read_ranges_from_file``, i.e. an array of + integers that describe ranges to be read from the file. + + mask_size: int + Size of the mask if present. + + cosmo_factor: cosmo_factor + Cosmology factor object corresponding to this array. + + description: str + Description (read from HDF5 file) of the data. + + compression: str + String describing the lossy compression filters that were applied to the + data (read from the HDF5 file). + + columns: np.lib.index_tricks.IndexEpression, optional + Index expression corresponding to which columns to read from the numpy array. + If not provided, we read all columns and return an n-dimensional array. + + + Returns + ------- + + getter: callable + A callable object that gets the value of the array that has been saved to + ``_name``. This function takes only ``self`` from the + :obj:``__SWIFTParticleDataset`` class. + + + Notes + ----- + + The major use of this function is for its side effect of setting ``_name`` as + a member of the class on first read. When the attribute is accessed, it will + be dynamically read from the file, to keep initial memory usage as minimal + as possible. + + If the resultant array is modified, it will not be re-read from the file. + + """ + + # Must do this _outside_ getter because of weird locality issues with the + # use of None as the default. + # Here, we need to ensure that in the cases where we're using columns, + # during a partial read, that we respect the single-column dataset nature. + use_columns = columns is not None + + if not use_columns: + columns = np.s_[:] + + def getter(self): + current_value = getattr(self, f"_{name}") + + if current_value is not None: + return current_value + else: + # call from server, do most of these operations server-side + try: + if mask is not None: + request_parameters = { + "alias": simulation_alias, + "field": field, + "mask": mask, + "mask_size": mask_size, + "columns": columns, + } + + array_data = requests.get(server_address, credentials, params=request_parameters) + + setattr( + self, + f"_{name}", + cosmo_array( + array_data, + unit, + cosmo_factor=cosmo_factor, + name=description, + compression=compression, + ), + ) + else: + + request_parameters = { + "alias": simulation_alias, + "field": field, + "columns": columns + } + array_data = requests.get(server_address, credentials, params=request_parameters) + setattr( + self, + f"_{name}", + cosmo_array( + array_data, + unit, + cosmo_factor=cosmo_factor, + name=description, + compression=compression, + ), + ) + except KeyError: + print(f"Could not read {field}") + return None + + return getattr(self, f"_{name}") + + return getter def generate_setter(name: str): @@ -1584,6 +1740,8 @@ def create_particle_datasets(self): return +class RemoteSWIFTUnitsException(Exception): + pass class RemoteSWIFTDataset(object): """ @@ -1638,6 +1796,10 @@ def __init__(self, server_address, credentials, simulation_alias, local_file, ma self.server_address = server_address self.credentials = credentials self.simulation_alias = simulation_alias + + #self.filename = get_filename() + # filename needs to be either the actual file path or an alias + self.filename = local_file self.mask = mask @@ -1649,7 +1811,7 @@ def __init__(self, server_address, credentials, simulation_alias, local_file, ma self.create_particle_datasets() return - + def __str__(self): """ Prints out some more useful information, rather than just @@ -1661,6 +1823,30 @@ def __str__(self): def __repr__(self): return self.__str__() + @staticmethod + def create_unyt_quantities_from_json(input_json: str) -> dict: + swift_unit_dict = json.loads(input_json) + + excluded_fields = ["filename", "units"] + try: + swift_unit_dict["units"] = { + key: unyt.unyt_quantity.from_string(value) + for key, value in swift_unit_dict["units"].items() + } + swift_unit_dict = { + key: ( + unyt.unyt_quantity.from_string(value) + if key not in excluded_fields + else value + ) + for key, value in swift_unit_dict.items() + } + except KeyError as error: + message = f"Missing key {error} in units object" + raise RemoteSWIFTUnitsException(message) from error + + return swift_unit_dict + def get_units(self): """ Requests units from the SWIFT snapshot on the server side. @@ -1671,9 +1857,17 @@ def get_units(self): # run some API call here with server address and credentials # server-side processing returns SWIFTUnits - - self.units = SWIFTUnits(self.filename) - + payload = { + "alias": self.simulation_alias, + "filename": self.filename + } + units_response = requests.post( + self.server_address, + json = payload + ) + json_response = units_response.json() + + self.units = RemoteSWIFTDataset.create_unyt_quantities_from_json(json_response) return def get_metadata(self): @@ -1705,7 +1899,7 @@ def create_particle_datasets(self): setattr( self, particle_name, - generate_dataset( # DELETE THIS COMMENT: generate_dataset should point to some remote function + generate_dataset_remote( # DELETE THIS COMMENT: generate_dataset should point to some remote function getattr(self.metadata, f"{particle_name}_properties"), self.mask ), ) From 11886a5217b04c3d5b1601ce1908789c10ccc8be Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Wed, 13 Sep 2023 11:28:07 +0100 Subject: [PATCH 3/8] Updates for reader.py --- swiftsimio/reader.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index eb60eb27..7ad341c4 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -202,6 +202,15 @@ def get_unit_dictionary(self): ) +class RemoteSWIFTUnits: + def __init__(self, unit_dict=None): + if unit_dict is not None: + for key, value in unit_dict.items(): + setattr(self, key, unyt.unyt_quantity.from_string(value)) + if isinstance(unit_dict[key], dict): + for nested_key, nested_value in unit_dict[key].items(): + setattr(self, nested_key, nested_value) + class SWIFTMetadata(object): """ Loads all metadata (apart from Units, those are handled by SWIFTUnits) @@ -1743,7 +1752,7 @@ def create_particle_datasets(self): class RemoteSWIFTUnitsException(Exception): pass -class RemoteSWIFTDataset(object): +class RemoteSWIFTDataset(): """ A collection object for: @@ -1776,7 +1785,7 @@ def create_particle_datasets(self): are specified in metadata.particle_types. """ - def __init__(self, server_address, credentials, simulation_alias, local_file, mask=None): + def __init__(self, server_address, credentials, simulation_alias, filename, mask=None): """ Constructor for SWIFTDataset class @@ -1788,11 +1797,16 @@ def __init__(self, server_address, credentials, simulation_alias, local_file, ma Server access credentials. simulation_alias : str The aliased name of a particular simulation. - local_file : Path - Path to local file containing snapshot + filename : Path + Full path to file containing snapshot mask : np.ndarray, optional mask object containing dataset to selected particles """ + + # Perhaps make an abstract base class that both this and SWIFTDataset are based on + # this concrete implementation can't have the same init as SWIFTDataset as that calls a bunch + # of things that rely on locally available files. + self.server_address = server_address self.credentials = credentials self.simulation_alias = simulation_alias @@ -1800,7 +1814,7 @@ def __init__(self, server_address, credentials, simulation_alias, local_file, ma #self.filename = get_filename() # filename needs to be either the actual file path or an alias - self.filename = local_file + self.filename = filename self.mask = mask if mask is not None: @@ -1862,12 +1876,15 @@ def get_units(self): "filename": self.filename } units_response = requests.post( - self.server_address, + f"{self.server_address}/swiftunits", json = payload ) json_response = units_response.json() - self.units = RemoteSWIFTDataset.create_unyt_quantities_from_json(json_response) + units_dict = RemoteSWIFTDataset.create_unyt_quantities_from_json(json_response) + + self.units = RemoteSWIFTUnits(units_dict) + return def get_metadata(self): @@ -1880,6 +1897,10 @@ def get_metadata(self): # run some API call here with server address and credentials # server-side processing returns SWIFTMetadata + # Again this needs to happen remotely + # We can retrieve a units-like object here and send that through + # or create the units on the server. + self.metadata = SWIFTMetadata(self.filename, self.units) return From ccfe74fac3ea109d296f8083b031910d4440c1ee Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Wed, 20 Sep 2023 17:11:11 +0100 Subject: [PATCH 4/8] Add cloudpickle to dependencies, modify metadata endpoint --- pyproject.toml | 1 + swiftsimio/reader.py | 88 ++++++++++++++++++++++++++------------------ 2 files changed, 53 insertions(+), 36 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3abe9923..c3db34ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ + "cloudpickle>=2.2.1", "numpy", "h5py", "unyt>=2.9.0", diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index 7ad341c4..38aed584 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -16,6 +16,7 @@ from swiftsimio.objects import cosmo_array, cosmo_factor, a from swiftsimio.conversions import swift_cosmology_to_astropy +import cloudpickle import re import h5py import json @@ -29,6 +30,7 @@ from typing import Union, Callable, List from pathlib import Path + class MassTable(object): """ Extracts a mass table to local variables based on the @@ -211,6 +213,7 @@ def __init__(self, unit_dict=None): for nested_key, nested_value in unit_dict[key].items(): setattr(self, nested_key, nested_value) + class SWIFTMetadata(object): """ Loads all metadata (apart from Units, those are handled by SWIFTUnits) @@ -363,12 +366,14 @@ def postprocess_header(self): """ # These are just read straight in to variables - header_unpack_arrays_units = metadata.metadata_fields.generate_units_header_unpack_arrays( - m=self.units.mass, - l=self.units.length, - t=self.units.time, - I=self.units.current, - T=self.units.temperature, + header_unpack_arrays_units = ( + metadata.metadata_fields.generate_units_header_unpack_arrays( + m=self.units.mass, + l=self.units.length, + t=self.units.time, + I=self.units.current, + T=self.units.temperature, + ) ) for field, name in metadata.metadata_fields.header_unpack_arrays.items(): @@ -433,12 +438,14 @@ def postprocess_header(self): # These must be unpacked as they are stored as length-1 arrays - header_unpack_float_units = metadata.metadata_fields.generate_units_header_unpack_single_float( - m=self.units.mass, - l=self.units.length, - t=self.units.time, - I=self.units.current, - T=self.units.temperature, + header_unpack_float_units = ( + metadata.metadata_fields.generate_units_header_unpack_single_float( + m=self.units.mass, + l=self.units.length, + t=self.units.time, + I=self.units.current, + T=self.units.temperature, + ) ) for field, names in metadata.metadata_fields.header_unpack_single_float.items(): @@ -894,7 +901,7 @@ def get_units(unit_attribute): # Need to check if the exponent is 0 manually because of float precision unit_exponent = unit_attribute[f"U_{exponent} exponent"][0] if unit_exponent != 0.0: - units *= unit ** unit_exponent + units *= unit**unit_exponent except KeyError: # Can't load that data! # We should probably warn the user here... @@ -967,7 +974,7 @@ def get_cosmo(dataset): # Can't load, 'graceful' fallback. cosmo_exponent = 0.0 - a_factor_this_dataset = a ** cosmo_exponent + a_factor_this_dataset = a**cosmo_exponent return cosmo_factor(a_factor_this_dataset, current_scale_factor) @@ -1175,7 +1182,8 @@ def getter(self): return getattr(self, f"_{name}") return getter - + + def generate_getter_remote( server_address: str, credentials: str, @@ -1203,7 +1211,7 @@ def generate_getter_remote( server_address: str URI for the API that will serve HDF5 file contents. - + credentials: str Credentials for the HDF5 API. @@ -1289,9 +1297,11 @@ def getter(self): "mask_size": mask_size, "columns": columns, } - - array_data = requests.get(server_address, credentials, params=request_parameters) - + + array_data = requests.get( + server_address, credentials, params=request_parameters + ) + setattr( self, f"_{name}", @@ -1308,9 +1318,11 @@ def getter(self): request_parameters = { "alias": simulation_alias, "field": field, - "columns": columns + "columns": columns, } - array_data = requests.get(server_address, credentials, params=request_parameters) + array_data = requests.get( + server_address, credentials, params=request_parameters + ) setattr( self, f"_{name}", @@ -1749,10 +1761,12 @@ def create_particle_datasets(self): return + class RemoteSWIFTUnitsException(Exception): pass -class RemoteSWIFTDataset(): + +class RemoteSWIFTDataset: """ A collection object for: @@ -1785,7 +1799,9 @@ def create_particle_datasets(self): are specified in metadata.particle_types. """ - def __init__(self, server_address, credentials, simulation_alias, filename, mask=None): + def __init__( + self, server_address, credentials, simulation_alias, filename, mask=None + ): """ Constructor for SWIFTDataset class @@ -1802,7 +1818,7 @@ def __init__(self, server_address, credentials, simulation_alias, filename, mask mask : np.ndarray, optional mask object containing dataset to selected particles """ - + # Perhaps make an abstract base class that both this and SWIFTDataset are based on # this concrete implementation can't have the same init as SWIFTDataset as that calls a bunch # of things that rely on locally available files. @@ -1811,7 +1827,7 @@ def __init__(self, server_address, credentials, simulation_alias, filename, mask self.credentials = credentials self.simulation_alias = simulation_alias - #self.filename = get_filename() + # self.filename = get_filename() # filename needs to be either the actual file path or an alias self.filename = filename @@ -1825,7 +1841,7 @@ def __init__(self, server_address, credentials, simulation_alias, filename, mask self.create_particle_datasets() return - + def __str__(self): """ Prints out some more useful information, rather than just @@ -1871,16 +1887,12 @@ def get_units(self): # run some API call here with server address and credentials # server-side processing returns SWIFTUnits - payload = { - "alias": self.simulation_alias, - "filename": self.filename - } + payload = {"alias": self.simulation_alias, "filename": self.filename} units_response = requests.post( - f"{self.server_address}/swiftunits", - json = payload + f"{self.server_address}/swiftunits", json=payload ) json_response = units_response.json() - + units_dict = RemoteSWIFTDataset.create_unyt_quantities_from_json(json_response) self.units = RemoteSWIFTUnits(units_dict) @@ -1900,8 +1912,12 @@ def get_metadata(self): # Again this needs to happen remotely # We can retrieve a units-like object here and send that through # or create the units on the server. + payload = {"alias": self.simulation_alias, "filename": self.filename} + metadata_response_pickle = requests.post( + f"{self.server_address}/swiftmetadata", json=payload + ) - self.metadata = SWIFTMetadata(self.filename, self.units) + self.metadata = cloudpickle.loads(metadata_response_pickle) return @@ -1920,9 +1936,9 @@ def create_particle_datasets(self): setattr( self, particle_name, - generate_dataset_remote( # DELETE THIS COMMENT: generate_dataset should point to some remote function + generate_dataset_remote( # DELETE THIS COMMENT: generate_dataset should point to some remote function getattr(self.metadata, f"{particle_name}_properties"), self.mask ), ) - return \ No newline at end of file + return From a5805d426d84e2a7f7093afb45ead1647fe3f732 Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Wed, 20 Sep 2023 21:23:33 +0100 Subject: [PATCH 5/8] Adding remote array request functionality for masked and unmasked data --- swiftsimio/reader.py | 281 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 226 insertions(+), 55 deletions(-) diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index 38aed584..2f020646 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -15,6 +15,7 @@ from swiftsimio.accelerated import read_ranges_from_file from swiftsimio.objects import cosmo_array, cosmo_factor, a from swiftsimio.conversions import swift_cosmology_to_astropy +from swiftsimio.masks import SWIFTMask import cloudpickle import re @@ -22,6 +23,7 @@ import json import unyt import numpy as np +import numpy.typing as npt import requests import warnings @@ -31,6 +33,13 @@ from pathlib import Path +class NumpyEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + return json.JSONEncoder.default(self, obj) + + class MassTable(object): """ Extracts a mass table to local variables based on the @@ -1037,7 +1046,6 @@ def generate_getter( ): """ Generates a function that: - ORIGINALLY WE WOULD DO THIS: a) If self._`name` exists, return it b) If not, open `filename` @@ -1045,13 +1053,6 @@ def generate_getter( d) Set self._`name` e) Return self._`name`. - - BUT NOW WE WANT TO DO THIS: - a) If self._`name` exists, return it - b) If not, send a request to _the server_ - c) Receive a response from the server - d) Set self._`name` - e) Return self._`name`. Parameters ---------- @@ -1184,10 +1185,27 @@ def getter(self): return getter +def load_ndarray_from_json( + json_array: str, + data_type: str | None, +) -> npt.NDArray: + """Convert JSON to a Numpy NDArray. + + Args: + json_array (str): Numpy array as JSON + data_type (str): Data type of elements in the original array + + Returns + ------- + npt.NDArray: Numpy NDArray object + """ + loaded_json = json.loads(json_array) + return np.asarray(loaded_json, dtype=data_type) + + def generate_getter_remote( server_address: str, - credentials: str, - simulation_alias: str, + filename: str, name: str, field: str, unit: unyt.unyt_quantity, @@ -1212,13 +1230,6 @@ def generate_getter_remote( server_address: str URI for the API that will serve HDF5 file contents. - credentials: str - Credentials for the HDF5 API. - - simulation_alias: str - String identifier of HDF5 file that everything will be read from on the server side. - Used to generate the HDF5 dataset. - name: str Output name (snake_case) of the field. @@ -1287,21 +1298,30 @@ def getter(self): if current_value is not None: return current_value else: - # call from server, do most of these operations server-side try: if mask is not None: - request_parameters = { - "alias": simulation_alias, + payload = { + "filename": filename, "field": field, - "mask": mask, - "mask_size": mask_size, + "mask_array_json": json.dumps(mask, cls=NumpyEncoder), + "mask_size": int(mask_size), "columns": columns, } - array_data = requests.get( - server_address, credentials, params=request_parameters + array_data_response = requests.post( + f"{server_address}/masked_dataset", json=payload ) + if not array_data_response.status_code == 200: + raise RemoteSWIFTDatasetException( + "ERROR: Array data not found following request for masked dataset." + ) + return None + + array_data_json = array_data_response.json() + array_data = np.asarray( + array_data_json["array"], dtype=array_data_json["dtype"] + ) setattr( self, f"_{name}", @@ -1314,15 +1334,27 @@ def getter(self): ), ) else: - - request_parameters = { - "alias": simulation_alias, + payload = { + "filename": filename, "field": field, "columns": columns, } - array_data = requests.get( - server_address, credentials, params=request_parameters + array_data_response = requests.post( + f"{server_address}/unmasked_dataset", json=payload + ) + + if not array_data_response.status_code == 200: + raise RemoteSWIFTDatasetException( + "ERROR: Array data not found following request for unmasked dataset." + ) + return None + + array_data_json = array_data_response.json() + + array_data = np.asarray( + array_data_json["array"], dtype=array_data_json["dtype"] ) + setattr( self, f"_{name}", @@ -1515,7 +1547,7 @@ def __eq__(self, other): return self.named_columns == other.named_columns and self.name == other.name -def generate_dataset(particle_metadata: SWIFTParticleTypeMetadata, mask): +def generate_dataset(particle_metadata: SWIFTParticleTypeMetadata, mask: SWIFTMask): """ Generates a SWIFTParticleDataset _class_ that corresponds to the particle type given. @@ -1650,6 +1682,147 @@ def generate_dataset(particle_metadata: SWIFTParticleTypeMetadata, mask): return empty_dataset +def generate_remote_dataset( + particle_metadata: SWIFTParticleTypeMetadata, mask: SWIFTMask, server_address: str +): + """ + Generates a SWIFTParticleDataset _class_ that corresponds to the + particle type given. + + We _must_ do the following _outside_ of the class itself, as one + can assign properties to a _class_ but not _within_ a class + dynamically. + + Here we loop through all of the possible properties in the metadata file. + We then use the builtin property() function and some generators to + create setters and getters for those properties. This will allow them + to be accessed from outside by using SWIFTParticleDataset.name, where + the name is, for example, coordinates. + + Parameters + ---------- + particle_metadata : SWIFTParticleTypeMetadata + the metadata for the particle type + mask : SWIFTMask + the mask object for the dataset + server_address : str + API URL + """ + + filename = particle_metadata.filename + particle_type = particle_metadata.particle_type + particle_name = particle_metadata.particle_name + particle_nice_name = metadata.particle_types.particle_name_class[particle_type] + + # Mask is an object that contains all masks for all possible datasets. + if mask is not None: + mask_array = getattr(mask, particle_name) + mask_size = getattr(mask, f"{particle_name}_size") + else: + mask_array = None + mask_size = -1 + + # Set up an iterator for us to loop over for all fields + field_paths = particle_metadata.field_paths + field_names = particle_metadata.field_names + field_cosmologies = particle_metadata.field_cosmologies + field_units = particle_metadata.field_units + field_descriptions = particle_metadata.field_descriptions + field_compressions = particle_metadata.field_compressions + field_named_columns = particle_metadata.named_columns + + dataset_iterator = zip( + field_paths, + field_names, + field_cosmologies, + field_units, + field_descriptions, + field_compressions, + ) + + # This 'nice' piece of code ensures that our datasets have different _types_ + # for different particle types. We initially fill a dict with the properties that + # we want, and then create a single instance of our class. + + this_dataset_bases = (__SWIFTParticleDataset, object) + this_dataset_dict = {} + + for ( + field_path, + field_name, + field_cosmology, + field_unit, + field_description, + field_compression, + ) in dataset_iterator: + named_columns = field_named_columns[field_path] + + if named_columns is None: + field_property = property( + generate_getter_remote( + server_address, + filename, + field_name, + field_path, + unit=field_unit, + mask=mask_array, + mask_size=mask_size, + cosmo_factor=field_cosmology, + description=field_description, + compression=field_compression, + ), + generate_setter(field_name), + generate_deleter(field_name), + ) + else: + # TODO: Handle this case with recursion. + + # Here we want to create an extra middleman object. So we can do something + # like {ptype}.{ThisNamedColumnDataset}.column_name. This follows from the + # above templating. + + this_named_column_dataset_bases = (__SWIFTNamedColumnDataset, object) + this_named_column_dataset_dict = {} + + for index, column in enumerate(named_columns): + this_named_column_dataset_dict[column] = property( + generate_getter_remote( + server_address, + filename, + column, + field_path, + unit=field_unit, + mask=mask_array, + mask_size=mask_size, + cosmo_factor=field_cosmology, + description=f"{field_description} [Column {index}, {column}]", + compression=field_compression, + columns=np.s_[index], + ), + generate_setter(column), + generate_deleter(column), + ) + + ThisNamedColumnDataset = type( + f"{particle_nice_name}{field_path.split('/')[-1]}Columns", + this_named_column_dataset_bases, + this_named_column_dataset_dict, + ) + + field_property = ThisNamedColumnDataset( + field_path=field_path, named_columns=named_columns, name=field_name + ) + + this_dataset_dict[field_name] = field_property + + ThisDataset = type( + f"{particle_nice_name}Dataset", this_dataset_bases, this_dataset_dict + ) + empty_dataset = ThisDataset(particle_metadata) + + return empty_dataset + + class SWIFTDataset(object): """ A collection object for: @@ -1766,6 +1939,10 @@ class RemoteSWIFTUnitsException(Exception): pass +class RemoteSWIFTDatasetException(Exception): + pass + + class RemoteSWIFTDataset: """ A collection object for: @@ -1799,38 +1976,35 @@ def create_particle_datasets(self): are specified in metadata.particle_types. """ - def __init__( - self, server_address, credentials, simulation_alias, filename, mask=None - ): + def __init__(self, server_address, simulation_alias=None, filename=None, mask=None): """ Constructor for SWIFTDataset class + One of simulation alias or filename should be set when initialising instances of this class. + Parameters ---------- server_address : str - URL of API serving HDF5 files. - credentials : SWIFTCredentials - Server access credentials. - simulation_alias : str + Base URL of API serving HDF5 files. + simulation_alias : str, optional The aliased name of a particular simulation. - filename : Path + filename : str, optional Full path to file containing snapshot - mask : np.ndarray, optional - mask object containing dataset to selected particles + mask : SWIFTMask, optional + Optional SWIFTMask object for data masking """ - # Perhaps make an abstract base class that both this and SWIFTDataset are based on - # this concrete implementation can't have the same init as SWIFTDataset as that calls a bunch - # of things that rely on locally available files. - self.server_address = server_address - self.credentials = credentials self.simulation_alias = simulation_alias + self.filename = filename - # self.filename = get_filename() - # filename needs to be either the actual file path or an alias + if not self.filename and self.simulation_alias: + payload = {"alias": self.simulation_alias, "filename": self.filename} + filepath_response = requests.post( + f"{self.server_address}/filepath", json=payload + ) + self.filename = filepath_response.json() - self.filename = filename self.mask = mask if mask is not None: @@ -1906,12 +2080,7 @@ def get_metadata(self): Ordinarily this happens automatically, but you can call this function again if you mess things up. """ - # run some API call here with server address and credentials - # server-side processing returns SWIFTMetadata - # Again this needs to happen remotely - # We can retrieve a units-like object here and send that through - # or create the units on the server. payload = {"alias": self.simulation_alias, "filename": self.filename} metadata_response_pickle = requests.post( f"{self.server_address}/swiftmetadata", json=payload @@ -1936,8 +2105,10 @@ def create_particle_datasets(self): setattr( self, particle_name, - generate_dataset_remote( # DELETE THIS COMMENT: generate_dataset should point to some remote function - getattr(self.metadata, f"{particle_name}_properties"), self.mask + generate_remote_dataset( + getattr(self.metadata, f"{particle_name}_properties"), + self.mask, + remote=True, ), ) From f3f48f8e7fa317aa0bb3d5597cb79cbcbb39628e Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Thu, 19 Oct 2023 13:41:29 +0100 Subject: [PATCH 6/8] Update reader.py to reflect latest changes in the SWIFTsimIO API --- swiftsimio/reader.py | 146 +++++++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 54 deletions(-) diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index 2f020646..e92ba5ab 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -15,7 +15,6 @@ from swiftsimio.accelerated import read_ranges_from_file from swiftsimio.objects import cosmo_array, cosmo_factor, a from swiftsimio.conversions import swift_cosmology_to_astropy -from swiftsimio.masks import SWIFTMask import cloudpickle import re @@ -215,12 +214,17 @@ def get_unit_dictionary(self): class RemoteSWIFTUnits: def __init__(self, unit_dict=None): + excluded_fields = ["filename"] if unit_dict is not None: for key, value in unit_dict.items(): - setattr(self, key, unyt.unyt_quantity.from_string(value)) - if isinstance(unit_dict[key], dict): - for nested_key, nested_value in unit_dict[key].items(): - setattr(self, nested_key, nested_value) + if key not in excluded_fields and not isinstance( + value, unyt.unyt_quantity + ): + if isinstance(unit_dict[key], dict): + for nested_key, nested_value in unit_dict[key].items(): + setattr(self, nested_key, nested_value) + else: + setattr(self, key, unyt.unyt_quantity.from_string(value)) class SWIFTMetadata(object): @@ -375,14 +379,12 @@ def postprocess_header(self): """ # These are just read straight in to variables - header_unpack_arrays_units = ( - metadata.metadata_fields.generate_units_header_unpack_arrays( - m=self.units.mass, - l=self.units.length, - t=self.units.time, - I=self.units.current, - T=self.units.temperature, - ) + header_unpack_arrays_units = metadata.metadata_fields.generate_units_header_unpack_arrays( + m=self.units.mass, + l=self.units.length, + t=self.units.time, + I=self.units.current, + T=self.units.temperature, ) for field, name in metadata.metadata_fields.header_unpack_arrays.items(): @@ -447,14 +449,12 @@ def postprocess_header(self): # These must be unpacked as they are stored as length-1 arrays - header_unpack_float_units = ( - metadata.metadata_fields.generate_units_header_unpack_single_float( - m=self.units.mass, - l=self.units.length, - t=self.units.time, - I=self.units.current, - T=self.units.temperature, - ) + header_unpack_float_units = metadata.metadata_fields.generate_units_header_unpack_single_float( + m=self.units.mass, + l=self.units.length, + t=self.units.time, + I=self.units.current, + T=self.units.temperature, ) for field, names in metadata.metadata_fields.header_unpack_single_float.items(): @@ -910,7 +910,7 @@ def get_units(unit_attribute): # Need to check if the exponent is 0 manually because of float precision unit_exponent = unit_attribute[f"U_{exponent} exponent"][0] if unit_exponent != 0.0: - units *= unit**unit_exponent + units *= unit ** unit_exponent except KeyError: # Can't load that data! # We should probably warn the user here... @@ -983,7 +983,7 @@ def get_cosmo(dataset): # Can't load, 'graceful' fallback. cosmo_exponent = 0.0 - a_factor_this_dataset = a**cosmo_exponent + a_factor_this_dataset = a ** cosmo_exponent return cosmo_factor(a_factor_this_dataset, current_scale_factor) @@ -1185,10 +1185,7 @@ def getter(self): return getter -def load_ndarray_from_json( - json_array: str, - data_type: str | None, -) -> npt.NDArray: +def load_ndarray_from_json(json_array: str, data_type: str | None) -> npt.NDArray: """Convert JSON to a Numpy NDArray. Args: @@ -1205,6 +1202,7 @@ def load_ndarray_from_json( def generate_getter_remote( server_address: str, + session: requests.Session, filename: str, name: str, field: str, @@ -1301,15 +1299,17 @@ def getter(self): try: if mask is not None: payload = { - "filename": filename, - "field": field, - "mask_array_json": json.dumps(mask, cls=NumpyEncoder), - "mask_size": int(mask_size), - "columns": columns, + "data_spec": { + "filename": filename, + "field": field, + "mask_array_json": json.dumps(mask, cls=NumpyEncoder), + "mask_size": int(mask_size), + "columns": columns, + } } - array_data_response = requests.post( - f"{server_address}/masked_dataset", json=payload + array_data_response = session.post( + f"{server_address}/swiftdata/masked_dataset", json=payload ) if not array_data_response.status_code == 200: raise RemoteSWIFTDatasetException( @@ -1335,12 +1335,14 @@ def getter(self): ) else: payload = { - "filename": filename, - "field": field, - "columns": columns, + "data_spec": { + "filename": filename, + "field": field, + "columns": columns, + } } - array_data_response = requests.post( - f"{server_address}/unmasked_dataset", json=payload + array_data_response = session.post( + f"{server_address}/swiftdata/unmasked_dataset", json=payload ) if not array_data_response.status_code == 200: @@ -1547,7 +1549,7 @@ def __eq__(self, other): return self.named_columns == other.named_columns and self.name == other.name -def generate_dataset(particle_metadata: SWIFTParticleTypeMetadata, mask: SWIFTMask): +def generate_dataset(particle_metadata: SWIFTParticleTypeMetadata, mask): """ Generates a SWIFTParticleDataset _class_ that corresponds to the particle type given. @@ -1683,7 +1685,10 @@ def generate_dataset(particle_metadata: SWIFTParticleTypeMetadata, mask: SWIFTMa def generate_remote_dataset( - particle_metadata: SWIFTParticleTypeMetadata, mask: SWIFTMask, server_address: str + particle_metadata: SWIFTParticleTypeMetadata, + mask, + server_address: str, + session: requests.Session, ): """ Generates a SWIFTParticleDataset _class_ that corresponds to the @@ -1707,6 +1712,8 @@ def generate_remote_dataset( the mask object for the dataset server_address : str API URL + session : requests.Session + Session object for persistent HTTP connection """ filename = particle_metadata.filename @@ -1761,6 +1768,7 @@ def generate_remote_dataset( field_property = property( generate_getter_remote( server_address, + session, filename, field_name, field_path, @@ -1788,6 +1796,7 @@ def generate_remote_dataset( this_named_column_dataset_dict[column] = property( generate_getter_remote( server_address, + session, filename, column, field_path, @@ -1976,7 +1985,15 @@ def create_particle_datasets(self): are specified in metadata.particle_types. """ - def __init__(self, server_address, simulation_alias=None, filename=None, mask=None): + def __init__( + self, + server_address: str, + username: str, + password: str, + simulation_alias=None, + filename=None, + mask=None, + ): """ Constructor for SWIFTDataset class @@ -1986,6 +2003,10 @@ def __init__(self, server_address, simulation_alias=None, filename=None, mask=No ---------- server_address : str Base URL of API serving HDF5 files. + username : str + VirgoDB username + password : str + VirgoDB password simulation_alias : str, optional The aliased name of a particular simulation. filename : str, optional @@ -1998,10 +2019,21 @@ def __init__(self, server_address, simulation_alias=None, filename=None, mask=No self.simulation_alias = simulation_alias self.filename = filename + self.session = requests.Session() + token_response = self.session.post( + f"{self.server_address}/token", + json={"username": username, "password": password}, + ) + token = token_response.json()["access_token"] + + self.session.headers.update({"Authorization": f"Bearer {token}"}) + if not self.filename and self.simulation_alias: - payload = {"alias": self.simulation_alias, "filename": self.filename} - filepath_response = requests.post( - f"{self.server_address}/filepath", json=payload + payload = { + "data_spec": {"alias": self.simulation_alias, "filename": self.filename} + } + filepath_response = self.session.post( + f"{self.server_address}/swiftdata/filepath", json=payload ) self.filename = filepath_response.json() @@ -2028,8 +2060,8 @@ def __repr__(self): return self.__str__() @staticmethod - def create_unyt_quantities_from_json(input_json: str) -> dict: - swift_unit_dict = json.loads(input_json) + def create_unyt_quantities_from_json(swift_unit_dict: dict) -> dict: + # swift_unit_dict = json.loads(input_json) excluded_fields = ["filename", "units"] try: @@ -2061,9 +2093,11 @@ def get_units(self): # run some API call here with server address and credentials # server-side processing returns SWIFTUnits - payload = {"alias": self.simulation_alias, "filename": self.filename} - units_response = requests.post( - f"{self.server_address}/swiftunits", json=payload + payload = { + "data_spec": {"alias": self.simulation_alias, "filename": self.filename} + } + units_response = self.session.post( + f"{self.server_address}/swiftdata/units_dict", json=payload ) json_response = units_response.json() @@ -2081,10 +2115,13 @@ def get_metadata(self): this function again if you mess things up. """ - payload = {"alias": self.simulation_alias, "filename": self.filename} - metadata_response_pickle = requests.post( - f"{self.server_address}/swiftmetadata", json=payload + payload = { + "data_spec": {"alias": self.simulation_alias, "filename": self.filename} + } + metadata_response = self.session.post( + f"{self.server_address}/swiftdata/metadata", json=payload ) + metadata_response_pickle = metadata_response.content self.metadata = cloudpickle.loads(metadata_response_pickle) @@ -2108,7 +2145,8 @@ def create_particle_datasets(self): generate_remote_dataset( getattr(self.metadata, f"{particle_name}_properties"), self.mask, - remote=True, + self.server_address, + self.session, ), ) From fcb86d4353452e7c4de6f0af1d34b1c11fe24b58 Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Fri, 20 Oct 2023 08:18:56 +0100 Subject: [PATCH 7/8] Remove pipe character for older python compatibility --- swiftsimio/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index b899b497..4f4234fc 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -1223,7 +1223,7 @@ def getter(self): return getter -def load_ndarray_from_json(json_array: str, data_type: str | None) -> npt.NDArray: +def load_ndarray_from_json(json_array: str, data_type: Union[str, None]) -> npt.NDArray: """Convert JSON to a Numpy NDArray. Args: From fca9cd3fe2f426226837efbef40bbb704bd2f71f Mon Sep 17 00:00:00 2001 From: Harry Moss Date: Fri, 20 Oct 2023 08:25:39 +0100 Subject: [PATCH 8/8] Removing comment, adding docstring --- swiftsimio/reader.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py index 4f4234fc..a4e2e0d0 100644 --- a/swiftsimio/reader.py +++ b/swiftsimio/reader.py @@ -2098,8 +2098,18 @@ def __repr__(self): @staticmethod def create_unyt_quantities_from_json(swift_unit_dict: dict) -> dict: - # swift_unit_dict = json.loads(input_json) + """ + Converts strings to unyt quantities in a Unit object returned by the API. + + + Here we loop through all of the possible properties in the returned Units object. + We exclude the filename and 'units' fields, or anything that is not a string + representation of a quantity with units. + Parameters + ---------- + swift_unit_dict : Dictionary containing quantities with units from the API + """ excluded_fields = ["filename", "units"] try: swift_unit_dict["units"] = {