From 87923d3c781cfed9ccc43b3d9b6b281a571fd7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 2 Jul 2024 17:31:27 +0200 Subject: [PATCH 1/8] Preparation work: Refactoring --- src/IO/HDF5/HDF5IOHandler.cpp | 172 +++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 74 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 124a65559e..eff8eadfbe 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -74,6 +74,26 @@ namespace openPMD } while (0) #endif +constexpr char const *const init_json_shadow_str = &R"( +{ + "dataset": { + "chunks": null + }, + "independent_stores": null +})"[1]; +constexpr char const *dataset_cfg_mask = &R"( +{ + "dataset": { + "chunks": null, + "permanent_filters": null + } +} +)"[1]; +constexpr char const *const flush_cfg_mask = &R"( +{ + "independent_stores": null +})"[1]; + HDF5IOHandlerImpl::HDF5IOHandlerImpl( AbstractIOHandler *handler, json::TracingJSON config, @@ -149,23 +169,6 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( m_config = config["hdf5"]; { - constexpr char const *const init_json_shadow_str = R"( - { - "dataset": { - "chunks": null - }, - "independent_stores": null - })"; - constexpr char const *const dataset_cfg_mask = R"( - { - "dataset": { - "chunks": null - } - })"; - constexpr char const *const flush_cfg_mask = R"( - { - "independent_stores": null - })"; m_global_dataset_config = m_config.json(); json::filterByTemplate( m_global_dataset_config, @@ -460,72 +463,28 @@ void HDF5IOHandlerImpl::createPath( "creation"); } -void HDF5IOHandlerImpl::createDataset( - Writable *writable, Parameter const ¶meters) +namespace { - if (access::readOnly(m_handler->m_backendAccess)) - throw std::runtime_error( - "[HDF5] Creating a dataset in a file opened as read only is not " - "possible."); - - if (parameters.joinedDimension.has_value()) + using chunking_t = std::vector; + struct DatasetParams { - error::throwOperationUnsupportedInBackend( - "HDF5", "Joined Arrays currently only supported in ADIOS2"); - } - - if (!writable->written) + std::optional chunking; + bool resizable = false; + }; + + auto parse_dataset_config( + json::TracingJSON &config, + std::vector const &dims, + Datatype const d) -> DatasetParams { - /* Sanitize name */ - std::string name = parameters.name; - if (auxiliary::starts_with(name, '/')) - name = auxiliary::replace_first(name, "/", ""); - if (auxiliary::ends_with(name, '/')) - name = auxiliary::replace_last(name, "/", ""); - - std::vector dims; - std::uint64_t num_elements = 1u; - for (auto const &val : parameters.extent) - { - dims.push_back(static_cast(val)); - num_elements *= val; - } - - Datatype d = parameters.dtype; - if (d == Datatype::UNDEFINED) - { - // TODO handle unknown dtype - std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset " - "creation (serial HDF5)" - << std::endl; - d = Datatype::BOOL; - } - - json::TracingJSON config = [&]() { - auto parsed_config = json::parseOptions( - parameters.options, /* considerFiles = */ false); - if (auto hdf5_config_it = parsed_config.config.find("hdf5"); - hdf5_config_it != parsed_config.config.end()) - { - auto copy = m_global_dataset_config; - json::merge(copy, hdf5_config_it.value()); - hdf5_config_it.value() = std::move(copy); - } - else - { - parsed_config.config["hdf5"] = m_global_dataset_config; - } - return parsed_config; - }(); + DatasetParams res; // general - bool is_resizable_dataset = false; if (config.json().contains("resizable")) { - is_resizable_dataset = config["resizable"].json().get(); + res.resizable = config["resizable"].json().get(); } - using chunking_t = std::vector; using compute_chunking_t = std::variant; @@ -609,6 +568,71 @@ void HDF5IOHandlerImpl::createDataset( }}, std::move(compute_chunking)); + return res; + } +} // namespace + +void HDF5IOHandlerImpl::createDataset( + Writable *writable, Parameter const ¶meters) +{ + if (access::readOnly(m_handler->m_backendAccess)) + throw std::runtime_error( + "[HDF5] Creating a dataset in a file opened as read only is not " + "possible."); + + if (parameters.joinedDimension.has_value()) + { + error::throwOperationUnsupportedInBackend( + "HDF5", "Joined Arrays currently only supported in ADIOS2"); + } + + if (!writable->written) + { + /* Sanitize name */ + std::string name = parameters.name; + if (auxiliary::starts_with(name, '/')) + name = auxiliary::replace_first(name, "/", ""); + if (auxiliary::ends_with(name, '/')) + name = auxiliary::replace_last(name, "/", ""); + + std::vector dims; + std::uint64_t num_elements = 1u; + for (auto const &val : parameters.extent) + { + dims.push_back(static_cast(val)); + num_elements *= val; + } + + Datatype d = parameters.dtype; + if (d == Datatype::UNDEFINED) + { + // TODO handle unknown dtype + std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset " + "creation (serial HDF5)" + << std::endl; + d = Datatype::BOOL; + } + + json::TracingJSON config = [&]() { + auto parsed_config = json::parseOptions( + parameters.options, /* considerFiles = */ false); + if (auto hdf5_config_it = parsed_config.config.find("hdf5"); + hdf5_config_it != parsed_config.config.end()) + { + auto copy = m_global_dataset_config; + json::merge(copy, hdf5_config_it.value()); + hdf5_config_it.value() = std::move(copy); + } + else + { + parsed_config.config["hdf5"] = m_global_dataset_config; + } + return parsed_config; + }(); + + auto [chunking, is_resizable_dataset] = + parse_dataset_config(config, dims, d); + parameters.warnUnusedParameters( config, "hdf5", From e146fa6553dac9f7d8b06e10cfb87a06d2d5ff2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 2 Jul 2024 18:55:10 +0200 Subject: [PATCH 2/8] Basic compression/filtering in HDF5 --- examples/7_extended_write_serial.cpp | 12 ++- src/IO/HDF5/HDF5IOHandler.cpp | 120 +++++++++++++++++++++++++-- 2 files changed, 124 insertions(+), 8 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 63d9cb38ff..d9e13aa38c 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -144,7 +144,17 @@ int main() d = io::Dataset(dtype, mpiDims); electrons["positionOffset"]["x"].resetDataset(d); - auto dset = io::Dataset(io::determineDatatype(), {2}); + auto dset = io::Dataset( + io::determineDatatype(), + {2}, + R"( + hdf5.dataset.chunks = "auto" + + hdf5.dataset.permanent_filters = [ + {type = "zlib", aggression = 5}, + 2 + ] + )"); electrons.particlePatches["numParticles"].resetDataset(dset); electrons.particlePatches["numParticlesOffset"].resetDataset(dset); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index eff8eadfbe..b865ff2b06 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -26,6 +26,7 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" +#include #include #include #include @@ -468,8 +469,19 @@ namespace using chunking_t = std::vector; struct DatasetParams { + struct Zlib + { + unsigned aggression = 1; + }; + using filter_t = std::variant< + // generic + H5Z_filter_t, + // H5Pset_deflate + Zlib>; + std::optional chunking; bool resizable = false; + std::vector filters; }; auto parse_dataset_config( @@ -504,6 +516,19 @@ namespace } }; + auto filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + "Must be either a scalar filter or a vector of filters, " + "where a filter is either an integer ID for the filter or " + "a JSON object identifying a builtin filter."); + }; + auto builtin_filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + R"(A builtin filter is a JSON object with mandatory string type key "type". The only supported filter is currently "zlib", which optionally takes an unsigned integer type key "aggression" (default value 1).)"); + }; + compute_chunking_t compute_chunking = auxiliary::getEnvString("OPENPMD_HDF5_CHUNKS", "auto"); @@ -542,8 +567,75 @@ namespace throw_chunking_error(); } } + + if (datasetConfig.json().contains("permanent_filters")) + { + auto parse_filter = + [&filter_error, &builtin_filter_error]( + auto &filter_config, + auto &&json_accessor) -> DatasetParams::filter_t { + if (json_accessor(filter_config).is_number_integer()) + { + return json_accessor(filter_config) + .template get(); + } + else if (json_accessor(filter_config).is_object()) + { + if (!json_accessor(filter_config).contains("type")) + { + throw builtin_filter_error(); + } + if (auto const &type_config = + json::asLowerCaseStringDynamic( + json_accessor(filter_config["type"])); + !type_config.has_value() || *type_config != "zlib") + { + throw builtin_filter_error(); + } + + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) + { + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw builtin_filter_error(); + } + zlib.aggression = + aggression_config.template get(); + } + return zlib; + } + else + { + throw filter_error(); + } + }; + auto permanent_filters = datasetConfig["permanent_filters"]; + if (permanent_filters.json().is_array()) + { + permanent_filters.declareFullyRead(); + res.filters.reserve(permanent_filters.json().size()); + for (auto const &entry : permanent_filters.json()) + { + res.filters.push_back(parse_filter( + entry, [](auto const &j) -> nlohmann::json const & { + return j; + })); + } + } + else + { + res.filters = {parse_filter( + permanent_filters, + [](auto &&j) -> nlohmann::json const & { + return j.json(); + })}; + } + } } - std::optional chunking = std::visit( + res.chunking = std::visit( auxiliary::overloaded{ [&](chunking_t &&explicitly_specified) -> std::optional { @@ -630,7 +722,7 @@ void HDF5IOHandlerImpl::createDataset( return parsed_config; }(); - auto [chunking, is_resizable_dataset] = + auto [chunking, is_resizable_dataset, filters] = parse_dataset_config(config, dims, d); parameters.warnUnusedParameters( @@ -755,11 +847,25 @@ void HDF5IOHandlerImpl::createDataset( } } - std::string const &compression = ""; // @todo read from JSON - if (!compression.empty()) - std::cerr - << "[HDF5] Compression not yet implemented in HDF5 backend." - << std::endl; + for (auto const &filter : filters) + { + herr_t status = std::visit( + auxiliary::overloaded{ + [&](H5Z_filter_t filter_id) { + return H5Pset_filter( + datasetCreationProperty, filter_id, 0, 0, nullptr); + }, + [&](DatasetParams::Zlib const &zlib) { + return H5Pset_deflate( + datasetCreationProperty, zlib.aggression); + }}, + filter); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to set filter during dataset " + "creation"); + } + /* { std::vector< std::string > args = auxiliary::split(compression, From 1c30b10de20b49a308cdd8bb55ad8a2f1254f0a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 11:18:41 +0200 Subject: [PATCH 3/8] Configure generic filters via JSON object --- examples/7_extended_write_serial.cpp | 2 +- src/IO/HDF5/HDF5IOHandler.cpp | 205 +++++++++++++++++++-------- 2 files changed, 148 insertions(+), 59 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index d9e13aa38c..36376961e6 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -152,7 +152,7 @@ int main() hdf5.dataset.permanent_filters = [ {type = "zlib", aggression = 5}, - 2 + {id = "shuffle"} ] )"); electrons.particlePatches["numParticles"].resetDataset(dset); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index b865ff2b06..92480a69f3 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -469,13 +469,19 @@ namespace using chunking_t = std::vector; struct DatasetParams { + struct ByID + { + H5Z_filter_t id = 0; + unsigned int flags = 0; + std::vector c_values; + }; struct Zlib { unsigned aggression = 1; }; using filter_t = std::variant< // generic - H5Z_filter_t, + ByID, // H5Pset_deflate Zlib>; @@ -484,6 +490,140 @@ namespace std::vector filters; }; + template + auto parse_filter(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::filter_t + { + auto filter_error = []() { + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters"}, + "Must be either a JSON object or a vector of JSON objects."); + }; + if (!json_accessor(filter_config).is_object()) + { + throw filter_error(); + } + + enum class filter_type + { + ByID, + Zlib + }; + + filter_type type = [&]() -> filter_type { + if (json_accessor(filter_config).contains("type")) + { + auto res = json::asLowerCaseStringDynamic( + json_accessor(filter_config["type"])); + if (!res.has_value()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "type"}, + "Must be of type string."); + } + using pair_t = std::pair; + std::array filter_types{ + pair_t{"by_id", filter_type::ByID}, + pair_t{"zlib", filter_type::Zlib}}; + for (auto const &[key, res_type] : filter_types) + { + if (*res == key) + { + return res_type; + } + } + std::stringstream error; + error << "Must be one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "type"}, + error.str()); + } + else + { + return filter_type::ByID; + } + }(); + + switch (type) + { + case filter_type::ByID: { + DatasetParams::ByID byID; + if (!json_accessor(filter_config).contains("id")) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + "Required key for selecting a filter by ID."); + } + byID.id = [&]() -> H5Z_filter_t { + auto const &id_config = json_accessor(filter_config["id"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"deflate", H5Z_FILTER_DEFLATE}, + pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, + pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, + pair_t{"szip", H5Z_FILTER_SZIP}, + pair_t{"nbit", H5Z_FILTER_NBIT}, + pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; + auto id_error = [&]() { + std::stringstream error; + error + << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + error.str()); + }; + if (id_config.is_number_integer()) + { + return id_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(id_config); + if (!maybe_string.has_value()) + { + throw id_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw id_error(); + }(); + return byID; + } + break; + case filter_type::Zlib: { + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) + { + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "aggression"}, + "Must be of unsigned integer type."); + } + zlib.aggression = aggression_config.template get(); + } + return zlib; + } + break; + } + throw std::runtime_error("Unreachable!"); + } + auto parse_dataset_config( json::TracingJSON &config, std::vector const &dims, @@ -516,19 +656,6 @@ namespace } }; - auto filter_error = []() { - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters"}, - "Must be either a scalar filter or a vector of filters, " - "where a filter is either an integer ID for the filter or " - "a JSON object identifying a builtin filter."); - }; - auto builtin_filter_error = []() { - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters"}, - R"(A builtin filter is a JSON object with mandatory string type key "type". The only supported filter is currently "zlib", which optionally takes an unsigned integer type key "aggression" (default value 1).)"); - }; - compute_chunking_t compute_chunking = auxiliary::getEnvString("OPENPMD_HDF5_CHUNKS", "auto"); @@ -570,48 +697,6 @@ namespace if (datasetConfig.json().contains("permanent_filters")) { - auto parse_filter = - [&filter_error, &builtin_filter_error]( - auto &filter_config, - auto &&json_accessor) -> DatasetParams::filter_t { - if (json_accessor(filter_config).is_number_integer()) - { - return json_accessor(filter_config) - .template get(); - } - else if (json_accessor(filter_config).is_object()) - { - if (!json_accessor(filter_config).contains("type")) - { - throw builtin_filter_error(); - } - if (auto const &type_config = - json::asLowerCaseStringDynamic( - json_accessor(filter_config["type"])); - !type_config.has_value() || *type_config != "zlib") - { - throw builtin_filter_error(); - } - - DatasetParams::Zlib zlib; - if (json_accessor(filter_config).contains("aggression")) - { - auto const &aggression_config = - json_accessor(filter_config["aggression"]); - if (!aggression_config.is_number_integer()) - { - throw builtin_filter_error(); - } - zlib.aggression = - aggression_config.template get(); - } - return zlib; - } - else - { - throw filter_error(); - } - }; auto permanent_filters = datasetConfig["permanent_filters"]; if (permanent_filters.json().is_array()) { @@ -851,9 +936,13 @@ void HDF5IOHandlerImpl::createDataset( { herr_t status = std::visit( auxiliary::overloaded{ - [&](H5Z_filter_t filter_id) { + [&](DatasetParams::ByID const &by_id) { return H5Pset_filter( - datasetCreationProperty, filter_id, 0, 0, nullptr); + datasetCreationProperty, + by_id.id, + by_id.flags, + by_id.c_values.size(), + by_id.c_values.data()); }, [&](DatasetParams::Zlib const &zlib) { return H5Pset_deflate( From 07f165e74b94bd16bd9f08100cf707420d31f0ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 11:40:46 +0200 Subject: [PATCH 4/8] Full support for the set_filter API --- examples/7_extended_write_serial.cpp | 11 ++++- src/IO/HDF5/HDF5IOHandler.cpp | 60 ++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 36376961e6..1223a540d7 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -118,6 +118,15 @@ int main() } ] } + }, + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": { + "id": "fletcher32", + "flags": "optional" + } + } } })END"; d.options = datasetConfig; @@ -152,7 +161,7 @@ int main() hdf5.dataset.permanent_filters = [ {type = "zlib", aggression = 5}, - {id = "shuffle"} + {id = "shuffle", "flags" = "MANDATORY"} ] )"); electrons.particlePatches["numParticles"].resetDataset(dset); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 92480a69f3..c8361865a7 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -27,6 +27,7 @@ #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" #include +#include #include #include #include @@ -600,6 +601,65 @@ namespace } throw id_error(); }(); + byID.flags = [&]() -> unsigned int { + if (!json_accessor(filter_config).contains("flags")) + { + return 0; + } + auto const &flag_config = json_accessor(filter_config["flags"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"optional", H5Z_FLAG_OPTIONAL}, + pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; + auto flag_error = [&]() { + std::stringstream error; + error + << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "flags"}, + error.str()); + }; + if (flag_config.is_number_integer()) + { + return flag_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(flag_config); + if (!maybe_string.has_value()) + { + throw flag_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw flag_error(); + }(); + if (json_accessor(filter_config).contains("c_values")) + { + auto const &c_values_config = + json_accessor(filter_config["c_values"]); + try + { + + byID.c_values = + c_values_config + .template get>(); + } + catch (nlohmann::json::type_error const &) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "c_values"}, + "Must be an array of unsigned integers."); + } + } return byID; } break; From c5f11e75b3a6fbb12d832668aa31a3881b7642d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 12:16:33 +0200 Subject: [PATCH 5/8] Fix: captured structured bindings are a C++20 extension --- src/IO/HDF5/HDF5IOHandler.cpp | 36 ++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index c8361865a7..c72bf5323e 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -954,25 +954,27 @@ void HDF5IOHandlerImpl::createDataset( { if (chunking->size() != parameters.extent.size()) { - std::string chunking_printed = [&]() { - if (chunking->empty()) - { - return std::string("[]"); - } - else - { - std::stringstream s; - auto it = chunking->begin(); - auto end = chunking->end(); - s << '[' << *it++; - for (; it != end; ++it) + // captured structured bindings are a C++20 extension + std::string chunking_printed = + [&, &captured_chunking = chunking]() { + if (captured_chunking->empty()) { - s << ", " << *it; + return std::string("[]"); } - s << ']'; - return s.str(); - } - }(); + else + { + std::stringstream s; + auto it = captured_chunking->begin(); + auto end = captured_chunking->end(); + s << '[' << *it++; + for (; it != end; ++it) + { + s << ", " << *it; + } + s << ']'; + return s.str(); + } + }(); std::cerr << "[HDF5] Chunking for dataset '" << name << "' was specified as " << chunking_printed << ", but dataset has dimensionality " From a982ec56b83f821fcced1d7fa2fd747c74d4570c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 12:26:00 +0200 Subject: [PATCH 6/8] Refactoring to satisfy the Github bot --- src/IO/HDF5/HDF5IOHandler.cpp | 263 +++++++++++++++++----------------- 1 file changed, 135 insertions(+), 128 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index c72bf5323e..72ce883a60 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -491,6 +491,137 @@ namespace std::vector filters; }; + template + auto parse_filter_by_id(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::ByID + { + DatasetParams::ByID byID; + if (!json_accessor(filter_config).contains("id")) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + "Required key for selecting a filter by ID."); + } + byID.id = [&]() -> H5Z_filter_t { + auto const &id_config = json_accessor(filter_config["id"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"deflate", H5Z_FILTER_DEFLATE}, + pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, + pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, + pair_t{"szip", H5Z_FILTER_SZIP}, + pair_t{"nbit", H5Z_FILTER_NBIT}, + pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; + auto id_error = [&]() { + std::stringstream error; + error << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "id"}, + error.str()); + }; + if (id_config.is_number_integer()) + { + return id_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(id_config); + if (!maybe_string.has_value()) + { + throw id_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw id_error(); + }(); + byID.flags = [&]() -> unsigned int { + if (!json_accessor(filter_config).contains("flags")) + { + return 0; + } + auto const &flag_config = json_accessor(filter_config["flags"]); + using pair_t = std::pair; + std::array filter_types{ + pair_t{"optional", H5Z_FLAG_OPTIONAL}, + pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; + auto flag_error = [&]() { + std::stringstream error; + error << "Must be either of unsigned integer type or one of:"; + for (auto const &pair : filter_types) + { + error << " '" << pair.first << "'"; + } + error << "."; + return error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "flags"}, + error.str()); + }; + if (flag_config.is_number_integer()) + { + return flag_config.template get(); + } + auto maybe_string = json::asLowerCaseStringDynamic(flag_config); + if (!maybe_string.has_value()) + { + throw flag_error(); + } + for (auto const &[key, res_type] : filter_types) + { + if (*maybe_string == key) + { + return res_type; + } + } + throw flag_error(); + }(); + if (json_accessor(filter_config).contains("c_values")) + { + auto const &c_values_config = + json_accessor(filter_config["c_values"]); + try + { + + byID.c_values = + c_values_config.template get>(); + } + catch (nlohmann::json::type_error const &) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "c_values"}, + "Must be an array of unsigned integers."); + } + } + return byID; + } + + template + auto parse_filter_zlib(JSON &filter_config, Accessor &&json_accessor) + -> DatasetParams::Zlib + { + DatasetParams::Zlib zlib; + if (json_accessor(filter_config).contains("aggression")) + { + auto const &aggression_config = + json_accessor(filter_config["aggression"]); + if (!aggression_config.is_number_integer()) + { + throw error::BackendConfigSchema( + {"hdf5", "dataset", "permanent_filters", "aggression"}, + "Must be of unsigned integer type."); + } + zlib.aggression = aggression_config.template get(); + } + return zlib; + } + template auto parse_filter(JSON &filter_config, Accessor &&json_accessor) -> DatasetParams::filter_t @@ -552,134 +683,10 @@ namespace switch (type) { - case filter_type::ByID: { - DatasetParams::ByID byID; - if (!json_accessor(filter_config).contains("id")) - { - throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "id"}, - "Required key for selecting a filter by ID."); - } - byID.id = [&]() -> H5Z_filter_t { - auto const &id_config = json_accessor(filter_config["id"]); - using pair_t = std::pair; - std::array filter_types{ - pair_t{"deflate", H5Z_FILTER_DEFLATE}, - pair_t{"shuffle", H5Z_FILTER_SHUFFLE}, - pair_t{"fletcher32", H5Z_FILTER_FLETCHER32}, - pair_t{"szip", H5Z_FILTER_SZIP}, - pair_t{"nbit", H5Z_FILTER_NBIT}, - pair_t{"scaleoffset", H5Z_FILTER_SCALEOFFSET}}; - auto id_error = [&]() { - std::stringstream error; - error - << "Must be either of unsigned integer type or one of:"; - for (auto const &pair : filter_types) - { - error << " '" << pair.first << "'"; - } - error << "."; - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "id"}, - error.str()); - }; - if (id_config.is_number_integer()) - { - return id_config.template get(); - } - auto maybe_string = json::asLowerCaseStringDynamic(id_config); - if (!maybe_string.has_value()) - { - throw id_error(); - } - for (auto const &[key, res_type] : filter_types) - { - if (*maybe_string == key) - { - return res_type; - } - } - throw id_error(); - }(); - byID.flags = [&]() -> unsigned int { - if (!json_accessor(filter_config).contains("flags")) - { - return 0; - } - auto const &flag_config = json_accessor(filter_config["flags"]); - using pair_t = std::pair; - std::array filter_types{ - pair_t{"optional", H5Z_FLAG_OPTIONAL}, - pair_t{"mandatory", H5Z_FLAG_MANDATORY}}; - auto flag_error = [&]() { - std::stringstream error; - error - << "Must be either of unsigned integer type or one of:"; - for (auto const &pair : filter_types) - { - error << " '" << pair.first << "'"; - } - error << "."; - return error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "flags"}, - error.str()); - }; - if (flag_config.is_number_integer()) - { - return flag_config.template get(); - } - auto maybe_string = json::asLowerCaseStringDynamic(flag_config); - if (!maybe_string.has_value()) - { - throw flag_error(); - } - for (auto const &[key, res_type] : filter_types) - { - if (*maybe_string == key) - { - return res_type; - } - } - throw flag_error(); - }(); - if (json_accessor(filter_config).contains("c_values")) - { - auto const &c_values_config = - json_accessor(filter_config["c_values"]); - try - { - - byID.c_values = - c_values_config - .template get>(); - } - catch (nlohmann::json::type_error const &) - { - throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "c_values"}, - "Must be an array of unsigned integers."); - } - } - return byID; - } - break; - case filter_type::Zlib: { - DatasetParams::Zlib zlib; - if (json_accessor(filter_config).contains("aggression")) - { - auto const &aggression_config = - json_accessor(filter_config["aggression"]); - if (!aggression_config.is_number_integer()) - { - throw error::BackendConfigSchema( - {"hdf5", "dataset", "permanent_filters", "aggression"}, - "Must be of unsigned integer type."); - } - zlib.aggression = aggression_config.template get(); - } - return zlib; - } - break; + case filter_type::ByID: + return parse_filter_by_id(filter_config, json_accessor); + case filter_type::Zlib: + return parse_filter_zlib(filter_config, json_accessor); } throw std::runtime_error("Unreachable!"); } From 875beff6cfa79de48900d31bdc63e11be4cb9b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 12:29:06 +0200 Subject: [PATCH 7/8] Fix includes --- src/IO/HDF5/HDF5IOHandler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 72ce883a60..40fd9c113b 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -26,8 +26,6 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" -#include -#include #include #include #include From 9f2b4a9b248cc185336de4a12b63c5e085975ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 3 Jul 2024 14:07:30 +0200 Subject: [PATCH 8/8] Switch to JSON config for NVidia compiler's benefit --- examples/7_extended_write_serial.cpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/7_extended_write_serial.cpp b/examples/7_extended_write_serial.cpp index 1223a540d7..579cc8b8e9 100644 --- a/examples/7_extended_write_serial.cpp +++ b/examples/7_extended_write_serial.cpp @@ -157,13 +157,23 @@ int main() io::determineDatatype(), {2}, R"( - hdf5.dataset.chunks = "auto" - - hdf5.dataset.permanent_filters = [ - {type = "zlib", aggression = 5}, - {id = "shuffle", "flags" = "MANDATORY"} - ] - )"); + { + "hdf5": { + "dataset": { + "chunks": "auto", + "permanent_filters": [ + { + "aggression": 5, + "type": "zlib" + }, + { + "flags": "MANDATORY", + "id": "shuffle" + } + ] + } + } + })"); electrons.particlePatches["numParticles"].resetDataset(dset); electrons.particlePatches["numParticlesOffset"].resetDataset(dset);