From beb8de9be108f330913058168923fb73e2f18b47 Mon Sep 17 00:00:00 2001 From: Thomas Hahn Date: Mon, 22 Apr 2024 09:58:23 -0400 Subject: [PATCH] Clean up in h5::array_interface --- c++/h5/array_interface.cpp | 86 ++++++---- c++/h5/array_interface.hpp | 83 +++++----- c++/h5/scalar.hpp | 18 +-- c++/h5/stl/array.hpp | 30 ++-- c++/h5/stl/vector.hpp | 24 +-- doc/DoxygenLayout.xml | 4 +- doc/ex1.md | 26 +-- doc/groups.dox | 2 +- python/h5/h5py_io.cpp | 44 ++--- test/c++/h5_array_interface.cpp | 274 +++++++++++++++++++++++--------- 10 files changed, 375 insertions(+), 216 deletions(-) diff --git a/c++/h5/array_interface.cpp b/c++/h5/array_interface.cpp index e2dd071f..bcf3eee4 100644 --- a/c++/h5/array_interface.cpp +++ b/c++/h5/array_interface.cpp @@ -38,13 +38,13 @@ namespace h5::array_interface { namespace { // Create an HDF5 memory dataspace. - dataspace make_mem_dspace(h5_array_view const &v) { + dataspace make_mem_dspace(array_view const &v) { // scalar case if (v.rank() == 0) return H5Screate(H5S_SCALAR); - // create a dataspace of rank v.rank() and with shape v.L_tot - dataspace dspace = H5Screate_simple(v.slab.rank(), v.L_tot.data(), nullptr); - if (!dspace.is_valid()) throw std::runtime_error("Error in make_mem_dspace: Creating the dataspace for an h5_array_view failed"); + // create a dataspace of rank v.rank() and with shape v.parent_shape + dataspace dspace = H5Screate_simple(v.slab.rank(), v.parent_shape.data(), nullptr); + if (!dspace.is_valid()) throw std::runtime_error("Error in make_mem_dspace: Creating the dataspace for an array_view failed"); // select the hyperslab according to v.slab herr_t err = H5Sselect_hyperslab(dspace, H5S_SELECT_SET, v.slab.offset.data(), v.slab.stride.data(), v.slab.count.data(), @@ -57,32 +57,52 @@ namespace h5::array_interface { } // namespace - std::pair get_L_tot_and_strides_h5(long const *np_strides, int rank, long view_size) { + std::pair get_parent_shape_and_h5_strides(long const *np_strides, int rank, long view_size) { // scalar case: return empty vectors if (rank == 0) return {}; - // empty nd-array case: return (0,0,0), (1,1,1) + // empty view case: return (0,0,0), (1,1,1) if (view_size == 0) return {v_t(rank, 0), v_t(rank, 1)}; - // general case - v_t Ltot(rank), h5_strides(rank); + // for the general case, we would like to find a parent_shape and h5_strides such that the following equations hold (rank = N): + // 1. np_strides[N - 1] = h5_strides[N - 1] + // 2. np_strides[N - 2] = h5_strides[N - 2] * parent_shape[N - 1] + // 3. np_strides[N - 3] = h5_strides[N - 3] * parent_shape[N - 1] * parent_shape[N - 2] + // ... + // N. np_strides[N - N] = h5_strides[N - N] * parent_shape[N - 1] * parent_shape[N - 2] * ... * parent_shape[1] + // + // note that np_strides[N - i] = m * s[N - 1] * s[N - 2] * ... * s[N - i + 1], where 0 < m < s[N - i] and + // s is the true shape of the parent array + + // initialize h5_strides with the np_strides + v_t parent_shape(rank), h5_strides(rank); for (int u = 0; u < rank; ++u) h5_strides[u] = np_strides[u]; - Ltot[0] = view_size; + // from the above equations it follows that parent_shape[0] (size of the slowest varying dimension) is arbitrary + parent_shape[0] = view_size; + + // to solve the equations, we use the following algorithm: + // - Eq. 1 is already satisfied + // - parent_shape[N - i] = gcd(np_strides[N - i], np_strides[N - i - 1], ..., np_strides[0]) + // - h5_strides[N - 1] = np_strides[N - 1] / parent_shape[N - 1] + // - divide equations i, i+1, ..., N by parent_shape[N - i] + // - repeat until all equations are satisfied + // that way we guarantee that the parent_shape[i] >= s[i] (see note above), which is important to make sure that + // the view elements are not out of bounds when HDF5 selects the hyperslab for (int u = rank - 2; u >= 0; --u) { - // L[u+1] as gcd of size and stride[u] ... stride[0] - hsize_t L = h5_strides[u]; - // L becomes the gcd - for (int v = u - 1; v >= 0; --v) { L = std::gcd(L, h5_strides[v]); } - // divides - for (int v = u; v >= 0; --v) { h5_strides[v] /= L; } - Ltot[u + 1] = L; + // calculate the gcd + hsize_t gcd = h5_strides[u]; + for (int v = u - 1; v >= 0; --v) { gcd = std::gcd(gcd, h5_strides[v]); } + // set partent_shape to the gcd + parent_shape[u + 1] = gcd; + // divide the remaining equations by the gcd + for (int v = u; v >= 0; --v) { h5_strides[v] /= gcd; } } - return {Ltot, h5_strides}; + return {parent_shape, h5_strides}; } - h5_lengths_type get_h5_lengths_type(group g, std::string const &name) { + dataset_info get_dataset_info(group g, std::string const &name) { // open dataset dataset ds = g.open_dataset(name); @@ -97,7 +117,7 @@ namespace h5::array_interface { return {std::move(dims_out), ty, has_complex_attribute}; } - void write(group g, std::string const &name, h5_array_view const &v, bool compress) { + void write(group g, std::string const &name, array_view const &v, bool compress) { // unlink the dataset if it already exists g.unlink(name); @@ -141,15 +161,15 @@ namespace h5::array_interface { if (v.is_complex) h5_write_attribute(ds, "__complex__", "1"); } - void write_slice(group g, std::string const &name, h5_array_view const &v, h5_lengths_type lt, hyperslab sl) { + void write_slice(group g, std::string const &name, array_view const &v, dataset_info ds_info, hyperslab sl) { // empty hyperslab if (sl.empty()) return; // check consistency of input: block shape {1, ..., 1} is assumed if (v.slab.count != sl.count) throw std::runtime_error("Error in h5::array_interface::write_slice: Memory and file slabs are incompatible"); - if (not hdf5_type_equal(v.ty, lt.ty)) + if (not hdf5_type_equal(v.ty, ds_info.ty)) throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty) - + " != " + get_name_of_h5_type(lt.ty)); + + " != " + get_name_of_h5_type(ds_info.ty)); // open existing dataset, get dataspace and select hyperslab dataset ds = g.open_dataset(name); @@ -169,7 +189,7 @@ namespace h5::array_interface { } } - void write_attribute(object obj, std::string const &name, h5_array_view v) { + void write_attribute(object obj, std::string const &name, array_view v) { // check if the attribute already exists if (H5LTfind_attribute(obj, name.c_str()) != 0) throw std::runtime_error("Error in h5::array_interface::write_attribute: Attribute " + name + " already exists"); @@ -186,7 +206,7 @@ namespace h5::array_interface { if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_attribute: Writing to the attribute " + name + " failed"); } - void read(group g, std::string const &name, h5_array_view v, h5_lengths_type lt, hyperslab sl) { + void read(group g, std::string const &name, array_view v, dataset_info ds_info, hyperslab sl) { // open dataset and get dataspace dataset ds = g.open_dataset(name); dataspace file_dspace = H5Dget_space(ds); @@ -199,20 +219,20 @@ namespace h5::array_interface { } // check consistency of input - if (H5Tget_class(v.ty) != H5Tget_class(lt.ty)) + if (H5Tget_class(v.ty) != H5Tget_class(ds_info.ty)) throw std::runtime_error("Error in h5::array_interface::read: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty) - + " != " + get_name_of_h5_type(lt.ty)); + + " != " + get_name_of_h5_type(ds_info.ty)); - if (not hdf5_type_equal(v.ty, lt.ty)) - std::cerr << "WARNING: HDF5 type mismatch while reading into an h5_array_view: " + get_name_of_h5_type(v.ty) - + " != " + get_name_of_h5_type(lt.ty) + "\n"; + if (not hdf5_type_equal(v.ty, ds_info.ty)) + std::cerr << "WARNING: HDF5 type mismatch while reading into an array_view: " + get_name_of_h5_type(v.ty) + + " != " + get_name_of_h5_type(ds_info.ty) + "\n"; - if (lt.rank() != v.rank()) + if (ds_info.rank() != v.rank()) throw std::runtime_error("Error in h5::array_interface::read: Incompatible ranks: " + std::to_string(v.rank()) - + " != " + std::to_string(lt.rank())); + + " != " + std::to_string(ds_info.rank())); // block shape of {1, ..., 1} is assumed - if (sl.empty() and lt.lengths != v.slab.count) throw std::runtime_error("Error in h5::array_interface::read: Incompatible shapes"); + if (sl.empty() and ds_info.lengths != v.slab.count) throw std::runtime_error("Error in h5::array_interface::read: Incompatible shapes"); // memory dataspace dataspace mem_dspace = make_mem_dspace(v); @@ -225,7 +245,7 @@ namespace h5::array_interface { } } - void read_attribute(object obj, std::string const &name, h5_array_view v) { + void read_attribute(object obj, std::string const &name, array_view v) { // open attribute attribute attr = H5Aopen(obj, name.c_str(), H5P_DEFAULT); if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::read_attribute: Opening the attribute " + name + " failed"); diff --git a/c++/h5/array_interface.hpp b/c++/h5/array_interface.hpp index ced6baf7..e5164c72 100644 --- a/c++/h5/array_interface.hpp +++ b/c++/h5/array_interface.hpp @@ -35,25 +35,25 @@ namespace h5::array_interface { * @{ */ - /// Simple struct to store basic information about an n-dimensional array/dataspace. - struct h5_lengths_type { - /// Shape of the array/dataspace. + /// Simple struct to store basic information about an HDF5 dataset. + struct dataset_info { + /// Shape of the dataspace in the dataset. v_t lengths; - /// h5::datatype stored in the array/dataspace. + /// h5::datatype stored in the dataset. datatype ty; /// Whether the stored values are complex. bool has_complex_attribute; - /// Get the rank of the array/dataspace. + /// Get the rank of the dataspace in the dataset. [[nodiscard]] int rank() const { return static_cast(lengths.size()); } }; /** * @brief Struct representing an HDF5 hyperslab. * - * @details A hyperslab is used to select elements form an n-dimensional array/dataspace and it is defined + * @details A hyperslab is used to select elements from an n-dimensional array/dataspace and it is defined * by 4 arrays of the same size as the rank of the underlying dataspace * (see HDF5 docs): * - `offset`: Origin of the hyperslab in the dataspace. @@ -129,11 +129,14 @@ namespace h5::array_interface { /** * @brief Struct representing a view on an n-dimensional array/dataspace. * - * @details A view consists of the parent array and of an h5::array_interface::hyperslab - * specifying a selection. The array is defined by a pointer to its data and its shape. If - * the data of the array is complex, its imaginary part is treated as just another dimension. + * @details A view consists of the parent array and of an h5::array_interface::hyperslab specifying a selection. + * The parent array is defined by a pointer to its data and its shape. Note that the shape of the parent array + * does not necessarily have to correspond to the actual shape and size of the underlying memory. It is only used + * to select the correct elements in the hyperslab. + * + * If the data of the array is complex, its imaginary part is treated as just another dimension. */ - struct h5_array_view { + struct array_view { /// h5::datatype stored in the array. datatype ty; @@ -141,7 +144,7 @@ namespace h5::array_interface { void *start; /// Shape of the (contiguous) parent array. - v_t L_tot; + v_t parent_shape; /// h5::array_interface::hyperslab specifying the selection of the view. hyperslab slab; @@ -160,9 +163,9 @@ namespace h5::array_interface { * @param rank Rank of the parent array (excluding the possible added imaginary dimension). * @param is_complex Whether the data is complex valued. */ - h5_array_view(datatype ty, void *start, int rank, bool is_complex) - : ty(std::move(ty)), start(start), L_tot(rank + is_complex), slab(rank, is_complex), is_complex(is_complex) { - if (is_complex) L_tot[rank] = 2; + array_view(datatype ty, void *start, int rank, bool is_complex) + : ty(std::move(ty)), start(start), parent_shape(rank + is_complex), slab(rank, is_complex), is_complex(is_complex) { + if (is_complex) parent_shape[rank] = 2; } /// Get the rank of the view (including the possible added imaginary dimension). @@ -171,31 +174,39 @@ namespace h5::array_interface { /** * @brief Given a view on an n-dimensional array (dataspace) by specifying its numpy/nda-style strides and - * its size, calculate the shape of the underlying parent array and the HDF5 strides of the view. + * its size (number of elements in the view), calculate the shape of a possible parent array and the corresponding + * HDF5 strides of the view. + * + * @details The memory layout is assumend to be in C-order. Suppose `parent_shape` is an array containing the shape of + * the n-dimensional parent array, `np_strides` contains the numpy strides and `h5_strides` are the HDF5 strides. Then + * the following equations have to hold (rank = N): + * - `np_strides[N - 1] = h5_strides[N - 1]` + * - `np_strides[N - 2] = h5_strides[N - 2] * parent_shape[N - 1]` + * - `np_strides[N - 3] = h5_strides[N - 3] * parent_shape[N - 1] * parent_shape[N - 2]` + * - `...` + * - `np_strides[N - N] = h5_strides[N - N] * parent_shape[N - 1] * parent_shape[N - 2] * ... * parent_shape[1]`. * - * @warning This function contains a bug and only works as intended in special cases. + * Note that `np_strides[N - i] = m * s[N - 1] * s[N - 2] * ... * s[N - i + 1]`, where `0 < m < s[N - i]` is an integer + * an `s` is the true shape of the parent array. * - * @details The memory layout is assumend to be in C-order. Suppose `L` is an array containing the shape of the - * n-dimensional parent array, `np_strides` contains the numpy strides and `h5_strides` are the HDF5 strides. Then - * - `np_strides[n - 1] = h5_strides[n - 1]`, - * - `np_strides[n - i] = L[n - 1] * ... * L[n - i - 1] * h5_strides[n - i]` and - * - `np_strides[0] = L[n - 1] * ... * L[1] * h5_strides[0]`. + * @note The shape of the parent array as well as the HDF5 strides that fulfill the above equations are not unique. + * This is not a problem as long as HDF5 manages to select the correct elements in memory. * * @param np_strides Numpy/nda-style strides. * @param rank Rank of the n-dimensional parent array. * @param view_size Number of elements in the given view. * @return std::pair containing the shape of the parent array and the HDF5 strides of the view. */ - std::pair get_L_tot_and_strides_h5(long const *np_strides, int rank, long view_size); + std::pair get_parent_shape_and_h5_strides(long const *np_strides, int rank, long view_size); /** * @brief Retrieve the shape and the h5::datatype from a dataset. * * @param g h5::group containing the dataset. * @param name Name of the dataset. - * @return h5::array_interface::h5_lengths_type containing the shape and HDF5 type of the dataset. + * @return h5::array_interface::dataset_info containing the shape and HDF5 type of the dataset. */ - h5_lengths_type get_h5_lengths_type(group g, std::string const &name); + dataset_info get_dataset_info(group g, std::string const &name); /** * @brief Write an array view to an HDF5 dataset. @@ -206,10 +217,10 @@ namespace h5::array_interface { * * @param g h5::group in which the dataset is created. * @param name Name of the dataset - * @param v h5::array_interface::h5_array_view to be written. + * @param v h5::array_interface::array_view to be written. * @param compress Whether to compress the dataset. */ - void write(group g, std::string const &name, h5_array_view const &v, bool compress); + void write(group g, std::string const &name, array_view const &v, bool compress); /** * @brief Write an array view to a selected hyperslab of an existing HDF5 dataset. @@ -221,40 +232,40 @@ namespace h5::array_interface { * * @param g h5::group which contains the dataset. * @param name Name of the dataset. - * @param v h5::array_interface::h5_array_view to be written. - * @param lt h5::array_interface::h5_lengths_type of the file dataset (only used to check the consistency of the input). + * @param v h5::array_interface::array_view to be written. + * @param ds_info h5::array_interface::dataset_info of the file dataset (only used to check the consistency of the input). * @param sl h5::array_interface::hyperslab specifying the selection to be written to. */ - void write_slice(group g, std::string const &name, h5_array_view const &v, h5_lengths_type lt, hyperslab sl); + void write_slice(group g, std::string const &name, array_view const &v, dataset_info ds_info, hyperslab sl); /** * @brief Write an array view to an HDF5 attribute. * * @param obj h5::object to which the attribute is attached. * @param name Name of the attribute. - * @param v v h5::array_interface::h5_array_view to be written. + * @param v v h5::array_interface::array_view to be written. */ - void write_attribute(object obj, std::string const &name, h5_array_view v); + void write_attribute(object obj, std::string const &name, array_view v); /** * @brief Read a given hyperslab from an HDF5 dataset into an array view. * * @param g h5::group which contains the dataset. * @param name Name of the dataset. - * @param v h5::array_interface::h5_array_view to read into. - * @param lt h5::array_interface::h5_lengths_type of the file dataset (only used to check the consistency of the input). + * @param v h5::array_interface::array_view to read into. + * @param ds_info h5::array_interface::dataset_info of the file dataset (only used to check the consistency of the input). * @param sl h5::array_interface::hyperslab specifying the selection to read from. */ - void read(group g, std::string const &name, h5_array_view v, h5_lengths_type lt, hyperslab sl = {}); + void read(group g, std::string const &name, array_view v, dataset_info ds_info, hyperslab sl = {}); /** * @brief Read from an HDF5 attribute into an array view. * * @param obj h5::object to which the attribute is attached. * @param name Name of the attribute. - * @param v h5::array_interface::h5_array_view to read into. + * @param v h5::array_interface::array_view to read into. */ - void read_attribute(object obj, std::string const &name, h5_array_view v); + void read_attribute(object obj, std::string const &name, array_view v); /** @} */ diff --git a/c++/h5/scalar.hpp b/c++/h5/scalar.hpp index ff3d1b8e..8daac1c7 100644 --- a/c++/h5/scalar.hpp +++ b/c++/h5/scalar.hpp @@ -42,10 +42,10 @@ namespace h5 { * * @tparam T Scalar type. * @param x Scalar value. - * @return h5::array_interface::h5_array_view of rank 0. + * @return h5::array_interface::array_view of rank 0. */ template - h5_array_view h5_array_view_from_scalar(T &x) { + array_view array_view_from_scalar(T &x) { return {hdf5_type>(), (void *)(&x), 0, is_complex_v>}; } @@ -68,7 +68,7 @@ namespace h5 { */ template void h5_write(group g, std::string const &name, T const &x) H5_REQUIRES(std::is_arithmetic_v or is_complex_v or std::is_same_v) { - array_interface::write(g, name, array_interface::h5_array_view_from_scalar(x), false); + array_interface::write(g, name, array_interface::array_view_from_scalar(x), false); } /** @@ -96,19 +96,19 @@ namespace h5 { } } - // get h5_lengths_type - auto lt = array_interface::get_h5_lengths_type(g, name); + // get dataset_info + auto ds_info = array_interface::get_dataset_info(g, name); // read complex values stored as a compound HDF5 datatype if constexpr (is_complex_v) { - if (hdf5_type_equal(lt.ty, hdf5_type())) { + if (hdf5_type_equal(ds_info.ty, hdf5_type())) { h5_read(g, name, reinterpret_cast(x)); // NOLINT (reinterpret_cast is safe here) return; } } // read scalar value - array_interface::read(g, name, array_interface::h5_array_view_from_scalar(x), lt); + array_interface::read(g, name, array_interface::array_view_from_scalar(x), ds_info); } /** @@ -123,7 +123,7 @@ namespace h5 { */ template void h5_write_attribute(object obj, std::string const &name, T const &x) H5_REQUIRES(std::is_arithmetic_v or is_complex_v) { - array_interface::write_attribute(obj, name, array_interface::h5_array_view_from_scalar(x)); + array_interface::write_attribute(obj, name, array_interface::array_view_from_scalar(x)); } /** @@ -138,7 +138,7 @@ namespace h5 { */ template void h5_read_attribute(object obj, std::string const &name, T &x) H5_REQUIRES(std::is_arithmetic_v or is_complex_v) { - array_interface::read_attribute(obj, name, array_interface::h5_array_view_from_scalar(x)); + array_interface::read_attribute(obj, name, array_interface::array_view_from_scalar(x)); } /** @} */ diff --git a/c++/h5/stl/array.hpp b/c++/h5/stl/array.hpp index c80c57d7..2bf4eae3 100644 --- a/c++/h5/stl/array.hpp +++ b/c++/h5/stl/array.hpp @@ -58,10 +58,10 @@ namespace h5 { } else if constexpr (std::is_arithmetic_v or is_complex_v or std::is_same_v or std::is_same_v or std::is_same_v) { // array of arithmetic/complex types or char* or const char* - h5::array_interface::h5_array_view v{hdf5_type(), (void *)a.data(), 1, is_complex_v}; - v.slab.count[0] = N; - v.slab.stride[0] = 1; - v.L_tot[0] = N; + h5::array_interface::array_view v{hdf5_type(), (void *)a.data(), 1, is_complex_v}; + v.slab.count[0] = N; + v.slab.stride[0] = 1; + v.parent_shape[0] = N; h5::array_interface::write(g, name, v, true); } else { // array of generic type @@ -91,21 +91,21 @@ namespace h5 { } else if constexpr (std::is_arithmetic_v or is_complex_v or std::is_same_v or std::is_same_v or std::is_same_v) { // array of arithmetic/complex types or char* or const char* - auto lt = array_interface::get_h5_lengths_type(g, name); - H5_EXPECTS(lt.rank() == 1 + lt.has_complex_attribute); - H5_EXPECTS(N == lt.lengths[0]); + auto ds_info = array_interface::get_dataset_info(g, name); + H5_EXPECTS(ds_info.rank() == 1 + ds_info.has_complex_attribute); + H5_EXPECTS(N == ds_info.lengths[0]); if constexpr (is_complex_v) { // read complex values stored as a compound HDF5 datatype - if (hdf5_type_equal(lt.ty, hdf5_type())) { + if (hdf5_type_equal(ds_info.ty, hdf5_type())) { h5_read(g, name, reinterpret_cast &>(a)); // NOLINT (reinterpret_cast is safe here) return; } // read non-complex data into std::array - if (!lt.has_complex_attribute) { + if (!ds_info.has_complex_attribute) { std::cerr << "WARNING: HDF5 type mismatch while reading into a std::array: std::complex<" + get_name_of_h5_type(hdf5_type()) - + "> != " + get_name_of_h5_type(lt.ty) + "\n"; + + "> != " + get_name_of_h5_type(ds_info.ty) + "\n"; std::array tmp{}; h5_read(g, name, tmp); std::copy(begin(tmp), end(tmp), begin(a)); @@ -114,11 +114,11 @@ namespace h5 { } // use array_interface to read - array_interface::h5_array_view v{hdf5_type(), (void *)(a.data()), 1, is_complex_v}; - v.slab.count[0] = N; - v.slab.stride[0] = 1; - v.L_tot[0] = N; - array_interface::read(g, name, v, lt); + array_interface::array_view v{hdf5_type(), (void *)(a.data()), 1, is_complex_v}; + v.slab.count[0] = N; + v.slab.stride[0] = 1; + v.parent_shape[0] = N; + array_interface::read(g, name, v, ds_info); } else { // array of generic type auto g2 = g.open_group(name); diff --git a/c++/h5/stl/vector.hpp b/c++/h5/stl/vector.hpp index 8db806bd..e37cfc83 100644 --- a/c++/h5/stl/vector.hpp +++ b/c++/h5/stl/vector.hpp @@ -39,17 +39,17 @@ namespace h5 { /** * @ingroup rw_arrayinterface - * @brief Create an h5::array_interface::h5_array_view for a std::vector. + * @brief Create an h5::array_interface::array_view for a std::vector. * * @tparam T Value type of std::vector. * @param v std::vector. - * @return h5::array_interface::h5_array_view of rank 1. + * @return h5::array_interface::array_view of rank 1. */ template - h5_array_view h5_array_view_from_vector(std::vector const &v) { - h5_array_view res{hdf5_type(), (void *)v.data(), 1, is_complex_v>}; - res.slab.count[0] = v.size(); - res.L_tot[0] = v.size(); + array_view array_view_from_vector(std::vector const &v) { + array_view res{hdf5_type(), (void *)v.data(), 1, is_complex_v>}; + res.slab.count[0] = v.size(); + res.parent_shape[0] = v.size(); return res; } @@ -119,7 +119,7 @@ namespace h5 { void h5_write(group g, std::string const &name, std::vector const &v) { if constexpr (std::is_arithmetic_v or is_complex_v) { // vector of arithmetic/complex types - array_interface::write(g, name, array_interface::h5_array_view_from_vector(v), true); + array_interface::write(g, name, array_interface::array_view_from_vector(v), true); } else if constexpr (std::is_same_v or std::is_same_v>) { // vector (of vectors) of strings h5_write(g, name, to_char_buf(v)); @@ -158,11 +158,11 @@ namespace h5 { } else { if constexpr (std::is_arithmetic_v or is_complex_v) { // vector of arithmetic/complex types - auto lt = array_interface::get_h5_lengths_type(g, name); - if (lt.rank() != 1 + is_complex_v) - throw make_runtime_error("Error in h5_read: Reading a vector from an array of rank ", lt.rank(), " is not allowed"); - v.resize(lt.lengths[0]); - array_interface::read(g, name, array_interface::h5_array_view_from_vector(v), lt); + auto ds_info = array_interface::get_dataset_info(g, name); + if (ds_info.rank() != 1 + is_complex_v) + throw make_runtime_error("Error in h5_read: Reading a vector from an array of rank ", ds_info.rank(), " is not allowed"); + v.resize(ds_info.lengths[0]); + array_interface::read(g, name, array_interface::array_view_from_vector(v), ds_info); } else if constexpr (std::is_same_v or std::is_same_v>) { // vector of strings or vector of vector of strings char_buf cb; diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml index 20cc3cd0..8877f100 100644 --- a/doc/DoxygenLayout.xml +++ b/doc/DoxygenLayout.xml @@ -44,8 +44,8 @@ - - + + diff --git a/doc/ex1.md b/doc/ex1.md index a3f0c6f6..62db43e7 100644 --- a/doc/ex1.md +++ b/doc/ex1.md @@ -7,7 +7,7 @@ This example shows how to use the **h5** array interface to write/read a 2-dimen We first write a 2-dimensional `5x5` array to an HDF5 file. Then we use an h5::array_interface::hyperslab to select every other column from the original `5x5` matrix and -read it into a `5x3` h5::array_interface::h5_array_view. +read it into a `5x3` h5::array_interface::array_view. Finally, we output the result to stdout. @@ -25,33 +25,33 @@ int main() { std::vector data (25, 0); std::iota(data.begin(), data.end(), 0); - // create an h5_array_view of rank 2 with dimensions 5x5 of the original data + // create an array_view of rank 2 with dimensions 5x5 of the original data int rank = 2; int rows_w = 5; int cols_w = 5; - h5::array_interface::h5_array_view view(h5::hdf5_type(), (void*) data.data(), rank, false); + h5::array_interface::array_view view(h5::hdf5_type(), (void*) data.data(), rank, false); view.slab.count[0] = rows_w; view.slab.count[1] = cols_w; - view.L_tot[0] = rows_w; - view.L_tot[1] = cols_w; + view.parent_shape[0] = rows_w; + view.parent_shape[1] = cols_w; // create file in read/write mode h5::file file("view.h5", 'w'); - // write h5_array_view to file + // write array_view to file h5::array_interface::write(file, "view", view, false); // reserve memory for reading std::vector read_data(15, 0); - // create an h5_array_view or rank 2 with dimensions 5x3 of the read memory + // create an array_view or rank 2 with dimensions 5x3 of the read memory int rows_r = 5; int cols_r = 3; - h5::array_interface::h5_array_view read_view(h5::hdf5_type(), (void*) read_data.data(), rank, false); + h5::array_interface::array_view read_view(h5::hdf5_type(), (void*) read_data.data(), rank, false); read_view.slab.count[0] = rows_r; read_view.slab.count[1] = cols_r; - read_view.L_tot[0] = rows_r; - read_view.L_tot[1] = cols_r; + read_view.parent_shape[0] = rows_r; + read_view.parent_shape[1] = cols_r; // create an hyperslab to select the data to be read from the file (every other column -> stride in second dimension is 2) h5::array_interface::hyperslab read_slab(rank, false); @@ -60,11 +60,11 @@ int main() { read_slab.stride[0] = 1; read_slab.stride[1] = 2; - // get h5_lengths_type from the dataset in the file - auto lengths_type = h5::array_interface::get_h5_lengths_type(file, "view"); + // get dataset_info from the dataset in the file + auto ds_info = h5::array_interface::get_dataset_info(file, "view"); // read data from file - h5::array_interface::read(file, "view", read_view, lengths_type, read_slab); + h5::array_interface::read(file, "view", read_view, ds_info, read_slab); // output data for (int i = 0; i < rows_r; ++i) { diff --git a/doc/groups.dox b/doc/groups.dox index 031c6cd0..1d24de8d 100644 --- a/doc/groups.dox +++ b/doc/groups.dox @@ -85,7 +85,7 @@ * @ingroup readwrite * @brief Specialized functions to read/write n-dimensional arrays from/to HDF5. * - * @details It provides specialized functions to read/write an h5::array_interface::h5_array_view from/to HDF5. An + * @details It provides specialized functions to read/write an h5::array_interface::array_view from/to HDF5. An * array view mimics a view on a generic n-dimensional array. It uses HDF5's hyperslab concept to perform the * reading/writing. * diff --git a/python/h5/h5py_io.cpp b/python/h5/h5py_io.cpp index 1bac7710..7b118cc5 100644 --- a/python/h5/h5py_io.cpp +++ b/python/h5/h5py_io.cpp @@ -126,8 +126,8 @@ namespace h5 { //-------------------------------------- - // Make a h5_array_view from the numpy object - static array_interface::h5_array_view make_av_from_npy(PyArrayObject *arr_obj) { + // Make a array_view from the numpy object + static array_interface::array_view make_av_from_npy(PyArrayObject *arr_obj) { #ifdef PYTHON_NUMPY_VERSION_LT_17 int elementsType = arr_obj->descr->type_num; @@ -139,7 +139,7 @@ namespace h5 { datatype dt = npy_to_h5(elementsType); const bool is_complex = (elementsType == NPY_CDOUBLE) or (elementsType == NPY_CLONGDOUBLE) or (elementsType == NPY_CFLOAT); - array_interface::h5_array_view res{dt, PyArray_DATA(arr_obj), rank, is_complex}; + array_interface::array_view res{dt, PyArray_DATA(arr_obj), rank, is_complex}; std::vector c_strides(rank + is_complex, 0); long total_size = 1; @@ -155,10 +155,10 @@ namespace h5 { } // be careful to consider the last dim if complex, but do NOT copy it - auto [Ltot, stri] = h5::array_interface::get_L_tot_and_strides_h5(c_strides.data(), rank + is_complex, total_size * (is_complex ? 2 : 1)); + auto [Ltot, stri] = h5::array_interface::get_parent_shape_and_h5_strides(c_strides.data(), rank + is_complex, total_size * (is_complex ? 2 : 1)); for (int i = 0; i < rank; ++i) { - res.L_tot[i] = Ltot[i]; - res.slab.stride[i] = stri[i]; + res.parent_shape[i] = Ltot[i]; + res.slab.stride[i] = stri[i]; } return res; @@ -230,27 +230,27 @@ namespace h5 { PyObject *h5_read_bare(group g, std::string const &name) { // There should be no errors from h5 reading import_numpy(); - array_interface::h5_lengths_type lt = array_interface::get_h5_lengths_type(g, name); + array_interface::dataset_info ds_info = array_interface::get_dataset_info(g, name); // First case, we have a scalar - if (lt.rank() == 0) { - if (H5Tget_class(lt.ty) == H5T_FLOAT) { + if (ds_info.rank() == 0) { + if (H5Tget_class(ds_info.ty) == H5T_FLOAT) { double x; h5_read(g, name, x); return PyFloat_FromDouble(x); } - if (H5Tget_class(lt.ty) == H5T_INTEGER) { return h5_read_any_int(g, name, lt.ty); } - if (H5Tequal(lt.ty, h5::hdf5_type())) { + if (H5Tget_class(ds_info.ty) == H5T_INTEGER) { return h5_read_any_int(g, name, ds_info.ty); } + if (H5Tequal(ds_info.ty, h5::hdf5_type())) { bool x; h5_read(g, name, x); return PyBool_FromLong(long(x)); } - if (H5Tget_class(lt.ty) == H5T_STRING) { + if (H5Tget_class(ds_info.ty) == H5T_STRING) { std::string x; h5_read(g, name, x); return PyUnicode_FromString(x.c_str()); } - if (H5Tequal(lt.ty, hdf5_type())) { + if (H5Tequal(ds_info.ty, hdf5_type())) { dcplx_t x; h5_read(g, name, x); return PyComplex_FromDoubles(x.r, x.i); @@ -261,19 +261,19 @@ namespace h5 { } // A scalar complex is a special case - if ((lt.rank() == 1) and lt.has_complex_attribute) { + if ((ds_info.rank() == 1) and ds_info.has_complex_attribute) { std::complex z; h5_read(g, name, z); return PyComplex_FromDoubles(z.real(), z.imag()); } - if (H5Tget_class(lt.ty) == H5T_STRING) { - if (lt.rank() == 1) { + if (H5Tget_class(ds_info.ty) == H5T_STRING) { + if (ds_info.rank() == 1) { auto x = h5_read>(g, name); return cpp2py::convert_to_python(x); } - if (lt.rank() == 2) { + if (ds_info.rank() == 2) { auto x = h5_read>>(g, name); return cpp2py::convert_to_python(x); } @@ -283,10 +283,10 @@ namespace h5 { } // Last case : it is an array - std::vector L(lt.rank()); // Make the lengths - std::copy(lt.lengths.begin(), lt.lengths.end(), L.begin()); //npy_intp and size_t may differ, so I can not use = - int elementsType = h5_to_npy(lt.ty, lt.has_complex_attribute); // element_type in Python from the hdf5 type and complex tag - if (lt.has_complex_attribute) + std::vector L(ds_info.rank()); // Make the lengths + std::copy(ds_info.lengths.begin(), ds_info.lengths.end(), L.begin()); //npy_intp and size_t may differ, so I can not use = + int elementsType = h5_to_npy(ds_info.ty, ds_info.has_complex_attribute); // element_type in Python from the hdf5 type and complex tag + if (ds_info.has_complex_attribute) L.pop_back(); // remove the last dim which is 2 in complex case, // since we are going to build a array of complex @@ -296,7 +296,7 @@ namespace h5 { // in case of allocation error // read from the file - read(g, name, make_av_from_npy((PyArrayObject *)ob), lt); + read(g, name, make_av_from_npy((PyArrayObject *)ob), ds_info); return ob; } diff --git a/test/c++/h5_array_interface.cpp b/test/c++/h5_array_interface.cpp index 080712ec..0e6cc5e0 100644 --- a/test/c++/h5_array_interface.cpp +++ b/test/c++/h5_array_interface.cpp @@ -20,82 +20,210 @@ #include #include -// not working and probably should not work -// TEST(H5, GetLtotAndStridesH5) { -// // test the get_L_tot_and_strides_h5 function for a 10x10 array with every 2nd element in the view -// h5::v_t arr_shape{10, 10}; -// h5::v_t view_shape{5, 5}; -// long view_size = 25; -// std::vector np_strides{20, 2}; -// h5::v_t h5_strides{2, 2}; - -// auto [L_tot, strides] = h5::array_interface::get_L_tot_and_strides_h5(np_strides.data(), 2, view_size); -// EXPECT_EQ(L_tot, arr_shape); -// EXPECT_EQ(strides, h5_strides); -// } +// Print container. +template +void print(const C &c) { + for (const auto &v : c) std::cout << v << " "; + std::cout << std::endl; +} -// not working, should definitely work -// TEST(H5, GetLtotAndStridesH5ContiguousView) { -// // test the get_L_tot_and_strides_h5 function for a 10x10 array with every element in the view -// h5::v_t arr_shape{10, 10}; -// h5::v_t view_shape{10, 10}; -// long view_size = 100; -// std::vector np_strides{10, 1}; -// h5::v_t h5_strides{1, 1}; - -// auto [L_tot, strides] = h5::array_interface::get_L_tot_and_strides_h5(np_strides.data(), 2, view_size); -// EXPECT_EQ(L_tot, arr_shape); -// EXPECT_EQ(strides, h5_strides); -// } +// Check the set of equations that relate numpy/nda-style strides to HDF5-style strides. +void check_strides(const std::vector &np_strides, const std::vector &view_shape) { + // get view size and rank + auto view_size = std::accumulate(view_shape.begin(), view_shape.end(), 1l, std::multiplies<>()); + int rank = static_cast(view_shape.size()); -TEST(H5, ArrayInterface2DArray) { - // write a 5x5 array and read a 5x3 array (every other column) - std::vector data(25, 0); - std::iota(data.begin(), data.end(), 0); + // get parent shape and h5 strides + auto [parent_shape, h5_strides] = h5::array_interface::get_parent_shape_and_h5_strides(np_strides.data(), rank, view_size); - // create an h5_array_view of rank 2 with dimensions 5x5 of the original data - int rank = 2; - int rows_w = 5; - int cols_w = 5; - h5::array_interface::h5_array_view view(h5::hdf5_type(), (void *)data.data(), rank, false); - view.slab.count[0] = rows_w; - view.slab.count[1] = cols_w; - view.L_tot[0] = rows_w; - view.L_tot[1] = cols_w; - - // create file in read/write mode - h5::file file("2d_array.h5", 'w'); - - // write h5_array_view to file - h5::array_interface::write(file, "view", view, false); - - // reserve memory for reading - std::vector data_in(15, 0); - - // create an h5_array_view or rank 2 with dimensions 5x3 of the read memory - int rows_in = 5; - int cols_in = 3; - h5::array_interface::h5_array_view view_in(h5::hdf5_type(), (void *)data_in.data(), rank, false); - view_in.slab.count[0] = rows_in; - view_in.slab.count[1] = cols_in; - view_in.L_tot[0] = rows_in; - view_in.L_tot[1] = cols_in; - - // create an hyperslab to select the data to be read from the file (every other column -> stride in second dimension is 2) - h5::array_interface::hyperslab slab_in(rank, false); - slab_in.count[0] = rows_in; - slab_in.count[1] = cols_in; - slab_in.stride[0] = 1; - slab_in.stride[1] = 2; - - // get h5_lengths_type from the dataset in the file - auto lengths_type = h5::array_interface::get_h5_lengths_type(file, "view"); - - // read data from file - h5::array_interface::read(file, "view", view_in, lengths_type, slab_in); + // check that the number of strides is the same + EXPECT_EQ(np_strides.size(), h5_strides.size()); - // check results - for (int i = 0; i < rows_in; ++i) { - for (int j = 0; j < cols_in; ++j) { EXPECT_EQ(data_in[i * cols_in + j], data[i * cols_w + j * 2]); } + // check that the number of strides is the same as the rank of the parent shape + EXPECT_EQ(np_strides.size(), parent_shape.size()); + + // check that the strides are consistent with the parent shape + h5::hsize_t product = 1; + for (int i = static_cast(np_strides.size()) - 1; i >= 0; --i) { + EXPECT_EQ(np_strides[i], product * h5_strides[i]); + product *= parent_shape[i]; + EXPECT_TRUE(parent_shape[i] >= view_shape[i]); } } + +TEST(H5, GetParentShapeAndH5Strides1D) { + std::vector np_strides(1), view_shape(1); + + // 1D array of size 10: contiguous data, all elements + np_strides = {1}; + view_shape = {10}; + check_strides(np_strides, view_shape); + + // 1D array of size 10: contiguous data, first 5 elements + view_shape = {5}; + check_strides(np_strides, view_shape); + + // 1D array of size 10: every other element + np_strides = {2}; + view_shape = {5}; + check_strides(np_strides, view_shape); + + // 1D array of size 10: every third element + np_strides = {3}; + view_shape = {4}; + check_strides(np_strides, view_shape); + + // 1D array of size 10: every seventh element + np_strides = {7}; + view_shape = {2}; + check_strides(np_strides, view_shape); +} + +TEST(H5, GetParentShapeAndH5Strides2D) { + std::vector np_strides(2), view_shape(2); + + // 2D array of size 10x10: contiguous data, all elements + np_strides = {10, 1}; + view_shape = {10, 10}; + check_strides(np_strides, view_shape); + + // 2D array of size 10x10: contiguous data, first 5x5 elements + view_shape = {5, 5}; + check_strides(np_strides, view_shape); + + // 2D array of size 10x10: every other row and column + np_strides = {20, 2}; + view_shape = {5, 5}; + check_strides(np_strides, view_shape); + + // 2D array of size 10x10: every third row and the first four columns + np_strides = {30, 1}; + view_shape = {4, 4}; + check_strides(np_strides, view_shape); +} + +TEST(H5, GetParentShapeAndH5Strides3D) { + std::vector np_strides(3), view_shape(3); + + // 3D array of size 10x10x10: contiguous data, all elements + np_strides = {100, 10, 1}; + view_shape = {10, 10, 10}; + check_strides(np_strides, view_shape); + + // 3D array of size 10x10x10: contiguous data, first 2x3x4 elements + view_shape = {2, 3, 4}; + check_strides(np_strides, view_shape); + + // 3D array of size 10x10x10: every other element + np_strides = {200, 20, 2}; + view_shape = {5, 5, 5}; + check_strides(np_strides, view_shape); +} + +TEST(H5, ArrayInterface1DArray) { + // test the array interface for a 1D array + h5::file file("1d_array.h5", 'w'); + std::vector data(100, 0); + std::iota(data.begin(), data.end(), 0); + int rank = 1; + int size = static_cast(data.size()); + + // write the full array + h5::array_interface::array_view full_view(h5::hdf5_type(), (void *)data.data(), rank, false); + full_view.slab.count[0] = size; + full_view.parent_shape[0] = size; + h5::array_interface::write(file, "full_view", full_view, true); + + // read the full array + std::vector full_data_in(data.size(), 0); + h5::array_interface::array_view full_view_in(h5::hdf5_type(), (void *)full_data_in.data(), rank, false); + full_view_in.slab.count[0] = size; + full_view_in.parent_shape[0] = size; + auto full_view_ds_info = h5::array_interface::get_dataset_info(file, "full_view"); + h5::array_interface::read(file, "full_view", full_view_in, full_view_ds_info, full_view.slab); + + // check results + for (int i = 0; i < size; ++i) { EXPECT_EQ(full_data_in[i], data[i]); } + + // // write blocks of size 10, skipping every other block, starting with the second block + // h5::array_interface::array_view block_view(h5::hdf5_type(), (void *)data.data(), rank, false); + // block_view.slab.offset[0] = 10; + // block_view.slab.stride[0] = 20; + // block_view.slab.count[0] = 5; + // block_view.slab.block[0] = 10; + // block_view.parent_shape[0] = size; + // h5::array_interface::write(file, "block_view", block_view, true); + + // // read blocks of size 10, skipping every other block, starting with the second block + // std::vector block_data_in(50, 0); + // h5::array_interface::array_view block_view_in(h5::hdf5_type(), (void *)block_data_in.data(), rank, false); + // block_view_in.slab.count[0] = 50; + // block_view_in.parent_shape[0] = 50; + // h5::array_interface::read(file, "full_view", block_view_in, full_view_lt, block_view_in.slab); + + // read the block view + // std::vector block_data_in(50, 0); + // h5::array_interface::array_view block_view_in(h5::hdf5_type(), (void *)block_data_in.data(), rank, false); + // block_view_in.slab.count[0] = 50; + // block_view_in.parent_shape[0] = 50; + // auto block_view_lt = h5::array_interface::get_dataset_info(file, "block_view"); + // h5::array_interface::read(file, "block_view", block_view_in, block_view_lt, block_view_in.slab); +} + +// TEST(H5, ArrayInterface2DArray) { +// // write a 5x5 array and read a 5x3 array (every other column) +// std::vector data(25, 0); +// std::iota(data.begin(), data.end(), 0); + +// // create an array_view of rank 2 with dimensions 5x5 of the original data +// int rank = 2; +// int rows_w = 5; +// int cols_w = 5; +// h5::array_interface::array_view view(h5::hdf5_type(), (void *)data.data(), rank, false); +// // view.slab.count[0] = rows_w; +// // view.slab.count[1] = cols_w; +// // view.parent_shape[0] = rows_w; +// // view.parent_shape[1] = cols_w; +// view.slab.count[0] = 1; +// view.slab.count[1] = 1; +// view.slab.block[0] = 5; +// view.slab.block[1] = 5; +// view.parent_shape[0] = rows_w; +// view.parent_shape[1] = cols_w; + +// // create file in read/write mode +// h5::file file("2d_array.h5", 'w'); + +// // write array_view to file +// h5::array_interface::write(file, "view", view, false); + +// // reserve memory for reading +// std::vector data_in(15, 0); + +// // create an array_view or rank 2 with dimensions 5x3 of the read memory +// int rows_in = 5; +// int cols_in = 3; +// h5::array_interface::array_view view_in(h5::hdf5_type(), (void *)data_in.data(), rank, false); +// view_in.slab.count[0] = rows_in; +// view_in.slab.count[1] = cols_in; +// view_in.parent_shape[0] = rows_in; +// view_in.parent_shape[1] = cols_in; + +// // create an hyperslab to select the data to be read from the file (every other column -> stride in second dimension is 2) +// h5::array_interface::hyperslab slab_in(rank, false); +// slab_in.count[0] = rows_in; +// slab_in.count[1] = cols_in; +// slab_in.stride[0] = 1; +// slab_in.stride[1] = 2; + +// // get dataset_info from the dataset in the file +// auto lengths_type = h5::array_interface::get_dataset_info(file, "view"); + +// // read data from file +// h5::array_interface::read(file, "view", view_in, lengths_type, slab_in); + +// // check results +// for (int i = 0; i < rows_in; ++i) { +// for (int j = 0; j < cols_in; ++j) { EXPECT_EQ(data_in[i * cols_in + j], data[i * cols_w + j * 2]); } +// } +// }