Skip to content

Commit

Permalink
Allow arbitrary block sizes in h5::array_interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Thoemi09 authored and Wentzell committed Apr 24, 2024
1 parent beb8de9 commit 88d12d4
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 126 deletions.
41 changes: 23 additions & 18 deletions c++/h5/array_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,8 @@ namespace h5::array_interface {
return {parent_shape, h5_strides};
}

dataset_info get_dataset_info(group g, std::string const &name) {
// open dataset
dataset ds = g.open_dataset(name);

// retrieve shape information
dataset_info get_dataset_info(dataset ds) {
// retrieve shape and datatype information
datatype ty = H5Dget_type(ds);
bool has_complex_attribute = H5LTfind_attribute(ds, "__complex__");
dataspace dspace = H5Dget_space(ds);
Expand All @@ -117,10 +114,18 @@ namespace h5::array_interface {
return {std::move(dims_out), ty, has_complex_attribute};
}

dataset_info get_dataset_info(group g, std::string const &name) {
dataset ds = g.open_dataset(name);
return get_dataset_info(ds);
}

void write(group g, std::string const &name, array_view const &v, bool compress) {
// unlink the dataset if it already exists
g.unlink(name);

// shape of the hyperslab in memory
auto hs_shape = v.slab.shape();

// chunk the dataset and add compression
proplist cparms = H5P_DEFAULT;
if (compress and (v.rank() != 0)) {
Expand All @@ -131,16 +136,16 @@ namespace h5::array_interface {
for (int i = v.rank() - 1; i >= 0; --i) {
H5_ASSERT(max_chunk_size >= chunk_size);
hsize_t max_dim = max_chunk_size / chunk_size;
chunk_dims[i] = std::clamp(v.slab.count[i], hsize_t{1}, max_dim);
chunk_dims[i] = std::clamp(hs_shape[i], hsize_t{1}, max_dim);
chunk_size *= chunk_dims[i];
}
cparms = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_chunk(cparms, n_dims, chunk_dims.data());
H5Pset_deflate(cparms, 1);
}

// dataspace for the dataset in the file: v.slab.block shape {1, ..., 1} is assumed
dataspace file_dspace = H5Screate_simple(v.slab.rank(), v.slab.count.data(), nullptr);
// dataspace for the dataset in the file
dataspace file_dspace = H5Screate_simple(v.slab.rank(), hs_shape.data(), nullptr);

// create the dataset in the file
dataset ds = H5Dcreate2(g, name.c_str(), v.ty, file_dspace, H5P_DEFAULT, cparms, H5P_DEFAULT);
Expand All @@ -161,12 +166,14 @@ namespace h5::array_interface {
if (v.is_complex) h5_write_attribute(ds, "__complex__", "1");
}

void write_slice(group g, std::string const &name, array_view const &v, dataset_info ds_info, hyperslab sl) {
void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl) {
// empty hyperslab
if (sl.empty()) return;

// check consistency of input: block shape {1, ..., 1} is assumed
if (v.slab.count != sl.count) throw std::runtime_error("Error in h5::array_interface::write_slice: Memory and file slabs are incompatible");
// check consistency of input
if (v.slab.size() != sl.size()) throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible sizes");

auto ds_info = get_dataset_info(g, name);
if (not hdf5_type_equal(v.ty, ds_info.ty))
throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
+ " != " + get_name_of_h5_type(ds_info.ty));
Expand Down Expand Up @@ -206,7 +213,7 @@ namespace h5::array_interface {
if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_attribute: Writing to the attribute " + name + " failed");
}

void read(group g, std::string const &name, array_view v, dataset_info ds_info, hyperslab sl) {
void read(group g, std::string const &name, array_view v, hyperslab sl) {
// open dataset and get dataspace
dataset ds = g.open_dataset(name);
dataspace file_dspace = H5Dget_space(ds);
Expand All @@ -219,6 +226,7 @@ namespace h5::array_interface {
}

// check consistency of input
auto ds_info = get_dataset_info(g, name);
if (H5Tget_class(v.ty) != H5Tget_class(ds_info.ty))
throw std::runtime_error("Error in h5::array_interface::read: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
+ " != " + get_name_of_h5_type(ds_info.ty));
Expand All @@ -227,12 +235,9 @@ namespace h5::array_interface {
std::cerr << "WARNING: HDF5 type mismatch while reading into an array_view: " + get_name_of_h5_type(v.ty)
+ " != " + get_name_of_h5_type(ds_info.ty) + "\n";

if (ds_info.rank() != v.rank())
throw std::runtime_error("Error in h5::array_interface::read: Incompatible ranks: " + std::to_string(v.rank())
+ " != " + std::to_string(ds_info.rank()));

// block shape of {1, ..., 1} is assumed
if (sl.empty() and ds_info.lengths != v.slab.count) throw std::runtime_error("Error in h5::array_interface::read: Incompatible shapes");
auto sl_size = sl.size();
if (sl.empty()) { sl_size = std::accumulate(ds_info.lengths.begin(), ds_info.lengths.end(), (hsize_t)1, std::multiplies<>()); }
if (sl_size != v.slab.size()) throw std::runtime_error("Error in h5::array_interface::read: Incompatible sizes");

// memory dataspace
dataspace mem_dspace = make_mem_dspace(v);
Expand Down
40 changes: 29 additions & 11 deletions c++/h5/array_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "./group.hpp"
#include "./object.hpp"

#include <algorithm>
#include <numeric>
#include <string>
#include <utility>

Expand Down Expand Up @@ -124,15 +126,29 @@ namespace h5::array_interface {

/// Check whether the hyperslab is empty (has been initialized).
[[nodiscard]] bool empty() const { return count.empty(); }

/// Get the shape of the selected hyperslab.
[[nodiscard]] v_t shape() const {
v_t shape(rank());
std::transform(count.begin(), count.end(), block.begin(), shape.begin(), std::multiplies<>());
return shape;
}

/// Get the total number of elements in the hyperslab.
[[nodiscard]] auto size() const {
auto sh = shape();
return std::accumulate(sh.begin(), sh.end(), (hsize_t)1, std::multiplies<>());
}
};

/**
* @brief Struct representing a view on an n-dimensional array/dataspace.
*
* @details A view consists of the parent array and of an h5::array_interface::hyperslab specifying a selection.
* The parent array is defined by a pointer to its data and its shape. Note that the shape of the parent array
* does not necessarily have to correspond to the actual shape and size of the underlying memory. It is only used
* to select the correct elements in the hyperslab.
* The parent array is defined by a pointer to its data and its shape.
*
* Note that the shape of the parent array does not necessarily have to correspond to the actual shape and size of
* the underlying memory. It is only used to select the correct elements in the hyperslab.
*
* If the data of the array is complex, its imaginary part is treated as just another dimension.
*/
Expand Down Expand Up @@ -202,6 +218,14 @@ namespace h5::array_interface {
/**
* @brief Retrieve the shape and the h5::datatype from a dataset.
*
* @param ds h5::dataset.
* @return h5::array_interface::dataset_info containing the shape and HDF5 type of the dataset.
*/
dataset_info get_dataset_info(dataset ds);

/**
* @brief Retrieve the shape and the h5::datatype from a dataset with a given name in the given group.
*
* @param g h5::group containing the dataset.
* @param name Name of the dataset.
* @return h5::array_interface::dataset_info containing the shape and HDF5 type of the dataset.
Expand All @@ -213,8 +237,6 @@ namespace h5::array_interface {
*
* @details If a link with the given name already exists, it is first unlinked.
*
* @warning This function only works consistently if the blocks of the hyperslab contain only a single element!
*
* @param g h5::group in which the dataset is created.
* @param name Name of the dataset
* @param v h5::array_interface::array_view to be written.
Expand All @@ -228,15 +250,12 @@ namespace h5::array_interface {
* @details It checks if the number of elements in the view is the same as selected in the hyperslab and if the
* datatypes are compatible. Otherwise, an exception is thrown.
*
* @warning This function only works consistently if the blocks of both hyperslabs contain only a single element!
*
* @param g h5::group which contains the dataset.
* @param name Name of the dataset.
* @param v h5::array_interface::array_view to be written.
* @param ds_info h5::array_interface::dataset_info of the file dataset (only used to check the consistency of the input).
* @param sl h5::array_interface::hyperslab specifying the selection to be written to.
*/
void write_slice(group g, std::string const &name, array_view const &v, dataset_info ds_info, hyperslab sl);
void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl);

/**
* @brief Write an array view to an HDF5 attribute.
Expand All @@ -253,10 +272,9 @@ namespace h5::array_interface {
* @param g h5::group which contains the dataset.
* @param name Name of the dataset.
* @param v h5::array_interface::array_view to read into.
* @param ds_info h5::array_interface::dataset_info of the file dataset (only used to check the consistency of the input).
* @param sl h5::array_interface::hyperslab specifying the selection to read from.
*/
void read(group g, std::string const &name, array_view v, dataset_info ds_info, hyperslab sl = {});
void read(group g, std::string const &name, array_view v, hyperslab sl = {});

/**
* @brief Read from an HDF5 attribute into an array view.
Expand Down
2 changes: 1 addition & 1 deletion c++/h5/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ namespace h5 {
}

// read scalar value
array_interface::read(g, name, array_interface::array_view_from_scalar(x), ds_info);
array_interface::read(g, name, array_interface::array_view_from_scalar(x));
}

/**
Expand Down
2 changes: 1 addition & 1 deletion c++/h5/stl/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ namespace h5 {
v.slab.count[0] = N;
v.slab.stride[0] = 1;
v.parent_shape[0] = N;
array_interface::read(g, name, v, ds_info);
array_interface::read(g, name, v);
} else {
// array of generic type
auto g2 = g.open_group(name);
Expand Down
2 changes: 1 addition & 1 deletion c++/h5/stl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ namespace h5 {
if (ds_info.rank() != 1 + is_complex_v<T>)
throw make_runtime_error("Error in h5_read: Reading a vector from an array of rank ", ds_info.rank(), " is not allowed");
v.resize(ds_info.lengths[0]);
array_interface::read(g, name, array_interface::array_view_from_vector(v), ds_info);
array_interface::read(g, name, array_interface::array_view_from_vector(v));
} else if constexpr (std::is_same_v<T, std::string> or std::is_same_v<T, std::vector<std::string>>) {
// vector of strings or vector of vector of strings
char_buf cb;
Expand Down
2 changes: 1 addition & 1 deletion doc/documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ the interface.
* @ref rw_variant
* @ref rw_vector

The @ref rw_array "array interface" helps with loading and storing n-dimensional arrays.
The @ref rw_arrayinterface "array interface" helps with loading and storing n-dimensional arrays.

Furthermore, the generic design of the read/write functionality makes it easily extendible to support custom user types as well.
@ref ex2 shows how to make a user defined type HDF5 serializable.
Expand Down
2 changes: 1 addition & 1 deletion python/h5/h5py_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ namespace h5 {
// in case of allocation error

// read from the file
read(g, name, make_av_from_npy((PyArrayObject *)ob), ds_info);
read(g, name, make_av_from_npy((PyArrayObject *)ob));
return ob;
}

Expand Down
Loading

0 comments on commit 88d12d4

Please sign in to comment.