From b2959d29f7c9c01155d6f5427254e4b1e6dde7d6 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Sat, 31 Aug 2024 23:57:20 -0700 Subject: [PATCH] Read ElectricalSeries.data example working --- src/BaseIO.cpp | 13 ----- src/BaseIO.hpp | 97 ++++++++++++++++++++++----------- src/hdf5/HDF5IO.cpp | 5 +- src/nwb/NWBFile.hpp | 3 +- src/nwb/base/TimeSeries.hpp | 9 +++ src/nwb/file/ElectrodeTable.hpp | 3 +- tests/testEcephys.cpp | 62 +++++++++++++++++++++ tests/testHDF5IO.cpp | 5 +- 8 files changed, 145 insertions(+), 52 deletions(-) diff --git a/src/BaseIO.cpp b/src/BaseIO.cpp index 7c2c5ded..353f709d 100644 --- a/src/BaseIO.cpp +++ b/src/BaseIO.cpp @@ -92,19 +92,6 @@ Status BaseIO::createTimestampsAttributes(const std::string& path) return Status::Success; } -std::unique_ptr BaseIO::lazyReadDataset( - const std::string& dataPath) -{ - return std::make_unique(this, dataPath); -} - -std::unique_ptr BaseIO::lazyReadAttribute( - const std::string& dataPath) -{ - return std::make_unique(std::shared_ptr(this), - dataPath); -} - // BaseRecordingData BaseRecordingData::BaseRecordingData() {} diff --git a/src/BaseIO.hpp b/src/BaseIO.hpp index 65625807..44c730d1 100644 --- a/src/BaseIO.hpp +++ b/src/BaseIO.hpp @@ -7,6 +7,8 @@ #include #include +#include // TODO move this and function def to the cpp file + #include "Types.hpp" #define DEFAULT_STR_SIZE 256 @@ -88,20 +90,75 @@ class BaseDataType class DataBlockGeneric { public: + /** + * @ brief The untyped data values + */ std::any data; + /** + * \brief The 1D vector with the n-dimensional shape of the data + */ std::vector shape; }; /** - * @brief Structure to hold data and shape + * @brief Structure to hold data and shape for a typed data vector + * + * @tparam DTYPE The data type of the vector */ -template +template class DataBlock { public: - std::vector data; + /** + * @ brief The 1D vector with the data values of type DTYPE + */ + std::vector data; + /** + * \brief The 1D vector with the n-dimensional shape of the data + */ std::vector shape; + /// Constructor + DataBlock(const std::vector& data, const std::vector& shape) + : data(data) + , shape(shape) + { + } + + /** + * \brief Transform the data to a boost multi-dimensional array for convenient + * access + * + * The function uses boost::const_multi_array_ref to avoid copying of the data + * + * @tparam NDIMS The number of dimensions of the array. Same as shape.size() + */ + template + boost::const_multi_array_ref as_multi_array() const + { + if (shape.size() != NDIMS) { + throw std::invalid_argument( + "Shape size does not match the number of dimensions."); + } + + // Calculate the total number of elements expected + SizeType expected_size = 1; + for (SizeType dim : shape) { + expected_size *= dim; + } + + if (data.size() != expected_size) { + throw std::invalid_argument("Data size does not match the shape."); + } + + // Convert the shape vector to a boost::array + boost::array boost_shape; + std::copy(shape.begin(), shape.end(), boost_shape.begin()); + + // Construct and return the boost::const_multi_array_ref + return boost::const_multi_array_ref(data.data(), boost_shape); + } + /** * @brief Factory method to create an DataBlock from a DataBlockGeneric. * @@ -112,13 +169,10 @@ class DataBlock * * @return A DataBlock structure containing the data and shape. */ - static DataBlock fromGeneric(const DataBlockGeneric& genericData) + static DataBlock fromGeneric(const DataBlockGeneric& genericData) { - DataBlock result; - // Extract the data from the std::any object - result.data = std::any_cast>(genericData.data); - // Copy the shape information - result.shape = genericData.shape; + auto result = DataBlock( + std::any_cast>(genericData.data), genericData.shape); return result; } }; @@ -208,16 +262,6 @@ class BaseIO const std::vector& stride = {}, const std::vector& block = {}) = 0; - /** - * @brief Create a ReadDatasetWrapper for a dataset for lazy reading - * - * @param dataPath The path to the dataset within the file. - * - * @return A ReadDatasetWrapper object for lazy reading from the dataset - */ - virtual std::unique_ptr lazyReadDataset( - const std::string& dataPath); - /** * @brief Reads a attribute and determines the data type * @@ -231,16 +275,6 @@ class BaseIO */ virtual DataBlockGeneric readAttribute(const std::string& dataPath) = 0; - /** - * @brief Create a ReadAttributeWrapper for an attribute for lazy reading - * - * @param dataPath The path to the dataset within the file. - * - * @return A ReadAttributeWrapper object for lazy reading from the dataset - */ - virtual std::unique_ptr lazyReadAttribute( - const std::string& dataPath); - /** * @brief Creates an attribute at a given location in the file. * @param type The base data type of the attribute. @@ -469,7 +503,8 @@ class ReadDatasetWrapper /** * @brief Default constructor. */ - ReadDatasetWrapper(BaseIO* io, std::string dataPath) + ReadDatasetWrapper(const std::shared_ptr io, // BaseIO* io, + std::string dataPath) : io(io) , dataPath(dataPath) { @@ -535,7 +570,7 @@ class ReadDatasetWrapper /** * @brief Pointer to the I/O object to use for reading. */ - BaseIO* io; + const std::shared_ptr io; // BaseIO* io; /** * @brief Path to the dataset or attribute to read */ diff --git a/src/hdf5/HDF5IO.cpp b/src/hdf5/HDF5IO.cpp index 6b5a4b99..331480e4 100644 --- a/src/hdf5/HDF5IO.cpp +++ b/src/hdf5/HDF5IO.cpp @@ -163,10 +163,8 @@ AQNWB::DataBlockGeneric HDF5IO::readAttribute(const std::string& dataPath) // Read the attribute H5::Attribute attribute = file->openAttribute(dataPath); H5::DataType dataType = attribute.getDataType(); - // Determine the size of the attribute size_t numElements = attribute.getStorageSize(); - // Read the attribute into a vector of the appropriate type if (dataType == H5::PredType::C_S1) { result.data = readStringDataHelper(attribute, numElements); @@ -190,8 +188,7 @@ AQNWB::DataBlockGeneric HDF5IO::readAttribute(const std::string& dataPath) throw std::runtime_error("Unsupported data type"); } // Attributes do not have a shape, so set shape to empty - result.shape.clear(); - + result.shape.clear(); // TODO is not correct for return result; } diff --git a/src/nwb/NWBFile.hpp b/src/nwb/NWBFile.hpp index 13896b08..58f0299a 100644 --- a/src/nwb/NWBFile.hpp +++ b/src/nwb/NWBFile.hpp @@ -29,7 +29,6 @@ class NWBFile : public Container public: /** * @brief Constructor for NWBFile class. - * @param idText The identifier text for the NWBFile. * @param io The shared pointer to the IO object. */ NWBFile(std::shared_ptr io); @@ -52,6 +51,8 @@ class NWBFile : public Container /** * @brief Initializes the NWB file by opening and setting up the file * structure. + * + * @param idText The identifier text for the NWBFile. */ Status initialize(const std::string& idText); diff --git a/src/nwb/base/TimeSeries.hpp b/src/nwb/base/TimeSeries.hpp index 704f8556..8548bbfe 100644 --- a/src/nwb/base/TimeSeries.hpp +++ b/src/nwb/base/TimeSeries.hpp @@ -87,6 +87,15 @@ class TimeSeries : public Container */ BaseDataType timestampsType = BaseDataType::F64; + inline std::string dataPath() { return (this->path + std::string("/data")); } + + std::unique_ptr dataLazy() + { + std::string dataPath = this->dataPath(); + std::cout << "Path: " << dataPath << std::endl; + return std::make_unique(this->io, dataPath); + } + private: /** * @brief The neurodataType of the TimeSeries. diff --git a/src/nwb/file/ElectrodeTable.hpp b/src/nwb/file/ElectrodeTable.hpp index da8b6ad4..909e1440 100644 --- a/src/nwb/file/ElectrodeTable.hpp +++ b/src/nwb/file/ElectrodeTable.hpp @@ -18,7 +18,6 @@ class ElectrodeTable : public DynamicTable /** * @brief Constructor. * @param io The shared pointer to the BaseIO object. - * @param description The description of the table (default: "metadata about * extracellular electrodes"). */ ElectrodeTable(std::shared_ptr io); @@ -36,6 +35,8 @@ class ElectrodeTable : public DynamicTable * * Initializes the ElectrodeTable by creating NWB related attributes and * adding required columns. + * + * @param description The description of the table (default: "metadata about */ void initialize(const std::string& description = "metadata about extracellular electrodes"); diff --git a/tests/testEcephys.cpp b/tests/testEcephys.cpp index 2725e047..aeb0106c 100644 --- a/tests/testEcephys.cpp +++ b/tests/testEcephys.cpp @@ -68,6 +68,68 @@ TEST_CASE("ElectricalSeries", "[ecephys]") es.writeChannel( ch, numSamples, mockData[ch].data(), mockTimestamps.data()); } + + io->flush(); + + // TODO START Move this check to a proper test and expand on it + std::string esdataPath = es.dataPath(); + std::string expectedDataPath = dataPath + "/data"; + REQUIRE(esdataPath == expectedDataPath); + + auto readDataWrapper = es.dataLazy(); + DataBlock dataValues = readDataWrapper->values(); + REQUIRE(dataValues.data.size() == (numSamples * numChannels)); + REQUIRE(dataValues.shape[0] == numSamples); + REQUIRE(dataValues.shape[1] == numChannels); + for (SizeType s = 0; s < numSamples; s++) { + std::vector temp; + temp.resize(numChannels); + for (SizeType c = 0; c < numChannels; c++) { + temp[c] = mockData[c][s]; + } + std::vector selectedRange( + dataValues.data.begin() + (s * numChannels), + dataValues.data.begin() + ((s + 1) * numChannels)); + REQUIRE_THAT(selectedRange, Catch::Matchers::Approx(temp).margin(1)); + } + // Check the boost multi-array feature + auto boostMulitArray = dataValues.as_multi_array<2>(); + for (SizeType s = 0; s < numSamples; s++) { + // Get timestamp s (i.e., the values from all channels at time index s) + // Accessing [a, :] + auto row_s = boostMulitArray[s]; // This returns a 1D array representing + // the first row + // Convert the row from boost to a std:vector + std::vector row_s_vector(row_s.begin(), row_s.end()); + + // Data for comparison + std::vector temp; + temp.resize(numChannels); + for (SizeType c = 0; c < numChannels; c++) { + temp[c] = mockData[c][s]; + } + REQUIRE_THAT(row_s_vector, Catch::Matchers::Approx(temp).margin(1)); + } + + // std::cout<<"Data shape:" << dataValues.shape.size() << std::endl; + + // + // REQUIRE_THAT(readDataValues, Catch::Matchers::Approx(data).margin(1)); + + // TODO Add an attribute getter and test that it works as well + // auto dataLazyWrapper = es.dataLazy(); + // auto dataValueGeneric = dataLazyWrapper->valuesGeneric(); + // TODO Check why this causes Assertion failed: + // (this->file->attrExists(dataPath)), function readAttribute, file + // HDF5IO.cpp, line 161. + // the attribute does not seem to exists so either the path is wrong or + // something is bad with the write? + // REQUIRE(unitValueGeneric.shape.size() == 0); + + // std::string esUnit = es.unit(); + // REQUIRE(esUnit == std::string("volts")); + // TODO END Move this code + io->close(); // Read data back from file diff --git a/tests/testHDF5IO.cpp b/tests/testHDF5IO.cpp index b0de58ab..6d6954ab 100644 --- a/tests/testHDF5IO.cpp +++ b/tests/testHDF5IO.cpp @@ -425,7 +425,7 @@ TEST_CASE("readDataset", "[hdf5io]") { // open file std::string path = getTestFilePath("Read1DData1DDataset.h5"); - std::unique_ptr hdf5io = std::make_unique(path); + std::shared_ptr hdf5io = std::make_shared(path); hdf5io->open(); // Set up test data @@ -450,7 +450,8 @@ TEST_CASE("readDataset", "[hdf5io]") REQUIRE(readDataTyped.data == testData); // Confirm using lazy read as well - auto readDataWrapper = hdf5io->lazyReadDataset(dataPath); + auto readDataWrapper = + std::make_unique(hdf5io, dataPath); auto readDataGeneric = readDataWrapper->valuesGeneric(); REQUIRE(readDataGeneric.shape[0] == 10); auto readDataTypedV2 = readDataWrapper->values();