Skip to content

Commit

Permalink
Merge pull request #22 from lbl-cbg/add-validation-tests
Browse files Browse the repository at this point in the history
Add validation tests
  • Loading branch information
stephprince authored May 2, 2024
2 parents 922965a + 601d640 commit a2519a0
Show file tree
Hide file tree
Showing 17 changed files with 164 additions and 30 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,43 @@ jobs:
- name: Test
working-directory: build
run: ctest --output-on-failure --no-tests=error -C Release -j 2

- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: test-files-${{ matrix.os }}
path: |
build/tests/data/*.nwb
validate:
needs: tests
defaults:
run:
shell: bash
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}
cancel-in-progress: true
strategy:
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest]

runs-on: ${{ matrix.os }}

steps:
- name: Download test files
uses: actions/download-artifact@v3
with:
name: test-files-${{ matrix.os }}
path: nwb_files

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install pynwb and run validation
run: |
python -m pip install --upgrade pip
python -m pip install nwbinspector
nwbinspector nwb_files --threshold BEST_PRACTICE_VIOLATION
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# folders
**/.DS_Store
/tests/data/*
/resources/schema/*

# IDEs
.idea/
Expand Down
45 changes: 36 additions & 9 deletions resources/generate_spec_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,44 @@
from ruamel.yaml import YAML

# TODO - setup git submodule or cloning
schema_dir = Path('./resources/spec/core/2.7.0')
schema_dir = Path('./resources/schema/')

# create spec strings
for file in schema_dir.rglob(r"*.yaml"):
for file in schema_dir.rglob(r"*namespace.yaml"):
# load file
yaml = YAML(typ='safe')
with open(file) as f:
spec = yaml.load(file)
namespace = yaml.load(file)

# convert to json
outfile = file.with_suffix('.json')
print(f'Generating file {outfile}')
with open(outfile, 'w') as fo:
json.dump(spec, fo, separators=(',', ':'),)
# get all the sources
for i, ns in enumerate(namespace['namespaces']):
spec_dir = Path(f"./resources/spec/{ns['name']}/{ns['version']}")
spec_dir.mkdir(parents=True, exist_ok=True)

# load and convert schema files
for s in ns['schema']:
if 'source' in s:
# load file
schema_file = file.parent / s['source']
with open(schema_file) as f:
spec = yaml.load(schema_file)

# convert to json
spec_file = (spec_dir / s['source']).with_suffix('.json')
print(f'Generating file {spec_file}')
with open(spec_file, 'w') as fo:
json.dump(spec, fo, separators=(',', ':'),)

# reformat schema sources for namespace file
schema = list()
for s in ns['schema']:
if 'source' in s:
s = {'source': s['source'].split('.yaml')[0]}
schema.append(s)
ns['schema'] = schema

# convert namespace json
ns_file = (spec_dir / file.name).with_suffix('.json')
ns_output = {'namespaces': [ns]}
print(f'Generating file {ns_file}')
with open(ns_file, 'w') as fo:
json.dump(ns_output, fo, separators=(',', ':'),)
2 changes: 1 addition & 1 deletion resources/spec/core/2.7.0/nwb.file.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion resources/spec/core/2.7.0/nwb.namespace.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"namespaces":[{"name":"core","doc":"NWB namespace","author":["Andrew Tritt","Oliver Ruebel","Ryan Ly","Ben Dichter","Keith Godfrey","Jeff Teeters"],"contact":["[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"],"full_name":"NWB core","schema":[{"namespace":"hdmf-common"},{"doc":"This source module contains base data types used throughout the NWB data format.","source":"nwb.base.yaml","title":"Base data types"},{"doc":"This source module contains neurodata_types for device data.","source":"nwb.device.yaml","title":"Devices"},{"doc":"This source module contains neurodata_types for epoch data.","source":"nwb.epoch.yaml","title":"Epochs"},{"doc":"This source module contains neurodata_types for image data.","source":"nwb.image.yaml","title":"Image data"},{"doc":"Main NWB file specification.","source":"nwb.file.yaml","title":"NWB file"},{"doc":"Miscellaneous types.","source":"nwb.misc.yaml","title":"Miscellaneous neurodata_types."},{"doc":"This source module contains neurodata_types for behavior data.","source":"nwb.behavior.yaml","title":"Behavior"},{"doc":"This source module contains neurodata_types for extracellular electrophysiology data.","source":"nwb.ecephys.yaml","title":"Extracellular electrophysiology"},{"doc":"This source module contains neurodata_types for intracellular electrophysiology data.","source":"nwb.icephys.yaml","title":"Intracellular electrophysiology"},{"doc":"This source module contains neurodata_types for opto-genetics data.","source":"nwb.ogen.yaml","title":"Optogenetics"},{"doc":"This source module contains neurodata_types for optical physiology data.","source":"nwb.ophys.yaml","title":"Optical physiology"},{"doc":"This source module contains neurodata_type for retinotopy data.","source":"nwb.retinotopy.yaml","title":"Retinotopy"}],"version":"2.7.0"}]}
{"namespaces":[{"name":"core","doc":"NWB namespace","author":["Andrew Tritt","Oliver Ruebel","Ryan Ly","Ben Dichter","Keith Godfrey","Jeff Teeters"],"contact":["[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"],"full_name":"NWB core","schema":[{"namespace":"hdmf-common"},{"source":"nwb.base"},{"source":"nwb.device"},{"source":"nwb.epoch"},{"source":"nwb.image"},{"source":"nwb.file"},{"source":"nwb.misc"},{"source":"nwb.behavior"},{"source":"nwb.ecephys"},{"source":"nwb.icephys"},{"source":"nwb.ogen"},{"source":"nwb.ophys"},{"source":"nwb.retinotopy"}],"version":"2.7.0"}]}
2 changes: 1 addition & 1 deletion resources/spec/hdmf-common/1.8.0/namespace.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"namespaces":[{"name":"hdmf-common","doc":"Common data structures provided by HDMF","author":["Andrew Tritt","Oliver Ruebel","Ryan Ly","Ben Dichter"],"contact":["[email protected]","[email protected]","[email protected]","[email protected]"],"full_name":"HDMF Common","schema":[{"doc":"base data types","source":"base.yaml","title":"Base data types"},{"doc":"data types for a column-based table","source":"table.yaml","title":"Table data types"},{"doc":"data types for different types of sparse matrices","source":"sparse.yaml","title":"Sparse data types"}],"version":"1.5.0"},{"name":"hdmf-experimental","doc":"Experimental data structures provided by HDMF. These are not guaranteed to be available in the future","author":["Andrew Tritt","Oliver Ruebel","Ryan Ly","Ben Dichter"],"contact":["[email protected]","[email protected]","[email protected]","[email protected]"],"full_name":"HDMF Experimental","schema":[{"namespace":"hdmf-common"},{"doc":"Experimental data types","source":"experimental.yaml","title":"Experimental data types"},{"doc":"data types for storing references to web accessible resources","source":"resources.yaml","title":"Resource reference data types"}],"version":"0.1.0"}]}
{"namespaces":[{"name":"hdmf-common","doc":"Common data structures provided by HDMF","author":["Andrew Tritt","Oliver Ruebel","Ryan Ly","Ben Dichter"],"contact":["[email protected]","[email protected]","[email protected]","[email protected]"],"full_name":"HDMF Common","schema":[{"source":"base"},{"source":"table"},{"source":"sparse"}],"version":"1.8.0"}]}
1 change: 0 additions & 1 deletion resources/spec/hdmf-common/1.8.0/resources.json

This file was deleted.

1 change: 1 addition & 0 deletions resources/spec/hdmf-experimental/0.5.0/namespace.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"namespaces":[{"name":"hdmf-experimental","doc":"Experimental data structures provided by HDMF. These are not guaranteed to be available in the future.","author":["Andrew Tritt","Oliver Ruebel","Ryan Ly","Ben Dichter","Matthew Avaylon"],"contact":["[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"],"full_name":"HDMF Experimental","schema":[{"namespace":"hdmf-common"},{"source":"experimental"},{"source":"resources"}],"version":"0.5.0"}]}
1 change: 1 addition & 0 deletions resources/spec/hdmf-experimental/0.5.0/resources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"groups":[{"data_type_def":"HERD","data_type_inc":"Container","doc":"HDMF External Resources Data Structure. A set of six tables for tracking external resource references in a file or across multiple files.","datasets":[{"data_type_inc":"Data","name":"keys","doc":"A table for storing user terms that are used to refer to external resources.","dtype":[{"name":"key","dtype":"text","doc":"The user term that maps to one or more resources in the `resources` table, e.g., \"human\"."}],"dims":["num_rows"],"shape":[null]},{"data_type_inc":"Data","name":"files","doc":"A table for storing object ids of files used in external resources.","dtype":[{"name":"file_object_id","dtype":"text","doc":"The object id (UUID) of a file that contains objects that refers to external resources."}],"dims":["num_rows"],"shape":[null]},{"data_type_inc":"Data","name":"entities","doc":"A table for mapping user terms (i.e., keys) to resource entities.","dtype":[{"name":"entity_id","dtype":"text","doc":"The compact uniform resource identifier (CURIE) of the entity, in the form [prefix]:[unique local identifier], e.g., 'NCBI_TAXON:9606'."},{"name":"entity_uri","dtype":"text","doc":"The URI for the entity this reference applies to. This can be an empty string. e.g., https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id=9606"}],"dims":["num_rows"],"shape":[null]},{"data_type_inc":"Data","name":"objects","doc":"A table for identifying which objects in a file contain references to external resources.","dtype":[{"name":"files_idx","dtype":"uint","doc":"The row index to the file in the `files` table containing the object."},{"name":"object_id","dtype":"text","doc":"The object id (UUID) of the object."},{"name":"object_type","dtype":"text","doc":"The data type of the object."},{"name":"relative_path","dtype":"text","doc":"The relative path from the data object with the `object_id` to the dataset or attribute with the value(s) that is associated with an external resource. This can be an empty string if the object is a dataset that contains the value(s) that is associated with an external resource."},{"name":"field","dtype":"text","doc":"The field within the compound data type using an external resource. This is used only if the dataset or attribute is a compound data type; otherwise this should be an empty string."}],"dims":["num_rows"],"shape":[null]},{"data_type_inc":"Data","name":"object_keys","doc":"A table for identifying which objects use which keys.","dtype":[{"name":"objects_idx","dtype":"uint","doc":"The row index to the object in the `objects` table that holds the key"},{"name":"keys_idx","dtype":"uint","doc":"The row index to the key in the `keys` table."}],"dims":["num_rows"],"shape":[null]},{"data_type_inc":"Data","name":"entity_keys","doc":"A table for identifying which keys use which entity.","dtype":[{"name":"entities_idx","dtype":"uint","doc":"The row index to the entity in the `entities` table."},{"name":"keys_idx","dtype":"uint","doc":"The row index to the key in the `keys` table."}],"dims":["num_rows"],"shape":[null]}]}]}
10 changes: 10 additions & 0 deletions src/BaseIO.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class BaseDataType
T_F32, ///< 32-bit floating point
T_F64, ///< 64-bit floating point
T_STR, ///< String
V_STR, ///< Variable length string
};

/**
Expand Down Expand Up @@ -217,6 +218,15 @@ class BaseIO
virtual Status createStringDataSet(const std::string& path,
const std::string& value) = 0;

/**
* @brief Creates a dataset that holds an array of string values.
* @param path The location in the file of the dataset.
* @param values The vector of string values of the dataset.
* @return The status of the dataset creation operation.
*/
virtual Status createStringDataSet(
const std::string& path, const std::vector<std::string>& values) = 0;

/**
* @brief Creates a dataset that holds an array of references to groups within
* the file.
Expand Down
27 changes: 19 additions & 8 deletions src/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
#include <iomanip>
#include <sstream>

#include <boost/date_time.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

#include "boost/date_time/c_local_time_adjustor.hpp"

namespace AQNWB
{
/**
Expand All @@ -27,17 +30,25 @@ inline std::string generateUuid()
*/
inline std::string getCurrentTime()
{
// Get current time
auto currentTime =
std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
// Set up boost time zone adjustment and time facet
using local_adj =
boost::date_time::c_local_adjustor<boost::posix_time::ptime>;
boost::posix_time::time_facet* f = new boost::posix_time::time_facet();
f->time_duration_format("%+%H:%M");

// Convert to tm struct to extract date and time components
std::tm utcTime = *std::gmtime(&currentTime);
// get local time, utc time, and offset
auto now = boost::posix_time::microsec_clock::universal_time();
auto utc_now = local_adj::utc_to_local(now);
boost::posix_time::time_duration td = utc_now - now;

// Format the date and time in ISO 8601 format with the UTC offset
std::ostringstream oss;
oss << std::put_time(&utcTime, "%FT%T%z");
std::ostringstream oss_offset;
oss_offset.imbue(std::locale(oss_offset.getloc(), f));
oss_offset << td;

std::string currentTime = to_iso_extended_string(utc_now);
currentTime += oss_offset.str();

return oss.str();
return currentTime;
}
} // namespace AQNWB
26 changes: 26 additions & 0 deletions src/hdf5/HDF5IO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,26 @@ Status HDF5IO::createStringDataSet(const std::string& path,
return Status::Success;
}

Status HDF5IO::createStringDataSet(const std::string& path,
const std::vector<std::string>& values)
{
if (!opened)
return Status::Failure;

std::vector<const char*> cStrs;
cStrs.reserve(values.size());
for (const auto& str : values) {
cStrs.push_back(str.c_str());
}

std::unique_ptr<BaseRecordingData> dataset;
dataset = std::unique_ptr<BaseRecordingData>(createDataSet(
BaseDataType::V_STR, SizeArray {values.size()}, SizeArray {1}, path));
dataset->writeDataBlock(1, BaseDataType::V_STR, cStrs.data());

return Status::Success;
}

AQNWB::BaseRecordingData* HDF5IO::getDataSet(const std::string& path)
{
std::unique_ptr<DataSet> data;
Expand Down Expand Up @@ -441,6 +461,9 @@ H5::DataType HDF5IO::getNativeType(BaseDataType type)
case BaseDataType::Type::T_STR:
return StrType(PredType::C_S1, type.typeSize);
break;
case BaseDataType::Type::V_STR:
return StrType(PredType::C_S1, H5T_VARIABLE);
break;
default:
baseType = PredType::NATIVE_INT32;
}
Expand Down Expand Up @@ -489,6 +512,9 @@ H5::DataType HDF5IO::getH5Type(BaseDataType type)
case BaseDataType::Type::T_STR:
return StrType(PredType::C_S1, type.typeSize);
break;
case BaseDataType::Type::V_STR:
return StrType(PredType::C_S1, H5T_VARIABLE);
break;
default:
return PredType::STD_I32LE;
}
Expand Down
9 changes: 9 additions & 0 deletions src/hdf5/HDF5IO.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ class HDF5IO : public BaseIO
Status createStringDataSet(const std::string& path,
const std::string& value) override;

/**
* @brief Creates a dataset that holds an array of string values.
* @param path The location in the file of the dataset.
* @param values The vector of string values of the dataset.
* @return The status of the dataset creation operation.
*/
Status createStringDataSet(const std::string& path,
const std::vector<std::string>& values) override;

/**
* @brief Creates a dataset that holds an array of references to groups within
* the file.
Expand Down
11 changes: 6 additions & 5 deletions src/nwb/NWBFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,8 @@ void NWBFile::finalize()

Status NWBFile::createFileStructure()
{
io->createAttribute("core", "/", "namespace");
io->createAttribute("NWBFile", "/", "neurodata_type");
io->createCommonNWBAttributes("/", "core", "NWBFile", "");
io->createAttribute(NWBVersion, "/", "nwb_version");
io->createAttribute(identifierText, "/", "object_id");

io->createGroup("/acquisition");
io->createGroup("/analysis");
Expand All @@ -59,15 +57,18 @@ Status NWBFile::createFileStructure()
io->createGroup("general/extracellular_ephys");

io->createGroup("/specifications");
io->createReferenceAttribute("/specifications", "/", ".specloc");
cacheSpecifications("core/", NWBVersion);
cacheSpecifications("hdmf-common/", HDMFVersion);
cacheSpecifications("hdmf-experimental/", HDMFExperimentalVersion);

std::string time = getCurrentTime();
io->createStringDataSet("/file_create_date", time); // TODO - change to array
std::vector<std::string> timeVec = {time};
io->createStringDataSet("/file_create_date", timeVec);
io->createStringDataSet("/session_description", "a recording session");
io->createStringDataSet("/session_start_time", time);
io->createStringDataSet("/timestamps_reference_time", time);
io->createStringDataSet("/identifier", "test-identifier");
io->createStringDataSet("/identifier", identifierText);

return Status::Success;
}
Expand Down
5 changes: 5 additions & 0 deletions src/nwb/NWBFile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ class NWBFile
*/
const std::string HDMFVersion = "1.8.0";

/**
* @brief Indicates the HDMF experimental version.
*/
const std::string HDMFExperimentalVersion = "0.5.0";

protected:
/**
* @brief Creates the default file structure.
Expand Down
Loading

0 comments on commit a2519a0

Please sign in to comment.