From 4a3bb033d425968315cb4c50f5b5bf1bdbac311d Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 23 May 2023 16:49:08 -0600 Subject: [PATCH] add terra15 v6 support (#140) --- dascore/data_registry.txt | 1 + dascore/io/terra15/core.py | 8 ++ dascore/io/terra15/utils.py | 8 +- pyproject.toml | 1 + tests/conftest.py | 6 + tests/test_io/test_terra15/test_terra15.py | 30 +++++ tests/test_io/test_terra15/test_terra15_v6.py | 114 ++++++++++++++++++ 7 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 tests/test_io/test_terra15/test_terra15.py create mode 100644 tests/test_io/test_terra15/test_terra15_v6.py diff --git a/dascore/data_registry.txt b/dascore/data_registry.txt index a90c1cc4..5fd2d9f5 100644 --- a/dascore/data_registry.txt +++ b/dascore/data_registry.txt @@ -2,6 +2,7 @@ terra15_das_1_trimmed.hdf5 18e08792c1cd08c9afd18334e17e21787be0b646151b39802541ee11a516976a https://github.com/dasdae/test_data/raw/master/das/terra15_das_1_trimmed.hdf5 terra15_das_unfinished.hdf5 087e98cb228d9be369783998ef96c5221fdd3d9aaa9da1e4f45c56effe771222 https://github.com/dasdae/test_data/raw/master/das/terra15_das_unfinished.hdf5 terra15_v5_test_file.hdf5 b8d27d4b690928da84c913fb1c8658e84c9afd7381ea36ed93790d1f23947b18 https://github.com/dasdae/test_data/raw/master/das/terra15_v5_test_file.hdf5 +terra15_v6_test_file.hdf5 63f130ed93c1fe4105f2a355272d136a51abfe5c212a40be3a29074404669387 https://github.com/dasdae/test_data/raw/master/das/terra15_v6_test_file.hdf5 iDAS005_hdf5_example.626.h5 28f4f9b1f2b248fa0419b41c11e971f2d395c3ed43afc1ef5d5c35f399e99190 https://github.com/dasdae/test_data/raw/master/das/iDAS005_hdf5_example.626.h5 iDAS005_tdms_example.626.tdms 0c31f75015bc671d958967c7fdf32e2e64fdb467f997d331ae6a7d4c989ab380 https://github.com/dasdae/test_data/raw/master/das/iDAS005_tdms_example.626.tdms sample_tdms_file_v4713.tdms 22a79c4c3166ce3cc8467265540cdb3ad2b54460209d39641c1e8f20e64eca65 https://github.com/dasdae/test_data/raw/master/das/sample_tdms_file_v4713.tdms diff --git a/dascore/io/terra15/core.py b/dascore/io/terra15/core.py index 7b2f382b..56681ba8 100644 --- a/dascore/io/terra15/core.py +++ b/dascore/io/terra15/core.py @@ -68,3 +68,11 @@ class Terra15FormatterV5(Terra15FormatterV4): """ version = "5" + + +class Terra15FormatterV6(Terra15FormatterV4): + """ + Support for Terra15 data format, version 5. + """ + + version = "6" diff --git a/dascore/io/terra15/utils.py b/dascore/io/terra15/utils.py index 489a0ff5..949753ec 100644 --- a/dascore/io/terra15/utils.py +++ b/dascore/io/terra15/utils.py @@ -3,6 +3,7 @@ from typing import Optional import numpy as np +from tables.exceptions import NoSuchNodeError from dascore.constants import timeable_types from dascore.core import Patch @@ -37,7 +38,10 @@ def _get_terra15_version_str(hdf_fi) -> str: def _get_scanned_time_min_max(data_node): """Get the min/max time from time array.""" - time = data_node["gps_time"] + try: + time = data_node["gps_time"] + except (NoSuchNodeError, IndexError): + time = data_node["posix_time"] t_len = len(time) # first try fast path by tacking first/last of time tmin, tmax = time[0], time[-1] @@ -70,7 +74,7 @@ def _get_version_data_node(root): if version == "4": data_type = root._v_attrs.data_product data_node = root[data_type] - elif version == "5": + elif version in {"5", "6"}: data_node = root["data_product"] else: raise NotImplementedError("Unknown Terra15 version") diff --git a/pyproject.toml b/pyproject.toml index da4ab7b8..6c99b8f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,5 +86,6 @@ PICKLE = "dascore.io.pickle.core:PickleIO" TDMS__V4713 = "dascore.io.tdms.core:TDMSFormatterV4713" TERRA15__V4 = "dascore.io.terra15.core:Terra15FormatterV4" TERRA15__V5 = "dascore.io.terra15.core:Terra15FormatterV5" +TERRA15__V6 = "dascore.io.terra15.core:Terra15FormatterV6" QUANTX__V2 = "dascore.io.quantx.core:QuantXV2" WAV = "dascore.io.wav.core:WavIO" diff --git a/tests/conftest.py b/tests/conftest.py index 3d69e664..87ba39de 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -103,6 +103,12 @@ def terra15_v5_path(): return fetch("terra15_v5_test_file.hdf5") +@pytest.fixture(scope="class") +def terra15_v6_path(): + """Get the path to terra15 V5 file, download if not cached.""" + return fetch("terra15_v6_test_file.hdf5") + + @pytest.fixture() @register_func(SPOOL_FIXTURES) def terra15_das_spool(terra15_das_example_path) -> SpoolType: diff --git a/tests/test_io/test_terra15/test_terra15.py b/tests/test_io/test_terra15/test_terra15.py new file mode 100644 index 00000000..da9fb0ec --- /dev/null +++ b/tests/test_io/test_terra15/test_terra15.py @@ -0,0 +1,30 @@ +""" +Misc. tests for Terra15. +""" +import shutil + +import numpy as np +import pandas as pd +import pytest +import tables + +import dascore as dc + + +class TestTerra15: + """Misc tests for Terra15.""" + + @pytest.fixture(scope="class") + def missing_gps_terra15_hdf5(self, terra15_v5_path, tmp_path_factory): + """Creates a terra15 file with missing GPS Time.""" + new = tmp_path_factory.mktemp("missing_gps") / "missing.hdf5" + shutil.copy(terra15_v5_path, new) + with tables.open_file(new, "a") as fi: + fi.root.data_product.gps_time._f_remove() + return new + + def test_missing_gps_time(self, missing_gps_terra15_hdf5): + """Tests for when GPS time isn't found.""" + patch = dc.read(missing_gps_terra15_hdf5)[0] + assert isinstance(patch, dc.Patch) + assert not np.any(pd.isnull(patch.coords["time"])) diff --git a/tests/test_io/test_terra15/test_terra15_v6.py b/tests/test_io/test_terra15/test_terra15_v6.py new file mode 100644 index 00000000..4b6f3f1e --- /dev/null +++ b/tests/test_io/test_terra15/test_terra15_v6.py @@ -0,0 +1,114 @@ +""" +Tests for reading terra15 format, version 5. +""" +import numpy as np +import pytest + +import dascore as dc +from dascore.constants import REQUIRED_DAS_ATTRS +from dascore.core.schema import PatchFileSummary +from dascore.io.terra15.core import Terra15FormatterV6 + + +@pytest.fixture(scope="class") +def terra15_v6_patch(terra15_v6_path): + """Read the terra15 v5 file.""" + patch = dc.read(terra15_v6_path)[0] + return patch + + +class TestReadTerra15V6: + """Tests for reading the terra15 format.""" + + def test_type(self, terra15_v6_patch): + """Ensure the expected type is returned.""" + assert isinstance(terra15_v6_patch, dc.Patch) + + def test_attributes(self, terra15_v6_patch): + """Ensure a few of the expected attrs exist in array.""" + attrs = dict(terra15_v6_patch.attrs) + expected_attrs = {"time_min", "time_max", "distance_min", "data_units"} + assert set(expected_attrs).issubset(set(attrs)) + + def test_has_required_attrs(self, terra15_v6_patch): + """ "Ensure the required das attrs are found""" + assert set(REQUIRED_DAS_ATTRS).issubset(set(dict(terra15_v6_patch.attrs))) + + def test_coord_attr_time_equal(self, terra15_v6_patch): + """The time reported in the attrs and coords should match""" + attr_time = terra15_v6_patch.attrs["time_max"] + coord_time = terra15_v6_patch.coords["time"].max() + assert attr_time == coord_time + + def test_read_with_limits(self, terra15_v6_patch, terra15_v6_path): + """If start/end time sare select the same patch ought to be returned.""" + attrs = terra15_v6_patch.attrs + time = (attrs["time_min"], attrs["time_max"]) + dist = (attrs["distance_min"], attrs["distance_max"]) + patch = Terra15FormatterV6().read( + terra15_v6_path, + time=time, + distance=dist, + )[0] + assert attrs["time_max"] == patch.attrs["time_max"] + + def test_time_dist_slice(self, terra15_v6_patch, terra15_v6_path): + """Ensure slicing distance and time works from read func.""" + time_array = terra15_v6_patch.coords["time"] + dist_array = terra15_v6_patch.coords["distance"] + t1, t2 = time_array[10], time_array[40] + d1, d2 = dist_array[10], dist_array[40] + patch = Terra15FormatterV6().read( + terra15_v6_path, time=(t1, t2), distance=(d1, d2) + )[0] + attrs, coords = patch.attrs, patch.coords + assert attrs["time_min"] == coords["time"].min() == t1 + assert attrs["time_max"] == coords["time"].max() + # since we use floats sometimes this are a little off. + assert (attrs["time_max"] - t2) < (attrs["d_time"] / 4) + assert attrs["distance_min"] == coords["distance"].min() == d1 + assert attrs["distance_max"] == coords["distance"].max() == d2 + + def test_no_arrays_in_attrs(self, terra15_das_patch): + """ + Ensure that the attributes are not arrays. + Originally, attrs like time_min can be arrays with empty shapes. + """ + for key, val in terra15_das_patch.attrs.items(): + assert not isinstance(val, np.ndarray) + + +class TestIsTerra15V6: + """Tests for function to determine if a file is a terra15 file.""" + + def test_format_and_version(self, terra15_v6_path): + """Ensure version""" + name, version = Terra15FormatterV6().get_format(terra15_v6_path) + assert (name, version) == (Terra15FormatterV6.name, Terra15FormatterV6.version) + + def test_not_terra15_not_hdf5(self, dummy_text_file): + """Test for not even a hdf5 file.""" + parser = Terra15FormatterV6() + assert not parser.get_format(dummy_text_file) + assert not parser.get_format(dummy_text_file.parent) + + def test_hdf5file_not_terra15(self, generic_hdf5): + """Assert that the generic hdf5 file is not a terra15.""" + parser = Terra15FormatterV6() + assert not parser.get_format(generic_hdf5) + + +class TestScanTerra15V6: + """Tests for scanning terra15 file.""" + + def test_basic_scan(self, terra15_v6_path): + """Tests for getting summary info from terra15 data.""" + parser = Terra15FormatterV6() + out = parser.scan(terra15_v6_path) + assert isinstance(out, list) + assert len(out) == 1 + assert isinstance(out[0], PatchFileSummary) + + data = out[0] + assert data.file_format == parser.name + assert data.file_version == parser.version