From 93856d58add0d2ea40e01524d352f850312e46ca Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 13 Sep 2022 20:23:21 -0600 Subject: [PATCH] add cable/instrument ids to min_size for directory spool (#49) --- dascore/utils/hdf5.py | 2 ++ tests/conftest.py | 2 +- tests/test_clients/test_dirspool.py | 21 ++++++++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/dascore/utils/hdf5.py b/dascore/utils/hdf5.py index 6edd41a6..ef71eed8 100644 --- a/dascore/utils/hdf5.py +++ b/dascore/utils/hdf5.py @@ -123,6 +123,8 @@ class HDFPatchIndexManager: "station": 8, "dims": 40, "file_version": 9, + "cable_id": 40, + "instrument_id": 40, } # columns which should be indexed for fast querying _query_columns = ("time_min", "time_max", "distance_min", "distance_max") diff --git a/tests/conftest.py b/tests/conftest.py index 0aeb1f3c..4ddd34ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -229,7 +229,7 @@ def diverse_spool_directory(diverse_spool): @pytest.fixture(scope="class") -def diverse_file_spool(diverse_spool_directory): +def diverse_directory_spool(diverse_spool_directory): """Save the diverse spool contents to a directory.""" out = dascore.spool(diverse_spool_directory).update() return out diff --git a/tests/test_clients/test_dirspool.py b/tests/test_clients/test_dirspool.py index ce721876..150f9548 100644 --- a/tests/test_clients/test_dirspool.py +++ b/tests/test_clients/test_dirspool.py @@ -10,6 +10,7 @@ import dascore as dc from dascore.constants import ONE_SECOND from dascore.core.schema import PatchFileSummary +from dascore.utils.hdf5 import HDFPatchIndexManager from dascore.utils.misc import register_func FILE_SPOOLS = [] @@ -113,9 +114,9 @@ def test_is_in_tag(self, basic_file_spool, spool_tag): out = basic_file_spool.select(tag=tag_collection).get_contents() assert out["tag"].isin(tag_collection).all() - def test_multiple_selects(self, diverse_file_spool): + def test_multiple_selects(self, diverse_directory_spool): """Ensure selects can be stacked.""" - spool = diverse_file_spool + spool = diverse_directory_spool contents = spool.get_contents() duration = contents["time_max"] - contents["time_min"] new_max = (contents["time_min"] + duration.mean() / 2).median() @@ -145,15 +146,15 @@ def test_select_time_tuple_with_string(self, basic_file_spool): for pa1, pa2 in zip(spool1, spool2): assert pa1.attrs["time_max"] == pa2.attrs["time_max"] - def test_select_non_zero_index(self, diverse_file_spool): + def test_select_non_zero_index(self, diverse_directory_spool): """ A Bug caused the contents of the source dataframe to have non-zero based indices, thus spools didnt work. This fixes the issue. """ - contents = diverse_file_spool.get_contents() + contents = diverse_directory_spool.get_contents() end_time = contents["time_max"].min() - sub = diverse_file_spool.select( + sub = diverse_directory_spool.select( time=(None, end_time), distance=(100, 200), ) @@ -194,6 +195,16 @@ def test_sub_chunk(self, one_file_file_spool): assert isinstance(patch, dc.Patch) +class TestGetContents: + """Tests for getting the contents of the spool.""" + + def test_str_columns_in_dataframe(self, diverse_directory_spool): + """Ensure all the string columns are in index.""" + df = diverse_directory_spool.get_contents() + expected = HDFPatchIndexManager._min_itemsize + assert set(df.columns).issuperset(set(expected)) + + class TestFileSpoolIntegrations: """Small integration tests for the file spool."""