Skip to content

Commit

Permalink
add cable/instrument ids to min_size for directory spool (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers authored Sep 14, 2022
1 parent 7208acb commit 93856d5
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
2 changes: 2 additions & 0 deletions dascore/utils/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class HDFPatchIndexManager:
"station": 8,
"dims": 40,
"file_version": 9,
"cable_id": 40,
"instrument_id": 40,
}
# columns which should be indexed for fast querying
_query_columns = ("time_min", "time_max", "distance_min", "distance_max")
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def diverse_spool_directory(diverse_spool):


@pytest.fixture(scope="class")
def diverse_file_spool(diverse_spool_directory):
def diverse_directory_spool(diverse_spool_directory):
"""Save the diverse spool contents to a directory."""
out = dascore.spool(diverse_spool_directory).update()
return out
Expand Down
21 changes: 16 additions & 5 deletions tests/test_clients/test_dirspool.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import dascore as dc
from dascore.constants import ONE_SECOND
from dascore.core.schema import PatchFileSummary
from dascore.utils.hdf5 import HDFPatchIndexManager
from dascore.utils.misc import register_func

FILE_SPOOLS = []
Expand Down Expand Up @@ -113,9 +114,9 @@ def test_is_in_tag(self, basic_file_spool, spool_tag):
out = basic_file_spool.select(tag=tag_collection).get_contents()
assert out["tag"].isin(tag_collection).all()

def test_multiple_selects(self, diverse_file_spool):
def test_multiple_selects(self, diverse_directory_spool):
"""Ensure selects can be stacked."""
spool = diverse_file_spool
spool = diverse_directory_spool
contents = spool.get_contents()
duration = contents["time_max"] - contents["time_min"]
new_max = (contents["time_min"] + duration.mean() / 2).median()
Expand Down Expand Up @@ -145,15 +146,15 @@ def test_select_time_tuple_with_string(self, basic_file_spool):
for pa1, pa2 in zip(spool1, spool2):
assert pa1.attrs["time_max"] == pa2.attrs["time_max"]

def test_select_non_zero_index(self, diverse_file_spool):
def test_select_non_zero_index(self, diverse_directory_spool):
"""
A Bug caused the contents of the source dataframe to have
non-zero based indices, thus spools didnt work. This fixes
the issue.
"""
contents = diverse_file_spool.get_contents()
contents = diverse_directory_spool.get_contents()
end_time = contents["time_max"].min()
sub = diverse_file_spool.select(
sub = diverse_directory_spool.select(
time=(None, end_time),
distance=(100, 200),
)
Expand Down Expand Up @@ -194,6 +195,16 @@ def test_sub_chunk(self, one_file_file_spool):
assert isinstance(patch, dc.Patch)


class TestGetContents:
"""Tests for getting the contents of the spool."""

def test_str_columns_in_dataframe(self, diverse_directory_spool):
"""Ensure all the string columns are in index."""
df = diverse_directory_spool.get_contents()
expected = HDFPatchIndexManager._min_itemsize
assert set(df.columns).issuperset(set(expected))


class TestFileSpoolIntegrations:
"""Small integration tests for the file spool."""

Expand Down

0 comments on commit 93856d5

Please sign in to comment.