From e67126fd42be23128360021c73bb478ae207c29e Mon Sep 17 00:00:00 2001 From: Hiran Wijesinghe Date: Wed, 2 Oct 2024 13:55:47 -0400 Subject: [PATCH 1/5] fix indentation after `pad_widt` computation --- databroker/mongo_normalized.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/databroker/mongo_normalized.py b/databroker/mongo_normalized.py index 6e76f1433..226bad75b 100644 --- a/databroker/mongo_normalized.py +++ b/databroker/mongo_normalized.py @@ -2244,11 +2244,11 @@ def default_validate_shape(key, data, expected_shape): else: # margin == 0 padding.append((0, 0)) trimming.append(slice(None, None)) - # TODO Rethink this! - # We cannot do NaN because that does not work for integers - # and it is too late to change our mind about the data type. - padded = numpy.pad(data, padding, "edge") - padded_and_trimmed = padded[tuple(trimming)] + # TODO Rethink this! + # We cannot do NaN because that does not work for integers + # and it is too late to change our mind about the data type. + padded = numpy.pad(data, padding, "edge") + padded_and_trimmed = padded[tuple(trimming)] return padded_and_trimmed From 172286dde96162f33cde7efb833c9f84f1557559 Mon Sep 17 00:00:00 2001 From: Hiran Wijesinghe Date: Mon, 7 Oct 2024 17:32:43 -0400 Subject: [PATCH 2/5] add tests for default_validate_shape --- databroker/tests/test_validate_shape.py | 66 ++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/databroker/tests/test_validate_shape.py b/databroker/tests/test_validate_shape.py index 01a37df6c..48cfabeb9 100644 --- a/databroker/tests/test_validate_shape.py +++ b/databroker/tests/test_validate_shape.py @@ -1,11 +1,13 @@ from bluesky import RunEngine from bluesky.plans import count -from ophyd.sim import img +from ophyd.sim import img, DirectImage from tiled.client import Context, from_context from tiled.server.app import build_app -from ..mongo_normalized import MongoAdapter +from ..mongo_normalized import MongoAdapter, BadShapeMetadata +import numpy as np +import pytest def test_validate_shape(tmpdir): # custom_validate_shape will mutate this to show it has been called @@ -29,3 +31,63 @@ def post_document(name, doc): assert not shapes client[uid]["primary"]["data"]["img"][:] assert shapes + + +@pytest.mark.parametrize("shape,expected_shape",[ + ( (10,), (11,) ), + ( (10,20), (10,21) ), + ( (10,20,30), (10,21,30) ), + ( (10,20,30), (10,20,31) ), + ( (20,20,20,20), (20,21,20,22) ), +]) +def test_padding(tmpdir, shape, expected_shape): + adapter = MongoAdapter.from_mongomock() + + direct_img = DirectImage( + func=lambda: np.array(np.ones(shape)), name="direct", labels={"detectors"} + ) + direct_img.img.name = "img" + + with Context.from_app(build_app(adapter), token_cache=tmpdir) as context: + client = from_context(context) + + def post_document(name, doc): + if name == "descriptor": + doc["data_keys"]["img"]["shape"] = expected_shape + + client.post_document(name, doc) + + RE = RunEngine() + RE.subscribe(post_document) + (uid,) = RE(count([direct_img])) + assert client[uid]["primary"]["data"]["img"][0].shape == expected_shape + +@pytest.mark.parametrize("shape,expected_shape",[ + ( (10,), (11,12) ), + ( (10,20), (10,200) ), + #( (20,20,20,20), (20,21,20,200) ), # range with page_size=0 because expected byte size is too large + #( (10,20), (9,20) ), # docstring says this should raise BadShapeMetadata, but code doesn't +]) +def test_default_validate_shape(tmpdir, shape, expected_shape): + adapter = MongoAdapter.from_mongomock() + + direct_img = DirectImage( + func=lambda: np.array(np.ones(shape)), name="direct", labels={"detectors"} + ) + direct_img.img.name = "img" + + with Context.from_app(build_app(adapter), token_cache=tmpdir) as context: + client = from_context(context) + + def post_document(name, doc): + if name == "descriptor": + doc["data_keys"]["img"]["shape"] = expected_shape + + client.post_document(name, doc) + + RE = RunEngine() + RE.subscribe(post_document) + (uid,) = RE(count([direct_img])) + with pytest.raises(BadShapeMetadata): + client[uid]["primary"]["data"]["img"][:] + From c1ec41f5bd0379a374db122b55c9fb4c2a8bc726 Mon Sep 17 00:00:00 2001 From: Hiran Wijesinghe Date: Wed, 9 Oct 2024 00:52:45 -0400 Subject: [PATCH 3/5] lint with black --- databroker/tests/test_validate_shape.py | 35 +++++++++++++++---------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/databroker/tests/test_validate_shape.py b/databroker/tests/test_validate_shape.py index 48cfabeb9..88a5230f1 100644 --- a/databroker/tests/test_validate_shape.py +++ b/databroker/tests/test_validate_shape.py @@ -9,6 +9,7 @@ import numpy as np import pytest + def test_validate_shape(tmpdir): # custom_validate_shape will mutate this to show it has been called shapes = [] @@ -33,13 +34,16 @@ def post_document(name, doc): assert shapes -@pytest.mark.parametrize("shape,expected_shape",[ - ( (10,), (11,) ), - ( (10,20), (10,21) ), - ( (10,20,30), (10,21,30) ), - ( (10,20,30), (10,20,31) ), - ( (20,20,20,20), (20,21,20,22) ), -]) +@pytest.mark.parametrize( + "shape,expected_shape", + [ + ((10,), (11,)), + ((10, 20), (10, 21)), + ((10, 20, 30), (10, 21, 30)), + ((10, 20, 30), (10, 20, 31)), + ((20, 20, 20, 20), (20, 21, 20, 22)), + ], +) def test_padding(tmpdir, shape, expected_shape): adapter = MongoAdapter.from_mongomock() @@ -62,12 +66,16 @@ def post_document(name, doc): (uid,) = RE(count([direct_img])) assert client[uid]["primary"]["data"]["img"][0].shape == expected_shape -@pytest.mark.parametrize("shape,expected_shape",[ - ( (10,), (11,12) ), - ( (10,20), (10,200) ), - #( (20,20,20,20), (20,21,20,200) ), # range with page_size=0 because expected byte size is too large - #( (10,20), (9,20) ), # docstring says this should raise BadShapeMetadata, but code doesn't -]) + +@pytest.mark.parametrize( + "shape,expected_shape", + [ + ((10,), (11, 12)), + ((10, 20), (10, 200)), + # ( (20,20,20,20), (20,21,20,200) ), # range with page_size=0 because expected byte size is too large + # ( (10,20), (9,20) ), # docstring says this should raise BadShapeMetadata, but code doesn't + ], +) def test_default_validate_shape(tmpdir, shape, expected_shape): adapter = MongoAdapter.from_mongomock() @@ -90,4 +98,3 @@ def post_document(name, doc): (uid,) = RE(count([direct_img])) with pytest.raises(BadShapeMetadata): client[uid]["primary"]["data"]["img"][:] - From 78931a241207ec3e911a350cdd83b2f78caa1226 Mon Sep 17 00:00:00 2001 From: Hiran Wijesinghe Date: Thu, 10 Oct 2024 15:55:58 -0400 Subject: [PATCH 4/5] add more test cases, touchups to --- databroker/mongo_normalized.py | 17 +++++------------ databroker/tests/test_validate_shape.py | 5 +++-- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/databroker/mongo_normalized.py b/databroker/mongo_normalized.py index 226bad75b..44585265e 100644 --- a/databroker/mongo_normalized.py +++ b/databroker/mongo_normalized.py @@ -977,7 +977,7 @@ def populate_columns(keys, min_seq_num, max_seq_num): # their size if we need to squeeze more performance out here. But maybe # we can get away with never adding that complexity. for key, est_row_bytesize in zip(nonscalars, estimated_nonscalar_row_bytesizes): - page_size = TARGET_PAGE_BYTESIZE // est_row_bytesize + page_size = max(1, TARGET_PAGE_BYTESIZE // est_row_bytesize) boundaries = list(range(min_seq_num, 1 + max_seq_num, page_size)) if boundaries[-1] != max_seq_num: boundaries.append(max_seq_num) @@ -2209,10 +2209,11 @@ def default_validate_shape(key, data, expected_shape): Check that data.shape == expected.shape. * If number of dimensions differ, raise BadShapeMetadata - * If any dimension is larger than expected, raise BadShapeMetadata. + * If any dimension differs by more than MAX_SIZE_DIFF, raise BadShapeMetadata. * If some dimensions are smaller than expected,, pad "right" edge of each dimension that falls short with NaN. """ + MAX_SIZE_DIFF = 2 if data.shape == expected_shape: return data if len(data.shape) != len(expected_shape): @@ -2228,8 +2229,7 @@ def default_validate_shape(key, data, expected_shape): for actual, expected in zip(data.shape, expected_shape): margin = expected - actual # Limit how much padding or trimming we are willing to do. - SOMEWHAT_ARBITRARY_LIMIT_OF_WHAT_IS_REASONABLE = 2 - if abs(margin) > SOMEWHAT_ARBITRARY_LIMIT_OF_WHAT_IS_REASONABLE: + if abs(margin) > MAX_SIZE_DIFF: raise BadShapeMetadata( f"For data key {key} " f"shape {data.shape} does not " @@ -2237,19 +2237,12 @@ def default_validate_shape(key, data, expected_shape): ) if margin > 0: padding.append((0, margin)) - trimming.append(slice(None, None)) elif margin < 0: padding.append((0, 0)) - trimming.append(slice(None)) else: # margin == 0 padding.append((0, 0)) - trimming.append(slice(None, None)) - # TODO Rethink this! - # We cannot do NaN because that does not work for integers - # and it is too late to change our mind about the data type. padded = numpy.pad(data, padding, "edge") - padded_and_trimmed = padded[tuple(trimming)] - return padded_and_trimmed + return padded def build_summary(run_start_doc, run_stop_doc, stream_names): diff --git a/databroker/tests/test_validate_shape.py b/databroker/tests/test_validate_shape.py index 88a5230f1..0a31e2368 100644 --- a/databroker/tests/test_validate_shape.py +++ b/databroker/tests/test_validate_shape.py @@ -39,6 +39,7 @@ def post_document(name, doc): [ ((10,), (11,)), ((10, 20), (10, 21)), + ((10, 20), (10, 19)), ((10, 20, 30), (10, 21, 30)), ((10, 20, 30), (10, 20, 31)), ((20, 20, 20, 20), (20, 21, 20, 22)), @@ -72,8 +73,8 @@ def post_document(name, doc): [ ((10,), (11, 12)), ((10, 20), (10, 200)), - # ( (20,20,20,20), (20,21,20,200) ), # range with page_size=0 because expected byte size is too large - # ( (10,20), (9,20) ), # docstring says this should raise BadShapeMetadata, but code doesn't + ( (20,20,20,20), (20,21,20,200) ), + ( (10,20), (5,20) ), ], ) def test_default_validate_shape(tmpdir, shape, expected_shape): From e3723ea59a947f2ba8e9aa11056d6dccd87f88d3 Mon Sep 17 00:00:00 2001 From: Hiran Wijesinghe Date: Thu, 10 Oct 2024 16:20:50 -0400 Subject: [PATCH 5/5] lint --- databroker/mongo_normalized.py | 1 - databroker/tests/test_validate_shape.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/databroker/mongo_normalized.py b/databroker/mongo_normalized.py index 44585265e..26c3ece19 100644 --- a/databroker/mongo_normalized.py +++ b/databroker/mongo_normalized.py @@ -2225,7 +2225,6 @@ def default_validate_shape(key, data, expected_shape): ) # Pad at the "end" along any dimension that is too short. padding = [] - trimming = [] for actual, expected in zip(data.shape, expected_shape): margin = expected - actual # Limit how much padding or trimming we are willing to do. diff --git a/databroker/tests/test_validate_shape.py b/databroker/tests/test_validate_shape.py index 0a31e2368..df563e384 100644 --- a/databroker/tests/test_validate_shape.py +++ b/databroker/tests/test_validate_shape.py @@ -73,8 +73,8 @@ def post_document(name, doc): [ ((10,), (11, 12)), ((10, 20), (10, 200)), - ( (20,20,20,20), (20,21,20,200) ), - ( (10,20), (5,20) ), + ((20, 20, 20, 20), (20, 21, 20, 200)), + ((10, 20), (5, 20)), ], ) def test_default_validate_shape(tmpdir, shape, expected_shape):