From 1a75824974911939c78f789288165103a56ef9b8 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Fri, 12 Apr 2024 19:05:58 -0400 Subject: [PATCH 1/4] feat: extended Bento DRS responses with access data --- chord_drs/routes.py | 36 +++++++++++++++++++++++++++++++----- chord_drs/types.py | 9 +++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/chord_drs/routes.py b/chord_drs/routes.py index 405f4e7..9263050 100644 --- a/chord_drs/routes.py +++ b/chord_drs/routes.py @@ -28,7 +28,7 @@ from .data_sources import DATA_SOURCE_LOCAL, DATA_SOURCE_MINIO from .db import db from .models import DrsBlob, DrsBundle -from .types import DRSAccessMethodDict, DRSContentsDict, DRSObjectDict +from .types import DRSAccessMethodDict, DRSContentsDict, DRSObjectBentoDict, DRSObjectDict from .utils import drs_file_checksum @@ -153,7 +153,20 @@ def build_contents(bundle: DrsBundle, expand: bool) -> list[DRSContentsDict]: return content -def build_bundle_json(drs_bundle: DrsBundle, expand: bool = False) -> DRSObjectDict: +def build_bento_object_json(drs_object: DrsMixin) -> DRSObjectBentoDict: + return { + "project_id": drs_object.project_id, + "dataset_id": drs_object.dataset_id, + "data_type": drs_object.data_type, + "public": drs_object.public, + } + + +def build_bundle_json( + drs_bundle: DrsBundle, + expand: bool = False, + with_bento_properties: bool = False, +) -> DRSObjectDict: return { "contents": build_contents(drs_bundle, expand), "checksums": [ @@ -169,10 +182,15 @@ def build_bundle_json(drs_bundle: DrsBundle, expand: bool = False) -> DRSObjectD **({"description": drs_bundle.description} if drs_bundle.description is not None else {}), "id": drs_bundle.id, "self_uri": create_drs_uri(drs_bundle.id), + **(build_bento_object_json(drs_bundle) if with_bento_properties else {}), } -def build_blob_json(drs_blob: DrsBlob, inside_container: bool = False) -> DRSObjectDict: +def build_blob_json( + drs_blob: DrsBlob, + inside_container: bool = False, + with_bento_properties: bool = False, +) -> DRSObjectDict: data_source = current_app.config["SERVICE_DATA_SOURCE"] blob_url: str = urllib.parse.urljoin( @@ -227,6 +245,7 @@ def build_blob_json(drs_blob: DrsBlob, inside_container: bool = False) -> DRSObj **({"description": drs_blob.description} if drs_blob.description is not None else {}), "id": drs_blob.id, "self_uri": create_drs_uri(drs_blob.id), + **(build_bento_object_json(drs_bundle) if with_bento_properties else {}), } @@ -278,7 +297,13 @@ def object_info(object_id: str): # The requester can specify object internal path to be added to the response use_internal_path: bool = str_to_bool(request.args.get("internal_path", "")) - return jsonify(build_blob_json(drs_object, inside_container=use_internal_path)) + + # The requester can ask for additional, non-spec-compliant Bento properties to be included in the response + with_bento_properties: bool = str_to_bool(request.args.get("with_bento_properties", "")) + + return jsonify( + build_blob_json(drs_object, inside_container=use_internal_path, with_bento_properties=with_bento_properties) + ) @drs_service.route("/objects//access/", methods=["GET"]) @@ -304,6 +329,7 @@ def object_search(): fuzzy_name: str | None = request.args.get("fuzzy_name") search_q: str | None = request.args.get("q") internal_path: bool = str_to_bool(request.args.get("internal_path", "")) + with_bento_properties: bool = str_to_bool(request.args.get("with_bento_properties", "")) if name: objects = DrsBlob.query.filter_by(name=name).all() @@ -325,7 +351,7 @@ def object_search(): # TODO: map objects to resources to avoid duplicate calls to same resource in check_objects_permission for obj, p in zip(objects, check_objects_permission(list(objects), P_QUERY_DATA)): if p: # Only include the blob in the search results if we have permissions to view it. - response.append(build_blob_json(obj, internal_path)) + response.append(build_blob_json(obj, internal_path, with_bento_properties=with_bento_properties)) authz_middleware.mark_authz_done(request) return jsonify(response) diff --git a/chord_drs/types.py b/chord_drs/types.py index e812c1c..391e99b 100644 --- a/chord_drs/types.py +++ b/chord_drs/types.py @@ -5,6 +5,7 @@ "DRSAccessMethodDict", "DRSChecksumDict", "DRSContentsDict", + "DRSObjectBentoDict", "DRSObjectDict", ] @@ -52,6 +53,13 @@ class DRSContentsDict(_DRSContentsDictBase, total=False): contents: list["DRSContentsDict"] +class DRSObjectBentoDict(TypedDict): + project_id: str | None + dataset_id: str | None + data_type: str | None + public: bool + + class DRSObjectDict(_DRSObjectDictBase, total=False): access_methods: list[DRSAccessMethodDict] name: str @@ -61,3 +69,4 @@ class DRSObjectDict(_DRSObjectDictBase, total=False): mime_type: str contents: list[DRSContentsDict] aliases: list[str] + bento: DRSObjectBentoDict From 974f234457b2aecf30f7daa9cff1ce31434435ad Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 15 Apr 2024 09:53:04 -0400 Subject: [PATCH 2/4] fix: various undefined errors --- chord_drs/routes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chord_drs/routes.py b/chord_drs/routes.py index 9263050..0b3c580 100644 --- a/chord_drs/routes.py +++ b/chord_drs/routes.py @@ -27,7 +27,7 @@ from .constants import BENTO_SERVICE_KIND, SERVICE_NAME, SERVICE_TYPE from .data_sources import DATA_SOURCE_LOCAL, DATA_SOURCE_MINIO from .db import db -from .models import DrsBlob, DrsBundle +from .models import DrsMixin, DrsBlob, DrsBundle from .types import DRSAccessMethodDict, DRSContentsDict, DRSObjectBentoDict, DRSObjectDict from .utils import drs_file_checksum @@ -245,7 +245,7 @@ def build_blob_json( **({"description": drs_blob.description} if drs_blob.description is not None else {}), "id": drs_blob.id, "self_uri": create_drs_uri(drs_blob.id), - **(build_bento_object_json(drs_bundle) if with_bento_properties else {}), + **(build_bento_object_json(drs_blob) if with_bento_properties else {}), } From 5f258c52308f6b4cca15bd5fd2c1cedbfd0fe0da Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 15 Apr 2024 10:07:00 -0400 Subject: [PATCH 3/4] fix(routes): misc issues with Bento responses --- chord_drs/routes.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/chord_drs/routes.py b/chord_drs/routes.py index 0b3c580..a9551f3 100644 --- a/chord_drs/routes.py +++ b/chord_drs/routes.py @@ -155,10 +155,12 @@ def build_contents(bundle: DrsBundle, expand: bool) -> list[DRSContentsDict]: def build_bento_object_json(drs_object: DrsMixin) -> DRSObjectBentoDict: return { - "project_id": drs_object.project_id, - "dataset_id": drs_object.dataset_id, - "data_type": drs_object.data_type, - "public": drs_object.public, + "bento": { + "project_id": drs_object.project_id, + "dataset_id": drs_object.dataset_id, + "data_type": drs_object.data_type, + "public": drs_object.public, + } } @@ -291,16 +293,16 @@ def get_drs_object(object_id: str) -> tuple[DrsBlob | DrsBundle | None, bool]: def object_info(object_id: str): drs_object, is_bundle = fetch_and_check_object_permissions(object_id, P_QUERY_DATA) + # The requester can ask for additional, non-spec-compliant Bento properties to be included in the response + with_bento_properties: bool = str_to_bool(request.args.get("with_bento_properties", "")) + if is_bundle: expand: bool = str_to_bool(request.args.get("expand", "")) - return jsonify(build_bundle_json(drs_object, expand=expand)) + return jsonify(build_bundle_json(drs_object, expand=expand, with_bento_properties=with_bento_properties)) # The requester can specify object internal path to be added to the response use_internal_path: bool = str_to_bool(request.args.get("internal_path", "")) - # The requester can ask for additional, non-spec-compliant Bento properties to be included in the response - with_bento_properties: bool = str_to_bool(request.args.get("with_bento_properties", "")) - return jsonify( build_blob_json(drs_object, inside_container=use_internal_path, with_bento_properties=with_bento_properties) ) From 6cf5e0d5a6016465278172798f03eb8ac465846c Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 15 Apr 2024 10:07:07 -0400 Subject: [PATCH 4/4] test(routes): test Bento responses --- tests/test_routes.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/test_routes.py b/tests/test_routes.py index 0005595..3076688 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -12,7 +12,12 @@ NON_EXISTENT_ID = "123" -def validate_object_fields(data, existing_id=None, with_internal_path=False): +def validate_object_fields( + data, + existing_id: bool = None, + with_internal_path: bool = False, + with_bento_properties: bool = False, +): is_local = current_app.config["SERVICE_DATA_SOURCE"] == DATA_SOURCE_LOCAL is_minio = current_app.config["SERVICE_DATA_SOURCE"] == DATA_SOURCE_MINIO @@ -36,6 +41,16 @@ def validate_object_fields(data, existing_id=None, with_internal_path=False): if existing_id: assert "id" in data and data["id"] == existing_id + if with_bento_properties: + assert "bento" in data + bento_data = data["bento"] + assert "project_id" in bento_data + assert "dataset_id" in bento_data + assert "data_type" in bento_data + assert "public" in bento_data + else: + assert "bento" not in data + def test_service_info(client): from chord_drs.app import application @@ -100,10 +115,15 @@ def test_object_access_fail(client): def _test_object_and_download(client, obj, test_range=False): res = client.get(f"/objects/{obj.id}") data = res.get_json() - assert res.status_code == 200 validate_object_fields(data, existing_id=obj.id) + # Check that we can get extra Bento data + res = client.get(f"/objects/{obj.id}?with_bento_properties=true") + data = res.get_json() + assert res.status_code == 200 + validate_object_fields(data, existing_id=obj.id, with_bento_properties=True) + # Check that we don't have access via an access ID (since we don't generate them) res = client.get(f"/objects/{obj.id}/access/no_access") assert res.status_code == 404 @@ -178,8 +198,8 @@ def test_object_and_download_minio(client_minio, drs_object_minio): @responses.activate def test_object_and_download_minio_specific_perms(client_minio, drs_object_minio): - # _test_object_and_download does 3 different accesses - authz_drs_specific_obj(iters=4) + # _test_object_and_download does 5 different accesses + authz_drs_specific_obj(iters=5) _test_object_and_download(client_minio, drs_object_minio) @@ -191,8 +211,8 @@ def test_object_and_download(client, drs_object): @responses.activate def test_object_and_download_specific_perms(client, drs_object): - # _test_object_and_download does 3 different accesses - authz_drs_specific_obj(iters=4) + # _test_object_and_download does 5 different accesses + authz_drs_specific_obj(iters=5) _test_object_and_download(client, drs_object)