From 69ae7c566c8e01da3aa492a99ec262f469f57f09 Mon Sep 17 00:00:00 2001 From: Thomas Zajac Date: Tue, 1 Oct 2024 09:48:27 +0200 Subject: [PATCH] [FIS] Fix bucket_id propagation (#58) --- services/fis/README.md | 8 +++----- services/fis/config_schema.json | 6 ------ services/fis/dev_config.yaml | 1 - services/fis/example_config.yaml | 1 - services/fis/openapi.yaml | 6 +++++- services/fis/pyproject.toml | 2 +- .../fis/src/fis/adapters/inbound/fastapi_/routes.py | 1 - services/fis/src/fis/core/ingest.py | 10 +--------- services/fis/src/fis/core/models.py | 1 + services/fis/src/fis/ports/inbound/ingest.py | 6 +++--- services/fis/tests_fis/fixtures/joint.py | 1 + services/fis/tests_fis/fixtures/test_config.yaml | 1 - services/fis/tests_fis/test_api_call.py | 4 ++-- services/fis/tests_fis/test_outbox_dao.py | 2 +- 14 files changed, 18 insertions(+), 32 deletions(-) diff --git a/services/fis/README.md b/services/fis/README.md index 560b719a..cf129818 100644 --- a/services/fis/README.md +++ b/services/fis/README.md @@ -15,13 +15,13 @@ We recommend using the provided Docker container. A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/file-ingest-service): ```bash -docker pull ghga/file-ingest-service:3.1.1 +docker pull ghga/file-ingest-service:4.0.0 ``` Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile): ```bash # Execute in the repo's root dir: -docker build -t ghga/file-ingest-service:3.1.1 . +docker build -t ghga/file-ingest-service:4.0.0 . ``` For production-ready deployment, we recommend using Kubernetes, however, @@ -29,7 +29,7 @@ for simple use cases, you could execute the service using docker on a single server: ```bash # The entrypoint is preconfigured: -docker run -p 8080:8080 ghga/file-ingest-service:3.1.1 --help +docker run -p 8080:8080 ghga/file-ingest-service:4.0.0 --help ``` If you prefer not to use containers, you may install the service from source: @@ -199,8 +199,6 @@ The service requires the following configuration parameters: - **`private_key`** *(string, required)*: Base64 encoded private key of the keypair whose public key is used to encrypt the payload. -- **`source_bucket_id`** *(string, required)*: ID of the bucket the object(s) corresponding to the upload metadata have been uploaded to. This should currently point to the staging bucket. - - **`token_hashes`** *(array, required)*: List of token hashes corresponding to the tokens that can be used to authenticate calls to this service. - **Items** *(string)* diff --git a/services/fis/config_schema.json b/services/fis/config_schema.json index 3e65252d..1ad23d76 100644 --- a/services/fis/config_schema.json +++ b/services/fis/config_schema.json @@ -172,11 +172,6 @@ "title": "Private Key", "type": "string" }, - "source_bucket_id": { - "description": "ID of the bucket the object(s) corresponding to the upload metadata have been uploaded to. This should currently point to the staging bucket.", - "title": "Source Bucket Id", - "type": "string" - }, "token_hashes": { "description": "List of token hashes corresponding to the tokens that can be used to authenticate calls to this service.", "items": { @@ -419,7 +414,6 @@ "vault_url", "vault_path", "private_key", - "source_bucket_id", "token_hashes", "file_upload_validation_success_topic", "kafka_servers", diff --git a/services/fis/dev_config.yaml b/services/fis/dev_config.yaml index 0df53b80..b3032f0c 100644 --- a/services/fis/dev_config.yaml +++ b/services/fis/dev_config.yaml @@ -1,5 +1,4 @@ service_name: fis -source_bucket_id: staging private_key: dummy-key token_hashes: [abcdef, ghijkl] diff --git a/services/fis/example_config.yaml b/services/fis/example_config.yaml index f704743f..264c0a04 100644 --- a/services/fis/example_config.yaml +++ b/services/fis/example_config.yaml @@ -28,7 +28,6 @@ private_key: dummy-key service_account_token_path: /var/run/secrets/kubernetes.io/serviceaccount/token service_instance_id: '1' service_name: fis -source_bucket_id: staging token_hashes: - abcdef - ghijkl diff --git a/services/fis/openapi.yaml b/services/fis/openapi.yaml index 9553bc9c..426880f0 100644 --- a/services/fis/openapi.yaml +++ b/services/fis/openapi.yaml @@ -28,6 +28,9 @@ components: in place of the actual secret.' properties: + bucket_id: + title: Bucket Id + type: string encrypted_md5_checksums: items: type: string @@ -65,6 +68,7 @@ components: required: - file_id - object_id + - bucket_id - part_size - unencrypted_size - encrypted_size @@ -104,7 +108,7 @@ info: description: A service to ingest s3 file upload metadata produced by thedata-steward-kit upload command title: File Ingest Service - version: 3.1.1 + version: 4.0.0 openapi: 3.1.0 paths: /federated/ingest_metadata: diff --git a/services/fis/pyproject.toml b/services/fis/pyproject.toml index 689f8d52..c6e904e4 100644 --- a/services/fis/pyproject.toml +++ b/services/fis/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "fis" -version = "3.1.1" +version = "4.0.0" description = "File Ingest Service - A lightweight service to propagate file upload metadata to the GHGA file backend services" readme = "README.md" authors = [ diff --git a/services/fis/src/fis/adapters/inbound/fastapi_/routes.py b/services/fis/src/fis/adapters/inbound/fastapi_/routes.py index b70f2929..7e160d50 100644 --- a/services/fis/src/fis/adapters/inbound/fastapi_/routes.py +++ b/services/fis/src/fis/adapters/inbound/fastapi_/routes.py @@ -103,7 +103,6 @@ async def ingest_metadata( await upload_metadata_processor.populate_by_event( upload_metadata=payload, secret_id=secret_id ) - return Response(status_code=202) diff --git a/services/fis/src/fis/core/ingest.py b/services/fis/src/fis/core/ingest.py index 2b5a7058..4a8a63b3 100644 --- a/services/fis/src/fis/core/ingest.py +++ b/services/fis/src/fis/core/ingest.py @@ -46,11 +46,6 @@ class ServiceConfig(BaseSettings): description="Base64 encoded private key of the keypair whose public key is used " + "to encrypt the payload.", ) - source_bucket_id: str = Field( - default=..., - description="ID of the bucket the object(s) corresponding to the upload metadata " - + "have been uploaded to. This should currently point to the staging bucket.", - ) token_hashes: list[str] = Field( default=..., description="List of token hashes corresponding to the tokens that can be used " @@ -62,7 +57,6 @@ async def _send_file_metadata( *, dao: FileUploadValidationSuccessDao, upload_metadata: models.UploadMetadataBase, - source_bucket_id: str, secret_id: str, ): """Send FileUploadValidationSuccess event to downstream services""" @@ -70,7 +64,7 @@ async def _send_file_metadata( upload_date=now_as_utc().isoformat(), file_id=upload_metadata.file_id, object_id=upload_metadata.object_id, - bucket_id=source_bucket_id, + bucket_id=upload_metadata.bucket_id, s3_endpoint_alias=upload_metadata.storage_alias, decrypted_size=upload_metadata.unencrypted_size, decryption_secret_id=secret_id, @@ -125,7 +119,6 @@ async def populate_by_event( await _send_file_metadata( dao=self._file_validation_success_dao, secret_id=secret_id, - source_bucket_id=self._config.source_bucket_id, upload_metadata=upload_metadata, ) @@ -170,7 +163,6 @@ async def populate_by_event( await _send_file_metadata( dao=self._file_validation_success_dao, secret_id=secret_id, - source_bucket_id=self._config.source_bucket_id, upload_metadata=upload_metadata, ) diff --git a/services/fis/src/fis/core/models.py b/services/fis/src/fis/core/models.py index 856f02cf..21af9e35 100644 --- a/services/fis/src/fis/core/models.py +++ b/services/fis/src/fis/core/models.py @@ -33,6 +33,7 @@ class UploadMetadataBase(BaseModel): file_id: str object_id: str + bucket_id: str part_size: int unencrypted_size: int encrypted_size: int diff --git a/services/fis/src/fis/ports/inbound/ingest.py b/services/fis/src/fis/ports/inbound/ingest.py index c7065c47..56aad887 100644 --- a/services/fis/src/fis/ports/inbound/ingest.py +++ b/services/fis/src/fis/ports/inbound/ingest.py @@ -20,7 +20,7 @@ class DecryptionError(RuntimeError): - """Thrown when decryption with the provided private key failed""" + """Raised when decryption with the provided private key failed""" def __init__(self): message = "Could not decrypt received payload with the given key." @@ -28,14 +28,14 @@ def __init__(self): class VaultCommunicationError(RuntimeError): - """Thrown when interaction with the vault resulted in an error""" + """Raised when interaction with the vault resulted in an error""" def __init__(self, *, message) -> None: super().__init__(message) class WrongDecryptedFormatError(RuntimeError): - """Thrown when the decrypted payload""" + """Raised when the decrypted payload""" def __init__(self, *, cause: str): message = f"Decrypted payload does not conform to expected format: { diff --git a/services/fis/tests_fis/fixtures/joint.py b/services/fis/tests_fis/fixtures/joint.py index fafaac40..49f0d208 100644 --- a/services/fis/tests_fis/fixtures/joint.py +++ b/services/fis/tests_fis/fixtures/joint.py @@ -44,6 +44,7 @@ TEST_PAYLOAD = UploadMetadataBase( file_id="abc", + bucket_id="staging", object_id="happy_little_object", part_size=16 * 1024**2, unencrypted_size=50 * 1024**2, diff --git a/services/fis/tests_fis/fixtures/test_config.yaml b/services/fis/tests_fis/fixtures/test_config.yaml index 0df53b80..b3032f0c 100644 --- a/services/fis/tests_fis/fixtures/test_config.yaml +++ b/services/fis/tests_fis/fixtures/test_config.yaml @@ -1,5 +1,4 @@ service_name: fis -source_bucket_id: staging private_key: dummy-key token_hashes: [abcdef, ghijkl] diff --git a/services/fis/tests_fis/test_api_call.py b/services/fis/tests_fis/test_api_call.py index 3663bc01..2bc28d67 100644 --- a/services/fis/tests_fis/test_api_call.py +++ b/services/fis/tests_fis/test_api_call.py @@ -114,7 +114,7 @@ async def test_api_calls(monkeypatch, joint_fixture: JointFixture): upload_date=expected_upload_date, file_id=TEST_PAYLOAD.file_id, object_id=TEST_PAYLOAD.object_id, - bucket_id=joint_fixture.config.source_bucket_id, + bucket_id=TEST_PAYLOAD.bucket_id, s3_endpoint_alias=TEST_PAYLOAD.storage_alias, decrypted_size=TEST_PAYLOAD.unencrypted_size, decryption_secret_id=secret_id, @@ -198,7 +198,7 @@ async def test_legacy_api_calls(monkeypatch, joint_fixture: JointFixture): upload_date=expected_upload_date, file_id=TEST_PAYLOAD.file_id, object_id=TEST_PAYLOAD.object_id, - bucket_id=joint_fixture.config.source_bucket_id, + bucket_id=TEST_PAYLOAD.bucket_id, s3_endpoint_alias=TEST_PAYLOAD.storage_alias, decrypted_size=TEST_PAYLOAD.unencrypted_size, decryption_secret_id=secret_id, diff --git a/services/fis/tests_fis/test_outbox_dao.py b/services/fis/tests_fis/test_outbox_dao.py index 6c31baac..890084d4 100644 --- a/services/fis/tests_fis/test_outbox_dao.py +++ b/services/fis/tests_fis/test_outbox_dao.py @@ -60,7 +60,7 @@ async def test_dto_to_event(joint_fixture: JointFixture): upload_date=now_as_utc().isoformat(), file_id=TEST_PAYLOAD.file_id, object_id=TEST_PAYLOAD.object_id, - bucket_id=joint_fixture.config.source_bucket_id, + bucket_id=TEST_PAYLOAD.bucket_id, s3_endpoint_alias=TEST_PAYLOAD.storage_alias, decrypted_size=TEST_PAYLOAD.unencrypted_size, decryption_secret_id="",