Skip to content

Commit

Permalink
Process dataset deletion events (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
Cito authored Aug 11, 2023
1 parent 9d308fb commit 20d6db4
Show file tree
Hide file tree
Showing 21 changed files with 335 additions and 71 deletions.
5 changes: 3 additions & 2 deletions .devcontainer/.dev_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ kafka_servers: ["kafka:9092"]

download_access_url: "http://127.0.0.1:8080/download-access"

dataset_overview_event_topic: metadata
dataset_overview_event_type: metadata_dataset_overview
dataset_change_event_topic: metadata_datasets
dataset_upsertion_event_type: dataset_created
dataset_deletion_event_type: dataset_deleted

# the default keys are invalid but set for creating the example specs
auth_key: "{}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
name: Verify tag format
# format must be compatible with semantic versioning
run: |
SEMVER_REGEX="^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
SEMVER_REGEX="^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(?:-((?:0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
if echo "${{ steps.get_version_tag.outputs.version }}" | grep -Eq "$SEMVER_REGEX"; then
echo "Tag format is valid"
else
Expand Down
2 changes: 2 additions & 0 deletions .static_files
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
scripts/script_utils/__init__.py
scripts/script_utils/cli.py

scripts/__init__.py
scripts/update_all.py
scripts/license_checker.py
scripts/get_package_name.py
scripts/update_config_docs.py
Expand Down
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,21 @@ We recommend using the provided Docker container.

A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/work-package-service):
```bash
docker pull ghga/work-package-service:0.1.3
docker pull ghga/work-package-service:0.1.4
```

Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile):
```bash
# Execute in the repo's root dir:
docker build -t ghga/work-package-service:0.1.3 .
docker build -t ghga/work-package-service:0.1.4 .
```

For production-ready deployment, we recommend using Kubernetes, however,
for simple use cases, you could execute the service using docker
on a single server:
```bash
# The entrypoint is preconfigured:
docker run -p 8080:8080 ghga/work-package-service:0.1.3 --help
docker run -p 8080:8080 ghga/work-package-service:0.1.4 --help
```

If you prefer not to use containers, you may install the service from source:
Expand Down Expand Up @@ -102,9 +102,11 @@ The service requires the following configuration parameters:

- **Items** *(string)*

- **`dataset_overview_event_topic`** *(string)*: Name of the topic for events that inform about datasets.
- **`dataset_change_event_topic`** *(string)*: Name of the topic for events that inform about datasets.

- **`dataset_overview_event_type`** *(string)*: The type to use for events that inform about datasets.
- **`dataset_upsertion_event_type`** *(string)*: The type of events that inform about new and changed datasets.

- **`dataset_deletion_event_type`** *(string)*: The type of events that inform about deleted datasets.

- **`download_access_url`** *(string)*: URL pointing to the internal download access API.

Expand Down
32 changes: 21 additions & 11 deletions config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,30 @@
"type": "string"
}
},
"dataset_overview_event_topic": {
"title": "Dataset Overview Event Topic",
"dataset_change_event_topic": {
"title": "Dataset Change Event Topic",
"description": "Name of the topic for events that inform about datasets.",
"example": "metadata",
"example": "metadata_datasets",
"env_names": [
"wps_dataset_overview_event_topic"
"wps_dataset_change_event_topic"
],
"type": "string"
},
"dataset_overview_event_type": {
"title": "Dataset Overview Event Type",
"description": "The type to use for events that inform about datasets.",
"example": "metadata_dataset_overview",
"dataset_upsertion_event_type": {
"title": "Dataset Upsertion Event Type",
"description": "The type of events that inform about new and changed datasets.",
"example": "dataset_created",
"env_names": [
"wps_dataset_overview_event_type"
"wps_dataset_upsertion_event_type"
],
"type": "string"
},
"dataset_deletion_event_type": {
"title": "Dataset Deletion Event Type",
"description": "The type of events that inform about deleted datasets.",
"example": "dataset_deleted",
"env_names": [
"wps_dataset_deletion_event_type"
],
"type": "string"
},
Expand Down Expand Up @@ -306,8 +315,9 @@
"db_connection_str",
"service_instance_id",
"kafka_servers",
"dataset_overview_event_topic",
"dataset_overview_event_type",
"dataset_change_event_topic",
"dataset_upsertion_event_type",
"dataset_deletion_event_type",
"download_access_url",
"auth_key"
],
Expand Down
5 changes: 3 additions & 2 deletions example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ cors_allow_credentials: false
cors_allowed_headers: []
cors_allowed_methods: []
cors_allowed_origins: []
dataset_overview_event_topic: metadata
dataset_overview_event_type: metadata_dataset_overview
dataset_change_event_topic: metadata_datasets
dataset_deletion_event_type: dataset_deleted
dataset_upsertion_event_type: dataset_created
datasets_collection: datasets
db_connection_str: '**********'
db_name: dev-db
Expand Down
2 changes: 1 addition & 1 deletion openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ components:
info:
description: A service managing work packages for the GHGA CLI
title: Work Package Service
version: 0.1.3
version: 0.1.4
openapi: 3.0.2
paths:
/health:
Expand Down
17 changes: 17 additions & 0 deletions scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Scripts and utils used during development or in CI pipelines."""
51 changes: 51 additions & 0 deletions scripts/update_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3

# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Run all update scripts that are present in the repository in the correct order"""

try:
from scripts.update_template_files import main as update_template
except ImportError:
pass
else:
print("Pulling in updates from template repository")
update_template()

try:
from scripts.update_config_docs import main as update_config
except ImportError:
pass
else:
print("Updating config docs")
update_config()

try:
from scripts.update_openapi_docs import main as update_openapi
except ImportError:
pass
else:
print("Updating OpenAPI docs")
update_openapi()

try:
from scripts.update_readme import main as update_readme
except ImportError:
pass
else:
print("Updating README")
update_readme()
2 changes: 1 addition & 1 deletion scripts/update_template_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
try:
from script_utils.cli import echo_failure, echo_success, run
except ImportError:
echo_failure = echo_success = print # type: ignore
echo_failure = echo_success = print

def run(main_fn):
"""Run main function without cli tools (typer)."""
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ zip_safe = False
include_package_data = True
packages = find:
install_requires =
ghga-service-commons[api,auth,crypt]==0.4.1
ghga-event-schemas==0.13.2
hexkit[akafka,mongodb]==0.10.0
ghga-service-commons[api,auth,crypt]==0.5.0
ghga-event-schemas==0.13.4
hexkit[akafka,mongodb]==0.10.2
httpx==0.23.3
typer==0.7.0

Expand Down
32 changes: 32 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
# for the German Human Genome-Phenome Archive (GHGA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Shared fixtures"""

import pytest
from hexkit.providers.akafka.testutils import get_kafka_fixture
from hexkit.providers.mongodb.testutils import MongoDbFixture, get_mongodb_fixture
from hexkit.providers.testing.utils import get_event_loop

event_loop = get_event_loop("session")
kafka_fixture = get_kafka_fixture("session")
mongodb_fixture = get_mongodb_fixture("session")


@pytest.fixture(autouse=True)
def reset_db(mongodb_fixture: MongoDbFixture): # noqa: F811
"""Clear the database before tests."""
mongodb_fixture.empty_collections()
12 changes: 7 additions & 5 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
)

from .access import AccessCheckMock
from .datasets import DATASET_OVERVIEW_EVENT
from .datasets import DATASET_UPSERTION_EVENT

__all__ = [
"AUTH_CLAIMS",
Expand Down Expand Up @@ -139,15 +139,17 @@ async def fixture_container(
# publish an event announcing a dataset
async with get_container(config=config) as container:
await kafka_fixture.publish_event(
payload=DATASET_OVERVIEW_EVENT.dict(),
type_="metadata_dataset_overview",
key="test_key",
topic="metadata",
payload=DATASET_UPSERTION_EVENT.dict(),
topic=config.dataset_change_event_topic,
type_=config.dataset_upsertion_event_type,
key="test-key-fixture",
)

# populate database with published dataset
event_subscriber = await container.event_subscriber()
# wait for event to be submitted and processed
await asyncio.wait_for(event_subscriber.run(forever=False), timeout=10)
await asyncio.sleep(0.25)

# return the configured and wired container
yield container
Expand Down
10 changes: 8 additions & 2 deletions tests/fixtures/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@

from ghga_event_schemas.pydantic_ import (
MetadataDatasetFile,
MetadataDatasetID,
MetadataDatasetOverview,
MetadataDatasetStage,
)

from wps.core.models import Dataset, DatasetFile, WorkType

__all__ = ["DATASET", "DATASET_OVERVIEW_EVENT"]
__all__ = ["DATASET", "DATASET_UPSERTION_EVENT", "DATASET_DELETION_EVENT"]


DATASET = Dataset(
Expand All @@ -40,7 +41,7 @@
)


DATASET_OVERVIEW_EVENT = MetadataDatasetOverview(
DATASET_UPSERTION_EVENT = MetadataDatasetOverview(
accession="some-dataset-id",
stage=MetadataDatasetStage.DOWNLOAD,
title="Test dataset 1",
Expand All @@ -63,3 +64,8 @@
),
],
)


DATASET_DELETION_EVENT = MetadataDatasetID(
accession="some-dataset-id",
)
10 changes: 3 additions & 7 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
from fastapi import status
from ghga_service_commons.api.testing import AsyncTestClient
from ghga_service_commons.utils.jwt_helpers import decode_and_validate_token
from hexkit.providers.akafka.testutils import ( # noqa: F401 # pylint: disable=unused-import
kafka_fixture,
)
from hexkit.providers.mongodb.testutils import ( # noqa: F401 # pylint: disable=unused-import
mongodb_fixture,
)
from pytest import mark
from pytest_httpx import HTTPXMock

Expand All @@ -47,12 +41,14 @@
"user_public_crypt4gh_key": user_public_crypt4gh_key,
}

TIMEOUT = 5


@mark.asyncio
async def test_health_check(client: AsyncTestClient):
"""Test that the health check endpoint works."""

response = await client.get("/health")
response = await client.get("/health", timeout=TIMEOUT)

assert response.status_code == status.HTTP_200_OK
assert response.json() == {"status": "OK"}
Expand Down
Loading

0 comments on commit 20d6db4

Please sign in to comment.