Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into refact/scopeable-m…
Browse files Browse the repository at this point in the history
…odel-base-class

# Conflicts:
#	chord_metadata_service/chord/tests/test_api.py
#	chord_metadata_service/discovery/api_views.py
#	chord_metadata_service/discovery/utils.py
#	chord_metadata_service/experiments/api_views.py
#	chord_metadata_service/phenopackets/api_views.py
  • Loading branch information
davidlougheed committed Oct 31, 2024
2 parents 65cc1f7 + e93107f commit 275524a
Show file tree
Hide file tree
Showing 25 changed files with 861 additions and 799 deletions.
56 changes: 56 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: Create and upload release artifacts
on:
release:
types: [published]

jobs:
release-artifacts:
runs-on: ubuntu-latest

permissions:
contents: write
repository-projects: write

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Poetry
run: python -m pip install poetry
- name: Install dependencies
run: poetry install
- name: Make release artifacts
run: |
mkdir -p ./dist/schemas
poetry run ./manage.py schemas phenopacket >> ./dist/schemas/phenopacket_schema.json
poetry run ./manage.py schemas experiment >> ./dist/schemas/experiment_schema.json
poetry run ./manage.py schemas discovery >> ./dist/schemas/discovery.json
pushd ./dist/schemas
zip -r ../../json-schemas.zip *
- name: Upload release artifacts
uses: actions/github-script@v7
with:
script: |
const fs = require("fs");
// get tag
const tag = context.ref.replace("refs/tags/", "");
// get release from tag
const release = await github.rest.repos.getReleaseByTag({
owner: context.repo.owner,
repo: context.repo.repo,
tag: tag,
});
await github.rest.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release.data.id,
name: "json-schemas.zip",
data: await fs.readFileSync("./json-schemas.zip"),
});
5 changes: 2 additions & 3 deletions bento.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.06.01
FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.10.01

SHELL ["/bin/bash", "-c"]

# Install Postgres client for checking if database is ready
# Install Poetry for dependency management and uvicorn to serve the API
RUN apt-get update -y && \
apt-get install -y postgresql-client && \
rm -rf /var/lib/apt/lists/* && \
pip install --no-cache-dir "uvicorn[standard]==0.30.1"
rm -rf /var/lib/apt/lists/*

# Backwards-compatible with old BentoV2 container layout
WORKDIR /app
Expand Down
2 changes: 1 addition & 1 deletion bento.dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.06.01
FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.10.01

LABEL org.opencontainers.image.description="Local development image for Katsu."
LABEL devcontainer.metadata='[{ \
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/authz/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# be protected by the gateway.
include_pattern_public = (
re.compile(r"^(GET|POST|PUT|DELETE)$"),
re.compile(r"^/api/(projects|datasets|public|public_overview|public_search_fields|public_dataset|public_rules)$"),
re.compile(r"^/api/(projects|datasets|public|public_overview|public_search_fields|public_rules)$"),
)
include_pattern_workflows = (pattern_get, re.compile(r"^(/workflows$|/workflows/)"))
include_pattern_si = (pattern_get, re.compile(r"^/service-info"))
Expand Down
11 changes: 11 additions & 0 deletions chord_metadata_service/authz/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ def one_no_authz_put(self, url: str, *args, **kwargs):
"""Mocks a single False response from the authorization service and executes a JSON PUT request."""
return self._one_authz_put(False, url, *args, **kwargs)

def _one_authz_patch(self, authz_res: bool, url: str, *args, **kwargs):
with aioresponses() as m:
mock_authz_eval_one_result(m, authz_res)
return self.client.patch(url, *args, content_type="application/json", **kwargs)

def one_authz_patch(self, url: str, *args, **kwargs):
"""
Mocks a single True response from the authorization service and executes a JSON PATCH request.
"""
return self._one_authz_patch(True, url, *args, **kwargs)

def _one_authz_delete(self, authz_res: bool, url: str, *args, **kwargs):
with aioresponses() as m:
mock_authz_eval_one_result(m, authz_res)
Expand Down
13 changes: 0 additions & 13 deletions chord_metadata_service/chord/admin.py

This file was deleted.

10 changes: 6 additions & 4 deletions chord_metadata_service/chord/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
from chord_metadata_service.resources.serializers import ResourceSerializer
from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, JSONLDDatasetRenderer, RDFDatasetRenderer
from chord_metadata_service.restapi.pagination import LargeResultsSetPagination
from chord_metadata_service.restapi.utils import response_optionally_as_attachment

from .models import Project, Dataset, ProjectJsonSchema
from .serializers import (
ProjectJsonSchemaSerializer,
ProjectSerializer,
DatasetSerializer
DatasetSerializer,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -140,7 +141,7 @@ class DatasetViewSet(CHORDPublicModelViewSet):
queryset = Dataset.objects.all().order_by("title")

@action(detail=True, methods=['get'])
def dats(self, request, *_args, **_kwargs):
def dats(self, request: DrfRequest, *_args, **_kwargs):
"""
Retrieve a specific DATS file for a given dataset.
Expand All @@ -152,7 +153,8 @@ def dats(self, request, *_args, **_kwargs):
return not_found(request) # side effect: sets authz done flag

authz.mark_authz_done(request)
return Response(dataset.dats_file)

return response_optionally_as_attachment(request, dataset.dats_file, f"{dataset.identifier}_dats.json")

@action(detail=True, methods=["get"])
def resources(self, request, *_args, **_kwargs):
Expand Down Expand Up @@ -249,7 +251,7 @@ async def update(self, request, *args, **kwargs):
return forbidden(request) # side effect: sets authz done flag

# Do not allow datasets to change project
if request.data["project"] != dataset_project_id:
if "project" in request.data and request.data["project"] != dataset_project_id:
return bad_request(request, "Dataset project ID cannot change")

authz.mark_authz_done(request)
Expand Down
28 changes: 28 additions & 0 deletions chord_metadata_service/chord/management/commands/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json
from typing import Any
from django.core.management.base import BaseCommand, CommandParser

from chord_metadata_service.chord.data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET
from chord_metadata_service.discovery.schemas import DISCOVERY_SCHEMA
from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA

NAME_TO_SCHEMA: dict[str, object] = {
DATA_TYPE_PHENOPACKET: PHENOPACKET_SCHEMA,
DATA_TYPE_EXPERIMENT: EXPERIMENT_SCHEMA,
"discovery": DISCOVERY_SCHEMA,
}


class Command(BaseCommand):
help = """
Compiles and returns a JSON-schema in a single JSON file for artifact.
Use in GitHub Actions in order to publish usable schemas on releases.
"""

def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument("schema", action="store", type=str, choices=NAME_TO_SCHEMA.keys())

def handle(self, *args: Any, **options: Any) -> str | None:
schema = NAME_TO_SCHEMA[options["schema"]]
self.stdout.write(json.dumps(schema, indent=2))
69 changes: 54 additions & 15 deletions chord_metadata_service/chord/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,34 @@ def test_dats(self):
response = self.client.get("/api/datasets/does-not-exist/dats")
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)

def test_dats_as_attachment(self):
payload = {**self.dats_valid_payload, 'dats_file': {}}

r = self.one_authz_post('/api/datasets', data=json.dumps(payload))

self.assertEqual(r.status_code, status.HTTP_201_CREATED)
dataset_id = Dataset.objects.first().identifier

subtest_params = [
("?attachment=true", True),
("?attachment=false", False),
("?attachment=", False),
("", False),
]

for params in subtest_params:
with self.subTest(params=params):
response = self.client.get(f"/api/datasets/{dataset_id}/dats{params[0]}")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(response.data, payload['dats_file'])
if params[1]:
self.assertEqual(
response.headers["Content-Disposition"],
f"attachment; filename=\"{dataset_id}_dats.json\""
)
else:
self.assertNotIn("Content-Disposition", response.headers)

def test_resources(self):
resource = {
"id": "NCBITaxon:2023-09-14",
Expand Down Expand Up @@ -236,9 +264,17 @@ def setUp(self):

def test_update_dataset(self):
r = self.one_authz_put(f"/api/datasets/{self.dataset.identifier}", json=self.valid_update)
assert r.status_code == status.HTTP_200_OK
self.assertEqual(r.status_code, status.HTTP_200_OK)
self.dataset.refresh_from_db()
self.assertEqual(self.dataset.title, self.valid_update["title"])

def test_update_dataset_partial(self):
r = self.one_authz_patch(
f"/api/datasets/{self.dataset.identifier}", data=json.dumps({"title": self.valid_update["title"]})
)
self.assertEqual(r.status_code, status.HTTP_200_OK)
self.dataset.refresh_from_db()
assert self.dataset.title == self.valid_update["title"]
self.assertEqual(self.dataset.title, self.valid_update["title"])

def test_update_dataset_changed_project(self):
r = self.one_authz_put(
Expand All @@ -248,50 +284,53 @@ def test_update_dataset_changed_project(self):
"project": str(self.project_2.identifier),
})
)
assert r.status_code == status.HTTP_400_BAD_REQUEST
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
res = r.json()
assert res["message"] == "Bad Request"
assert res["errors"][0]["message"] == "Dataset project ID cannot change"
self.assertEqual(res["message"], "Bad Request")
self.assertEqual(res["errors"][0]["message"], "Dataset project ID cannot change")

def test_update_dataset_bad_dats_json(self):
r = self.one_authz_put(
f"/api/datasets/{self.dataset.identifier}",
data=json.dumps({**self.valid_update, "dats_file": "asdf"}), # asdf is not JSON
)
assert r.status_code == status.HTTP_400_BAD_REQUEST
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
res = r.json()
assert res["message"] == "Bad Request"
assert res["errors"][0]["message"] == (
"Submitted dataset.dats_file data is not a valid JSON string. Make sure the string value is JSON "
"compatible, or submit dats_file as a JSON object."
self.assertEqual(res["message"], "Bad Request")
self.assertEqual(
res["errors"][0]["message"],
(
"Submitted dataset.dats_file data is not a valid JSON string. Make sure the string value is JSON "
"compatible, or submit dats_file as a JSON object."
)
)

def test_update_dataset_forbidden(self):
r = self.one_no_authz_put(f"/api/datasets/{self.dataset.identifier}", json=self.valid_update)
assert r.status_code == status.HTTP_403_FORBIDDEN
self.assertEqual(r.status_code, status.HTTP_403_FORBIDDEN)

def test_update_dataset_not_found(self):
r = self.one_authz_put(f"/api/datasets/{uuid.uuid4()}", json=self.valid_update)
assert r.status_code == status.HTTP_404_NOT_FOUND
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)


class DeleteDatasetTest(AuthzAPITestCase, ProjectTestCase):

def test_delete_dataset(self):
r = self.one_authz_delete(f"/api/datasets/{self.dataset.identifier}")
assert r.status_code == status.HTTP_204_NO_CONTENT
self.assertEqual(r.status_code, status.HTTP_204_NO_CONTENT)

with self.assertRaises(Dataset.DoesNotExist): # must not exist in DB anymore
self.dataset.refresh_from_db()

def test_delete_dataset_forbidden(self):
r = self.one_no_authz_delete(f"/api/datasets/{self.dataset.identifier}")
assert r.status_code == status.HTTP_403_FORBIDDEN
self.assertEqual(r.status_code, status.HTTP_403_FORBIDDEN)
self.dataset.refresh_from_db() # must not raise DoesNotExist

def test_delete_dataset_not_found(self):
r = self.client.delete(f"/api/datasets/{uuid.uuid4()}")
assert r.status_code == status.HTTP_404_NOT_FOUND
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)


class CreateProjectJsonSchema(AuthzAPITestCaseWithProjectJSON):
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/chord/workflows/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def boolean_input(id_: str, required: bool = True):
wm.WorkflowFileArrayInput(
id="document_files",
required=True,
pattern=r"^.*\.(pdf|csv|tsv|txt|docx|xlsx|jpeg|jpg|png|gif|md|markdown|mp3|m4a|mp4)$",
pattern=r"^.*\.(pdf|csv|tsv|txt|docx|xlsx|jpeg|jpg|png|gif|md|markdown|html|mp3|m4a|mp4)$",
),
],
))
Expand Down
39 changes: 4 additions & 35 deletions chord_metadata_service/discovery/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from adrf.decorators import api_view
from bento_lib.responses import errors
from django.conf import settings
from drf_spectacular.utils import extend_schema, inline_serializer
from functools import partial
from operator import is_not
Expand All @@ -12,10 +11,9 @@
from rest_framework.response import Response
from typing import Type

from .scopeable_model import BaseScopeableModel
from ..authz.permissions import BentoAllowAny
from ..chord import data_types as dts, models as cm
from ..logger import logger
from chord_metadata_service.authz.permissions import BentoAllowAny
from chord_metadata_service.chord import data_types as dts
from chord_metadata_service.logger import logger

from . import responses as dres
from .censorship import get_rules
Expand All @@ -24,6 +22,7 @@
from .model_lookups import PUBLIC_MODEL_NAMES_TO_DATA_TYPE, PUBLIC_MODEL_NAMES_TO_MODEL, PublicModelName
from .schemas import DISCOVERY_SCHEMA
from .scope import get_request_discovery_scope
from .scopeable_model import BaseScopeableModel
from .types import BinWithValue
from .utils import get_discovery_data_type_permissions, get_discovery_field_set_permissions

Expand Down Expand Up @@ -207,36 +206,6 @@ async def _get_field_response(field: str) -> dict:
return Response(response)


@api_view(["GET"])
@permission_classes([BentoAllowAny])
async def public_dataset(_request: DrfRequest):
"""
get:
Properties of the datasets
"""

# For now, we don't have any permissions checks for this.
# In the future, we could introduce a view:dataset permission or something.

if not settings.CONFIG_PUBLIC:
return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND)

# Datasets provenance metadata
datasets = cm.Dataset.objects.values(
"title", "description", "contact_info",
"dates", "stored_in", "spatial_coverage",
"types", "privacy", "distributions",
"dimensions", "primary_publications", "citations",
"produced_by", "creators", "licenses",
"acknowledges", "keywords", "version", "dats_file",
"extra_properties", "identifier", "discovery"
)

return Response({
"datasets": datasets
})


@api_view(["GET"])
@permission_classes([BentoAllowAny])
async def discovery_schema(_request: DrfRequest):
Expand Down
Loading

0 comments on commit 275524a

Please sign in to comment.