Skip to content

Commit

Permalink
Deep-compare actual vs expected published metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
marksparkza committed Oct 31, 2023
1 parent 9b7b35d commit 98a9e9d
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 28 deletions.
7 changes: 6 additions & 1 deletion odp/catalog/saeon.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ def _create_published_metadata(record_model: RecordModel) -> list[PublishedMetad

iso19115_jsonschema = schema_catalog.get_schema(URI(iso19115_schemaobj.uri))
result = iso19115_jsonschema.evaluate(JSON(record_model.metadata))
datacite_metadata = result.output('translation', scheme='saeon/datacite4', ignore_validity=True)
datacite_metadata = result.output(
'translation',
scheme='saeon/datacite4',
ignore_validity=True,
clear_empties=True,
)

published_metadata += [
PublishedMetadataModel(
Expand Down
30 changes: 30 additions & 0 deletions test/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import pathlib
from copy import deepcopy

import odp.schema

Expand All @@ -16,3 +17,32 @@ def iso19115_example():
example_file = schema_dir / 'metadata' / 'saeon' / 'iso19115-example.json'
with open(example_file) as f:
return json.load(f)


def isequal(x, y):
"""Perform a deep comparison, useful for comparing metadata records.
Lists are compared as if they were sets."""
if type(x) is not type(y):
return False # avoid `0 == False`, etc

if isinstance(x, dict):
return x.keys() == y.keys() and all(isequal(x[k], y[k]) for k in x)

if isinstance(x, list):
if len(x) != len(y):
return False
x_ = deepcopy(x)
y_ = deepcopy(y)
while x_:
xi = x_.pop()
found = False
for j, yj in enumerate(y_):
if isequal(xi, yj):
y_.pop(j)
found = True
break
if not found:
return False
return True

return x == y
95 changes: 68 additions & 27 deletions test/api/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from odp.const import ODPScope
from odp.db import Session
from odp.db.models import Catalog, Tag
from test import datacite4_example, iso19115_example
from test import datacite4_example, isequal, iso19115_example
from test.api import assert_forbidden, assert_new_timestamp, assert_not_found, assert_redirect
from test.factories import CatalogFactory, CollectionTagFactory, RecordFactory, RecordTagFactory

Expand Down Expand Up @@ -197,35 +197,68 @@ def test_redirect_to(
assert_not_found(r)


schema_uris = {
'SAEON.DataCite4': 'https://odp.saeon.ac.za/schema/metadata/saeon/datacite4',
'SAEON.ISO19115': 'https://odp.saeon.ac.za/schema/metadata/saeon/iso19115',
'SchemaOrg.Dataset': 'https://odp.saeon.ac.za/schema/metadata/schema.org/dataset',
}
metadata_examples = {
'SAEON.DataCite4': datacite4_example(),
'SAEON.ISO19115': iso19115_example(),
}
metadata_examples |= {
'SchemaOrg.Dataset': {
'@context': 'https://schema.org/',
'@type': 'Dataset',
'name': metadata_examples['SAEON.ISO19115']['title'],
'description': metadata_examples['SAEON.ISO19115']['abstract'],
'license': metadata_examples['SAEON.ISO19115']['constraints'][0]['rightsURI'],
# 'identifier': dynamic,
# 'keywords': dynamic,
# 'url': dynamic,
},
}


@pytest.mark.require_scope(ODPScope.CATALOG_READ)
def test_get_published_record(
api, scopes,
static_publishing_data, catalog_id, endpoint,
tag_collection_published, tag_collection_infrastructure,
tag_record_qc, tag_record_retracted,
):
def check_metadata_record(schema_id, deep=True):
def assert_metadata_record(schema_id):
# select the actual metadata record from the API result
metadata_record = next(filter(
lambda m: m['schema_id'] == schema_id, result['metadata_records']
))
uri_id = schema_id.split('.')[1].lower()
assert metadata_record['schema_uri'] == f'https://odp.saeon.ac.za/schema/metadata/saeon/{uri_id}'

if deep:
expected_metadata = datacite4_example() if schema_id == 'SAEON.DataCite4' else iso19115_example()
assert metadata_record['schema_uri'] == schema_uris[schema_id]

# construct the expected metadata
expected_metadata = metadata_examples[schema_id]
if schema_id == 'SchemaOrg.Dataset':
expected_metadata['identifier'] = f'doi:{example_record.doi}' if example_record.doi else None
if has_iso19115:
expected_metadata['keywords'] = [
dk['keyword'] for dk in metadata_examples['SAEON.ISO19115']['descriptiveKeywords']
if dk['keywordType'] in ('general', 'place', 'stratum')
]
else:
expected_metadata['keywords'] = [
s['subject'] for s in metadata_examples['SAEON.DataCite4']['subjects']
]
expected_metadata['url'] = ('http://odp.catalog/mims/'
f'{example_record.doi if example_record.doi else example_record.id}')
else:
if example_record.doi:
expected_metadata |= dict(doi=example_record.doi)
expected_metadata |= {'doi': example_record.doi}
else:
expected_metadata.pop('doi')
assert metadata_record['metadata'] == expected_metadata
expected_metadata.pop('doi', None)

# deep-compare actual vs expected
assert isequal(metadata_record['metadata'], expected_metadata)

authorized = ODPScope.CATALOG_READ in scopes
example_record = create_example_record(
tag_collection_published,
tag_collection_infrastructure,
tag_record_qc,
tag_record_retracted,
)
published = (
tag_collection_published is True and
tag_record_qc is True and
Expand All @@ -234,6 +267,13 @@ def check_metadata_record(schema_id, deep=True):
if catalog_id == 'MIMS':
published = published and tag_collection_infrastructure == 'MIMS'

example_record = create_example_record(
tag_collection_published,
tag_collection_infrastructure,
tag_record_qc,
tag_record_retracted,
)

route = f'/catalog/{catalog_id}/records'
resp_code = 200
if endpoint == 'get':
Expand Down Expand Up @@ -269,19 +309,20 @@ def check_metadata_record(schema_id, deep=True):
assert result['searchable'] is True
assert_new_timestamp(datetime.fromisoformat(result['timestamp']))

if example_record.schema_id == 'SAEON.DataCite4':
assert len(result['metadata_records']) == 1
check_metadata_record('SAEON.DataCite4')
has_datacite = True
has_iso19115 = example_record.schema_id == 'SAEON.ISO19115'
has_jsonld = catalog_id == 'MIMS'

elif example_record.schema_id == 'SAEON.ISO19115':
assert len(result['metadata_records']) == 2
check_metadata_record('SAEON.ISO19115')
# TODO: check why the example translated record does not
# exactly match the dynamically translated one here
check_metadata_record('SAEON.DataCite4', deep=False)
assert len(result['metadata_records']) == has_datacite + has_iso19115 + has_jsonld

else:
assert False
if has_datacite:
assert_metadata_record('SAEON.DataCite4')

if has_iso19115:
assert_metadata_record('SAEON.ISO19115')

if has_jsonld:
assert_metadata_record('SchemaOrg.Dataset')


@pytest.mark.parametrize('schema_id, json_pointer, expected_value', [
Expand Down

0 comments on commit 98a9e9d

Please sign in to comment.