From 07fe80f153a1268feaa9f0c61e6ab3cc53b4a5b3 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 3 Dec 2024 13:11:45 -0500 Subject: [PATCH] BF: do allow for user to specify a proper full name for "species" in metadata Apparently before we were checking for URLs and only for "common names", not proper long names. It is susprising that the problem manifested only now then! Addressing user report on Slack --- dandi/metadata/util.py | 6 ++++-- dandi/tests/test_metadata.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/dandi/metadata/util.py b/dandi/metadata/util.py index 093823e88..719336fbe 100644 --- a/dandi/metadata/util.py +++ b/dandi/metadata/util.py @@ -472,8 +472,10 @@ def extract_species(metadata: dict) -> models.SpeciesType | None: else: lower_value = value_orig.lower() for common_names, prefix, uri, name in species_map: - if any(key in lower_value for key in common_names) or ( - prefix is not None and lower_value.startswith(prefix) + if ( + lower_value == name.lower() + or any(key in lower_value for key in common_names) + or (prefix is not None and lower_value.startswith(prefix)) ): value_id = uri value = name diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 1de523171..89796ad42 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -513,6 +513,30 @@ def test_species(): } +# all of them should match the same record +# There should be no network access -- just matching records +@pytest.mark.parametrize( + "species", + [ + "mongolian gerbil", + "mongolian jird", + "Mongolian jird", + "http://purl.obolibrary.org/obo/NCBITaxon_10047", + "Meriones unguiculatus", + "Meriones Unguiculatus", + "meriones Unguiculatus", + ], +) +def test_species_all_possible(species: str): + assert extract_species({"species": species}).model_dump( + mode="json", exclude_none=True + ) == { + "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_10047", + "schemaKey": "SpeciesType", + "name": "Meriones unguiculatus", + } + + def test_extract_unknown_species(): with pytest.raises(ValueError) as excinfo: extract_species({"species": "mumba-jumba"})