From 33903e757c07f1bc8af86ba041265b809aac0e4a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 3 Dec 2024 13:11:45 -0500 Subject: [PATCH] BF: do allow for user to specify a proper full name for "species" in metadata Apparently before we were checking for URLs and only for "common names", not proper long names. It is susprising that the problem manifested only now then! Addressing user report on Slack --- dandi/metadata/util.py | 6 ++++-- dandi/tests/test_metadata.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/dandi/metadata/util.py b/dandi/metadata/util.py index 093823e88..719336fbe 100644 --- a/dandi/metadata/util.py +++ b/dandi/metadata/util.py @@ -472,8 +472,10 @@ def extract_species(metadata: dict) -> models.SpeciesType | None: else: lower_value = value_orig.lower() for common_names, prefix, uri, name in species_map: - if any(key in lower_value for key in common_names) or ( - prefix is not None and lower_value.startswith(prefix) + if ( + lower_value == name.lower() + or any(key in lower_value for key in common_names) + or (prefix is not None and lower_value.startswith(prefix)) ): value_id = uri value = name diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 1de523171..5c20acf0d 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -513,6 +513,30 @@ def test_species(): } +# all of them should match the same record +# There should be no network access -- just matching records +@pytest.mark.parametrize( + "species", + [ + "mongolian gerbil", + "mongolian jird", + "Mongolian jird", + "http://purl.obolibrary.org/obo/NCBITaxon_10047", + "Meriones unguiculatus", + "Meriones Unguiculatus", + "meriones Unguiculatus", + ], +) +def test_species_all_possible(species: str) -> None: + species_rec = extract_species({"species": species}) + assert species_rec + assert species_rec.model_dump(mode="json", exclude_none=True) == { + "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_10047", + "schemaKey": "SpeciesType", + "name": "Meriones unguiculatus", + } + + def test_extract_unknown_species(): with pytest.raises(ValueError) as excinfo: extract_species({"species": "mumba-jumba"})