diff --git a/dandi/metadata/util.py b/dandi/metadata/util.py index 093823e88..719336fbe 100644 --- a/dandi/metadata/util.py +++ b/dandi/metadata/util.py @@ -472,8 +472,10 @@ def extract_species(metadata: dict) -> models.SpeciesType | None: else: lower_value = value_orig.lower() for common_names, prefix, uri, name in species_map: - if any(key in lower_value for key in common_names) or ( - prefix is not None and lower_value.startswith(prefix) + if ( + lower_value == name.lower() + or any(key in lower_value for key in common_names) + or (prefix is not None and lower_value.startswith(prefix)) ): value_id = uri value = name diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 1de523171..5c20acf0d 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -513,6 +513,30 @@ def test_species(): } +# all of them should match the same record +# There should be no network access -- just matching records +@pytest.mark.parametrize( + "species", + [ + "mongolian gerbil", + "mongolian jird", + "Mongolian jird", + "http://purl.obolibrary.org/obo/NCBITaxon_10047", + "Meriones unguiculatus", + "Meriones Unguiculatus", + "meriones Unguiculatus", + ], +) +def test_species_all_possible(species: str) -> None: + species_rec = extract_species({"species": species}) + assert species_rec + assert species_rec.model_dump(mode="json", exclude_none=True) == { + "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_10047", + "schemaKey": "SpeciesType", + "name": "Meriones unguiculatus", + } + + def test_extract_unknown_species(): with pytest.raises(ValueError) as excinfo: extract_species({"species": "mumba-jumba"})