Skip to content

Commit

Permalink
[ENH] Add support for endpoint that will return nb:Diagnosis option…
Browse files Browse the repository at this point in the history
…s from SNOMED (#228)

* add json prettifier pre-commit hook

* add vocab file for SNOMED CT disorders

* create snomed term-label lookup from vocab file on startup

* add enum option for diagnosis

* generalize get vocab terms crud function + implement for diagnosis path param

* update docstrings

* add tests of lookup file and response contents of diagnosis vocab endpoint

* catch and raise informative warning for network errors

* test graceful handling of network error

* use new snomed terms lookup file for returning labels of available diagnoses

* remove unnecessary else statements
  • Loading branch information
alyssadai authored Nov 27, 2023
1 parent 1c62602 commit 25dd6ae
Show file tree
Hide file tree
Showing 9 changed files with 27,336 additions and 27 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ repos:
rev: v4.4.0
hooks:
- id: check-yaml
- id: pretty-format-json
args:
- "--autofix"
- "--no-sort-keys"

- repo: https://github.com/pycqa/isort
rev: 5.12.0
Expand Down
12 changes: 6 additions & 6 deletions app/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,11 @@ async def get_controlled_term_attributes() -> list:
return results_list


async def get_term_labels_for_cogatlas(
term_labels_path: Path,
async def get_term_labels_for_vocab(
term_labels_path: Path, vocabulary_name: str, namespace_prefix: str
) -> VocabLabelsResponse:
"""
Returns the term-label mappings along with the vocabulary namespace details for the Cognitive Atlas Task vocabulary.
Returns the term-label mappings along with the vocabulary namespace details for the specified vocabulary.
Returns
-------
Expand All @@ -288,8 +288,8 @@ async def get_term_labels_for_cogatlas(
term_labels = util.load_json(term_labels_path)

return VocabLabelsResponse(
vocabulary_name="Cognitive Atlas Tasks",
namespace_url=util.CONTEXT["cogatlas"],
namespace_prefix="cogatlas",
vocabulary_name=vocabulary_name,
namespace_url=util.CONTEXT[namespace_prefix],
namespace_prefix=namespace_prefix,
term_labels=term_labels,
)
1 change: 1 addition & 0 deletions app/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class DataElementURI(str, Enum):
"""Data model for data element URIs that have available vocabulary lookups."""

assessment = "nb:Assessment"
diagnosis = "nb:Diagnosis"


class VocabLabelsResponse(BaseModel):
Expand Down
18 changes: 15 additions & 3 deletions app/api/routers/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,18 @@ async def get_term_labels_for_vocab(
data_element_URI: DataElementURI, request: Request
):
"""When a GET request is sent, return a dict containing the name, namespace info, and all term ID-label mappings for the vocabulary of the specified variable."""
if data_element_URI is DataElementURI.assessment:
return await crud.get_term_labels_for_cogatlas(
term_labels_path=request.app.state.cogatlas_term_lookup_path
# TODO: If/when more attribute options are supported, consider refactoring to use match/case (see https://peps.python.org/pep-0634/#the-match-statement)
if data_element_URI == DataElementURI.assessment:
return await crud.get_term_labels_for_vocab(
term_labels_path=request.app.state.cogatlas_term_lookup_path,
vocabulary_name="Cognitive Atlas Tasks",
namespace_prefix="cogatlas",
)
if data_element_URI == DataElementURI.diagnosis:
return await crud.get_term_labels_for_vocab(
term_labels_path=request.app.state.snomed_term_lookup_path,
vocabulary_name="SNOMED CT",
namespace_prefix="snomed",
)


Expand All @@ -28,8 +37,11 @@ async def get_terms(
"""
term_labels_path = None

# TODO: If/when more attribute options are supported, consider refactoring to use match/case (see https://peps.python.org/pep-0634/#the-match-statement)
if data_element_URI == DataElementURI.assessment:
term_labels_path = request.app.state.cogatlas_term_lookup_path
if data_element_URI == DataElementURI.diagnosis:
term_labels_path = request.app.state.snomed_term_lookup_path

return await crud.get_terms(data_element_URI, term_labels_path)

Expand Down
50 changes: 40 additions & 10 deletions app/api/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,26 +373,38 @@ def fetch_and_save_cogatlas(output_path: Path):
Fetches the Cognitive Atlas vocabulary using its native Task API and writes term ID-label mappings to a temporary lookup file.
If the API request fails, a backup copy of the vocabulary is used instead.
Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names).
Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names.
Parameters
----------
output_path : Path
File path to store output vocabulary lookup file.
"""
api_url = "https://www.cognitiveatlas.org/api/v-alpha/task?format=json"
response = httpx.get(url=api_url)

if response.is_success:
vocab = response.json()
else:
try:
response = httpx.get(url=api_url)
if response.is_success:
vocab = response.json()
else:
warnings.warn(
f"""
The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.
Details of the response from the source:
Status code {response.status_code}
{response.reason_phrase}: {response.text}
"""
)
# Use backup copy of the raw vocabulary JSON
vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
except httpx.NetworkError as exc:
warnings.warn(
f"""
The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.
f""""
Fetching of the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source failed due to a network error.
The API will default to using a local backup copy of the vocabulary instead.
Details of the response from the source:
Status code {response.status_code}
{response.reason_phrase}: {response.text}
Error: {exc}
"""
)
# Use backup copy of the raw vocabulary JSON
Expand All @@ -401,3 +413,21 @@ def fetch_and_save_cogatlas(output_path: Path):
term_labels = {term["id"]: term["name"] for term in vocab}
with open(output_path, "w") as f:
f.write(json.dumps(term_labels, indent=2))


def create_snomed_term_lookup(output_path: Path):
"""
Reads in a file of disorder terms from the SNOMED CT vocabulary and writes term ID-label mappings to a temporary lookup file.
Saves a JSON with keys corresponding to SNOMED CT IDs and values corresponding to human-readable term names.
Parameters
----------
output_path : Path
File path to store output vocabulary lookup file.
"""
vocab = load_json(BACKUP_VOCAB_DIR / "snomedct_disorder.json")

term_labels = {term["sctid"]: term["preferred_name"] for term in vocab}
with open(output_path, "w") as f:
f.write(json.dumps(term_labels, indent=2))
5 changes: 5 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,16 @@ async def fetch_vocabularies_to_temp_dir():
app.state.vocab_dir = TemporaryDirectory()
app.state.vocab_dir_path = Path(app.state.vocab_dir.name)

# TODO: Maybe store these paths in one dictionary on the app instance instead of separate variables?
app.state.cogatlas_term_lookup_path = (
app.state.vocab_dir_path / "cogatlas_task_term_labels.json"
)
app.state.snomed_term_lookup_path = (
app.state.vocab_dir_path / "snomedct_disorder_term_labels.json"
)

util.fetch_and_save_cogatlas(app.state.cogatlas_term_lookup_path)
util.create_snomed_term_lookup(app.state.snomed_term_lookup_path)


@app.on_event("shutdown")
Expand Down
34 changes: 33 additions & 1 deletion tests/test_app_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ def test_app_with_set_allowed_origins(
)


def test_stored_vocab_lookup_file_created_on_startup(
test_app, set_test_credentials
):
"""Test that on startup, a non-empty temporary lookup file is created for term ID-label mappings for the locally stored SNOMED CT vocabulary."""
with test_app:
term_labels_path = test_app.app.state.snomed_term_lookup_path
assert term_labels_path.exists()
assert term_labels_path.stat().st_size > 0


def test_external_vocab_is_fetched_on_startup(
test_app, monkeypatch, set_test_credentials
):
Expand Down Expand Up @@ -154,7 +164,7 @@ def test_failed_vocab_fetching_on_startup_raises_warning(
test_app, monkeypatch, set_test_credentials
):
"""
Tests that when a GET request to the Cognitive Atlas API fails (e.g., due to service being unavailable),
Tests that when a GET request to the Cognitive Atlas API has a non-success response code (e.g., due to service being unavailable),
a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
"""

Expand All @@ -174,3 +184,25 @@ def mock_httpx_get(**kwargs):
in str(warn.message)
for warn in w
)


def test_network_error_on_startup_raises_warning(
test_app, monkeypatch, set_test_credentials
):
"""
Tests that when a GET request to the Cognitive Atlas API fails due to a network error (i.e., while issuing the request),
a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
"""

def mock_httpx_get(**kwargs):
raise httpx.ConnectError("Some network error")

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
assert test_app.app.state.cogatlas_term_lookup_path.exists()

assert any(
"failed due to a network error" in str(warn.message) for warn in w
)
29 changes: 22 additions & 7 deletions tests/test_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,23 +195,38 @@ def mock_httpx_post(**kwargs):
]


def test_get_attribute_vocab(test_app, monkeypatch, set_test_credentials):
@pytest.mark.parametrize(
"data_element_uri, expected_vocab_name, expected_namespace_pfx",
[
("nb:Assessment", "Cognitive Atlas Tasks", "cogatlas"),
("nb:Diagnosis", "SNOMED CT", "snomed"),
],
)
def test_get_attribute_vocab(
test_app,
monkeypatch,
set_test_credentials,
data_element_uri,
expected_vocab_name,
expected_namespace_pfx,
):
"""Given a GET request to the /attributes/{data_element_URI}/vocab endpoint, successfully returns a JSON object containing the vocabulary name, namespace info, and term-label mappings."""
# Mock contents of a temporary term-label lookup file for a vocabulary
mock_term_labels = {
"tsk_p7cabUkVvQPBS": "Generalized Self-Efficacy Scale",
"tsk_ccTKYnmv7tOZY": "Verbal Interference Test",
"trm_1234": "Generic Vocabulary Term 1",
"trm_2345": "Generic Vocabulary Term 2",
}

def mock_load_json(path):
return mock_term_labels

monkeypatch.setattr(util, "load_json", mock_load_json)
response = test_app.get("/attributes/nb:Assessment/vocab")
response = test_app.get(f"/attributes/{data_element_uri}/vocab")

assert response.status_code == 200
assert response.json() == {
"vocabulary_name": "Cognitive Atlas Tasks",
"namespace_url": util.CONTEXT["cogatlas"],
"namespace_prefix": "cogatlas",
"vocabulary_name": expected_vocab_name,
"namespace_url": util.CONTEXT[expected_namespace_pfx],
"namespace_prefix": expected_namespace_pfx,
"term_labels": mock_term_labels,
}
Loading

0 comments on commit 25dd6ae

Please sign in to comment.