Skip to content

Commit

Permalink
[MNT] Replaced Cognitive Atlas with SNOMED (#397)
Browse files Browse the repository at this point in the history
* Added `snomed_assessment.json`

* Replaced Cognitive Atlas with SNOMED

* Addressed requested changes from PR review
  • Loading branch information
rmanaem authored Jan 7, 2025
1 parent 7509578 commit 6298af7
Show file tree
Hide file tree
Showing 9 changed files with 9,391 additions and 155 deletions.
6 changes: 4 additions & 2 deletions app/api/routers/assessments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ..models import DataElementURI, VocabLabelsResponse
from . import route_factory

EXTERNAL_VOCAB = "cogatlas"
EXTERNAL_VOCAB = "snomed_assessment"
router = APIRouter(prefix="/assessments", tags=["assessments"])

router.add_api_route(
Expand All @@ -17,7 +17,9 @@
router.add_api_route(
path="/vocab",
endpoint=route_factory.create_get_vocab_handler(
external_vocab=EXTERNAL_VOCAB, vocab_name="Cognitive Atlas Tasks"
external_vocab=EXTERNAL_VOCAB,
vocab_name="SNOMED CT Assessment Scale",
namespace_prefix="snomed",
),
methods=["GET"],
response_model=VocabLabelsResponse,
Expand Down
6 changes: 4 additions & 2 deletions app/api/routers/diagnoses.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ..models import DataElementURI, VocabLabelsResponse
from . import route_factory

EXTERNAL_VOCAB = "snomed"
EXTERNAL_VOCAB = "snomed_disorder"
router = APIRouter(prefix="/diagnoses", tags=["diagnoses"])

router.add_api_route(
Expand All @@ -17,7 +17,9 @@
router.add_api_route(
path="/vocab",
endpoint=route_factory.create_get_vocab_handler(
external_vocab=EXTERNAL_VOCAB, vocab_name="SNOMED CT"
external_vocab=EXTERNAL_VOCAB,
vocab_name="SNOMED CT Disorder",
namespace_prefix="snomed",
),
methods=["GET"],
response_model=VocabLabelsResponse,
Expand Down
6 changes: 4 additions & 2 deletions app/api/routers/route_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ async def get_instances(request: Request):
return get_instances


def create_get_vocab_handler(external_vocab: str, vocab_name: str):
def create_get_vocab_handler(
external_vocab: str, vocab_name: str, namespace_prefix: str
):
"""Create the handler function (path function) for the `/vocab` endpoint of an attribute router."""

async def get_vocab(request: Request):
Expand All @@ -36,7 +38,7 @@ async def get_vocab(request: Request):
external_vocab
],
vocabulary_name=vocab_name,
namespace_prefix=external_vocab,
namespace_prefix=namespace_prefix,
)

return get_vocab
50 changes: 10 additions & 40 deletions app/api/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
import json
import os
import textwrap
import warnings
from collections import namedtuple
from pathlib import Path
from typing import Optional

import httpx

# Request constants
EnvVar = namedtuple("EnvVar", ["name", "val"])

Expand Down Expand Up @@ -47,9 +44,7 @@
}

CONTEXT = {
"cogatlas": "https://www.cognitiveatlas.org/task/id/",
"nb": "http://neurobagel.org/vocab/",
"nbg": "http://neurobagel.org/graph/", # TODO: Check if we still need this namespace.
"ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#",
"nidm": "http://purl.org/nidash/nidm#",
"snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/",
Expand Down Expand Up @@ -451,54 +446,29 @@ def load_json(path: Path) -> dict:
return json.load(f)


def fetch_and_save_cogatlas(output_path: Path):
def create_snomed_assessment_lookup(output_path: Path):
"""
Fetches the Cognitive Atlas vocabulary using its native Task API and writes term ID-label mappings to a temporary lookup file.
If the API request fails, a backup copy of the vocabulary is used instead.
Reads in a file of assessment terms from the SNOMED vocabulary and writes term ID-label mappings to a temporary lookup file.
Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names.
Saves a JSON with keys corresponding to SNOMED IDs and values corresponding to human-readable term names.
Parameters
----------
output_path : Path
File path to store output vocabulary lookup file.
"""
api_url = "https://www.cognitiveatlas.org/api/v-alpha/task?format=json"

try:
response = httpx.get(url=api_url)
if response.is_success:
vocab = response.json()
else:
warnings.warn(
f"""
The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.
Details of the response from the source:
Status code {response.status_code}
{response.reason_phrase}: {response.text}
"""
)
# Use backup copy of the raw vocabulary JSON
vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
except httpx.NetworkError as exc:
warnings.warn(
f""""
Fetching of the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source failed due to a network error.
The API will default to using a local backup copy of the vocabulary instead.
Error: {exc}
"""
)
# Use backup copy of the raw vocabulary JSON
vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
vocab = load_json(BACKUP_VOCAB_DIR / "snomed_assessment.json")

term_labels = {
term["identifier"].removeprefix("snomed:"): term["label"]
for term in vocab
}

term_labels = {term["id"]: term["name"] for term in vocab}
with open(output_path, "w") as f:
f.write(json.dumps(term_labels, indent=2))


def create_snomed_term_lookup(output_path: Path):
def create_snomed_disorder_lookup(output_path: Path):
"""
Reads in a file of disorder terms from the SNOMED CT vocabulary and writes term ID-label mappings to a temporary lookup file.
Expand Down
14 changes: 9 additions & 5 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,19 @@ async def fetch_vocabularies_to_temp_dir():
app.state.vocab_dir_path = Path(app.state.vocab_dir.name)

app.state.vocab_lookup_paths = {}
app.state.vocab_lookup_paths["cogatlas"] = (
app.state.vocab_dir_path / "cogatlas_task_term_labels.json"
app.state.vocab_lookup_paths["snomed_assessment"] = (
app.state.vocab_dir_path / "snomedct_assessment_term_labels.json"
)
app.state.vocab_lookup_paths["snomed"] = (
app.state.vocab_lookup_paths["snomed_disorder"] = (
app.state.vocab_dir_path / "snomedct_disorder_term_labels.json"
)

util.fetch_and_save_cogatlas(app.state.vocab_lookup_paths["cogatlas"])
util.create_snomed_term_lookup(app.state.vocab_lookup_paths["snomed"])
util.create_snomed_assessment_lookup(
app.state.vocab_lookup_paths["snomed_assessment"]
)
util.create_snomed_disorder_lookup(
app.state.vocab_lookup_paths["snomed_disorder"]
)


@app.on_event("shutdown")
Expand Down
1 change: 0 additions & 1 deletion docs/default_neurobagel_query.rq
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
PREFIX cogatlas: <https://www.cognitiveatlas.org/task/id/>
PREFIX nb: <http://neurobagel.org/vocab/>
PREFIX nbg: <http://neurobagel.org/graph/>
PREFIX ncit: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
Expand Down
101 changes: 6 additions & 95 deletions tests/test_app_events.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test events occurring on app startup or shutdown."""

import json
import os
import warnings

Expand Down Expand Up @@ -118,107 +117,19 @@ def test_app_with_set_allowed_origins(
)


@pytest.mark.parametrize(
"lookup_file",
["snomed_disorder", "snomed_assessment"],
)
@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_stored_vocab_lookup_file_created_on_startup(
test_app,
set_test_credentials,
disable_auth,
lookup_file,
):
"""Test that on startup, a non-empty temporary lookup file is created for term ID-label mappings for the locally stored SNOMED CT vocabulary."""
with test_app:
term_labels_path = test_app.app.state.vocab_lookup_paths["snomed"]
term_labels_path = test_app.app.state.vocab_lookup_paths[lookup_file]
assert term_labels_path.exists()
assert term_labels_path.stat().st_size > 0


@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_external_vocab_is_fetched_on_startup(
test_app, monkeypatch, set_test_credentials, disable_auth
):
"""
Tests that on startup, a GET request is made to the Cognitive Atlas API and that when the request succeeds,
the term ID-label mappings from the returned vocab are stored in a temporary lookup file.
"""
mock_vocab_json = [
{
"creation_time": 1689609836,
"last_updated": 1689609836,
"name": "Generalized Self-Efficacy Scale",
"definition_text": "The original Generalized Self-Efficacy Scale contains 10 items designed to tap into a global sense of self-efficacy, or belief of an individual in his or her ability (e.g., \u201cI can always solve difficult problems if I try hard enough,\u201d and \u201cI can usually handle whatever comes my way.\u201d) The revised version here includes these 10 items and two, which are repeated and reversed to examine acquiescence bias. Response options range from 1, never true, to 7, always true. Higher scores indicate greater generalized self-efficacy.",
"id": "tsk_p7cabUkVvQPBS",
},
{
"creation_time": 1689610375,
"last_updated": 1689610375,
"name": "Verbal Interference Test",
"definition_text": "The Verbal Interference Test is a behavioral assessment of cognitive regulation. In this task participants are presented with visual word stimuli that appear with incongruent text and color meaning (e.g., the word \u201cRED\u201d printed in blue, the word \u201cBLUE\u201d printed in green, the word \u201cGREEN\u201d printed in red). There are two phases of the task: Name (Part I) and Color (Part II). In the Name phase, participants are asked to identify the meaning of the word (e.g., red is the correct answer for the word \u201cRED\u201d printed in blue). In the Color phase, participants are asked to identify the color in which the word is printed (e.g., blue is the correct answer for the word \u201cRED\u201d printed in blue). This test assesses aspects of inhibition and interference corresponding to those indexed by the Stroop test.",
"id": "tsk_ccTKYnmv7tOZY",
},
]

def mock_httpx_get(**kwargs):
return httpx.Response(status_code=200, json=mock_vocab_json)

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with test_app:
term_labels_path = test_app.app.state.vocab_lookup_paths["cogatlas"]
assert term_labels_path.exists()

with open(term_labels_path, "r") as f:
term_labels = json.load(f)

assert term_labels == {
"tsk_p7cabUkVvQPBS": "Generalized Self-Efficacy Scale",
"tsk_ccTKYnmv7tOZY": "Verbal Interference Test",
}


@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_failed_vocab_fetching_on_startup_raises_warning(
test_app, monkeypatch, set_test_credentials, disable_auth
):
"""
Tests that when a GET request to the Cognitive Atlas API has a non-success response code (e.g., due to service being unavailable),
a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
"""

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=503, json={}, text="Some error message"
)

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
assert test_app.app.state.vocab_lookup_paths["cogatlas"].exists()

assert any(
"unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy"
in str(warn.message)
for warn in w
)


@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_network_error_on_startup_raises_warning(
test_app, monkeypatch, set_test_credentials, disable_auth
):
"""
Tests that when a GET request to the Cognitive Atlas API fails due to a network error (i.e., while issuing the request),
a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
"""

def mock_httpx_get(**kwargs):
raise httpx.ConnectError("Some network error")

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
assert test_app.app.state.vocab_lookup_paths["cogatlas"].exists()

assert any(
"failed due to a network error" in str(warn.message) for warn in w
)
16 changes: 8 additions & 8 deletions tests/test_attribute_factory_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@ def test_get_instances_endpoint_with_vocab_lookup(
{
"termURL": {
"type": "uri",
"value": "https://www.cognitiveatlas.org/task/id/tsk_U9gDp8utahAfO",
"value": "http://purl.bioontology.org/ontology/SNOMEDCT/1284852002",
}
},
{
"termURL": {
"type": "uri",
"value": "https://www.cognitiveatlas.org/task/id/not_found_id",
"value": "http://purl.bioontology.org/ontology/SNOMEDCT/not_found_id",
}
},
{
"termURL": {
"type": "uri",
"value": "https://www.notanatlas.org/task/id/tsk_alz5hjlUXp4WY",
"value": "http://unknownvocab.org/123456789",
}
},
]
Expand All @@ -60,10 +60,10 @@ def mock_httpx_post(**kwargs):
assert response.json() == {
"nb:Assessment": [
{
"TermURL": "cogatlas:tsk_U9gDp8utahAfO",
"Label": "Pittsburgh Stress Battery",
"TermURL": "snomed:1284852002",
"Label": "Numeric Pain Rating Scale",
},
{"TermURL": "cogatlas:not_found_id", "Label": None},
{"TermURL": "snomed:not_found_id", "Label": None},
]
}

Expand Down Expand Up @@ -123,8 +123,8 @@ def mock_httpx_post(**kwargs):
@pytest.mark.parametrize(
"attribute, expected_vocab_name, expected_namespace_pfx",
[
("assessments", "Cognitive Atlas Tasks", "cogatlas"),
("diagnoses", "SNOMED CT", "snomed"),
("assessments", "SNOMED CT Assessment Scale", "snomed"),
("diagnoses", "SNOMED CT Disorder", "snomed"),
],
)
def test_get_vocab_endpoint(
Expand Down
Loading

0 comments on commit 6298af7

Please sign in to comment.