Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MNT] Replaced Cognitive Atlas with SNOMED #397

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions app/api/routers/assessments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ..models import DataElementURI, VocabLabelsResponse
from . import route_factory

EXTERNAL_VOCAB = "cogatlas"
EXTERNAL_VOCAB = "snomed_assessment"
router = APIRouter(prefix="/assessments", tags=["assessments"])

router.add_api_route(
Expand All @@ -17,7 +17,9 @@
router.add_api_route(
path="/vocab",
endpoint=route_factory.create_get_vocab_handler(
external_vocab=EXTERNAL_VOCAB, vocab_name="Cognitive Atlas Tasks"
external_vocab=EXTERNAL_VOCAB,
vocab_name="SNOMED Assessment",
namespace_prefix="snomed",
),
methods=["GET"],
response_model=VocabLabelsResponse,
Expand Down
6 changes: 4 additions & 2 deletions app/api/routers/diagnoses.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ..models import DataElementURI, VocabLabelsResponse
from . import route_factory

EXTERNAL_VOCAB = "snomed"
EXTERNAL_VOCAB = "snomed_disorder"
router = APIRouter(prefix="/diagnoses", tags=["diagnoses"])

router.add_api_route(
Expand All @@ -17,7 +17,9 @@
router.add_api_route(
path="/vocab",
endpoint=route_factory.create_get_vocab_handler(
external_vocab=EXTERNAL_VOCAB, vocab_name="SNOMED CT"
external_vocab=EXTERNAL_VOCAB,
vocab_name="SNOMED Disorder",
namespace_prefix="snomed",
),
methods=["GET"],
response_model=VocabLabelsResponse,
Expand Down
6 changes: 4 additions & 2 deletions app/api/routers/route_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ async def get_instances(request: Request):
return get_instances


def create_get_vocab_handler(external_vocab: str, vocab_name: str):
def create_get_vocab_handler(
external_vocab: str, vocab_name: str, namespace_prefix: str
):
"""Create the handler function (path function) for the `/vocab` endpoint of an attribute router."""

async def get_vocab(request: Request):
Expand All @@ -36,7 +38,7 @@ async def get_vocab(request: Request):
external_vocab
],
vocabulary_name=vocab_name,
namespace_prefix=external_vocab,
namespace_prefix=namespace_prefix,
)

return get_vocab
45 changes: 6 additions & 39 deletions app/api/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
import json
import os
import textwrap
import warnings
from collections import namedtuple
from pathlib import Path
from typing import Optional

import httpx

# Request constants
EnvVar = namedtuple("EnvVar", ["name", "val"])

Expand Down Expand Up @@ -47,7 +44,6 @@
}

CONTEXT = {
"cogatlas": "https://www.cognitiveatlas.org/task/id/",
"nb": "http://neurobagel.org/vocab/",
"nbg": "http://neurobagel.org/graph/", # TODO: Check if we still need this namespace.
"ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#",
Expand Down Expand Up @@ -451,54 +447,25 @@ def load_json(path: Path) -> dict:
return json.load(f)


def fetch_and_save_cogatlas(output_path: Path):
def create_snomed_assessment_lookup(output_path: Path):
"""
Fetches the Cognitive Atlas vocabulary using its native Task API and writes term ID-label mappings to a temporary lookup file.
If the API request fails, a backup copy of the vocabulary is used instead.
Reads in a file of assessment terms from the SNOMED vocabulary and writes term ID-label mappings to a temporary lookup file.

Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names.
Saves a JSON with keys corresponding to SNOMED IDs and values corresponding to human-readable term names.

Parameters
----------
output_path : Path
File path to store output vocabulary lookup file.
"""
api_url = "https://www.cognitiveatlas.org/api/v-alpha/task?format=json"

try:
response = httpx.get(url=api_url)
if response.is_success:
vocab = response.json()
else:
warnings.warn(
f"""
The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.

Details of the response from the source:
Status code {response.status_code}
{response.reason_phrase}: {response.text}
"""
)
# Use backup copy of the raw vocabulary JSON
vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
except httpx.NetworkError as exc:
warnings.warn(
f""""
Fetching of the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source failed due to a network error.
The API will default to using a local backup copy of the vocabulary instead.

Error: {exc}
"""
)
# Use backup copy of the raw vocabulary JSON
vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
vocab = load_json(BACKUP_VOCAB_DIR / "snomed_assessment.json")

term_labels = {term["id"]: term["name"] for term in vocab}
term_labels = {term["identifier"][7:]: term["label"] for term in vocab}
with open(output_path, "w") as f:
f.write(json.dumps(term_labels, indent=2))


def create_snomed_term_lookup(output_path: Path):
def create_snomed_disorder_lookup(output_path: Path):
"""
Reads in a file of disorder terms from the SNOMED CT vocabulary and writes term ID-label mappings to a temporary lookup file.

Expand Down
14 changes: 9 additions & 5 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,19 @@ async def fetch_vocabularies_to_temp_dir():
app.state.vocab_dir_path = Path(app.state.vocab_dir.name)

app.state.vocab_lookup_paths = {}
app.state.vocab_lookup_paths["cogatlas"] = (
app.state.vocab_dir_path / "cogatlas_task_term_labels.json"
app.state.vocab_lookup_paths["snomed_assessment"] = (
app.state.vocab_dir_path / "snomedct_assessment_term_labels.json"
)
app.state.vocab_lookup_paths["snomed"] = (
app.state.vocab_lookup_paths["snomed_disorder"] = (
app.state.vocab_dir_path / "snomedct_disorder_term_labels.json"
)

util.fetch_and_save_cogatlas(app.state.vocab_lookup_paths["cogatlas"])
util.create_snomed_term_lookup(app.state.vocab_lookup_paths["snomed"])
util.create_snomed_assessment_lookup(
app.state.vocab_lookup_paths["snomed_assessment"]
)
util.create_snomed_disorder_lookup(
app.state.vocab_lookup_paths["snomed_disorder"]
)


@app.on_event("shutdown")
Expand Down
1 change: 0 additions & 1 deletion docs/default_neurobagel_query.rq
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
PREFIX cogatlas: <https://www.cognitiveatlas.org/task/id/>
PREFIX nb: <http://neurobagel.org/vocab/>
PREFIX nbg: <http://neurobagel.org/graph/>
PREFIX ncit: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
Expand Down
98 changes: 3 additions & 95 deletions tests/test_app_events.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test events occurring on app startup or shutdown."""

import json
import os
import warnings

Expand Down Expand Up @@ -126,99 +125,8 @@ def test_stored_vocab_lookup_file_created_on_startup(
):
"""Test that on startup, a non-empty temporary lookup file is created for term ID-label mappings for the locally stored SNOMED CT vocabulary."""
with test_app:
term_labels_path = test_app.app.state.vocab_lookup_paths["snomed"]
term_labels_path = test_app.app.state.vocab_lookup_paths[
"snomed_disorder"
]
assert term_labels_path.exists()
assert term_labels_path.stat().st_size > 0


@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_external_vocab_is_fetched_on_startup(
test_app, monkeypatch, set_test_credentials, disable_auth
):
"""
Tests that on startup, a GET request is made to the Cognitive Atlas API and that when the request succeeds,
the term ID-label mappings from the returned vocab are stored in a temporary lookup file.
"""
mock_vocab_json = [
{
"creation_time": 1689609836,
"last_updated": 1689609836,
"name": "Generalized Self-Efficacy Scale",
"definition_text": "The original Generalized Self-Efficacy Scale contains 10 items designed to tap into a global sense of self-efficacy, or belief of an individual in his or her ability (e.g., \u201cI can always solve difficult problems if I try hard enough,\u201d and \u201cI can usually handle whatever comes my way.\u201d) The revised version here includes these 10 items and two, which are repeated and reversed to examine acquiescence bias. Response options range from 1, never true, to 7, always true. Higher scores indicate greater generalized self-efficacy.",
"id": "tsk_p7cabUkVvQPBS",
},
{
"creation_time": 1689610375,
"last_updated": 1689610375,
"name": "Verbal Interference Test",
"definition_text": "The Verbal Interference Test is a behavioral assessment of cognitive regulation. In this task participants are presented with visual word stimuli that appear with incongruent text and color meaning (e.g., the word \u201cRED\u201d printed in blue, the word \u201cBLUE\u201d printed in green, the word \u201cGREEN\u201d printed in red). There are two phases of the task: Name (Part I) and Color (Part II). In the Name phase, participants are asked to identify the meaning of the word (e.g., red is the correct answer for the word \u201cRED\u201d printed in blue). In the Color phase, participants are asked to identify the color in which the word is printed (e.g., blue is the correct answer for the word \u201cRED\u201d printed in blue). This test assesses aspects of inhibition and interference corresponding to those indexed by the Stroop test.",
"id": "tsk_ccTKYnmv7tOZY",
},
]

def mock_httpx_get(**kwargs):
return httpx.Response(status_code=200, json=mock_vocab_json)

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with test_app:
term_labels_path = test_app.app.state.vocab_lookup_paths["cogatlas"]
assert term_labels_path.exists()

with open(term_labels_path, "r") as f:
term_labels = json.load(f)

assert term_labels == {
"tsk_p7cabUkVvQPBS": "Generalized Self-Efficacy Scale",
"tsk_ccTKYnmv7tOZY": "Verbal Interference Test",
}


@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_failed_vocab_fetching_on_startup_raises_warning(
test_app, monkeypatch, set_test_credentials, disable_auth
):
"""
Tests that when a GET request to the Cognitive Atlas API has a non-success response code (e.g., due to service being unavailable),
a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
"""

def mock_httpx_get(**kwargs):
return httpx.Response(
status_code=503, json={}, text="Some error message"
)

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
assert test_app.app.state.vocab_lookup_paths["cogatlas"].exists()

assert any(
"unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy"
in str(warn.message)
for warn in w
)


@pytest.mark.filterwarnings("ignore:.*NB_API_ALLOWED_ORIGINS")
def test_network_error_on_startup_raises_warning(
test_app, monkeypatch, set_test_credentials, disable_auth
):
"""
Tests that when a GET request to the Cognitive Atlas API fails due to a network error (i.e., while issuing the request),
a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
"""

def mock_httpx_get(**kwargs):
raise httpx.ConnectError("Some network error")

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.warns(UserWarning) as w:
with test_app:
assert test_app.app.state.vocab_lookup_paths["cogatlas"].exists()

assert any(
"failed due to a network error" in str(warn.message) for warn in w
)
16 changes: 8 additions & 8 deletions tests/test_attribute_factory_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@ def test_get_instances_endpoint_with_vocab_lookup(
{
"termURL": {
"type": "uri",
"value": "https://www.cognitiveatlas.org/task/id/tsk_U9gDp8utahAfO",
"value": "http://purl.bioontology.org/ontology/SNOMEDCT/1284852002",
}
},
{
"termURL": {
"type": "uri",
"value": "https://www.cognitiveatlas.org/task/id/not_found_id",
"value": "http://purl.bioontology.org/ontology/SNOMEDCT/not_found_id",
}
},
{
"termURL": {
"type": "uri",
"value": "https://www.notanatlas.org/task/id/tsk_alz5hjlUXp4WY",
"value": "http://notpurl.bioontology.org/ontology/SNOMEDCT/123456789",
}
},
]
Expand All @@ -60,10 +60,10 @@ def mock_httpx_post(**kwargs):
assert response.json() == {
"nb:Assessment": [
{
"TermURL": "cogatlas:tsk_U9gDp8utahAfO",
"Label": "Pittsburgh Stress Battery",
"TermURL": "snomed:1284852002",
"Label": "Numeric Pain Rating Scale",
},
{"TermURL": "cogatlas:not_found_id", "Label": None},
{"TermURL": "snomed:not_found_id", "Label": None},
]
}

Expand Down Expand Up @@ -123,8 +123,8 @@ def mock_httpx_post(**kwargs):
@pytest.mark.parametrize(
"attribute, expected_vocab_name, expected_namespace_pfx",
[
("assessments", "Cognitive Atlas Tasks", "cogatlas"),
("diagnoses", "SNOMED CT", "snomed"),
("assessments", "SNOMED Assessment", "snomed"),
("diagnoses", "SNOMED Disorder", "snomed"),
],
)
def test_get_vocab_endpoint(
Expand Down
Loading
Loading