[ENH] Add support for endpoint that will return nb:Diagnosis option…

…s from SNOMED (#228) * add json prettifier pre-commit hook * add vocab file for SNOMED CT disorders * create snomed term-label lookup from vocab file on startup * add enum option for diagnosis * generalize get vocab terms crud function + implement for diagnosis path param * update docstrings * add tests of lookup file and response contents of diagnosis vocab endpoint * catch and raise informative warning for network errors * test graceful handling of network error * use new snomed terms lookup file for returning labels of available diagnoses * remove unnecessary else statements
neurobagel · Nov 27, 2023 · 25dd6ae · 25dd6ae
1 parent 1c62602
commit 25dd6ae
Show file tree

Hide file tree

Showing 9 changed files with 27,336 additions and 27 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,6 +21,10 @@ repos:
     rev: v4.4.0
     hooks:
       - id: check-yaml
+      - id: pretty-format-json
+        args: 
+          - "--autofix"
+          - "--no-sort-keys"
 
   - repo: https://github.com/pycqa/isort
     rev: 5.12.0

diff --git a/app/api/crud.py b/app/api/crud.py
@@ -275,11 +275,11 @@ async def get_controlled_term_attributes() -> list:
     return results_list
 
 
-async def get_term_labels_for_cogatlas(
-    term_labels_path: Path,
+async def get_term_labels_for_vocab(
+    term_labels_path: Path, vocabulary_name: str, namespace_prefix: str
 ) -> VocabLabelsResponse:
     """
-    Returns the term-label mappings along with the vocabulary namespace details for the Cognitive Atlas Task vocabulary.
+    Returns the term-label mappings along with the vocabulary namespace details for the specified vocabulary.
 
     Returns
     -------
@@ -288,8 +288,8 @@ async def get_term_labels_for_cogatlas(
     term_labels = util.load_json(term_labels_path)
 
     return VocabLabelsResponse(
-        vocabulary_name="Cognitive Atlas Tasks",
-        namespace_url=util.CONTEXT["cogatlas"],
-        namespace_prefix="cogatlas",
+        vocabulary_name=vocabulary_name,
+        namespace_url=util.CONTEXT[namespace_prefix],
+        namespace_prefix=namespace_prefix,
         term_labels=term_labels,
     )
diff --git a/app/api/models.py b/app/api/models.py
@@ -69,6 +69,7 @@ class DataElementURI(str, Enum):
     """Data model for data element URIs that have available vocabulary lookups."""
 
     assessment = "nb:Assessment"
+    diagnosis = "nb:Diagnosis"
 
 
 class VocabLabelsResponse(BaseModel):

diff --git a/app/api/routers/attributes.py b/app/api/routers/attributes.py
@@ -12,9 +12,18 @@ async def get_term_labels_for_vocab(
     data_element_URI: DataElementURI, request: Request
 ):
     """When a GET request is sent, return a dict containing the name, namespace info, and all term ID-label mappings for the vocabulary of the specified variable."""
-    if data_element_URI is DataElementURI.assessment:
-        return await crud.get_term_labels_for_cogatlas(
-            term_labels_path=request.app.state.cogatlas_term_lookup_path
+    # TODO: If/when more attribute options are supported, consider refactoring to use match/case (see https://peps.python.org/pep-0634/#the-match-statement)
+    if data_element_URI == DataElementURI.assessment:
+        return await crud.get_term_labels_for_vocab(
+            term_labels_path=request.app.state.cogatlas_term_lookup_path,
+            vocabulary_name="Cognitive Atlas Tasks",
+            namespace_prefix="cogatlas",
+        )
+    if data_element_URI == DataElementURI.diagnosis:
+        return await crud.get_term_labels_for_vocab(
+            term_labels_path=request.app.state.snomed_term_lookup_path,
+            vocabulary_name="SNOMED CT",
+            namespace_prefix="snomed",
         )
 
 
@@ -28,8 +37,11 @@ async def get_terms(
     """
     term_labels_path = None
 
+    # TODO: If/when more attribute options are supported, consider refactoring to use match/case (see https://peps.python.org/pep-0634/#the-match-statement)
     if data_element_URI == DataElementURI.assessment:
         term_labels_path = request.app.state.cogatlas_term_lookup_path
+    if data_element_URI == DataElementURI.diagnosis:
+        term_labels_path = request.app.state.snomed_term_lookup_path
 
     return await crud.get_terms(data_element_URI, term_labels_path)
 

diff --git a/app/api/utility.py b/app/api/utility.py
@@ -373,26 +373,38 @@ def fetch_and_save_cogatlas(output_path: Path):
     Fetches the Cognitive Atlas vocabulary using its native Task API and writes term ID-label mappings to a temporary lookup file.
     If the API request fails, a backup copy of the vocabulary is used instead.
 
-    Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names).
+    Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names.
 
     Parameters
     ----------
     output_path : Path
         File path to store output vocabulary lookup file.
     """
     api_url = "https://www.cognitiveatlas.org/api/v-alpha/task?format=json"
-    response = httpx.get(url=api_url)
 
-    if response.is_success:
-        vocab = response.json()
-    else:
+    try:
+        response = httpx.get(url=api_url)
+        if response.is_success:
+            vocab = response.json()
+        else:
+            warnings.warn(
+                f"""
+                The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.
+
+                Details of the response from the source:
+                Status code {response.status_code}
+                {response.reason_phrase}: {response.text}
+                """
+            )
+            # Use backup copy of the raw vocabulary JSON
+            vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
+    except httpx.NetworkError as exc:
         warnings.warn(
-            f"""
-            The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.
+            f""""
+            Fetching of the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source failed due to a network error.
+            The API will default to using a local backup copy of the vocabulary instead.
 
-            Details of the response from the source:
-            Status code {response.status_code}
-            {response.reason_phrase}: {response.text}
+            Error: {exc}
             """
         )
         # Use backup copy of the raw vocabulary JSON
@@ -401,3 +413,21 @@ def fetch_and_save_cogatlas(output_path: Path):
     term_labels = {term["id"]: term["name"] for term in vocab}
     with open(output_path, "w") as f:
         f.write(json.dumps(term_labels, indent=2))
+
+
+def create_snomed_term_lookup(output_path: Path):
+    """
+    Reads in a file of disorder terms from the SNOMED CT vocabulary and writes term ID-label mappings to a temporary lookup file.
+
+    Saves a JSON with keys corresponding to SNOMED CT IDs and values corresponding to human-readable term names.
+
+    Parameters
+    ----------
+    output_path : Path
+        File path to store output vocabulary lookup file.
+    """
+    vocab = load_json(BACKUP_VOCAB_DIR / "snomedct_disorder.json")
+
+    term_labels = {term["sctid"]: term["preferred_name"] for term in vocab}
+    with open(output_path, "w") as f:
+        f.write(json.dumps(term_labels, indent=2))
diff --git a/app/main.py b/app/main.py
@@ -97,11 +97,16 @@ async def fetch_vocabularies_to_temp_dir():
     app.state.vocab_dir = TemporaryDirectory()
     app.state.vocab_dir_path = Path(app.state.vocab_dir.name)
 
+    # TODO: Maybe store these paths in one dictionary on the app instance instead of separate variables?
     app.state.cogatlas_term_lookup_path = (
         app.state.vocab_dir_path / "cogatlas_task_term_labels.json"
     )
+    app.state.snomed_term_lookup_path = (
+        app.state.vocab_dir_path / "snomedct_disorder_term_labels.json"
+    )
 
     util.fetch_and_save_cogatlas(app.state.cogatlas_term_lookup_path)
+    util.create_snomed_term_lookup(app.state.snomed_term_lookup_path)
 
 
 @app.on_event("shutdown")

diff --git a/tests/test_app_events.py b/tests/test_app_events.py
@@ -108,6 +108,16 @@ def test_app_with_set_allowed_origins(
     )
 
 
+def test_stored_vocab_lookup_file_created_on_startup(
+    test_app, set_test_credentials
+):
+    """Test that on startup, a non-empty temporary lookup file is created for term ID-label mappings for the locally stored SNOMED CT vocabulary."""
+    with test_app:
+        term_labels_path = test_app.app.state.snomed_term_lookup_path
+        assert term_labels_path.exists()
+        assert term_labels_path.stat().st_size > 0
+
+
 def test_external_vocab_is_fetched_on_startup(
     test_app, monkeypatch, set_test_credentials
 ):
@@ -154,7 +164,7 @@ def test_failed_vocab_fetching_on_startup_raises_warning(
     test_app, monkeypatch, set_test_credentials
 ):
     """
-    Tests that when a GET request to the Cognitive Atlas API fails (e.g., due to service being unavailable),
+    Tests that when a GET request to the Cognitive Atlas API has a non-success response code (e.g., due to service being unavailable),
     a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
     """
 
@@ -174,3 +184,25 @@ def mock_httpx_get(**kwargs):
         in str(warn.message)
         for warn in w
     )
+
+
+def test_network_error_on_startup_raises_warning(
+    test_app, monkeypatch, set_test_credentials
+):
+    """
+    Tests that when a GET request to the Cognitive Atlas API fails due to a network error (i.e., while issuing the request),
+    a warning is raised and that a term label lookup file is still created using a backup copy of the vocab.
+    """
+
+    def mock_httpx_get(**kwargs):
+        raise httpx.ConnectError("Some network error")
+
+    monkeypatch.setattr(httpx, "get", mock_httpx_get)
+
+    with pytest.warns(UserWarning) as w:
+        with test_app:
+            assert test_app.app.state.cogatlas_term_lookup_path.exists()
+
+    assert any(
+        "failed due to a network error" in str(warn.message) for warn in w
+    )
diff --git a/tests/test_attributes.py b/tests/test_attributes.py
@@ -195,23 +195,38 @@ def mock_httpx_post(**kwargs):
     ]
 
 
-def test_get_attribute_vocab(test_app, monkeypatch, set_test_credentials):
+@pytest.mark.parametrize(
+    "data_element_uri, expected_vocab_name, expected_namespace_pfx",
+    [
+        ("nb:Assessment", "Cognitive Atlas Tasks", "cogatlas"),
+        ("nb:Diagnosis", "SNOMED CT", "snomed"),
+    ],
+)
+def test_get_attribute_vocab(
+    test_app,
+    monkeypatch,
+    set_test_credentials,
+    data_element_uri,
+    expected_vocab_name,
+    expected_namespace_pfx,
+):
     """Given a GET request to the /attributes/{data_element_URI}/vocab endpoint, successfully returns a JSON object containing the vocabulary name, namespace info, and term-label mappings."""
+    # Mock contents of a temporary term-label lookup file for a vocabulary
     mock_term_labels = {
-        "tsk_p7cabUkVvQPBS": "Generalized Self-Efficacy Scale",
-        "tsk_ccTKYnmv7tOZY": "Verbal Interference Test",
+        "trm_1234": "Generic Vocabulary Term 1",
+        "trm_2345": "Generic Vocabulary Term 2",
     }
 
     def mock_load_json(path):
         return mock_term_labels
 
     monkeypatch.setattr(util, "load_json", mock_load_json)
-    response = test_app.get("/attributes/nb:Assessment/vocab")
+    response = test_app.get(f"/attributes/{data_element_uri}/vocab")
 
     assert response.status_code == 200
     assert response.json() == {
-        "vocabulary_name": "Cognitive Atlas Tasks",
-        "namespace_url": util.CONTEXT["cogatlas"],
-        "namespace_prefix": "cogatlas",
+        "vocabulary_name": expected_vocab_name,
+        "namespace_url": util.CONTEXT[expected_namespace_pfx],
+        "namespace_prefix": expected_namespace_pfx,
         "term_labels": mock_term_labels,
     }