From f00a1ff599b883a688881e4d67289c93b79aaa2d Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Wed, 4 Oct 2023 11:12:32 -0400 Subject: [PATCH] [REF] Refactor `httpx.post` call to Stardog and context creation (#195) * refactor Stardog post request into separate function * create function to handle context generation * update docstring * more graph backend-generic function names and docstrings --- app/api/crud.py | 134 ++++++++++++++++++++------------------------ app/api/utility.py | 24 ++++---- tests/test_query.py | 2 +- 3 files changed, 73 insertions(+), 87 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 7f0dd75..c411509 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -29,6 +29,48 @@ ] +def post_query_to_graph(query: str, timeout: float = 5.0) -> dict: + """ + Makes a post request to the graph API to perform a query, using parameters from the environment. + Parameters + ---------- + query : str + The full SPARQL query string. + timeout : float, optional + The maximum duration for the request, by default 5.0 seconds. + + Returns + ------- + dict + The response from the graph API, encoded as json. + """ + try: + response = httpx.post( + url=util.QUERY_URL, + content=query, + headers=util.QUERY_HEADER, + auth=httpx.BasicAuth( + os.environ.get(util.GRAPH_USERNAME.name), + os.environ.get(util.GRAPH_PASSWORD.name), + ), + timeout=timeout, + ) + # Provide more informative error message for a timeout in the connection to the host. + except httpx.ConnectTimeout as exc: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Timed out while connecting to the server. You may not be on an authorized network to perform this request.", + ) from exc + + if not response.is_success: + raise HTTPException( + status_code=response.status_code, + detail=f"{response.reason_phrase}: {response.text}", + ) + + return response.json() + + async def get( min_age: float, max_age: float, @@ -40,7 +82,7 @@ async def get( image_modal: str, ): """ - Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_query function. + Makes a POST request to graph API using httpx where the payload is a SPARQL query generated by the create_query function. Parameters ---------- @@ -67,40 +109,20 @@ async def get( Response of the POST request. """ - try: - response = httpx.post( - url=util.QUERY_URL, - content=util.create_query( - return_agg=util.RETURN_AGG.val, - age=(min_age, max_age), - sex=sex, - diagnosis=diagnosis, - is_control=is_control, - min_num_sessions=min_num_sessions, - assessment=assessment, - image_modal=image_modal, - ), - headers=util.QUERY_HEADER, - auth=httpx.BasicAuth( - os.environ.get(util.GRAPH_USERNAME.name), - os.environ.get(util.GRAPH_PASSWORD.name), - ), - # TODO: Revisit timeout value when query performance is improved - timeout=30.0, - ) - except httpx.ConnectTimeout as exc: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Timed out while connecting to the server. Please confirm that you are connected to the McGill network and try again.", - ) from exc - - if not response.is_success: - raise HTTPException( - status_code=response.status_code, - detail=f"{response.reason_phrase}: {response.text}", - ) - - results = response.json() + results = post_query_to_graph( + util.create_query( + return_agg=util.RETURN_AGG.val, + age=(min_age, max_age), + sex=sex, + diagnosis=diagnosis, + is_control=is_control, + min_num_sessions=min_num_sessions, + assessment=assessment, + image_modal=image_modal, + ), + # TODO: Revisit timeout value when query performance is improved + timeout=30.0, + ) # Reformat SPARQL results into more human-readable form results_dicts = [ @@ -160,7 +182,7 @@ async def get( async def get_terms(data_element_URI: str): """ - Makes a POST request to Stardog API using httpx where the payload is a SPARQL query generated by the create_terms_query function. + Makes a POST request to graph API using httpx where the payload is a SPARQL query generated by the create_terms_query function. Parameters ---------- @@ -172,23 +194,7 @@ async def get_terms(data_element_URI: str): httpx.response Response of the POST request. """ - response = httpx.post( - url=util.QUERY_URL, - content=util.create_terms_query(data_element_URI), - headers=util.QUERY_HEADER, - auth=httpx.BasicAuth( - os.environ.get(util.GRAPH_USERNAME.name), - os.environ.get(util.GRAPH_PASSWORD.name), - ), - ) - - if not response.is_success: - raise HTTPException( - status_code=response.status_code, - detail=f"{response.reason_phrase}: {response.text}", - ) - - results = response.json() + results = post_query_to_graph(util.create_terms_query(data_element_URI)) results_dict = { data_element_URI: [ @@ -202,7 +208,7 @@ async def get_terms(data_element_URI: str): async def get_controlled_term_attributes(): """ - Makes a POST query to Stardog API for all Neurobagel classes representing controlled term attributes. + Makes a POST query to graph API for all Neurobagel classes representing controlled term attributes. Returns ------- @@ -210,31 +216,15 @@ async def get_controlled_term_attributes(): Dictionary with value corresponding to all available controlled term attributes. """ attributes_query = f""" - {util.DEFAULT_CONTEXT} + {util.create_context()} SELECT DISTINCT ?attribute WHERE {{ ?attribute rdfs:subClassOf nb:ControlledTerm . }} """ + results = post_query_to_graph(attributes_query) - response = httpx.post( - url=util.QUERY_URL, - content=attributes_query, - headers=util.QUERY_HEADER, - auth=httpx.BasicAuth( - os.environ.get(util.GRAPH_USERNAME.name), - os.environ.get(util.GRAPH_PASSWORD.name), - ), - ) - - if not response.is_success: - raise HTTPException( - status_code=response.status_code, - detail=f"{response.reason_phrase}: {response.text}", - ) - - results = response.json() results_list = [ util.replace_namespace_uri(result["attribute"]["value"]) for result in results["results"]["bindings"] diff --git a/app/api/utility.py b/app/api/utility.py index 28724d7..1705720 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -1,4 +1,4 @@ -"""Constants for Stardog graph connection and utility functions for writing the SPARQL query.""" +"""Constants for graph server connection and utility functions for writing the SPARQL query.""" import os from collections import namedtuple @@ -37,17 +37,6 @@ "Accept": "application/sparql-results+json", } -# SPARQL query context -# TODO: Refactor into a function. -DEFAULT_CONTEXT = """ -PREFIX cogatlas: -PREFIX nb: -PREFIX nbg: -PREFIX ncit: -PREFIX nidm: -PREFIX snomed: -""" - CONTEXT = { "cogatlas": "https://www.cognitiveatlas.org/task/id/", "nb": "http://neurobagel.org/vocab/", @@ -79,6 +68,13 @@ def parse_origins_as_list(allowed_origins: str) -> list: return list(allowed_origins.split(" ")) +def create_context() -> str: + """Creates a SPARQL query context string from the CONTEXT dictionary.""" + return "\n".join( + [f"PREFIX {prefix}: <{uri}>" for prefix, uri in CONTEXT.items()] + ) + + def create_query( return_agg: bool, age: Optional[tuple] = (None, None), @@ -213,7 +209,7 @@ def create_query( \n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal """ - return "\n".join([DEFAULT_CONTEXT, query_string]) + return "\n".join([create_context(), query_string]) def create_terms_query(data_element_URI: str) -> str: @@ -243,7 +239,7 @@ def create_terms_query(data_element_URI: str) -> str: }} """ - return "\n".join([DEFAULT_CONTEXT, query_string]) + return "\n".join([create_context(), query_string]) def replace_namespace_uri(url: str) -> str: diff --git a/tests/test_query.py b/tests/test_query.py index 363b906..bf9e813 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -1,4 +1,4 @@ -"""Test API to query subjects from the Stardog graph who match user-specified criteria.""" +"""Test API to query subjects from the graph database who match user-specified criteria.""" import os import warnings