Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REF] Refactor dataset size request into separate function #235

Merged
merged 3 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions app/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,32 @@ def post_query_to_graph(query: str, timeout: float = 5.0) -> dict:
return response.json()


def query_matching_dataset_sizes(dataset_uuids: list) -> dict:
"""
Queries the graph for the number of subjects in each dataset in a list of dataset UUIDs.

Parameters
----------
dataset_uuids : pd.Series
A list of unique dataset UUIDs.

Returns
-------
dict
A dictionary with keys corresponding to the dataset UUIDs and values corresponding to the number of subjects in the dataset.
"""
# Get the total number of subjects in each dataset that matched the query
matching_dataset_size_results = post_query_to_graph(
util.create_multidataset_size_query(dataset_uuids)
)
return {
ds["dataset_uuid"]: int(ds["total_subjects"])
for ds in util.unpack_http_response_json_to_dicts(
matching_dataset_size_results
)
}


async def get(
min_age: float,
max_age: float,
Expand Down Expand Up @@ -129,18 +155,9 @@ async def get(
util.unpack_http_response_json_to_dicts(results)
).reindex(columns=ATTRIBUTES_ORDER)

# Get the total number of subjects in each dataset that matched the query
matching_dataset_size_results = post_query_to_graph(
util.create_multidataset_size_query(
results_df["dataset_uuid"].unique()
)
matching_dataset_sizes = query_matching_dataset_sizes(
results_df["dataset_uuid"].unique()
)
matching_dataset_sizes = {
ds["dataset_uuid"]: int(ds["total_subjects"])
for ds in util.unpack_http_response_json_to_dicts(
matching_dataset_size_results
)
}

response_obj = []
dataset_cols = ["dataset_uuid", "dataset_name"]
Expand Down
12 changes: 0 additions & 12 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def test_data():
]


# TODO update the test once https://github.com/neurobagel/api/issues/234 is resolved
@pytest.fixture
def mock_post_query_to_graph():
"""Mock post_query_to_graph function that returns toy data containing a dataset with no modalities for testing."""
Expand All @@ -65,7 +64,6 @@ def mockreturn(query, timeout=5.0):
"dataset_portal_uri",
"sub_id",
"image_modal",
"total_subjects",
]
},
"results": {
Expand All @@ -81,11 +79,6 @@ def mockreturn(query, timeout=5.0):
},
"sub_id": {"type": "literal", "value": "sub-ON95534"},
"dataset_name": {"type": "literal", "value": "QPN"},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "200",
},
},
{
"dataset_uuid": {
Expand All @@ -102,11 +95,6 @@ def mockreturn(query, timeout=5.0):
"type": "uri",
"value": "http://purl.org/nidash/nidm#T1Weighted",
},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "200",
},
},
]
},
Expand Down
55 changes: 55 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,67 @@
from app.api import crud


def test_get_subjects_by_query(monkeypatch):
"""Test that graph results for dataset size queries are correctly parsed into a dictionary."""

def mock_post_query_to_graph(query, timeout=5.0):
return {
"head": {"vars": ["dataset_uuid", "total_subjects"]},
"results": {
"bindings": [
{
"dataset_uuid": {
"type": "uri",
"value": "http://neurobagel.org/vocab/ds1234",
},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "70",
},
},
{
"dataset_uuid": {
"type": "uri",
"value": "http://neurobagel.org/vocab/ds2345",
},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "40",
},
},
]
},
}

monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
assert crud.query_matching_dataset_sizes(
[
"http://neurobagel.org/vocab/ds1234",
"http://neurobagel.org/vocab/ds2345",
]
) == {
"http://neurobagel.org/vocab/ds1234": 70,
"http://neurobagel.org/vocab/ds2345": 40,
}


def test_null_modalities(
test_app, test_data, mock_post_query_to_graph, monkeypatch
):
"""Given a response containing a dataset with no recorded modalities, returns an empty list for the imaging modalities."""

def mock_query_matching_dataset_sizes(dataset_uuids):
return {
"http://neurobagel.org/vocab/12345": 200,
}

monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
monkeypatch.setattr(
crud, "query_matching_dataset_sizes", mock_query_matching_dataset_sizes
)

response = test_app.get("/query/")
assert response.json()[0]["image_modals"] == [
"http://purl.org/nidash/nidm#T1Weighted"
Expand Down