From 5893ded41e4b03e096ea0d8f7c43f368ba78a23a Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Tue, 9 Jul 2024 22:35:25 -0400 Subject: [PATCH 01/18] add implicit OAuth flow + token verification via Google for /query route --- app/api/routers/query.py | 59 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index c5a526a..1b58f4b 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -1,12 +1,48 @@ """Router for query path operations.""" -from fastapi import APIRouter, Depends, Response, status +from fastapi import APIRouter, Depends, HTTPException, Response, status +from fastapi.security import OAuth2 +from fastapi.security.utils import get_authorization_scheme_param +from google.auth.transport import requests +from google.oauth2 import id_token from .. import crud from ..models import CombinedQueryResponse, QueryModel router = APIRouter(prefix="/query", tags=["query"]) +oauth2_scheme = OAuth2( + flows={ + "implicit": { + "authorizationUrl": "https://accounts.google.com/o/oauth2/auth", + } + } +) + +CLIENT_ID = ( + "465352721782-aj7eam9jdu967adj8vd8ckih325k62d5.apps.googleusercontent.com" +) + + +def verify_token(token: str): + """Verify the Google ID token. Raise an HTTPException if the token is invalid.""" + # Adapted from https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#python + try: + # Extract the token from the "Bearer" scheme + # (See https://github.com/tiangolo/fastapi/blob/master/fastapi/security/oauth2.py#L473-L485) + _, param = get_authorization_scheme_param(token) + id_info = id_token.verify_oauth2_token( + param, requests.Request(), CLIENT_ID + ) + # TODO: Remove print statement - just for testing + print("Token verified: ", id_info) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=f"Invalid token: {exc}", + headers={"WWW-Authenticate": "Bearer"}, + ) from exc + # We use the Response parameter below to change the status code of the response while still being able to validate the returned data using the response model. # (see https://fastapi.tiangolo.com/advanced/response-change-status-code/ for more info). @@ -16,9 +52,28 @@ # example responses for different status codes in the OpenAPI docs (less relevant for now since there is only one response model). @router.get("/", response_model=CombinedQueryResponse) async def get_query( - response: Response, query: QueryModel = Depends(QueryModel) + response: Response, + query: QueryModel = Depends(QueryModel), + token: str = Depends(oauth2_scheme), ): """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset.""" + # NOTE: Currently, when the request is unauthenticated (missing or malformed authorization header -> missing token), + # the default response is a 403 Forbidden error. + # This doesn't fully align with HTTP status code conventions: + # - 401 Unauthorized should be used when the client lacks authentication credentials + # - 403 Forbidden should be used when the client has been authenticated but lacks the required permissions + # If we really care about returning a 401 Unauthorized error, we can use auto_error=False + # when creating the OAuth2 object and raise a custom HTTPException. + # See also https://github.com/tiangolo/fastapi/discussions/9130 + # if not token: + # raise HTTPException( + # status_code=status.HTTP_401_UNAUTHORIZED, + # detail="Not authenticated", + # headers={"WWW-Authenticate": "Bearer"}, + # ) + + verify_token(token) + response_dict = await crud.get( query.min_age, query.max_age, From 97d4512ceff9db6122bc23561c0aa2aeb023b1ae Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Tue, 9 Jul 2024 22:36:22 -0400 Subject: [PATCH 02/18] add dependencies for Google auth library --- requirements.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6314e26..ed881d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,17 @@ anyio==3.6.2 attrs==23.1.0 +cachetools==5.3.3 certifi==2023.7.22 cfgv==3.3.1 +charset-normalizer==3.3.2 click==8.1.3 +colorama==0.4.6 coverage==7.0.0 distlib==0.3.6 exceptiongroup==1.0.4 fastapi==0.95.2 filelock==3.8.0 +google-auth==2.32.0 h11==0.14.0 httpcore==0.16.2 httpx==0.23.1 @@ -22,23 +26,28 @@ orjson==3.8.6 packaging==21.3 pandas==1.5.2 platformdirs==2.5.4 -pluggy==1.5.0 +pluggy==1.0.0 pre-commit==2.20.0 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 pydantic==1.10.2 pyparsing==3.0.9 -pytest==8.2.1 +pytest==7.2.0 pytest-asyncio==0.23.7 python-dateutil==2.8.2 pytz==2023.3.post1 PyYAML==6.0 referencing==0.31.1 +requests==2.31.0 rfc3986==1.5.0 rpds-py==0.13.2 +rsa==4.9 six==1.16.0 sniffio==1.3.0 starlette==0.27.0 toml==0.10.2 tomli==2.0.1 typing_extensions==4.4.0 +urllib3==2.2.0 uvicorn==0.20.0 virtualenv==20.16.7 From 1c14c8c5959036ef39c5516ca567233c3e59c6d2 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Wed, 10 Jul 2024 15:17:03 -0400 Subject: [PATCH 03/18] add note on using OIDC auth class --- app/api/routers/query.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index 1b58f4b..177e443 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -2,6 +2,8 @@ from fastapi import APIRouter, Depends, HTTPException, Response, status from fastapi.security import OAuth2 + +# from fastapi.security import open_id_connect_url from fastapi.security.utils import get_authorization_scheme_param from google.auth.transport import requests from google.oauth2 import id_token @@ -11,6 +13,7 @@ router = APIRouter(prefix="/query", tags=["query"]) +# Adapted from info in https://github.com/tiangolo/fastapi/discussions/9137#discussioncomment-5157382 oauth2_scheme = OAuth2( flows={ "implicit": { @@ -18,6 +21,10 @@ } } ) +# NOTE: Can also explicitly use OpenID Connect because Google supports it - results in the same behavior as the OAuth2 scheme above. +# openid_connect_scheme = open_id_connect_url.OpenIdConnect( +# openIdConnectUrl="https://accounts.google.com/.well-known/openid-configuration" +# ) CLIENT_ID = ( "465352721782-aj7eam9jdu967adj8vd8ckih325k62d5.apps.googleusercontent.com" From 6271938bb13278febe0c0cb9cc29f98685438adf Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Wed, 10 Jul 2024 16:32:45 -0400 Subject: [PATCH 04/18] read CLIENT_ID from env var and error if unset --- app/api/routers/query.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index 177e443..dc51faf 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -1,10 +1,13 @@ """Router for query path operations.""" +import os + from fastapi import APIRouter, Depends, HTTPException, Response, status from fastapi.security import OAuth2 # from fastapi.security import open_id_connect_url from fastapi.security.utils import get_authorization_scheme_param +from google.auth.exceptions import GoogleAuthError from google.auth.transport import requests from google.oauth2 import id_token @@ -26,13 +29,19 @@ # openIdConnectUrl="https://accounts.google.com/.well-known/openid-configuration" # ) -CLIENT_ID = ( - "465352721782-aj7eam9jdu967adj8vd8ckih325k62d5.apps.googleusercontent.com" -) +CLIENT_ID = os.environ.get("NB_QUERY_CLIENT_ID", None) def verify_token(token: str): """Verify the Google ID token. Raise an HTTPException if the token is invalid.""" + # By default, if CLIENT_ID is not provided to verify_oauth2_token, + # Google will simply skip verifying the audience claim of ID tokens. + # This however can be a security risk, so we mandate that CLIENT_ID is set. + if not CLIENT_ID: + raise ValueError( + "Client ID of the Neurobagel query tool must be provided to verify the audience claim of ID tokens. " + "Please set the environment variable NB_QUERY_CLIENT_ID." + ) # Adapted from https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#python try: # Extract the token from the "Bearer" scheme @@ -43,7 +52,7 @@ def verify_token(token: str): ) # TODO: Remove print statement - just for testing print("Token verified: ", id_info) - except ValueError as exc: + except (GoogleAuthError, ValueError) as exc: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail=f"Invalid token: {exc}", From 6a8eef599ca442a6a60cc8bd51446b863e02e327 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Wed, 10 Jul 2024 21:37:58 -0400 Subject: [PATCH 05/18] refactor token util into dedicated module --- app/api/routers/query.py | 44 +++++----------------------------------- app/api/security.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 39 deletions(-) create mode 100644 app/api/security.py diff --git a/app/api/routers/query.py b/app/api/routers/query.py index dc51faf..db16d55 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -1,18 +1,14 @@ """Router for query path operations.""" -import os - -from fastapi import APIRouter, Depends, HTTPException, Response, status +from fastapi import APIRouter, Depends, Response, status from fastapi.security import OAuth2 -# from fastapi.security import open_id_connect_url -from fastapi.security.utils import get_authorization_scheme_param -from google.auth.exceptions import GoogleAuthError -from google.auth.transport import requests -from google.oauth2 import id_token - from .. import crud from ..models import CombinedQueryResponse, QueryModel +from ..security import verify_token + +# from fastapi.security import open_id_connect_url + router = APIRouter(prefix="/query", tags=["query"]) @@ -29,36 +25,6 @@ # openIdConnectUrl="https://accounts.google.com/.well-known/openid-configuration" # ) -CLIENT_ID = os.environ.get("NB_QUERY_CLIENT_ID", None) - - -def verify_token(token: str): - """Verify the Google ID token. Raise an HTTPException if the token is invalid.""" - # By default, if CLIENT_ID is not provided to verify_oauth2_token, - # Google will simply skip verifying the audience claim of ID tokens. - # This however can be a security risk, so we mandate that CLIENT_ID is set. - if not CLIENT_ID: - raise ValueError( - "Client ID of the Neurobagel query tool must be provided to verify the audience claim of ID tokens. " - "Please set the environment variable NB_QUERY_CLIENT_ID." - ) - # Adapted from https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#python - try: - # Extract the token from the "Bearer" scheme - # (See https://github.com/tiangolo/fastapi/blob/master/fastapi/security/oauth2.py#L473-L485) - _, param = get_authorization_scheme_param(token) - id_info = id_token.verify_oauth2_token( - param, requests.Request(), CLIENT_ID - ) - # TODO: Remove print statement - just for testing - print("Token verified: ", id_info) - except (GoogleAuthError, ValueError) as exc: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=f"Invalid token: {exc}", - headers={"WWW-Authenticate": "Bearer"}, - ) from exc - # We use the Response parameter below to change the status code of the response while still being able to validate the returned data using the response model. # (see https://fastapi.tiangolo.com/advanced/response-change-status-code/ for more info). diff --git a/app/api/security.py b/app/api/security.py new file mode 100644 index 0000000..9d95d73 --- /dev/null +++ b/app/api/security.py @@ -0,0 +1,37 @@ +import os + +from fastapi import HTTPException, status +from fastapi.security.utils import get_authorization_scheme_param +from google.auth.exceptions import GoogleAuthError +from google.auth.transport import requests +from google.oauth2 import id_token + +CLIENT_ID = os.environ.get("NB_QUERY_CLIENT_ID", None) + + +def verify_token(token: str): + """Verify the Google ID token. Raise an HTTPException if the token is invalid.""" + # By default, if CLIENT_ID is not provided to verify_oauth2_token, + # Google will simply skip verifying the audience claim of ID tokens. + # This however can be a security risk, so we mandate that CLIENT_ID is set. + if not CLIENT_ID: + raise ValueError( + "Client ID of the Neurobagel query tool must be provided to verify the audience claim of ID tokens. " + "Please set the environment variable NB_QUERY_CLIENT_ID." + ) + # Adapted from https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#python + try: + # Extract the token from the "Bearer" scheme + # (See https://github.com/tiangolo/fastapi/blob/master/fastapi/security/oauth2.py#L473-L485) + _, param = get_authorization_scheme_param(token) + id_info = id_token.verify_oauth2_token( + param, requests.Request(), CLIENT_ID + ) + # TODO: Remove print statement - just for testing + print("Token verified: ", id_info) + except (GoogleAuthError, ValueError) as exc: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=f"Invalid token: {exc}", + headers={"WWW-Authenticate": "Bearer"}, + ) from exc From 76c7da114a2352f4a498f12b927abcc5a75ab808 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Wed, 10 Jul 2024 23:33:38 -0400 Subject: [PATCH 06/18] use mocked token & verification in tests --- tests/test_query.py | 52 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/tests/test_query.py b/tests/test_query.py index b731e91..7d17e9c 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -6,6 +6,30 @@ from fastapi import status +@pytest.fixture() +def mock_token(): + """Create a mock token that is well-formed for testing purposes.""" + return "Bearer foo" + + +@pytest.fixture() +def mock_verify_token(): + """Mock a successful token verification that does not raise any exceptions.""" + + def _verify_token(token): + return None + + return _verify_token + + +@pytest.fixture() +def set_mock_verify_token(monkeypatch, mock_verify_token): + """Set the verify_token function to a mock that does not raise any exceptions.""" + monkeypatch.setattr( + "app.api.routers.query.verify_token", mock_verify_token + ) + + @pytest.fixture() def mocked_single_matching_dataset_result(): """Valid aggregate query result for a single matching dataset.""" @@ -29,6 +53,8 @@ def test_partial_node_failure_responses_handled_gracefully( test_app, set_valid_test_federation_nodes, mocked_single_matching_dataset_result, + mock_token, + set_mock_verify_token, caplog, ): """ @@ -50,7 +76,10 @@ async def mock_httpx_get(self, **kwargs): monkeypatch.setattr(httpx.AsyncClient, "get", mock_httpx_get) - response = test_app.get("/query/") + response = test_app.get( + "/query/", + headers={"Authorization": mock_token}, + ) assert response.status_code == status.HTTP_207_MULTI_STATUS assert response.json() == { @@ -104,6 +133,8 @@ def test_partial_node_request_failures_handled_gracefully( test_app, set_valid_test_federation_nodes, mocked_single_matching_dataset_result, + mock_token, + set_mock_verify_token, error_to_raise, expected_node_message, caplog, @@ -123,7 +154,10 @@ async def mock_httpx_get(self, **kwargs): monkeypatch.setattr(httpx.AsyncClient, "get", mock_httpx_get) - response = test_app.get("/query/") + response = test_app.get( + "/query/", + headers={"Authorization": mock_token}, + ) assert response.status_code == status.HTTP_207_MULTI_STATUS @@ -153,6 +187,8 @@ def test_all_nodes_failure_handled_gracefully( monkeypatch, test_app, mock_failed_connection_httpx_get, + mock_token, + set_mock_verify_token, set_valid_test_federation_nodes, caplog, ): @@ -164,7 +200,10 @@ def test_all_nodes_failure_handled_gracefully( httpx.AsyncClient, "get", mock_failed_connection_httpx_get ) - response = test_app.get("/query/") + response = test_app.get( + "/query/", + headers={"Authorization": mock_token}, + ) # We expect 3 logs here: one warning for each failed node, and one error for the overall failure assert len(caplog.records) == 3 @@ -186,6 +225,8 @@ def test_all_nodes_success_handled_gracefully( caplog, set_valid_test_federation_nodes, mocked_single_matching_dataset_result, + mock_token, + set_mock_verify_token, ): """ Test that when queries sent to all nodes succeed, the federation API response includes an overall success status and no errors. @@ -201,7 +242,10 @@ async def mock_httpx_get(self, **kwargs): monkeypatch.setattr(httpx.AsyncClient, "get", mock_httpx_get) - response = test_app.get("/query/") + response = test_app.get( + "/query/", + headers={"Authorization": mock_token}, + ) assert response.status_code == status.HTTP_200_OK From e22cc564495376dc87dbfaeb09211b8dc51b0ebf Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 13:01:45 -0400 Subject: [PATCH 07/18] test behaviour for invalid tokens --- tests/test_security.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/test_security.py diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..4f2c160 --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,42 @@ +import pytest +from fastapi import HTTPException + +from app.api.security import verify_token + + +@pytest.mark.parametrize( + "invalid_token", + ["Bearer faketoken", "Bearer", "faketoken", "fakescheme faketoken"], +) +def test_invalid_token_raises_error(invalid_token): + """Test that an invalid token raises an error from the verification process.""" + with pytest.raises(HTTPException) as exc_info: + verify_token(invalid_token) + + assert exc_info.value.status_code == 401 + assert "Invalid token" in exc_info.value.detail + + +@pytest.mark.parametrize( + "invalid_auth_header", + [{}, {"Authorization": ""}, {"badheader": "badvalue"}], +) +def test_query_fails_with_malformed_auth_header( + test_app, monkeypatch, invalid_auth_header +): + """Test that a request to the /query route fails with a missing or malformed authorization header.""" + + # Mock verify_token function since we don't want to actually verify the token in this test + def mock_verify_token(token): + return None + + monkeypatch.setattr( + "app.api.routers.query.verify_token", mock_verify_token + ) + + response = test_app.get( + "/query/", + headers=invalid_auth_header, + ) + + assert response.status_code == 403 From 05cabd6c16bfe54f756225bc8a9a6420e89abf1a Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 13:04:07 -0400 Subject: [PATCH 08/18] add TODO to check for 'Bearer' --- app/api/security.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/api/security.py b/app/api/security.py index 9d95d73..e94ffe6 100644 --- a/app/api/security.py +++ b/app/api/security.py @@ -23,6 +23,7 @@ def verify_token(token: str): try: # Extract the token from the "Bearer" scheme # (See https://github.com/tiangolo/fastapi/blob/master/fastapi/security/oauth2.py#L473-L485) + # TODO: Check also if scheme of token is "Bearer"? _, param = get_authorization_scheme_param(token) id_info = id_token.verify_oauth2_token( param, requests.Request(), CLIENT_ID From 3aef8f07d429c1a7aff5459a3089b727a8412bfd Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 15:46:16 -0400 Subject: [PATCH 09/18] add env var to disable /query auth --- app/api/routers/query.py | 17 ++++++++++++----- app/api/utility.py | 2 ++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index db16d55..122b262 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -1,9 +1,10 @@ """Router for query path operations.""" -from fastapi import APIRouter, Depends, Response, status +from fastapi import APIRouter, Depends, HTTPException, Response, status from fastapi.security import OAuth2 from .. import crud +from .. import utility as util from ..models import CombinedQueryResponse, QueryModel from ..security import verify_token @@ -18,7 +19,8 @@ "implicit": { "authorizationUrl": "https://accounts.google.com/o/oauth2/auth", } - } + }, + auto_error=False, ) # NOTE: Can also explicitly use OpenID Connect because Google supports it - results in the same behavior as the OAuth2 scheme above. # openid_connect_scheme = open_id_connect_url.OpenIdConnect( @@ -36,7 +38,7 @@ async def get_query( response: Response, query: QueryModel = Depends(QueryModel), - token: str = Depends(oauth2_scheme), + token: str | None = Depends(oauth2_scheme), ): """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset.""" # NOTE: Currently, when the request is unauthenticated (missing or malformed authorization header -> missing token), @@ -53,8 +55,13 @@ async def get_query( # detail="Not authenticated", # headers={"WWW-Authenticate": "Bearer"}, # ) - - verify_token(token) + if util.AUTH_ENABLED: + if token is None: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Not authenticated", + ) + verify_token(token) response_dict = await crud.get( query.min_age, diff --git a/app/api/utility.py b/app/api/utility.py index 9aa2bdd..656be84 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -20,6 +20,8 @@ == "true", ) +AUTH_ENABLED = os.environ.get("NB_AUTH_ENABLED", "True").lower() == "true" + LOCAL_NODE_INDEX_PATH = Path(__file__).parents[2] / "local_nb_nodes.json" # Stores the names and URLs of all Neurobagel nodes known to the API instance, in the form of {node_url: node_name, ...} From 3e1eff6124ec0d002cd865762549e11fdd367074 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 16:12:08 -0400 Subject: [PATCH 10/18] check CLIENT_ID on app startup --- app/api/routers/query.py | 5 ++--- app/api/security.py | 11 ++++++++--- app/api/utility.py | 2 -- app/main.py | 2 ++ 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index 122b262..4d6aa9e 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -3,8 +3,7 @@ from fastapi import APIRouter, Depends, HTTPException, Response, status from fastapi.security import OAuth2 -from .. import crud -from .. import utility as util +from .. import crud, security from ..models import CombinedQueryResponse, QueryModel from ..security import verify_token @@ -55,7 +54,7 @@ async def get_query( # detail="Not authenticated", # headers={"WWW-Authenticate": "Bearer"}, # ) - if util.AUTH_ENABLED: + if security.AUTH_ENABLED: if token is None: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, diff --git a/app/api/security.py b/app/api/security.py index e94ffe6..8d795cb 100644 --- a/app/api/security.py +++ b/app/api/security.py @@ -6,19 +6,24 @@ from google.auth.transport import requests from google.oauth2 import id_token +AUTH_ENABLED = os.environ.get("NB_AUTH_ENABLED", "True").lower() == "true" CLIENT_ID = os.environ.get("NB_QUERY_CLIENT_ID", None) -def verify_token(token: str): - """Verify the Google ID token. Raise an HTTPException if the token is invalid.""" +def check_client_id(): + """Check if the CLIENT_ID environment variable is set.""" # By default, if CLIENT_ID is not provided to verify_oauth2_token, # Google will simply skip verifying the audience claim of ID tokens. # This however can be a security risk, so we mandate that CLIENT_ID is set. - if not CLIENT_ID: + if AUTH_ENABLED and CLIENT_ID is None: raise ValueError( "Client ID of the Neurobagel query tool must be provided to verify the audience claim of ID tokens. " "Please set the environment variable NB_QUERY_CLIENT_ID." ) + + +def verify_token(token: str): + """Verify the Google ID token. Raise an HTTPException if the token is invalid.""" # Adapted from https://developers.google.com/identity/gsi/web/guides/verify-google-id-token#python try: # Extract the token from the "Bearer" scheme diff --git a/app/api/utility.py b/app/api/utility.py index 656be84..9aa2bdd 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -20,8 +20,6 @@ == "true", ) -AUTH_ENABLED = os.environ.get("NB_AUTH_ENABLED", "True").lower() == "true" - LOCAL_NODE_INDEX_PATH = Path(__file__).parents[2] / "local_nb_nodes.json" # Stores the names and URLs of all Neurobagel nodes known to the API instance, in the form of {node_url: node_name, ...} diff --git a/app/main.py b/app/main.py index e88281a..6e6de54 100644 --- a/app/main.py +++ b/app/main.py @@ -11,6 +11,7 @@ from .api import utility as util from .api.routers import attributes, nodes, query +from .api.security import check_client_id logger = logging.getLogger("nb-f-API") stdout_handler = logging.StreamHandler() @@ -26,6 +27,7 @@ async def lifespan(app: FastAPI): """ Collect and store locally defined and public node details for federation upon startup and clears the index upon shutdown. """ + check_client_id() await util.create_federation_node_index() yield util.FEDERATION_NODES.clear() From 2817684bd55760a6aec1d87315511ea300903dc3 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 16:34:42 -0400 Subject: [PATCH 11/18] disable auth for tests of node fetching --- tests/test_nodes.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 3f9506f..e7e06a4 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -4,6 +4,15 @@ from app.api import utility as util +@pytest.fixture +def disable_auth(monkeypatch): + """ + Disable the authentication requirement for the API to skip startup checks + (for when the tested route does not require authentication). + """ + monkeypatch.setattr("app.api.security.AUTH_ENABLED", False) + + @pytest.mark.parametrize( "local_nodes", [ @@ -14,7 +23,9 @@ }, ], ) -def test_nodes_discovery_endpoint(test_app, monkeypatch, local_nodes): +def test_nodes_discovery_endpoint( + test_app, monkeypatch, local_nodes, disable_auth +): """Test that a federation node index is correctly created from locally set and remote node lists.""" def mock_parse_nodes_as_dict(path): @@ -59,7 +70,7 @@ def mock_httpx_get(**kwargs): def test_failed_public_nodes_fetching_raises_warning( - test_app, monkeypatch, caplog + test_app, monkeypatch, disable_auth, caplog ): """Test that when request for remote list of public nodes fails, an informative warning is raised and the federation node index only includes local nodes.""" @@ -95,7 +106,7 @@ def mock_httpx_get(**kwargs): assert warn_substr in caplog.text -def test_unset_local_nodes_raises_warning(test_app, monkeypatch): +def test_unset_local_nodes_raises_warning(test_app, monkeypatch, disable_auth): """Test that when no local nodes are set, an informative warning is raised and the federation node index only includes remote nodes.""" def mock_parse_nodes_as_dict(path): @@ -166,7 +177,9 @@ def test_missing_local_nodes_file_does_not_raise_error(tmp_path): assert util.parse_nodes_as_dict(expected_file_path) == {} -def test_no_available_nodes_raises_error(monkeypatch, test_app, caplog): +def test_no_available_nodes_raises_error( + monkeypatch, test_app, disable_auth, caplog +): """Test that when no local or remote nodes are available, an informative error is raised.""" def mock_parse_nodes_as_dict(path): From c4b2eae079f91b30ce10ecc15ec074f58dcfbb96 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 16:54:35 -0400 Subject: [PATCH 12/18] test startup check for CLIENT_ID --- app/api/security.py | 4 ++-- tests/test_security.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/app/api/security.py b/app/api/security.py index 8d795cb..2ccb7bf 100644 --- a/app/api/security.py +++ b/app/api/security.py @@ -17,8 +17,8 @@ def check_client_id(): # This however can be a security risk, so we mandate that CLIENT_ID is set. if AUTH_ENABLED and CLIENT_ID is None: raise ValueError( - "Client ID of the Neurobagel query tool must be provided to verify the audience claim of ID tokens. " - "Please set the environment variable NB_QUERY_CLIENT_ID." + "Authentication has been enabled (NB_AUTH_ENABLED) but the environment variable NB_QUERY_CLIENT_ID is not set. " + "Please set NB_QUERY_CLIENT_ID to the Neurobagel query tool client ID, to verify the audience claim of ID tokens." ) diff --git a/tests/test_security.py b/tests/test_security.py index 4f2c160..aed090e 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -4,6 +4,32 @@ from app.api.security import verify_token +def test_missing_client_id_raises_error_when_auth_enabled( + monkeypatch, test_app +): + """Test that a missing client ID raises an error on startup when authentication is enabled.""" + # We're using what should be default values of CLIENT_ID and AUTH_ENABLED here + # (if the corresponding environment variables are unset), + # but we set the values explicitly here for clarity + monkeypatch.setattr("app.api.security.CLIENT_ID", None) + monkeypatch.setattr("app.api.security.AUTH_ENABLED", True) + + with pytest.raises(ValueError) as exc_info: + with test_app: + pass + + assert "NB_QUERY_CLIENT_ID is not set" in str(exc_info.value) + + +def test_missing_client_id_ignored_when_auth_disabled(monkeypatch, test_app): + """Test that a missing client ID does not raise an error when authentication is disabled.""" + monkeypatch.setattr("app.api.security.CLIENT_ID", None) + monkeypatch.setattr("app.api.security.AUTH_ENABLED", False) + + with test_app: + pass + + @pytest.mark.parametrize( "invalid_token", ["Bearer faketoken", "Bearer", "faketoken", "fakescheme faketoken"], From 7c99272cbe59f752ece98c7058c7609f4691fc32 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 17:30:27 -0400 Subject: [PATCH 13/18] refactor tests; add test of query when auth is disabled --- tests/conftest.py | 9 +++++++++ tests/test_nodes.py | 9 --------- tests/test_query.py | 23 +++++++++++++++++++++++ tests/test_security.py | 4 ++++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a8b4b5f..01564b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,15 @@ def test_app(): yield client +@pytest.fixture +def disable_auth(monkeypatch): + """ + Disable the authentication requirement for the API to skip startup checks + (for when the tested route does not require authentication). + """ + monkeypatch.setattr("app.api.security.AUTH_ENABLED", False) + + @pytest.fixture(scope="function") def set_valid_test_federation_nodes(monkeypatch): """Set two correctly formatted federation nodes for a test function (mocks the result of reading/parsing available public and local nodes on startup).""" diff --git a/tests/test_nodes.py b/tests/test_nodes.py index e7e06a4..56eb42d 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -4,15 +4,6 @@ from app.api import utility as util -@pytest.fixture -def disable_auth(monkeypatch): - """ - Disable the authentication requirement for the API to skip startup checks - (for when the tested route does not require authentication). - """ - monkeypatch.setattr("app.api.security.AUTH_ENABLED", False) - - @pytest.mark.parametrize( "local_nodes", [ diff --git a/tests/test_query.py b/tests/test_query.py index 7d17e9c..c94dd13 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -254,3 +254,26 @@ async def mock_httpx_get(self, **kwargs): assert response["errors"] == [] assert len(response["responses"]) == 2 assert "Requests to all nodes succeeded (2/2)" in caplog.text + + +def test_query_without_token_succeeds_when_auth_disabled( + monkeypatch, + test_app, + set_valid_test_federation_nodes, + mocked_single_matching_dataset_result, + disable_auth, +): + """ + Test that when authentication is disabled, a federated query request without a token succeeds. + """ + + async def mock_httpx_get(self, **kwargs): + return httpx.Response( + status_code=200, json=[mocked_single_matching_dataset_result] + ) + + monkeypatch.setattr(httpx.AsyncClient, "get", mock_httpx_get) + + response = test_app.get("/query/") + + assert response.status_code == status.HTTP_200_OK diff --git a/tests/test_security.py b/tests/test_security.py index aed090e..79359d2 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -21,6 +21,10 @@ def test_missing_client_id_raises_error_when_auth_enabled( assert "NB_QUERY_CLIENT_ID is not set" in str(exc_info.value) +# Ignore startup warning that is unrelated to the current test +@pytest.mark.filterwarnings( + "ignore:No local Neurobagel nodes defined or found" +) def test_missing_client_id_ignored_when_auth_disabled(monkeypatch, test_app): """Test that a missing client ID does not raise an error when authentication is disabled.""" monkeypatch.setattr("app.api.security.CLIENT_ID", None) From 01b855400aa02edc1a48727d70dc3af284ea6f99 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 17:39:21 -0400 Subject: [PATCH 14/18] rename auth env var --- app/api/security.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/api/security.py b/app/api/security.py index 2ccb7bf..6573164 100644 --- a/app/api/security.py +++ b/app/api/security.py @@ -6,7 +6,7 @@ from google.auth.transport import requests from google.oauth2 import id_token -AUTH_ENABLED = os.environ.get("NB_AUTH_ENABLED", "True").lower() == "true" +AUTH_ENABLED = os.environ.get("NB_ENABLE_AUTH", "True").lower() == "true" CLIENT_ID = os.environ.get("NB_QUERY_CLIENT_ID", None) @@ -17,7 +17,7 @@ def check_client_id(): # This however can be a security risk, so we mandate that CLIENT_ID is set. if AUTH_ENABLED and CLIENT_ID is None: raise ValueError( - "Authentication has been enabled (NB_AUTH_ENABLED) but the environment variable NB_QUERY_CLIENT_ID is not set. " + "Authentication has been enabled (NB_ENABLE_AUTH) but the environment variable NB_QUERY_CLIENT_ID is not set. " "Please set NB_QUERY_CLIENT_ID to the Neurobagel query tool client ID, to verify the audience claim of ID tokens." ) From 88043c97cbfa13809e7e966dd22ddcd16949f402 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Jul 2024 17:43:19 -0400 Subject: [PATCH 15/18] add comment --- app/api/routers/query.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index 4d6aa9e..d6f0e5e 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -19,6 +19,7 @@ "authorizationUrl": "https://accounts.google.com/o/oauth2/auth", } }, + # Don't automatically error out when request is not authenticated, to support optional authentication auto_error=False, ) # NOTE: Can also explicitly use OpenID Connect because Google supports it - results in the same behavior as the OAuth2 scheme above. From 6c62e3707ebcc676ecc913210ce120b23ef53abc Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sun, 14 Jul 2024 13:47:04 -0400 Subject: [PATCH 16/18] improve clarity of comments and messages Co-authored-by: Sebastian Urchs --- app/api/security.py | 2 +- tests/test_security.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/api/security.py b/app/api/security.py index 6573164..2e62011 100644 --- a/app/api/security.py +++ b/app/api/security.py @@ -18,7 +18,7 @@ def check_client_id(): if AUTH_ENABLED and CLIENT_ID is None: raise ValueError( "Authentication has been enabled (NB_ENABLE_AUTH) but the environment variable NB_QUERY_CLIENT_ID is not set. " - "Please set NB_QUERY_CLIENT_ID to the Neurobagel query tool client ID, to verify the audience claim of ID tokens." + "Please set NB_QUERY_CLIENT_ID to the Google client ID for your Neurobagel query tool deployment, to verify the audience claim of ID tokens." ) diff --git a/tests/test_security.py b/tests/test_security.py index 79359d2..71eb45d 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -51,10 +51,10 @@ def test_invalid_token_raises_error(invalid_token): "invalid_auth_header", [{}, {"Authorization": ""}, {"badheader": "badvalue"}], ) -def test_query_fails_with_malformed_auth_header( +def test_query_with_malformed_auth_header_fails( test_app, monkeypatch, invalid_auth_header ): - """Test that a request to the /query route fails with a missing or malformed authorization header.""" + """Test that a request to the /query route with a missing or malformed authorization header, fails .""" # Mock verify_token function since we don't want to actually verify the token in this test def mock_verify_token(token): From 1e120c05cae1690c2c386522c7e75b7d86178f24 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sun, 14 Jul 2024 13:49:08 -0400 Subject: [PATCH 17/18] update comment --- app/api/security.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/security.py b/app/api/security.py index 2e62011..cb159a7 100644 --- a/app/api/security.py +++ b/app/api/security.py @@ -33,7 +33,7 @@ def verify_token(token: str): id_info = id_token.verify_oauth2_token( param, requests.Request(), CLIENT_ID ) - # TODO: Remove print statement - just for testing + # TODO: Remove print statement or turn into logging print("Token verified: ", id_info) except (GoogleAuthError, ValueError) as exc: raise HTTPException( From 7e036fb2fbfe6199fe30fb4a7c589978da7718ad Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sun, 14 Jul 2024 13:59:46 -0400 Subject: [PATCH 18/18] refactor mocked verify_token definition --- tests/conftest.py | 18 ++++++++++++++++++ tests/test_query.py | 18 ------------------ tests/test_security.py | 10 +--------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 01564b8..3d67611 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,24 @@ def disable_auth(monkeypatch): monkeypatch.setattr("app.api.security.AUTH_ENABLED", False) +@pytest.fixture() +def mock_verify_token(): + """Mock a successful token verification that does not raise any exceptions.""" + + def _verify_token(token): + return None + + return _verify_token + + +@pytest.fixture() +def set_mock_verify_token(monkeypatch, mock_verify_token): + """Set the verify_token function to a mock that does not raise any exceptions.""" + monkeypatch.setattr( + "app.api.routers.query.verify_token", mock_verify_token + ) + + @pytest.fixture(scope="function") def set_valid_test_federation_nodes(monkeypatch): """Set two correctly formatted federation nodes for a test function (mocks the result of reading/parsing available public and local nodes on startup).""" diff --git a/tests/test_query.py b/tests/test_query.py index c94dd13..7693592 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -12,24 +12,6 @@ def mock_token(): return "Bearer foo" -@pytest.fixture() -def mock_verify_token(): - """Mock a successful token verification that does not raise any exceptions.""" - - def _verify_token(token): - return None - - return _verify_token - - -@pytest.fixture() -def set_mock_verify_token(monkeypatch, mock_verify_token): - """Set the verify_token function to a mock that does not raise any exceptions.""" - monkeypatch.setattr( - "app.api.routers.query.verify_token", mock_verify_token - ) - - @pytest.fixture() def mocked_single_matching_dataset_result(): """Valid aggregate query result for a single matching dataset.""" diff --git a/tests/test_security.py b/tests/test_security.py index 71eb45d..b657168 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -52,18 +52,10 @@ def test_invalid_token_raises_error(invalid_token): [{}, {"Authorization": ""}, {"badheader": "badvalue"}], ) def test_query_with_malformed_auth_header_fails( - test_app, monkeypatch, invalid_auth_header + test_app, set_mock_verify_token, invalid_auth_header ): """Test that a request to the /query route with a missing or malformed authorization header, fails .""" - # Mock verify_token function since we don't want to actually verify the token in this test - def mock_verify_token(token): - return None - - monkeypatch.setattr( - "app.api.routers.query.verify_token", mock_verify_token - ) - response = test_app.get( "/query/", headers=invalid_auth_header,