From 9e5c2ef315c6b10edd6696687e0fb96a937dbeb6 Mon Sep 17 00:00:00 2001 From: Casper Welzel Andersen Date: Mon, 16 Aug 2021 17:49:44 +0200 Subject: [PATCH] Return GET /search as OPTIMADE or not Add possibility to return responses from `GET /search` as an OPTIMADE entry-listing response or as the "standard" gateway query response. In doing this, some things have been optimized. Mainly the logic surrounding retrieving and naming databases in the `POST /search` endpoint. In order to return a query response as a valid OPTIMADE response, a method has been added to the `QueryResource` model, which works similarly to `GET /gateways//structures`, but working from the basis of the finished query. It updates the entry `id`s by prepending the `provider/database/` name, making the `id`s unique. Extra safety has been added to the creation of gateway resources, as the `resource_factory()` now expects a pre-treating `GatewayCreate` entity, where there are no "unknown" database `id`s in the `database_ids` attribute. Unknown meaning that all `database_ids` must be represented in the `databases` attribute. --- optimade_gateway/models/gateways.py | 4 +- optimade_gateway/models/queries.py | 92 ++++++++++++++++++ optimade_gateway/queries/params.py | 15 +++ optimade_gateway/queries/perform.py | 30 +++++- optimade_gateway/queries/process.py | 20 +++- optimade_gateway/routers/databases.py | 4 +- .../routers/gateway/structures.py | 35 +++++-- optimade_gateway/routers/queries.py | 10 +- optimade_gateway/routers/search.py | 97 +++++++++++++------ optimade_gateway/routers/utils.py | 21 +++- 10 files changed, 280 insertions(+), 48 deletions(-) diff --git a/optimade_gateway/models/gateways.py b/optimade_gateway/models/gateways.py index d2412e05..ec3372d0 100644 --- a/optimade_gateway/models/gateways.py +++ b/optimade_gateway/models/gateways.py @@ -140,7 +140,9 @@ class GatewayCreate(EntryResourceCreate, GatewayResourceAttributes): @root_validator def specify_databases(cls, values: dict) -> dict: - """Either `database_ids` or `databases` must be non-empty""" + """Either `database_ids` or `databases` must be non-empty. + Both together is also fine. + """ if not any(values.get(field) for field in ("database_ids", "databases")): raise ValueError("Either 'database_ids' or 'databases' MUST be specified") return values diff --git a/optimade_gateway/models/queries.py b/optimade_gateway/models/queries.py index 49fd14d2..81677d57 100644 --- a/optimade_gateway/models/queries.py +++ b/optimade_gateway/models/queries.py @@ -1,11 +1,14 @@ """Pydantic models/schemas for the Queries resource""" from enum import Enum from typing import Any, Dict, List, Optional, Union +import urllib.parse import warnings from optimade.models import ( EntryResource, EntryResourceAttributes, + EntryResponseMany, + ErrorResponse, OptimadeError, ReferenceResource, ReferenceResponseMany, @@ -19,6 +22,7 @@ from optimade.models.utils import StrictField from optimade.server.query_params import EntryListingQueryParams from pydantic import BaseModel, EmailStr, Field, validator +from starlette.datastructures import URL as StarletteURL from optimade_gateway.models.resources import EntryResourceCreate from optimade_gateway.warnings import SortNotSupported @@ -210,6 +214,94 @@ class QueryResource(EntryResource): ) attributes: QueryResourceAttributes + async def response_as_optimade( + self, + url: Optional[ + Union[urllib.parse.ParseResult, urllib.parse.SplitResult, StarletteURL, str] + ] = None, + ) -> Union[EntryResponseMany, ErrorResponse]: + """Return `attributes.response` as a valid OPTIMADE entry listing response. + + Note, this method disregards the state of the query and will simply return the query results + as they currently are (if there are any at all). + + Parameters: + url: Optionally, update the `meta.query.representation` value with this. + + Returns: + A valid OPTIMADE entry-listing response according to the + [OPTIMADE specification](https://github.com/Materials-Consortia/OPTIMADE/blob/master/optimade.rst#entry-listing-endpoints) + or an error response, if errors were returned or occurred during the query. + + """ + from copy import deepcopy + from optimade.server.routers.utils import meta_values + + async def _update_id( + entry_: Union[EntryResource, Dict[str, Any]], database_provider_: str + ) -> Union[EntryResource, Dict[str, Any]]: + """Internal utility function to prepend the entries' `id` with `provider/database/`.""" + if isinstance(entry_, dict): + _entry = deepcopy(entry_) + _entry["id"] = f"{database_provider_}/{entry_['id']}" + else: + _entry = entry_.copy(deep=True) + _entry.id = f"{database_provider_}/{entry_.id}" + return _entry + + if not self.attributes.response: + # The query as not yet been initiated + return ErrorResponse( + errors=[ + { + "detail": ( + "Can not return as a valid OPTIMADE response as the query has not yet " + "been initialized." + ), + "id": "OPTIMADE_GATEWAY_QUERY_NOT_INITIALIZED", + } + ], + meta=meta_values( + url=url or f"/queries/{self.id}?", + data_returned=0, + data_available=0, + more_data_available=False, + ), + ) + + meta_ = self.attributes.response.meta + if url: + meta_ = meta_.dict(exclude_unset=True) + for repeated_key in ( + "query", + "api_version", + "time_stamp", + "provider", + "implementation", + ): + meta_.pop(repeated_key, None) + meta_ = meta_values(url=url, **meta_) + + # Error response + if self.attributes.response.errors: + return ErrorResponse( + errors=self.attributes.response.errors, + meta=meta_, + ) + + # Data response + results = [] + for database_provider, entries in self.attributes.response.data.items(): + results.extend( + [await _update_id(entry, database_provider) for entry in entries] + ) + + return self.attributes.endpoint.get_response_model()( + data=results, + meta=meta_, + links=self.attributes.response.links, + ) + class QueryCreate(EntryResourceCreate, QueryResourceAttributes): """Model for creating new Query resources in the MongoDB""" diff --git a/optimade_gateway/queries/params.py b/optimade_gateway/queries/params.py index 96abc6b1..3c4ee24f 100644 --- a/optimade_gateway/queries/params.py +++ b/optimade_gateway/queries/params.py @@ -35,6 +35,11 @@ class in `optimade´, which defines the standard entry listing endpoint query pa time, a redirection will still be performed, but to a zero-results page, which can be refreshed to get the finished query (once it has finished). + as_optimade (bool): Return the response as a standard OPTIMADE entry listing endpoint + response. Otherwise, the response will be based on the + [`QueriesResponseSingle`][optimade_gateway.models.responses.QueriesResponseSingle] + model. + """ def __init__( @@ -72,8 +77,18 @@ def __init__( "which can be refreshed to get the finished query (once it has finished)." ), ), + as_optimade: bool = Query( + False, + description=( + "Return the response as a standard OPTIMADE entry listing endpoint response. " + "Otherwise, the response will be based on the " + "[`QueriesResponseSingle`][optimade_gateway.models.responses.QueriesResponseSingle]" + " model." + ), + ) ) -> None: self.database_ids = database_ids self.optimade_urls = optimade_urls self.endpoint = endpoint self.timeout = timeout + self.as_optimade = as_optimade diff --git a/optimade_gateway/queries/perform.py b/optimade_gateway/queries/perform.py index 3176fc55..e4ff7c42 100644 --- a/optimade_gateway/queries/perform.py +++ b/optimade_gateway/queries/perform.py @@ -288,6 +288,34 @@ def db_find( try: response = ErrorResponse(**response) except ValidationError as exc: + # If it's an error and `meta` is missing, it is not a valid OPTIMADE response, + # but this happens a lot, and is therefore worth having an edge-case for. + if "errors" in response: + errors = list(response["errors"]) + errors.append( + { + "detail": ( + f"Could not pass response from {url} as either a " + f"{response_model.__name__!r} or 'ErrorResponse'. " + f"ValidationError: {exc}" + ), + "id": "OPTIMADE_GATEWAY_DB_FINDS_MANY_VALIDATIONERRORS", + } + ) + return ( + ErrorResponse( + errors=errors, + meta={ + "query": { + "representation": f"/{endpoint.strip('/')}?{query_params}" + }, + "api_version": __api_version__, + "more_data_available": False, + }, + ), + get_resource_attribute(database, "id"), + ) + return ( ErrorResponse( errors=[ @@ -297,7 +325,7 @@ def db_find( f"{response_model.__name__!r} or 'ErrorResponse'. " f"ValidationError: {exc}" ), - "id": "OPTIMADE_GATEWAY_DB_FIND_MANY_VALIDATIONERROR", + "id": "OPTIMADE_GATEWAY_DB_FINDS_MANY_VALIDATIONERRORS", } ], meta={ diff --git a/optimade_gateway/queries/process.py b/optimade_gateway/queries/process.py index 03eac1ce..6691afea 100644 --- a/optimade_gateway/queries/process.py +++ b/optimade_gateway/queries/process.py @@ -10,6 +10,7 @@ OptimadeError, ) +from optimade_gateway.common.config import CONFIG from optimade_gateway.common.utils import get_resource_attribute from optimade_gateway.models import GatewayResource, QueryResource from optimade_gateway.queries.utils import update_query @@ -75,10 +76,21 @@ async def process_db_response( meta_error = error.meta.dict() meta_error.update( { - "optimade_gateway": { - "gateway": gateway, - "source_database_id": database_id, - } + f"_{CONFIG.provider.prefix}_source_gateway": { + "id": gateway.id, + "type": gateway.type, + "links": {"self": gateway.links.self}, + }, + f"_{CONFIG.provider.prefix}_source_database": { + "id": database_id, + "type": "links", + "links": { + "self": ( + str(gateway.links.self).split("gateways")[0] + + f"databases/{database_id}" + ) + }, + }, } ) error.meta = Meta(**meta_error) diff --git a/optimade_gateway/routers/databases.py b/optimade_gateway/routers/databases.py index a00e9cd9..92afe273 100644 --- a/optimade_gateway/routers/databases.py +++ b/optimade_gateway/routers/databases.py @@ -128,7 +128,9 @@ async def get_database( ) = await DATABASES_COLLECTION.find(params=params) if fields or include_fields and result is not None: - result = handle_response_fields(result, fields, include_fields)[0] + result = handle_response_fields(result, fields, include_fields) + + result = result[0] if data_returned else None return DatabasesResponseSingle( links=ToplevelLinks(next=None), diff --git a/optimade_gateway/routers/gateway/structures.py b/optimade_gateway/routers/gateway/structures.py index 6e6d8b85..1e802184 100644 --- a/optimade_gateway/routers/gateway/structures.py +++ b/optimade_gateway/routers/gateway/structures.py @@ -86,8 +86,14 @@ async def get_structures( if isinstance(gateway_response, ErrorResponse): for error in gateway_response.errors: if error.status: - response.status_code = int(error.status) - break + for part in error.status.split(" "): + try: + response.status_code = int(part) + break + except ValueError: + pass + if response.status_code and response.status_code >= 300: + break else: response.status_code = 500 return gateway_response @@ -134,6 +140,7 @@ async def get_single_structure( Example: `GET /gateways/some_gateway/structures/some_database/some_structure`. """ + from optimade_gateway.common.config import CONFIG from optimade_gateway.models import GatewayResource from optimade_gateway.queries import db_find from optimade_gateway.routers.utils import get_valid_resource @@ -182,10 +189,16 @@ async def get_single_structure( meta = error.meta.dict() meta.update( { - "optimade_gateway": { - "gateway": gateway, - "source_database_id": database.id, - } + f"_{CONFIG.provider.prefix}_source_gateway": { + "id": gateway.id, + "type": gateway.type, + "links": {"self": gateway.links.self}, + }, + f"_{CONFIG.provider.prefix}_source_database": { + "id": database.id, + "type": database.type, + "links": {"self": database.links.self}, + }, } ) error.meta = Meta(**meta) @@ -239,8 +252,14 @@ async def get_single_structure( if isinstance(gateway_response, ErrorResponse): for error in errors or gateway_response.errors: if error.status: - response.status_code = int(error.status) - break + for part in error.status.split(" "): + try: + response.status_code = int(part) + break + except ValueError: + pass + if response.status_code and response.status_code >= 300: + break else: response.status_code = 500 diff --git a/optimade_gateway/routers/queries.py b/optimade_gateway/routers/queries.py index 9cbd7cfa..3d04e30c 100644 --- a/optimade_gateway/routers/queries.py +++ b/optimade_gateway/routers/queries.py @@ -136,8 +136,14 @@ async def get_query( if query.attributes.response.errors: for error in query.attributes.response.errors: if error.status: - response.status_code = int(error.status) - break + for part in error.status.split(" "): + try: + response.status_code = int(part) + break + except ValueError: + pass + if response.status_code and response.status_code >= 300: + break else: response.status_code = 500 diff --git a/optimade_gateway/routers/search.py b/optimade_gateway/routers/search.py index 8fc97e8d..08706795 100644 --- a/optimade_gateway/routers/search.py +++ b/optimade_gateway/routers/search.py @@ -11,13 +11,13 @@ from fastapi import ( APIRouter, Depends, - HTTPException, Request, Response, status, ) from fastapi.responses import RedirectResponse -from optimade.server.exceptions import BadRequest +from optimade.models.responses import EntryResponseMany, ErrorResponse +from optimade.server.exceptions import BadRequest, InternalServerError from optimade.models import ( LinksResource, LinksResourceAttributes, @@ -109,35 +109,60 @@ async def post_search(request: Request, search: Search) -> QueriesResponseSingle # Ensure all URLs are `pydantic.AnyUrl`s if not all([isinstance(_, AnyUrl) for _ in base_urls]): - raise HTTPException( - status_code=500, - detail="Could unexpectedly not get all base URLs as `pydantic.AnyUrl`s.", + raise InternalServerError( + "Could unexpectedly not get all base URLs as `pydantic.AnyUrl`s." ) - gateway = GatewayCreate( - databases=[ - LinksResource( - id=( - f"{url.user + '@' if url.user else ''}{url.host}" - f"{':' + url.port if url.port else ''}" - f"{url.path.rstrip('/') if url.path else ''}" - ).replace(".", "__"), - type="links", - attributes=LinksResourceAttributes( - name=( + databases = await DATABASES_COLLECTION.get_multiple( + filter={"base_url": {"$in": await clean_python_types(base_urls)}} + ) + if len(databases) == len(base_urls): + # At this point it is expected that the list of databases in `databases` + # is a complete set of databases requested. + gateway = GatewayCreate(databases=databases) + elif len(databases) < len(base_urls): + # There are unregistered databases + current_base_urls = set( + [ + get_resource_attribute(database, "attributes.base_url") + for database in databases + ] + ) + databases.extend( + [ + LinksResource( + id=( f"{url.user + '@' if url.user else ''}{url.host}" f"{':' + url.port if url.port else ''}" f"{url.path.rstrip('/') if url.path else ''}" + ).replace(".", "__"), + type="links", + attributes=LinksResourceAttributes( + name=( + f"{url.user + '@' if url.user else ''}{url.host}" + f"{':' + url.port if url.port else ''}" + f"{url.path.rstrip('/') if url.path else ''}" + ), + description="", + base_url=url, + link_type=LinkType.CHILD, + homepage=None, ), - description="", - base_url=url, - link_type=LinkType.CHILD, - homepage=None, - ), - ) - for url in base_urls - ] - ) + ) + for url in base_urls - current_base_urls + ] + ) + else: + LOGGER.error( + "Found more database entries in MongoDB than then number of passed base URLs. " + "This suggests ambiguity in the base URLs of databases stored in MongoDB.\n" + " base_urls: %s\n databases %s", + base_urls, + databases, + ) + raise InternalServerError("Unambiguous base URLs. See logs for more details.") + + gateway = GatewayCreate(databases=databases) gateway, created = await resource_factory(gateway) if created: @@ -172,7 +197,7 @@ async def post_search(request: Request, search: Search) -> QueriesResponseSingle @ROUTER.get( "/search", - response_model=QueriesResponseSingle, + response_model=Union[QueriesResponseSingle, EntryResponseMany, ErrorResponse], response_model_exclude_defaults=False, response_model_exclude_none=False, response_model_exclude_unset=True, @@ -184,7 +209,7 @@ async def get_search( response: Response, search_params: SearchQueryParams = Depends(), entry_params: EntryListingQueryParams = Depends(), -) -> Union[QueriesResponseSingle, RedirectResponse]: +) -> Union[QueriesResponseSingle, EntryResponseMany, ErrorResponse, RedirectResponse]: """`GET /search` Coordinate a new OPTIMADE query in multiple databases through a gateway: @@ -202,7 +227,10 @@ async def get_search( If the timeout time is reached and the query has not yet finished, the user is redirected to the specific URL for the query. - In the future, this might introduce a mode to return a response as a standard OPTIMADE response. + If the `as_optimade` query parameter is `True`, the response will be parseable as a standard + OPTIMADE entry listing endpoint like, e.g., `/structures`. + For more information see the + [OPTIMADE specification](https://github.com/Materials-Consortia/OPTIMADE/blob/master/optimade.rst#entry-listing-endpoints). """ from time import time @@ -245,11 +273,20 @@ async def get_search( if query.attributes.response.errors: for error in query.attributes.response.errors: if error.status: - response.status_code = int(error.status) - break + for part in error.status.split(" "): + try: + response.status_code = int(part) + break + except ValueError: + pass + if response.status_code and response.status_code >= 300: + break else: response.status_code = 500 + if search_params.as_optimade: + return await query.response_as_optimade(url=request.url) + return QueriesResponseSingle( links=ToplevelLinks(next=None), data=query, diff --git a/optimade_gateway/routers/utils.py b/optimade_gateway/routers/utils.py index 7518e8f0..ace50719 100644 --- a/optimade_gateway/routers/utils.py +++ b/optimade_gateway/routers/utils.py @@ -149,6 +149,13 @@ async def resource_factory( [`databases.attributes.base_url`](https://www.optimade.org/optimade-python-tools/api_reference/models/links/#optimade.models.links.LinksResourceAttributes.base_url) element values, when compared with the `create_resource`. + !!! important + The `database_ids` attribute **must not** contain values that are not also included in the + `databases` attribute, in the form of the IDs for the individual databases. + If this should be the case an + [`OptimadeGatewayError`][optimade_gateway.common.exceptions.OptimadeGatewayError] will be + thrown. + === "Queries" The `gateway_id`, `query_parameters`, and `endpoint` fields are collectively considered to define uniqueness for a [`QueryResource`][optimade_gateway.models.queries.QueryResource] @@ -195,10 +202,22 @@ async def resource_factory( GATEWAYS_COLLECTION as RESOURCE_COLLECTION, ) + # One MUST have taken care of database_ids prior to calling `resource_factory()` + database_attr_ids = set([_.id for _ in create_resource.databases or []]) + unknown_ids = set() + for database_id in create_resource.database_ids: + if database_id not in database_attr_ids: + unknown_ids.add(database_id) + if unknown_ids: + raise OptimadeGatewayError( + "When using `resource_factory()` for `GatewayCreate`, `database_ids` MUST not " + f"include unknown IDs. Passed unknown IDs: {unknown_ids}" + ) + mongo_query = { "databases": {"$size": len(create_resource.databases)}, "databases.attributes.base_url": { - "$all": [_.attributes.base_url for _ in create_resource.databases] + "$all": [_.attributes.base_url for _ in create_resource.databases or []] }, } elif isinstance(create_resource, QueryCreate):