From 2e5f7e332a0b049f00a4cf77d56dcf79ed871d1d Mon Sep 17 00:00:00 2001 From: Byron Himes Date: Thu, 31 Aug 2023 14:30:03 +0200 Subject: [PATCH] Static keys for facet options (GSI-333) (#14) * Add FacetOption model * Update query to return desired structure Replace the _id field from the groupby with value Remove sort-by-count for options since it's not even used by frontend * Add a ValidationError to queryhandler This will handle cases where there's a document doesn't have all facets * Update endpoint with new error * Update tests * Bump version from 0.2.1 -> 0.3.0 due to API change --------- Co-authored-by: TheByronHimes --- README.md | 6 +++--- mass/__init__.py | 2 +- mass/adapters/inbound/fastapi_/routes.py | 2 +- mass/adapters/outbound/utils.py | 17 +++------------ mass/core/models.py | 11 +++++++--- mass/core/query_handler.py | 6 +++++- mass/ports/inbound/query_handler.py | 10 +++++++++ openapi.yaml | 22 ++++++++++++++++--- tests/test_resources.py | 27 +++++++++++++++++------- 9 files changed, 69 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 9ef4a0b..a29a000 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,13 @@ We recommend using the provided Docker container. A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/mass): ```bash -docker pull ghga/mass:0.2.1 +docker pull ghga/mass:0.3.0 ``` Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile): ```bash # Execute in the repo's root dir: -docker build -t ghga/mass:0.2.1 . +docker build -t ghga/mass:0.3.0 . ``` For production-ready deployment, we recommend using Kubernetes, however, @@ -46,7 +46,7 @@ for simple use cases, you could execute the service using docker on a single server: ```bash # The entrypoint is preconfigured: -docker run -p 8080:8080 ghga/mass:0.2.1 --help +docker run -p 8080:8080 ghga/mass:0.3.0 --help ``` If you prefer not to use containers, you may install the service from source: diff --git a/mass/__init__.py b/mass/__init__.py index 14f9020..857a430 100644 --- a/mass/__init__.py +++ b/mass/__init__.py @@ -15,4 +15,4 @@ """A service for searching metadata artifacts and filtering results.""" -__version__ = "0.2.1" +__version__ = "0.3.0" diff --git a/mass/adapters/inbound/fastapi_/routes.py b/mass/adapters/inbound/fastapi_/routes.py index f1aac9c..b23dbbd 100644 --- a/mass/adapters/inbound/fastapi_/routes.py +++ b/mass/adapters/inbound/fastapi_/routes.py @@ -76,7 +76,7 @@ async def search( detail="The specified class name is invalid. See " + "/rpc/search-options for a list of valid class names.", ) from err - except query_handler.SearchError as err: + except (query_handler.SearchError, query_handler.ValidationError) as err: raise HTTPException( status_code=500, detail="An error occurred during search operation" ) from err diff --git a/mass/adapters/outbound/utils.py b/mass/adapters/outbound/utils.py index 2fc4a18..9aa91f5 100644 --- a/mass/adapters/outbound/utils.py +++ b/mass/adapters/outbound/utils.py @@ -71,7 +71,8 @@ def pipeline_apply_facets( "count": {"$sum": 1}, } }, - {"$sort": {"count": -1}}, # sort by descending order + {"$addFields": {"value": "$_id"}}, # rename "_id" to "value" on each option + {"$unset": "_id"}, ] # this is the total number of hits, but pagination can mean only a few are returned @@ -102,19 +103,7 @@ def pipeline_project(*, facet_fields: list[models.FacetLabel]) -> JsonObject: # add a segment for each facet to summarize the options for facet in facet_fields: segment["facets"].append( - { - "key": facet.key, - "name": facet.name, - "options": { - "$arrayToObject": { - "$map": { - "input": f"${facet.name}", - "as": "temp", - "in": {"k": "$$temp._id", "v": "$$temp.count"}, - } - } - }, - } + {"key": facet.key, "name": facet.name, "options": f"${facet.name}"} ) return {"$project": segment} diff --git a/mass/core/models.py b/mass/core/models.py index 5f27f78..d039c88 100644 --- a/mass/core/models.py +++ b/mass/core/models.py @@ -14,8 +14,6 @@ # limitations under the License. """Defines dataclasses for holding business-logic data""" -from collections import OrderedDict - from hexkit.custom_types import JsonObject from pydantic import BaseModel, Field @@ -27,10 +25,17 @@ class FacetLabel(BaseModel): name: str = Field(default="", description="The user-friendly name for the facet") +class FacetOption(BaseModel): + """Represents the format for an option for a facet""" + + value: str = Field(..., description="The text value of the facet option") + count: int = Field(..., description="The number of results matching the facet") + + class Facet(FacetLabel): """Represents a facet's key, name, and the discovered options for the facet""" - options: OrderedDict[str, int] = Field( + options: list[FacetOption] = Field( ..., description="The list of options for the facet" ) diff --git a/mass/core/query_handler.py b/mass/core/query_handler.py index 79991a6..245de5c 100644 --- a/mass/core/query_handler.py +++ b/mass/core/query_handler.py @@ -18,6 +18,7 @@ from hexkit.custom_types import JsonObject from hexkit.providers.mongodb.provider import ResourceNotFoundError +from pydantic import ValidationError from mass.config import SearchableClassesConfig from mass.core import models @@ -92,6 +93,9 @@ async def handle_query( except AggregationError as exc: raise self.SearchError() from exc - query_results = models.QueryResults(**aggregator_results) + try: + query_results = models.QueryResults(**aggregator_results) + except ValidationError as err: + raise self.ValidationError() from err return query_results diff --git a/mass/ports/inbound/query_handler.py b/mass/ports/inbound/query_handler.py index 5930076..1e7fe17 100644 --- a/mass/ports/inbound/query_handler.py +++ b/mass/ports/inbound/query_handler.py @@ -48,6 +48,15 @@ def __init__(self, resource_id: str): + "found in the database." ) + class ValidationError(RuntimeError): + """Raised when the aggregator results don't pass the model validation""" + + def __init__(self): + super().__init__( + "A subset of the query results does not conform to the expected results " + + "model schema." + ) + @abstractmethod async def delete_resource(self, *, resource_id: str, class_name: str): """Delete resource with given ID and class name from the database @@ -75,6 +84,7 @@ async def handle_query( ClassNotConfiguredError - when the class_name parameter does not match any configured class SearchError - when the search operation fails + ValidationError - when the results are malformed and fail model validation """ ... diff --git a/openapi.yaml b/openapi.yaml index 2a160e7..465c20f 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -14,11 +14,11 @@ components: title: Name type: string options: - additionalProperties: - type: integer description: The list of options for the facet + items: + $ref: '#/components/schemas/FacetOption' title: Options - type: object + type: array required: - key - options @@ -40,6 +40,22 @@ components: - key title: FacetLabel type: object + FacetOption: + description: Represents the format for an option for a facet + properties: + count: + description: The number of results matching the facet + title: Count + type: integer + value: + description: The text value of the facet option + title: Value + type: string + required: + - value + - count + title: FacetOption + type: object Filter: description: Represents a filter used to refine results properties: diff --git a/tests/test_resources.py b/tests/test_resources.py index facf6b9..f324e5e 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -96,16 +96,27 @@ async def test_facets_returned(joint_fixture: JointFixture): assert facet.key in facet_key_to_name assert facet.name == facet_key_to_name[facet.key] if facet.key == "category": - assert len(facet.options) == 1 - assert facet.options["hotel"] == 2 + hotel_options = [x for x in facet.options if x.value == "hotel"] + assert len(hotel_options) == 1 + assert hotel_options[0].count == 2 elif facet.key == "field1": - assert len(facet.options) == 2 - assert facet.options["Miami"] == 1 - assert facet.options["Denver"] == 1 + miami_options = [x for x in facet.options if x.value == "Miami"] + assert len(miami_options) == 1 + assert miami_options[0].count == 1 + + denver_options = [x for x in facet.options if x.value == "Denver"] + assert len(denver_options) == 1 + assert denver_options[0].count == 1 else: assert len(facet.options) == 2 - assert facet.options["piano"] == 1 - assert facet.options["kitchen"] == 1 + + piano_options = [x for x in facet.options if x.value == "piano"] + assert len(piano_options) == 1 + assert piano_options[0].count == 1 + + kitchen_options = [x for x in facet.options if x.value == "kitchen"] + assert len(kitchen_options) == 1 + assert kitchen_options[0].count == 1 @pytest.mark.asyncio @@ -222,7 +233,7 @@ async def test_error_from_malformed_resource(joint_fixture: JointFixture): await query_handler.load_resource(resource=resource, class_name="DatasetEmbedded") - with pytest.raises(query_handler.SearchError): + with pytest.raises(query_handler.ValidationError): await query_handler.handle_query( class_name="DatasetEmbedded", query="", filters=[] )