Skip to content

Commit

Permalink
Static keys for facet options (GSI-333) (#14)
Browse files Browse the repository at this point in the history
* Add FacetOption model

* Update query to return desired structure

Replace the _id field from the groupby with value

Remove sort-by-count for options since it's not even used by frontend

* Add a ValidationError to queryhandler

This will handle cases where there's a document doesn't have all facets

* Update endpoint with new error

* Update tests

* Bump version from 0.2.1 -> 0.3.0 due to API change

---------

Co-authored-by: TheByronHimes <[email protected]>
  • Loading branch information
TheByronHimes and TheByronHimes authored Aug 31, 2023
1 parent 0ad243d commit 2e5f7e3
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 34 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,21 @@ We recommend using the provided Docker container.

A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/mass):
```bash
docker pull ghga/mass:0.2.1
docker pull ghga/mass:0.3.0
```

Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile):
```bash
# Execute in the repo's root dir:
docker build -t ghga/mass:0.2.1 .
docker build -t ghga/mass:0.3.0 .
```

For production-ready deployment, we recommend using Kubernetes, however,
for simple use cases, you could execute the service using docker
on a single server:
```bash
# The entrypoint is preconfigured:
docker run -p 8080:8080 ghga/mass:0.2.1 --help
docker run -p 8080:8080 ghga/mass:0.3.0 --help
```

If you prefer not to use containers, you may install the service from source:
Expand Down
2 changes: 1 addition & 1 deletion mass/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@

"""A service for searching metadata artifacts and filtering results."""

__version__ = "0.2.1"
__version__ = "0.3.0"
2 changes: 1 addition & 1 deletion mass/adapters/inbound/fastapi_/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ async def search(
detail="The specified class name is invalid. See "
+ "/rpc/search-options for a list of valid class names.",
) from err
except query_handler.SearchError as err:
except (query_handler.SearchError, query_handler.ValidationError) as err:
raise HTTPException(
status_code=500, detail="An error occurred during search operation"
) from err
Expand Down
17 changes: 3 additions & 14 deletions mass/adapters/outbound/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def pipeline_apply_facets(
"count": {"$sum": 1},
}
},
{"$sort": {"count": -1}}, # sort by descending order
{"$addFields": {"value": "$_id"}}, # rename "_id" to "value" on each option
{"$unset": "_id"},
]

# this is the total number of hits, but pagination can mean only a few are returned
Expand Down Expand Up @@ -102,19 +103,7 @@ def pipeline_project(*, facet_fields: list[models.FacetLabel]) -> JsonObject:
# add a segment for each facet to summarize the options
for facet in facet_fields:
segment["facets"].append(
{
"key": facet.key,
"name": facet.name,
"options": {
"$arrayToObject": {
"$map": {
"input": f"${facet.name}",
"as": "temp",
"in": {"k": "$$temp._id", "v": "$$temp.count"},
}
}
},
}
{"key": facet.key, "name": facet.name, "options": f"${facet.name}"}
)
return {"$project": segment}

Expand Down
11 changes: 8 additions & 3 deletions mass/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
# limitations under the License.

"""Defines dataclasses for holding business-logic data"""
from collections import OrderedDict

from hexkit.custom_types import JsonObject
from pydantic import BaseModel, Field

Expand All @@ -27,10 +25,17 @@ class FacetLabel(BaseModel):
name: str = Field(default="", description="The user-friendly name for the facet")


class FacetOption(BaseModel):
"""Represents the format for an option for a facet"""

value: str = Field(..., description="The text value of the facet option")
count: int = Field(..., description="The number of results matching the facet")


class Facet(FacetLabel):
"""Represents a facet's key, name, and the discovered options for the facet"""

options: OrderedDict[str, int] = Field(
options: list[FacetOption] = Field(
..., description="The list of options for the facet"
)

Expand Down
6 changes: 5 additions & 1 deletion mass/core/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from hexkit.custom_types import JsonObject
from hexkit.providers.mongodb.provider import ResourceNotFoundError
from pydantic import ValidationError

from mass.config import SearchableClassesConfig
from mass.core import models
Expand Down Expand Up @@ -92,6 +93,9 @@ async def handle_query(
except AggregationError as exc:
raise self.SearchError() from exc

query_results = models.QueryResults(**aggregator_results)
try:
query_results = models.QueryResults(**aggregator_results)
except ValidationError as err:
raise self.ValidationError() from err

return query_results
10 changes: 10 additions & 0 deletions mass/ports/inbound/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def __init__(self, resource_id: str):
+ "found in the database."
)

class ValidationError(RuntimeError):
"""Raised when the aggregator results don't pass the model validation"""

def __init__(self):
super().__init__(
"A subset of the query results does not conform to the expected results "
+ "model schema."
)

@abstractmethod
async def delete_resource(self, *, resource_id: str, class_name: str):
"""Delete resource with given ID and class name from the database
Expand Down Expand Up @@ -75,6 +84,7 @@ async def handle_query(
ClassNotConfiguredError - when the class_name parameter does not
match any configured class
SearchError - when the search operation fails
ValidationError - when the results are malformed and fail model validation
"""
...

Expand Down
22 changes: 19 additions & 3 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ components:
title: Name
type: string
options:
additionalProperties:
type: integer
description: The list of options for the facet
items:
$ref: '#/components/schemas/FacetOption'
title: Options
type: object
type: array
required:
- key
- options
Expand All @@ -40,6 +40,22 @@ components:
- key
title: FacetLabel
type: object
FacetOption:
description: Represents the format for an option for a facet
properties:
count:
description: The number of results matching the facet
title: Count
type: integer
value:
description: The text value of the facet option
title: Value
type: string
required:
- value
- count
title: FacetOption
type: object
Filter:
description: Represents a filter used to refine results
properties:
Expand Down
27 changes: 19 additions & 8 deletions tests/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,27 @@ async def test_facets_returned(joint_fixture: JointFixture):
assert facet.key in facet_key_to_name
assert facet.name == facet_key_to_name[facet.key]
if facet.key == "category":
assert len(facet.options) == 1
assert facet.options["hotel"] == 2
hotel_options = [x for x in facet.options if x.value == "hotel"]
assert len(hotel_options) == 1
assert hotel_options[0].count == 2
elif facet.key == "field1":
assert len(facet.options) == 2
assert facet.options["Miami"] == 1
assert facet.options["Denver"] == 1
miami_options = [x for x in facet.options if x.value == "Miami"]
assert len(miami_options) == 1
assert miami_options[0].count == 1

denver_options = [x for x in facet.options if x.value == "Denver"]
assert len(denver_options) == 1
assert denver_options[0].count == 1
else:
assert len(facet.options) == 2
assert facet.options["piano"] == 1
assert facet.options["kitchen"] == 1

piano_options = [x for x in facet.options if x.value == "piano"]
assert len(piano_options) == 1
assert piano_options[0].count == 1

kitchen_options = [x for x in facet.options if x.value == "kitchen"]
assert len(kitchen_options) == 1
assert kitchen_options[0].count == 1


@pytest.mark.asyncio
Expand Down Expand Up @@ -222,7 +233,7 @@ async def test_error_from_malformed_resource(joint_fixture: JointFixture):

await query_handler.load_resource(resource=resource, class_name="DatasetEmbedded")

with pytest.raises(query_handler.SearchError):
with pytest.raises(query_handler.ValidationError):
await query_handler.handle_query(
class_name="DatasetEmbedded", query="", filters=[]
)
Expand Down

0 comments on commit 2e5f7e3

Please sign in to comment.