Skip to content

Commit

Permalink
Calculate the proper counts for facets (#32)
Browse files Browse the repository at this point in the history
* Calculate the proper counts for facets

* Bump version
  • Loading branch information
Cito authored Aug 8, 2024
1 parent 2a8c48a commit f6dc92d
Show file tree
Hide file tree
Showing 19 changed files with 269 additions and 138 deletions.
2 changes: 1 addition & 1 deletion .pyproject_generation/pyproject_custom.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "mass"
version = "3.0.2"
version = "3.0.3"
description = "Metadata Artifact Search Service - A service for searching metadata artifacts and filtering results."
dependencies = [
"typer>=0.12",
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,21 @@ We recommend using the provided Docker container.

A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/mass):
```bash
docker pull ghga/mass:3.0.2
docker pull ghga/mass:3.0.3
```

Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile):
```bash
# Execute in the repo's root dir:
docker build -t ghga/mass:3.0.2 .
docker build -t ghga/mass:3.0.3 .
```

For production-ready deployment, we recommend using Kubernetes, however,
for simple use cases, you could execute the service using docker
on a single server:
```bash
# The entrypoint is preconfigured:
docker run -p 8080:8080 ghga/mass:3.0.2 --help
docker run -p 8080:8080 ghga/mass:3.0.3 --help
```

If you prefer not to use containers, you may install the service from source:
Expand Down
38 changes: 19 additions & 19 deletions lock/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -970,25 +970,25 @@ urllib3==2.2.2 \
# docker
# requests
# testcontainers
uv==0.2.33 \
--hash=sha256:02ed3b62049ea1f40404d33a02a69d3808f3b0e001e5565938804ca76beafbc4 \
--hash=sha256:181ccdb22058465c6690dca22e506fec234dcae5bcbe6389fd5330971910250e \
--hash=sha256:2fe685e73f198b2630e08e89ece0d858d58646a038a6d9cb2b06126dcca856d1 \
--hash=sha256:37924a3b502117fd74b1ddf08e9288b397da7895dd8cad46005422eefffe6e88 \
--hash=sha256:42b65bbf78b5186a40ea4423fab030fb01c9354432a7c0a3b5db67a3f4e246c5 \
--hash=sha256:48cfdb8efd237eb00086b8f0d0dc7281e517fd8afb55f698538087379bf45a8d \
--hash=sha256:676231a93001db051ecf98cb380f2d48d3f6b95add66ff4546073e30911a737a \
--hash=sha256:714351e10f27e41052897e26cd4acfe66e35250903fdc20f762d29461cf3ec4a \
--hash=sha256:73031edf35195289f02f6f1a603c512b57c8f921cb62fd442dbb63fd2a77c801 \
--hash=sha256:744eb9743e4b850af5de9f3c727d84a60a763ae0f4f5183dcdfa8a065879694d \
--hash=sha256:86f6237102deedbb17201804eb821833c5bad3f551f16f2695ae2b85e9f066de \
--hash=sha256:8eba96cbff1bc492c270e143235b39cfbe6dddebd842228ea14124d6b7d944e8 \
--hash=sha256:90b74796ce75594e63345c8e090fbac832a8f6db876691ae2b57b0b8d6011559 \
--hash=sha256:93c45d07ab440c03f2796540d646c34e58b4707feebfb9f70ded1306830408b0 \
--hash=sha256:ace6cb8383203fdfeaf8dbbc1ecb3bb945e040ca10558e233b63c84af82f6636 \
--hash=sha256:dbe497a1a16be9569d42cf4a7562e14bb3c3d9b33cc65e59095f1c3f8ab983df \
--hash=sha256:ede51de6795f9571b182c104d6078690c3a10b3fbe6fcf414b2e38c8d394e575 \
--hash=sha256:fb6f282ac92fbc05e82fa3a93e6515ad5b044e8c845ba16d815b5889799eebd1
uv==0.2.34 \
--hash=sha256:0a6a9a15adde8ef1faa7ac1289fcf622c5ca630d74a5274281bb149e85205105 \
--hash=sha256:0c8e746b674cc854113077859cab794ec92ea75d572544b8e6c73298a86183b8 \
--hash=sha256:102c033cd23c89cbb42b18be376ce3cf66123b308cae48718a6197764d1c41b3 \
--hash=sha256:19f2ff55b467eea0c53c394108188ba4989d25e634eb0362115e8137d49e47a2 \
--hash=sha256:31623af3f21fd5faf9e9640d3f22c181218477af3c817198eef313ee6c33164d \
--hash=sha256:3b6354f1bb83a2db56c1f062206bb0e697d31244f4ff419a6ff6114016527e45 \
--hash=sha256:4ce15beeba44e4ea052d83c89eb4ea3586dfd68bab039c5cdf44b90fbfc5698d \
--hash=sha256:5a40d5365e2790537005903470d31bc4c0fcde08f3f3969a4eefcdcded7c7965 \
--hash=sha256:5ff72659a05f02f80180c85b9803c69cc6cc66da74478cf2b1516e929b9ac8a0 \
--hash=sha256:6e10be9666ba9572ad76e0dcf0cae41c821b4475be0e3963a0a5911838037a96 \
--hash=sha256:8f79f157e4eed3beff69b13bf4b0b3549b2f1b8a22c4559bb29ab2f3599fcd0b \
--hash=sha256:cdcf3f28748a252aee5308312a6d265b42dd1033079b6b49ea2c548e95bd3341 \
--hash=sha256:d3b74ccc24f25519507cb337cb8b4d5bfc6f0e6476ff0d30bb7c62c9a0c9dd10 \
--hash=sha256:da1ff6e1125e068794ef6dfbd9f209fad11c8882c01e47add6cd524e1772c292 \
--hash=sha256:f30194d3c50446003033ec5ce65ddcfe6961aadafacd8ff6b958a4bc4596003b \
--hash=sha256:f6bdae3db4deb200d1ca4607f51659747c335cce6678209969e0e196167b2760 \
--hash=sha256:fb62e0e338b6fae4b9ef7fdbc2a82235ecdee3797f82f6eecbfc61c998e41701 \
--hash=sha256:fbe5730caed03dc4aa8dd3aa9f2317df82396b0913a79b1a2793d7595e65478e
# via -r lock/requirements-dev-template.in
uvicorn==0.29.0 \
--hash=sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de \
Expand Down
2 changes: 1 addition & 1 deletion openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ info:
name: Apache 2.0
summary: A service for searching metadata artifacts and filtering results.
title: Metadata Artifact Search Service
version: 3.0.2
version: 3.0.3
openapi: 3.1.0
paths:
/health:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
"Intended Audience :: Developers",
]
name = "mass"
version = "3.0.2"
version = "3.0.3"
description = "Metadata Artifact Search Service - A service for searching metadata artifacts and filtering results."
dependencies = [
"typer>=0.12",
Expand Down
6 changes: 3 additions & 3 deletions src/mass/adapters/outbound/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ def __init__(self, *, collection):
async def aggregate( # noqa: PLR0913, D102
self,
*,
selected_fields: list[models.FieldLabel],
facet_fields: list[models.FieldLabel],
query: str,
filters: list[models.Filter],
facet_fields: list[models.FieldLabel],
selected_fields: list[models.FieldLabel],
sorting_parameters: list[models.SortingParameter],
skip: int = 0,
limit: int | None = None,
sorting_parameters: list[models.SortingParameter],
) -> JsonObject:
# don't carry out aggregation if the collection is empty
if not await self._collection.find_one():
Expand Down
13 changes: 6 additions & 7 deletions src/mass/adapters/outbound/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,11 @@ def pipeline_facet_sort_and_paginate(
{
"$group": {
"_id": {"$getField": {"field": field, "input": path}},
"count": {"$sum": 1},
"uniqueIds": {"$addToSet": "$_id"},
}
},
{
"$addFields": {"value": "$_id"}
}, # rename "_id" to "value" on each option
{"$match": {"_id": {"$ne": None}}},
{"$addFields": {"value": "$_id", "count": {"$size": "$uniqueIds"}}},
{"$unset": "_id"},
{"$sort": {"value": 1}},
)
Expand Down Expand Up @@ -169,13 +168,13 @@ def pipeline_project(*, facet_fields: list[models.FieldLabel]) -> JsonObject:

def build_pipeline( # noqa: PLR0913
*,
query: str,
filters: list[models.Filter],
facet_fields: list[models.FieldLabel],
selected_fields: list[models.FieldLabel],
query: str,
filters: list[models.Filter],
sorting_parameters: list[models.SortingParameter],
skip: int = 0,
limit: int | None = None,
sorting_parameters: list[models.SortingParameter],
) -> list[JsonObject]:
"""Build aggregation pipeline based on query"""
pipeline: list[JsonObject] = []
Expand Down
10 changes: 6 additions & 4 deletions src/mass/core/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,19 @@ async def delete_resource(self, *, resource_id: str, class_name: str) -> None:
except ResourceNotFoundError as err:
raise self.ResourceNotFoundError(resource_id=resource_id) from err

async def handle_query( # noqa: D102, PLR0913
async def handle_query( # noqa: C901, D102, PLR0913
self,
*,
class_name: str,
query: str,
filters: list[models.Filter],
query: str = "",
filters: list[models.Filter] | None = None,
sorting_parameters: list[models.SortingParameter] | None = None,
skip: int = 0,
limit: int | None = None,
sorting_parameters: list[models.SortingParameter] | None = None,
) -> models.QueryResults:
# set empty list if not provided
if filters is None:
filters = []
if not sorting_parameters:
if query:
sorting_parameters = [
Expand Down
6 changes: 3 additions & 3 deletions src/mass/ports/inbound/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ async def handle_query( # noqa: PLR0913
self,
*,
class_name: str,
query: str,
filters: list[models.Filter],
query: str = "",
filters: list[models.Filter] | None = None,
sorting_parameters: list[models.SortingParameter] | None = None,
skip: int = 0,
limit: int | None = None,
sorting_parameters: list[models.SortingParameter] | None = None,
) -> models.QueryResults:
"""Processes a query
Expand Down
6 changes: 3 additions & 3 deletions src/mass/ports/outbound/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ class AggregatorPort(ABC):
async def aggregate( # noqa: PLR0913
self,
*,
selected_fields: list[models.FieldLabel],
facet_fields: list[models.FieldLabel],
query: str,
filters: list[models.Filter],
facet_fields: list[models.FieldLabel],
selected_fields: list[models.FieldLabel],
sorting_parameters: list[models.SortingParameter],
skip: int = 0,
limit: int | None = None,
sorting_parameters: list[models.SortingParameter],
) -> JsonObject:
"""Applies an aggregation pipeline to a mongodb collection"""
...
Expand Down
6 changes: 3 additions & 3 deletions tests/fixtures/joint.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ def recreate_mongodb_indexes(self) -> None:
async def handle_query(
self,
class_name: str,
query: str,
filters: list[models.Filter],
query: str = "",
filters: list[models.Filter] | None = None,
sorting_parameters: list[models.SortingParameter] | None = None,
skip: int = 0,
limit: int | None = None,
sorting_parameters: list[models.SortingParameter] | None = None,
) -> models.QueryResults:
"""Handle a query."""
return await self._query_handler.handle_query(
Expand Down
18 changes: 7 additions & 11 deletions tests/fixtures/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,37 +21,29 @@ searchable_classes:
description: Dataset with embedded references.
facetable_fields:
- key: category
name: Category
- key: city
name: Field 1
- key: "object.type"
name: Object Type
- key: object.type
selected_fields:
- key: id_
name: ID
- key: type
name: Location Type
- key: "object.type"
name: Object Type
name: Location ype
- key: object.type
EmptyCollection:
description: An empty collection to test the index creation.
facetable_fields:
- key: fun_fact
name: Fun Fact
selected_fields: []
SortingTests:
description: Data for testing sorting functionality.
facetable_fields:
- key: field
name: Field
selected_fields: []
RelevanceTests:
description: Data for testing sorting by relevance.
facetable_fields:
- key: field
name: Field
- key: data
name: Data
selected_fields: []
FilteringTests:
description: Data for testing filtering on using single and multi-valued fields.
Expand All @@ -61,6 +53,10 @@ searchable_classes:
name: Food
- key: friends.name
name: Friend
- key: items.type
name: Item
- key: items.color
name: Item color
- key: special.features.fur.color
name: Fur color
selected_fields:
Expand Down
69 changes: 69 additions & 0 deletions tests/fixtures/test_data/FilteringTests.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@
}
],
"id_": "1",
"items": [
{
"color": "gold",
"type": "coin"
},
{
"color": "silver",
"type": "pistol"
},
{
"color": "red",
"type": "shirt"
}
],
"name": "Jack",
"special": {
"features": [
Expand Down Expand Up @@ -42,6 +56,20 @@
}
],
"id_": "2",
"items": [
{
"color": "pink",
"type": "collar"
},
{
"color": "pink",
"type": "shirt"
},
{
"color": "pink",
"type": "bowl"
}
],
"name": "Bruiser",
"special": {
"features": [
Expand Down Expand Up @@ -71,6 +99,32 @@
}
],
"id_": "3",
"items": [
{
"color": "blue",
"type": "collar"
},
{
"color": "green",
"type": "collar"
},
{
"color": "brown",
"type": "collar"
},
{
"color": "gold",
"type": "collar"
},
{
"color": "blue",
"type": "shirt"
},
{
"color": "white",
"type": "bowl"
}
],
"name": "Lady",
"special": {
"features": [
Expand Down Expand Up @@ -109,6 +163,20 @@
}
],
"id_": "4",
"items": [
{
"color": "red",
"type": "bowl"
},
{
"color": "white",
"type": "bowl"
},
{
"color": "yellow",
"type": "bowl"
}
],
"name": "Garfield",
"special": {
"features": [
Expand Down Expand Up @@ -144,6 +212,7 @@
}
],
"id_": "5",
"items": [],
"name": "Flipper",
"special": {
"features": [
Expand Down
Loading

0 comments on commit f6dc92d

Please sign in to comment.