Skip to content

Commit

Permalink
Add parameters for sorting options (GSI-329) (#15)
Browse files Browse the repository at this point in the history
* Add sorting spec parameters

* Pass sorting parameters through service chain

* Apply sorting in pipeline

* Always include id_ in sorting parameters

* Make sorting_parameters optional in QueryHandler

* Rename facet stage and run it every time

* Add validator to prevent duplicate sort fields

* Add tests for the sorting parameters

* Bump version from 0.3.0 -> 0.3.1

* Change intenum to string enum

* Change sort_field/sort_order to field/order

* Update openapi

* Update tests

* Tweak model creation in tests

---------

Co-authored-by: TheByronHimes <[email protected]>
  • Loading branch information
TheByronHimes and TheByronHimes authored Sep 6, 2023
1 parent 2e5f7e3 commit 1598193
Show file tree
Hide file tree
Showing 14 changed files with 338 additions and 15 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,21 @@ We recommend using the provided Docker container.

A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/mass):
```bash
docker pull ghga/mass:0.3.0
docker pull ghga/mass:0.3.1
```

Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile):
```bash
# Execute in the repo's root dir:
docker build -t ghga/mass:0.3.0 .
docker build -t ghga/mass:0.3.1 .
```

For production-ready deployment, we recommend using Kubernetes, however,
for simple use cases, you could execute the service using docker
on a single server:
```bash
# The entrypoint is preconfigured:
docker run -p 8080:8080 ghga/mass:0.3.0 --help
docker run -p 8080:8080 ghga/mass:0.3.1 --help
```

If you prefer not to use containers, you may install the service from source:
Expand Down
2 changes: 1 addition & 1 deletion mass/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@

"""A service for searching metadata artifacts and filtering results."""

__version__ = "0.3.0"
__version__ = "0.3.1"
16 changes: 14 additions & 2 deletions mass/adapters/inbound/fastapi_/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
"""Models only used by the API"""
from typing import Optional

from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, validator

from mass.core.models import Filter
from mass.core.models import Filter, SortingParameter, SortOrder


class SearchParameters(BaseModel):
Expand All @@ -37,3 +37,15 @@ class SearchParameters(BaseModel):
limit: Optional[int] = Field(
default=None, description="Limit the results to this number"
)
sorting_parameters: list[SortingParameter] = Field(
default=[SortingParameter(field="id_", order=SortOrder.ASCENDING)],
description=("Collection of sorting parameters used to refine search results"),
)

@validator("sorting_parameters")
@classmethod
def no_duplicate_fields(cls, parameters: list[SortingParameter]):
"""Check for duplicate fields in sorting parameters"""
all_sort_fields = [param.field for param in parameters]
if len(set(all_sort_fields)) < len(all_sort_fields):
raise ValueError("Sorting parameters cannot contain duplicate fields")
1 change: 1 addition & 0 deletions mass/adapters/inbound/fastapi_/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ async def search(
filters=parameters.filters,
skip=parameters.skip,
limit=parameters.limit,
sorting_parameters=parameters.sorting_parameters,
)
except query_handler.ClassNotConfiguredError as err:
raise HTTPException(
Expand Down
2 changes: 2 additions & 0 deletions mass/adapters/outbound/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ async def aggregate(
facet_fields: list[models.FacetLabel],
skip: int = 0,
limit: Optional[int] = None,
sorting_parameters: list[models.SortingParameter],
) -> JsonObject:
# don't carry out aggregation if the collection is empty
if not await self._collection.find_one():
Expand All @@ -58,6 +59,7 @@ async def aggregate(
facet_fields=facet_fields,
skip=skip,
limit=limit,
sorting_parameters=sorting_parameters,
)

try:
Expand Down
32 changes: 23 additions & 9 deletions mass/adapters/outbound/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

from mass.core import models

SORT_ORDER_CONVERSION = {"ascending": 1, "descending": -1}


def pipeline_match_text_search(*, query: str) -> JsonObject:
"""Build text search segment of aggregation pipeline"""
Expand Down Expand Up @@ -53,8 +55,12 @@ def pipeline_match_filters_stage(*, filters: list[models.Filter]) -> JsonObject:
return {"$match": segment}


def pipeline_apply_facets(
*, facet_fields: list[models.FacetLabel], skip: int, limit: Optional[int] = None
def pipeline_facet_sort_and_paginate(
*,
facet_fields: list[models.FacetLabel],
skip: int,
limit: Optional[int] = None,
sorts: JsonObject,
):
"""Uses a list of facetable property names to build the subquery for faceting"""
segment: dict[str, list[JsonObject]] = {}
Expand All @@ -80,9 +86,9 @@ def pipeline_apply_facets(

# sort by ID, then rename the ID field to id_ to match our model
segment["hits"] = [
{"$sort": {"_id": 1}},
{"$addFields": {"id_": "$_id"}},
{"$unset": "_id"},
{"$sort": sorts},
]

# apply skip and limit for pagination
Expand Down Expand Up @@ -115,6 +121,7 @@ def build_pipeline(
facet_fields: list[models.FacetLabel],
skip: int = 0,
limit: Optional[int] = None,
sorting_parameters: list[models.SortingParameter],
) -> list[JsonObject]:
"""Build aggregation pipeline based on query"""
pipeline: list[JsonObject] = []
Expand All @@ -124,18 +131,25 @@ def build_pipeline(
if query:
pipeline.append(pipeline_match_text_search(query=query))

# sort initial results
pipeline.append({"$sort": {"_id": 1}})

# apply filters
if filters:
pipeline.append(pipeline_match_filters_stage(filters=filters))

# turn the sorting parameters into a formatted pipeline $sort
sorts = {}
for param in sorting_parameters:
sort_order = SORT_ORDER_CONVERSION[param.order.value]
sorts[param.field] = sort_order

# define facets from preliminary results and reshape data
if facet_fields:
pipeline.append(
pipeline_apply_facets(facet_fields=facet_fields, skip=skip, limit=limit)
pipeline.append(
pipeline_facet_sort_and_paginate(
facet_fields=facet_fields,
skip=skip,
limit=limit,
sorts=sorts,
)
)

# transform data one more time to match models
pipeline.append(pipeline_project(facet_fields=facet_fields))
Expand Down
21 changes: 21 additions & 0 deletions mass/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# limitations under the License.

"""Defines dataclasses for holding business-logic data"""
from enum import Enum

from hexkit.custom_types import JsonObject
from pydantic import BaseModel, Field

Expand Down Expand Up @@ -71,3 +73,22 @@ class QueryResults(BaseModel):
facets: list[Facet] = Field(default=[], description="Contains the faceted fields")
count: int = Field(default=0, description="The number of results found")
hits: list[Resource] = Field(default=[], description="The search results")


class SortOrder(Enum):
"""Represents the possible sorting orders"""

ASCENDING = "ascending"
DESCENDING = "descending"


class SortingParameter(BaseModel):
"""Represents a combination of a field to sort and the sort order"""

field: str = Field(
...,
description=("Which field to sort results by."),
)
order: SortOrder = Field(
default=SortOrder.ASCENDING, description="Sort order to apply to sort_field"
)
12 changes: 12 additions & 0 deletions mass/core/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,18 @@ async def handle_query(
filters: list[models.Filter],
skip: int = 0,
limit: Optional[int] = None,
sorting_parameters: Optional[list[models.SortingParameter]] = None,
) -> models.QueryResults:
# set empty list if not provided
if sorting_parameters is None:
sorting_parameters = []

# if id_ is not in sorting_parameters, add to end
if "id_" not in [param.field for param in sorting_parameters]:
sorting_parameters.append(
models.SortingParameter(field="id_", order=models.SortOrder.ASCENDING)
)

# get configured facet fields for given resource class
try:
facet_fields: list[models.FacetLabel] = self._config.searchable_classes[
Expand All @@ -89,6 +100,7 @@ async def handle_query(
facet_fields=facet_fields,
skip=skip,
limit=limit,
sorting_parameters=sorting_parameters,
)
except AggregationError as exc:
raise self.SearchError() from exc
Expand Down
1 change: 1 addition & 0 deletions mass/ports/inbound/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ async def handle_query(
filters: list[models.Filter],
skip: int = 0,
limit: Optional[int] = None,
sorting_parameters: Optional[list[models.SortingParameter]] = None,
) -> models.QueryResults:
"""Processes a query
Expand Down
1 change: 1 addition & 0 deletions mass/ports/outbound/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ async def aggregate(
facet_fields: list[models.FacetLabel],
skip: int = 0,
limit: Optional[int] = None,
sorting_parameters: list[models.SortingParameter],
) -> JsonObject:
"""Applies an aggregation pipeline to a mongodb collection"""
...
Expand Down
31 changes: 31 additions & 0 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@ components:
description: The number of results to skip for pagination
title: Skip
type: integer
sorting_parameters:
default:
- field: id_
order: ascending
description: Collection of sorting parameters used to refine search results
items:
$ref: '#/components/schemas/SortingParameter'
title: Sorting Parameters
type: array
required:
- class_name
title: SearchParameters
Expand All @@ -181,6 +190,28 @@ components:
- facetable_properties
title: SearchableClass
type: object
SortOrder:
description: Represents the possible sorting orders
enum:
- ascending
- descending
title: SortOrder
SortingParameter:
description: Represents a combination of a field to sort and the sort order
properties:
field:
description: Which field to sort results by.
title: Field
type: string
order:
allOf:
- $ref: '#/components/schemas/SortOrder'
default: ascending
description: Sort order to apply to sort_field
required:
- field
title: SortingParameter
type: object
ValidationError:
properties:
loc:
Expand Down
5 changes: 5 additions & 0 deletions tests/fixtures/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ searchable_classes:
facetable_properties:
- key: fun_fact
name: Fun Fact
SortingTests:
description: Data for testing sorting functionality.
facetable_properties:
- key: field
name: Field
resource_change_event_topic: searchable_resources
resource_deletion_event_type: searchable_resource_deleted
resource_upsertion_event_type: searchable_resource_upserted
Expand Down
28 changes: 28 additions & 0 deletions tests/fixtures/test_data/SortingTests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"items": [
{
"field": "some data",
"id_": "i2"
},
{
"field": "some data",
"id_": "i1"
},
{
"field": "some data",
"id_": "i3"
},
{
"field": "some data",
"id_": "i5"
},
{
"field": "some data",
"id_": "i6"
},
{
"field": "some data",
"id_": "i4"
}
]
}
Loading

0 comments on commit 1598193

Please sign in to comment.