Skip to content

Commit

Permalink
Sort by Relevance (GSI-330) (#16)
Browse files Browse the repository at this point in the history
* Fix bug with validator

* Add relevance option for sort order

* Make relevance the default sort

* Fix wrong comparison operator in test

* Add test config for data to test relevance sorting

* Add test data

* Change sorting_parameters default to [ ]

* Catch empty list as well as None in QH

* Add tests for relevance

* Fix/replace flawed sorter

* Bump version from 0.3.1 -> 0.3.2

* Add return type to SearchParameters validator

---------

Co-authored-by: TheByronHimes <[email protected]>
  • Loading branch information
TheByronHimes and TheByronHimes authored Sep 13, 2023
1 parent 1598193 commit eedd7fb
Show file tree
Hide file tree
Showing 11 changed files with 372 additions and 35 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,21 @@ We recommend using the provided Docker container.

A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/mass):
```bash
docker pull ghga/mass:0.3.1
docker pull ghga/mass:0.3.2
```

Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile):
```bash
# Execute in the repo's root dir:
docker build -t ghga/mass:0.3.1 .
docker build -t ghga/mass:0.3.2 .
```

For production-ready deployment, we recommend using Kubernetes, however,
for simple use cases, you could execute the service using docker
on a single server:
```bash
# The entrypoint is preconfigured:
docker run -p 8080:8080 ghga/mass:0.3.1 --help
docker run -p 8080:8080 ghga/mass:0.3.2 --help
```

If you prefer not to use containers, you may install the service from source:
Expand Down
2 changes: 1 addition & 1 deletion mass/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@

"""A service for searching metadata artifacts and filtering results."""

__version__ = "0.3.1"
__version__ = "0.3.2"
9 changes: 6 additions & 3 deletions mass/adapters/inbound/fastapi_/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from pydantic import BaseModel, Field, validator

from mass.core.models import Filter, SortingParameter, SortOrder
from mass.core.models import Filter, SortingParameter


class SearchParameters(BaseModel):
Expand All @@ -38,14 +38,17 @@ class SearchParameters(BaseModel):
default=None, description="Limit the results to this number"
)
sorting_parameters: list[SortingParameter] = Field(
default=[SortingParameter(field="id_", order=SortOrder.ASCENDING)],
default=[],
description=("Collection of sorting parameters used to refine search results"),
)

@validator("sorting_parameters")
@classmethod
def no_duplicate_fields(cls, parameters: list[SortingParameter]):
def no_duplicate_fields(
cls, parameters: list[SortingParameter]
) -> list[SortingParameter]:
"""Check for duplicate fields in sorting parameters"""
all_sort_fields = [param.field for param in parameters]
if len(set(all_sort_fields)) < len(all_sort_fields):
raise ValueError("Sorting parameters cannot contain duplicate fields")
return parameters
12 changes: 8 additions & 4 deletions mass/adapters/outbound/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
#

"""Utility functions for building the aggregation pipeline used by query handler"""
from collections import defaultdict
from collections import OrderedDict, defaultdict
from typing import Any, Optional

from hexkit.custom_types import JsonObject

from mass.core import models

SORT_ORDER_CONVERSION = {"ascending": 1, "descending": -1}
SORT_ORDER_CONVERSION: JsonObject = {
"ascending": 1,
"descending": -1,
"relevance": {"$meta": "textScore"},
}


def pipeline_match_text_search(*, query: str) -> JsonObject:
Expand Down Expand Up @@ -60,7 +64,7 @@ def pipeline_facet_sort_and_paginate(
facet_fields: list[models.FacetLabel],
skip: int,
limit: Optional[int] = None,
sorts: JsonObject,
sorts: OrderedDict,
):
"""Uses a list of facetable property names to build the subquery for faceting"""
segment: dict[str, list[JsonObject]] = {}
Expand Down Expand Up @@ -136,7 +140,7 @@ def build_pipeline(
pipeline.append(pipeline_match_filters_stage(filters=filters))

# turn the sorting parameters into a formatted pipeline $sort
sorts = {}
sorts = OrderedDict()
for param in sorting_parameters:
sort_order = SORT_ORDER_CONVERSION[param.order.value]
sorts[param.field] = sort_order
Expand Down
1 change: 1 addition & 0 deletions mass/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class SortOrder(Enum):

ASCENDING = "ascending"
DESCENDING = "descending"
RELEVANCE = "relevance"


class SortingParameter(BaseModel):
Expand Down
11 changes: 9 additions & 2 deletions mass/core/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,15 @@ async def handle_query(
sorting_parameters: Optional[list[models.SortingParameter]] = None,
) -> models.QueryResults:
# set empty list if not provided
if sorting_parameters is None:
sorting_parameters = []
if not sorting_parameters:
if query:
sorting_parameters = [
models.SortingParameter(
field="query", order=models.SortOrder.RELEVANCE
)
]
else:
sorting_parameters = []

# if id_ is not in sorting_parameters, add to end
if "id_" not in [param.field for param in sorting_parameters]:
Expand Down
5 changes: 2 additions & 3 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,7 @@ components:
title: Skip
type: integer
sorting_parameters:
default:
- field: id_
order: ascending
default: []
description: Collection of sorting parameters used to refine search results
items:
$ref: '#/components/schemas/SortingParameter'
Expand Down Expand Up @@ -195,6 +193,7 @@ components:
enum:
- ascending
- descending
- relevance
title: SortOrder
SortingParameter:
description: Represents a combination of a field to sort and the sort order
Expand Down
7 changes: 7 additions & 0 deletions tests/fixtures/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ searchable_classes:
facetable_properties:
- key: field
name: Field
RelevanceTests:
description: Data for testing sorting by relevance.
facetable_properties:
- key: field
name: Field
- key: data
name: Data
resource_change_event_topic: searchable_resources
resource_deletion_event_type: searchable_resource_deleted
resource_upsertion_event_type: searchable_resource_upserted
Expand Down
29 changes: 29 additions & 0 deletions tests/fixtures/test_data/RelevanceTests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"items": [
{
"data": "test test test test test test",
"field": "alternative",
"id_": "i2"
},
{
"data": "test test test test test",
"field": "same as i1",
"id_": "i4"
},
{
"data": "test test test test",
"field": "same as i4",
"id_": "i1"
},
{
"data": "test test test ",
"field": "some data",
"id_": "i3"
},
{
"data": "test test test test test",
"field": "alternative alternative",
"id_": "i5"
}
]
}
Loading

0 comments on commit eedd7fb

Please sign in to comment.