From 392de9ff42c187e507c09324dce3e1300602cf14 Mon Sep 17 00:00:00 2001 From: WillDotWhite Date: Wed, 17 Apr 2024 15:41:14 +0100 Subject: [PATCH] WIP Migrate query logic to Opensearch --- .../kotlin/com/gmtkgamejam/EnumExtensions.kt | 9 +- .../com/gmtkgamejam/routing/PostRoutes.kt | 113 ++------------- .../com/gmtkgamejam/search/Opensearch.kt | 13 +- .../com/gmtkgamejam/search/SearchParams.kt | 129 ++++++++++++++++++ .../com/gmtkgamejam/services/PostService.kt | 7 + .../home/components/SearchFormWrapper.tsx | 12 +- .../pages/home/components/SortingOptions.tsx | 1 + 7 files changed, 166 insertions(+), 118 deletions(-) create mode 100644 api/src/main/kotlin/com/gmtkgamejam/search/SearchParams.kt diff --git a/api/src/main/kotlin/com/gmtkgamejam/EnumExtensions.kt b/api/src/main/kotlin/com/gmtkgamejam/EnumExtensions.kt index cc1ac2dd..507d9281 100644 --- a/api/src/main/kotlin/com/gmtkgamejam/EnumExtensions.kt +++ b/api/src/main/kotlin/com/gmtkgamejam/EnumExtensions.kt @@ -1,10 +1,5 @@ package com.gmtkgamejam -/** - * Floating function to cast a String to an Enum without throwing an exception - * - * Suggest using with mapNotNull{} where possible - */ -inline fun > enumFromStringSafe(value: String) : A? { - return enumValues().find { s -> s.name == value.uppercase() } +inline fun > enumSetFromInput(commaSeparatedString: String) : Set { + return commaSeparatedString.split(',').filter(String::isNotBlank).map { enumValueOf(it) }.toSet() } \ No newline at end of file diff --git a/api/src/main/kotlin/com/gmtkgamejam/routing/PostRoutes.kt b/api/src/main/kotlin/com/gmtkgamejam/routing/PostRoutes.kt index e42ff8d9..e646e0d5 100644 --- a/api/src/main/kotlin/com/gmtkgamejam/routing/PostRoutes.kt +++ b/api/src/main/kotlin/com/gmtkgamejam/routing/PostRoutes.kt @@ -1,17 +1,14 @@ package com.gmtkgamejam.routing import com.auth0.jwt.JWT -import com.gmtkgamejam.enumFromStringSafe -import com.gmtkgamejam.models.posts.Availability import com.gmtkgamejam.models.posts.PostItem -import com.gmtkgamejam.models.posts.Skills -import com.gmtkgamejam.models.posts.Tools import com.gmtkgamejam.models.posts.dtos.PostItemCreateDto import com.gmtkgamejam.models.posts.dtos.PostItemReportDto import com.gmtkgamejam.models.posts.dtos.PostItemUnableToContactReportDto import com.gmtkgamejam.models.posts.dtos.PostItemUpdateDto import com.gmtkgamejam.respondJSON import com.gmtkgamejam.search.OpenSearch +import com.gmtkgamejam.search.SearchParams import com.gmtkgamejam.services.AuthService import com.gmtkgamejam.services.FavouritesService import com.gmtkgamejam.services.PostService @@ -21,13 +18,9 @@ import io.ktor.server.auth.* import io.ktor.server.request.* import io.ktor.server.response.* import io.ktor.server.routing.* -import org.bson.conversions.Bson -import org.litote.kmongo.* import java.time.LocalDateTime import java.time.format.DateTimeFormatter import kotlin.math.min -import kotlin.reflect.full.memberProperties -import kotlin.text.Regex.Companion.escape fun Application.configurePostRouting() { @@ -38,9 +31,9 @@ fun Application.configurePostRouting() { routing { route("/posts") { get { - val params = call.parameters - - val posts = service.getPosts(and(getFilterFromParameters(params)), getSortFromParameters(params)) + val searchParams = SearchParams(call.parameters) + val postIds = OpenSearch.search(searchParams.query(), searchParams.sort()) + val posts = service.getPostsByOrderedIds(postIds) // Set isFavourite on posts for this user if they're logged in call.request.header("Authorization")?.substring(7) @@ -101,8 +94,6 @@ fun Application.configurePostRouting() { } get("favourites") { - val params = call.parameters - val favourites = authService.getTokenSet(call) ?.let { favouritesService.getFavouritesByUserId(it.discordId) } @@ -111,18 +102,12 @@ fun Application.configurePostRouting() { return@get call.respond(emptyList()) } - val favouritesFilters = mutableListOf() - favourites.postIds.forEach { - favouritesFilters.add(and(PostItem::id eq it, PostItem::deletedAt eq null)) - } + val searchParams = SearchParams(call.parameters) + val postIds = OpenSearch.search(searchParams.query(), searchParams.sort()) + .filter { favourites.postIds.contains(it) } + .toList() - val posts = service.getPosts( - and( - or(favouritesFilters), - and(getFilterFromParameters(params)) - ), - getSortFromParameters(params) - ) + val posts = service.getPostsByOrderedIds(postIds) posts.map { post -> post.isFavourite = true } call.respond(posts) @@ -212,83 +197,3 @@ fun Application.configurePostRouting() { } } } - -fun getFilterFromParameters(params: Parameters): List { - val filters = mutableListOf(PostItem::deletedAt eq null) - - params["description"]?.split(',') - ?.filter(String::isNotBlank) // Filter out empty `&description=` - ?.map { it -> it.trim() } - // The regex is the easiest way to check if a description contains a given substring - ?.forEach { filters.add(PostItem::description regex escape(it).toRegex(RegexOption.IGNORE_CASE)) } - - val skillsPossessedSearchMode = params["skillsPossessedSearchMode"] ?: "and" - params["skillsPossessed"]?.split(',') - ?.filter(String::isNotBlank) // Filter out empty `&skillsPossessed=` - ?.mapNotNull { enumFromStringSafe(it) } - ?.map { PostItem::skillsPossessed contains it } - ?.let { if (skillsPossessedSearchMode == "and") and(it) else or(it) } - ?.let(filters::add) - - val skillsSoughtSearchMode = params["skillsSoughtSearchMode"] ?: "and" - params["skillsSought"]?.split(',') - ?.filter(String::isNotBlank) // Filter out empty `&skillsSought=` - ?.mapNotNull { enumFromStringSafe(it) } - ?.map { PostItem::skillsSought contains it } - ?.let { if (skillsSoughtSearchMode == "and") and(it) else or(it) } - ?.let(filters::add) - - params["tools"]?.split(',') - ?.filter(String::isNotBlank) // Filter out empty `&skillsSought=` - ?.mapNotNull { enumFromStringSafe(it) } - ?.map { PostItem::preferredTools contains it } - ?.let(filters::addAll) - - params["languages"]?.split(',') - ?.filter(String::isNotBlank) // Filter out empty `&languages=` - ?.map { PostItem::languages contains it } - ?.let { filters.add(or(it)) } - - params["availability"]?.split(',') - ?.filter(String::isNotBlank) // Filter out empty `&availability=` - ?.mapNotNull { enumFromStringSafe(it) } - ?.map { PostItem::availability eq it } - // Availabilities are mutually exclusive, so treat it as inclusion search - ?.let { filters.add(or(it)) } - - // If no timezones sent, lack of filters will search all timezones - if (params["timezoneStart"] != null && params["timezoneEnd"] != null) { - val timezoneStart: Int = params["timezoneStart"]!!.toInt() - val timezoneEnd: Int = params["timezoneEnd"]!!.toInt() - - val timezones: MutableList = mutableListOf() - if (timezoneStart == timezoneEnd) { - timezones.add(timezoneStart) - } else if (timezoneStart < timezoneEnd) { - // UTC-2 -> UTC+2 should be: [-2, -1, 0, 1, 2] - timezones.addAll((timezoneStart..timezoneEnd)) - } else { - // UTC+9 -> UTC-9 should be: [9, 10, 11, 12, -12, -11, -10, -9] - timezones.addAll((timezoneStart..12)) - timezones.addAll((-12..timezoneEnd)) - } - - // Add all timezone searches as eq checks - // It's brute force, but easier to confirm - timezones - .map { PostItem::timezoneOffsets contains it } - .let { filters.add(or(it)) } - } - - return filters -} - -fun getSortFromParameters(params: Parameters): Bson { - val sortByFieldName = params["sortBy"] ?: "createdAt" - val sortByField = PostItem::class.memberProperties.first { prop -> prop.name == sortByFieldName } - return when (params["sortDir"].toString()) { - "asc" -> ascending(sortByField) - "desc" -> descending(sortByField) - else -> descending(sortByField) - } -} diff --git a/api/src/main/kotlin/com/gmtkgamejam/search/Opensearch.kt b/api/src/main/kotlin/com/gmtkgamejam/search/Opensearch.kt index 7e9b76f0..87acfe98 100644 --- a/api/src/main/kotlin/com/gmtkgamejam/search/Opensearch.kt +++ b/api/src/main/kotlin/com/gmtkgamejam/search/Opensearch.kt @@ -5,11 +5,11 @@ import com.gmtkgamejam.models.posts.PostItem import org.apache.hc.core5.http.HttpHost import org.opensearch.client.json.jackson.JacksonJsonpMapper import org.opensearch.client.opensearch.OpenSearchClient +import org.opensearch.client.opensearch._types.SortOptions import org.opensearch.client.opensearch._types.query_dsl.Query import org.opensearch.client.opensearch.core.IndexRequest import org.opensearch.client.opensearch.core.SearchRequest import org.opensearch.client.opensearch.core.UpdateRequest -import org.opensearch.client.opensearch.core.search.Hit import org.opensearch.client.transport.OpenSearchTransport import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder @@ -31,16 +31,17 @@ object OpenSearch { /** * Perform a search request against OpenSearch and return the documents in the result */ - fun search(index: String, query: Query): List> { - // If there wasn't a result in the cache (or it was too old), - // call OpenSearch and update the cache with the result + fun search(query: Query, sortOptions: List): List { val searchRequest = SearchRequest.Builder() - .index(index) + .index("posts") .query(query) - .size(10000) + .size(24) + .sort(sortOptions) .build() return client.search(searchRequest, ObjectNode::class.java).hits().hits() + .map { it.source()!!.get("id").asText() } + .toList() } // TODO: Handle response from client.index diff --git a/api/src/main/kotlin/com/gmtkgamejam/search/SearchParams.kt b/api/src/main/kotlin/com/gmtkgamejam/search/SearchParams.kt new file mode 100644 index 00000000..d47d8b7a --- /dev/null +++ b/api/src/main/kotlin/com/gmtkgamejam/search/SearchParams.kt @@ -0,0 +1,129 @@ +package com.gmtkgamejam.search + +import com.gmtkgamejam.enumSetFromInput +import com.gmtkgamejam.models.posts.Availability +import com.gmtkgamejam.models.posts.PostItem +import com.gmtkgamejam.models.posts.Skills +import com.gmtkgamejam.models.posts.Tools +import io.ktor.http.* +import org.opensearch.client.opensearch._types.FieldValue +import org.opensearch.client.opensearch._types.ScoreSort +import org.opensearch.client.opensearch._types.SortOptions +import org.opensearch.client.opensearch._types.SortOrder +import org.opensearch.client.opensearch._types.query_dsl.* + +private fun Set.toFieldValue(): FieldValue = FieldValue.of(this.toString()) +private fun String.toFieldValue(): FieldValue = FieldValue.of(this) + +data class SearchParams( + val sortBy: String, + val sortDir: String, + val description: String?, + val skillsPossessed: Set?, + val skillsSought: Set?, + val tools: Set?, + val languages: Set?, + val availability: Set?, + val timezones: Set?, +) { + constructor(params: Parameters) : this( + sortBy = params["sortBy"] ?: "score", + sortDir = params["sortDir"] ?: "desc", + description = params["description"], + skillsPossessed = params["skillsPossessed"]?.let { enumSetFromInput(it) }, + skillsSought = params["skillsSought"]?.let { enumSetFromInput(it) }, + tools = params["tools"]?.let { enumSetFromInput(it) }, + languages = params["languages"]?.split(',')?.filter(String::isNotBlank)?.toSet(), + availability = params["availability"]?.let { enumSetFromInput(it) }, + timezones = generateTimezones(params) + ) + + private fun matchQuery(params: MatchQuery.Builder.() -> Unit): Query = Query.Builder().match( + MatchQuery.Builder().apply(params).build() + ).build() + + private fun termQuery(params: TermQuery.Builder.() -> Unit): Query = Query.Builder().term( + TermQuery.Builder().apply(params).build() + ).build() + + private fun termsQuery(params: TermsQuery.Builder.() -> Unit): Query = Query.Builder().terms( + TermsQuery.Builder().apply(params).build() + ).build() + + fun query(): Query { + val builder = BoolQuery.Builder() + + /** STEP 1: FILTER ON REQUIRED FIELDS TO ENSURE ALL RESULTS CONTAIN ALL ELEMENTS */ + skillsPossessed?.let { builder.filter(matchQuery { field(PostItem::skillsPossessed.name); query(skillsPossessed.toFieldValue()) }) } + skillsSought?.let { builder.filter(matchQuery { field(PostItem::skillsSought.name); query(skillsSought.toFieldValue()) }) } + languages?.let { builder.filter(matchQuery { field(PostItem::languages.name); query(languages.toFieldValue()) }) } + tools?.let { builder.filter(matchQuery { field(PostItem::preferredTools.name); query(tools.toFieldValue()) }) } + + /** STEP 2: SCORE BASED ON VARIABLE FIELDS */ + // TODO: Add flex without making fields irrelevant + description?.isNotBlank()?.let { + builder.must(matchQuery { + field(PostItem::description.name) + query(description.toFieldValue()) + fuzziness("2.0") + }) + } + + timezones?.let { + builder.must(termsQuery { + field(PostItem::timezoneOffsets.name) + terms { it.value(timezones.map { tz -> FieldValue.of(tz.toString()) }) } + }) + } + + // All other fields are term queries because we don't want any analysis done - it's all exact matches + availability?.let { + builder.should(termQuery { + field(PostItem::availability.name) + value(availability.toFieldValue()) + }) + } + + return builder.build().toQuery() + } + + private fun sortOptions(params: SortOptions.Builder.() -> Unit): SortOptions = + SortOptions.Builder().apply(params).build() + + fun sort(): List { + val sortOrder = if (sortDir == "asc") SortOrder.Asc else SortOrder.Desc + val primarySort = when(sortBy) { + "score" -> sortOptions { score { ScoreSort.Builder().order(sortOrder) } } + "size" -> sortOptions { field { it.field(sortBy); it.order(sortOrder) } } + else -> sortOptions { field { it.field("$sortBy.keyword"); it.order(sortOrder) } } + } + + // Secondary sort functions as a deterministic tiebreaker when multiple docs have same score + val secondarySort = sortOptions { field { it.field("id.keyword"); it.order(sortOrder)} } + return listOf(primarySort, secondarySort) + } + + companion object { + fun generateTimezones(params: Parameters): Set? { + if (params["timezoneStart"] == null || params["timezoneEnd"] == null) { + return null + } + + val timezoneStart = params["timezoneStart"]!!.toInt() + val timezoneEnd = params["timezoneEnd"]!!.toInt() + val timezones: MutableSet = mutableSetOf() + if (timezoneStart == timezoneEnd) { + timezones.add(timezoneStart) + } else if (timezoneStart < timezoneEnd) { + // UTC-2 -> UTC+2 should be: [-2, -1, 0, 1, 2] + timezones.addAll((timezoneStart..timezoneEnd)) + } else { + // UTC+9 -> UTC-9 should be: [9, 10, 11, 12, -12, -11, -10, -9] + timezones.addAll((timezoneStart..12)) + timezones.addAll((-12..timezoneEnd)) + } + + return timezones + } + } +} diff --git a/api/src/main/kotlin/com/gmtkgamejam/services/PostService.kt b/api/src/main/kotlin/com/gmtkgamejam/services/PostService.kt index 2080fccc..4f91e661 100644 --- a/api/src/main/kotlin/com/gmtkgamejam/services/PostService.kt +++ b/api/src/main/kotlin/com/gmtkgamejam/services/PostService.kt @@ -8,6 +8,7 @@ import org.bson.conversions.Bson import org.koin.core.component.KoinComponent import org.koin.core.component.inject import org.litote.kmongo.* +import org.litote.kmongo.MongoOperator.`in` import java.time.LocalDateTime import java.time.format.DateTimeFormatter @@ -63,5 +64,11 @@ class PostService : KoinComponent { col.updateOne(PostItem::id eq postItem.id, postItem) } + fun getPostsByOrderedIds(ids: List): List { + // IDs need to be an array of strings (either using " or '), but Kotlin defaults to an array of numbers + val formattedIds = ids.joinToString(separator = "', '", prefix = "['", postfix = "']") + return col.find("""{id: {$`in`: $formattedIds}}}""").toList().sortedBy { result -> ids.indexOf(result.id) } + } + } diff --git a/ui/src/pages/home/components/SearchFormWrapper.tsx b/ui/src/pages/home/components/SearchFormWrapper.tsx index f7926792..92752c26 100644 --- a/ui/src/pages/home/components/SearchFormWrapper.tsx +++ b/ui/src/pages/home/components/SearchFormWrapper.tsx @@ -5,17 +5,25 @@ import {SearchForm} from "./SearchForm.tsx"; import {FormikSearchFormParameters} from "../models/FormikSearchFormParameters.ts"; import {removeEmpty} from "../../../utils.ts" import debounce from "just-debounce-it"; +import {useState} from "react"; + +type BookmarkParameter = { + bookmarked: boolean; +} + +type SubmissionParameters = SearchParameters | BookmarkParameter export const SearchFormWrapper: React.FC<{ searchParams: URLSearchParams, setSearchParams: (value: any) => void }> = ({searchParams, setSearchParams}) => { + const [onlyBookmarked, _] = useState(!!searchParams.get("bookmarked") || false) const initialFormValues: SearchParameters = searchParametersFromQueryString(searchParams) const onSubmitForm = (values: any) => { // Remove the empty fields, so we don't clutter up the query string with &a=&b=... - const formattedValues: Partial = removeEmpty(values) + const formattedValues: Partial = removeEmpty(values) // If we only have one timezone flag set, don't send either in query string if (!values['timezoneStart'] || !values['timezoneEnd']) { @@ -23,6 +31,8 @@ export const SearchFormWrapper: React.FC<{ delete formattedValues.timezoneEnd } + if (onlyBookmarked) formattedValues.bookmarked = true + // @ts-ignore setSearchParams(formattedValues) } diff --git a/ui/src/pages/home/components/SortingOptions.tsx b/ui/src/pages/home/components/SortingOptions.tsx index 6e7e5437..1171d346 100644 --- a/ui/src/pages/home/components/SortingOptions.tsx +++ b/ui/src/pages/home/components/SortingOptions.tsx @@ -3,6 +3,7 @@ import {Field} from "formik"; import CustomSelect, {CustomSelectOption} from "./common/CustomSelect.tsx"; const sortBy: CustomSelectOption[] = [ + {label: "Relevance", value: "score"}, {label: "Team Size", value: "size"}, {label: "Date Created", value: "createdAt"}, {label: "Last Updated", value: "updatedAt"},