diff --git a/src/arboretum-client.ts b/src/arboretum-client.ts index ec479d2..3f75a14 100644 --- a/src/arboretum-client.ts +++ b/src/arboretum-client.ts @@ -137,11 +137,24 @@ export type ArboretumPageT = ArboretumPageBaseT & { additionalFields?: { [key: string]: any }; }; +export type OrphanedArboretumPageT = Omit< + ArboretumPageT, + "path" | "totalDirectChildrenCount" | "ancestors" | "children" +>; + +export type OrphanedArboretumPageSearchResultT = OrphanedArboretumPageT & { + searchScore: number; +}; + export type ArboretumPageNodeT = | ArboretumPageT | ArboretumRedirectT | ArboretumAliasT; +export type ArboretumPageSearchResultNodeT = ArboretumPageNodeT & { + searchScore: number; +}; + export type OptionsT = { withChildren?: boolean; withAncestors?: boolean }; export type ArboretumClientOptionsT = Pick< @@ -187,7 +200,7 @@ export type ArboretumClientT = { phrase: string, localeCode?: string, limit?: number - ) => Array; + ) => Array; status: () => { lastUpdatedAt: string; pagesCount: number; @@ -200,13 +213,10 @@ export type ArboretumClientT = { limit?: number; skip?: number; localeCode?: string; - }) => Either< - string, - Array< - Omit< - ArboretumPageT, - "path" | "totalDirectChildrenCount" | "ancestors" | "children" - > - > - >; + }) => Either>; + searchOrphanedPages: ( + phrase: string, + localeCode?: string, + limit?: number + ) => Array; }; diff --git a/src/impl/arboretum-client.impl.ts b/src/impl/arboretum-client.impl.ts index 7c467e2..20212c8 100644 --- a/src/impl/arboretum-client.impl.ts +++ b/src/impl/arboretum-client.impl.ts @@ -5,7 +5,6 @@ import { ArboretumClientContentfulConfigOptionsT, ArboretumClientConfigT, ArboretumClientT, - ArboretumClientOptions, ArboretumClientOptionsT, } from "../arboretum-client"; import { @@ -20,7 +19,6 @@ import { pagesByTagId } from "./sitemap/methods/pages-by-tag-id"; import { pagesByIds } from "./sitemap/methods/pages-by-ids"; import { pagesByPaths } from "./sitemap/methods/pages-by-paths"; import { regenerate } from "./sitemap/methods/regenerate"; -import { search } from "./sitemap/methods/search"; import { status } from "./sitemap/methods/status"; import { Either, right } from "../utils/fp-utils"; import { pages } from "./sitemap/methods/pages"; @@ -29,6 +27,8 @@ import { cachedData } from "./sitemap/methods/cached-data"; import { buildPagesByTagEagerly } from "./sitemap/helpers/build-pages-by-tag-eagerly"; import { homePage } from "./sitemap/methods/home-page"; import { orphanedPages } from "./sitemap/methods/orphaned-pages"; +import { searchOrphanedPages } from "./sitemap/methods/search-orphaned-pages"; +import { search } from "./sitemap/methods/search"; const pageTagIdPrefix = "page"; const pageHomeTagId = `${pageTagIdPrefix}Home`; @@ -208,6 +208,7 @@ export const createArboretumClient = async ( status: status(ctx), cachedData: cachedData(ctx), orphanedPages: orphanedPages(ctx), + searchOrphanedPages: searchOrphanedPages(ctx), }, warnings: dataE.right.warnings, }; diff --git a/src/impl/constants.ts b/src/impl/constants.ts new file mode 100644 index 0000000..013e8f8 --- /dev/null +++ b/src/impl/constants.ts @@ -0,0 +1,5 @@ +export const CONSTANTS = { + search: { + minSearchScore: 0.4, + }, +}; diff --git a/src/impl/sitemap/methods/search-orphaned-pages.ts b/src/impl/sitemap/methods/search-orphaned-pages.ts new file mode 100644 index 0000000..37eaa69 --- /dev/null +++ b/src/impl/sitemap/methods/search-orphaned-pages.ts @@ -0,0 +1,44 @@ +import { + ArboretumClientT, + OrphanedArboretumPageT, +} from "../../../arboretum-client"; +import { phraseSimilarity } from "../../../utils/phrase-similarity"; +import { ArboretumClientCtx } from "../../arboretum-client.impl"; +import { CONSTANTS } from "../../constants"; +import { orphanedPages } from "./orphaned-pages"; + +const orphanedPagePhraseSimilarity = ( + page: Pick, + phrase: string +): number => { + const slugSimilarity = phraseSimilarity(phrase, page.slug); + const idSimilarity = phraseSimilarity(phrase, page.id); + const titleSimilarity = page.title ? phraseSimilarity(phrase, page.title) : 0; + return Math.max(slugSimilarity, idSimilarity, titleSimilarity); +}; + +export const searchOrphanedPages = + ( + ctx: Pick< + ArboretumClientCtx, + "data" | "sitemap" | "options" | "pageHomeTagId" + > + ): ArboretumClientT["searchOrphanedPages"] => + (phrase, localeCode, limit) => { + const pages = orphanedPages(ctx)({ localeCode }); + + if (pages._tag === "Right") { + return pages.right + .map((page) => { + const searchScore = orphanedPagePhraseSimilarity(page, phrase); + return { ...page, searchScore }; + }) + .filter( + ({ searchScore }) => searchScore >= CONSTANTS.search.minSearchScore + ) + .sort((a, b) => b.searchScore - a.searchScore) + .slice(0, limit); + } else { + return []; + } + }; diff --git a/src/impl/sitemap/methods/search.ts b/src/impl/sitemap/methods/search.ts index fe082fd..72d8906 100644 --- a/src/impl/sitemap/methods/search.ts +++ b/src/impl/sitemap/methods/search.ts @@ -1,91 +1,39 @@ import { ArboretumClientT, - ArboretumPageNodeT, -} from '../../../arboretum-client'; -import { stringSimilarity } from '../../../utils/string-similarity'; -import { - ArboretumClientCtx, - LocalizedSitemapT, - PageT, - RedirectT, -} from '../../arboretum-client.impl'; -import { localizedSitemapFromCacheOrBuildEff } from '../helpers/build-localized-sitemap'; -import { toArboretumPageWithMissingData } from '../adapters/to-arboretum-page-with-missing-data-adapter'; -import { redirectToArboretumPage } from '../adapters/redirect-to-arboretum-page-adapter'; - -const minPhraseSimilarity = 0.4; -const defaultLimit = 20; - -type PageSearchResultT = { phraseSimilarity: number; page: ArboretumPageNodeT }; + ArboretumPageT, + ArboretumRedirectT, +} from "../../../arboretum-client"; +import { phraseSimilarity } from "../../../utils/phrase-similarity"; +import { ArboretumClientCtx } from "../../arboretum-client.impl"; +import { CONSTANTS } from "../../constants"; +import { pages } from "./pages"; const pagePhraseSimilarity = ( - page: PageT, - path: string, - phrase: string, + page: Pick, + phrase: string ): number => { - const slugSimilarity = stringSimilarity(phrase, page.slug); - const pathSimilarity = stringSimilarity(phrase, path); - const idSimilarity = stringSimilarity(phrase, page.sys.id); - return Math.max(...[slugSimilarity, pathSimilarity, idSimilarity]); + const slugSimilarity = phraseSimilarity(phrase, page.slug); + const pathSimilarity = phraseSimilarity(phrase, page.path); + const idSimilarity = phraseSimilarity(phrase, page.id); + const titleSimilarity = page.title ? phraseSimilarity(phrase, page.title) : 0; + return Math.max( + slugSimilarity, + pathSimilarity, + idSimilarity, + titleSimilarity + ); }; const redirectPhraseSimilarity = ( - redirect: RedirectT, - phrase: string, + redirect: Pick, + phrase: string ): number => { - const pathSimilarity = stringSimilarity(phrase, redirect.path); - const idSimilarity = stringSimilarity(phrase, redirect.sys.id); - return Math.max(...[pathSimilarity, idSimilarity]); -}; - -const localizedRecursiveSearch = ( - localeCode: string, - localizedSitemap: LocalizedSitemapT, - phrase: string, - parentPath: string, - currentPage: PageT | RedirectT, -): Array => { - const getPath = (page: PageT) => - page.sys.id === localizedSitemap.root.sys.id - ? parentPath - : parentPath + '/' + page.slug; - const phraseSimilarity = - currentPage.type === 'page' - ? pagePhraseSimilarity(currentPage, getPath(currentPage), phrase) - : redirectPhraseSimilarity(currentPage, phrase); - - const childrenResults = - currentPage.type === 'page' - ? currentPage.childPages.flatMap(({ sys: { id } }) => { - const childPage = localizedSitemap.sitemap.get(id); - const path = getPath(currentPage); - return childPage - ? localizedRecursiveSearch( - localeCode, - localizedSitemap, - phrase, - path, - childPage, - ) - : []; - }) - : []; - const res: Array = []; - if (phraseSimilarity >= minPhraseSimilarity) { - res.push({ - phraseSimilarity, - page: - currentPage.type === 'page' - ? toArboretumPageWithMissingData(localeCode)( - currentPage, - undefined, - undefined, - ) - : redirectToArboretumPage(localeCode)(currentPage), - }); - } - res.push(...childrenResults); - return res; + const pathSimilarity = phraseSimilarity(phrase, redirect.path); + const idSimilarity = phraseSimilarity(phrase, redirect.id); + const titleSimilarity = redirect.title + ? phraseSimilarity(phrase, redirect.title) + : 0; + return Math.max(pathSimilarity, idSimilarity, titleSimilarity); }; // Primitive implementation that can be inefficient for large sitemaps @@ -93,33 +41,27 @@ export const search = ( ctx: Pick< ArboretumClientCtx, - 'data' | 'sitemap' | 'options' | 'pageHomeTagId' - >, - ): ArboretumClientT['search'] => + "data" | "sitemap" | "options" | "pageHomeTagId" + > + ): ArboretumClientT["search"] => (phrase, localeCode, limit) => { - return [...ctx.data.locales.values()] - .filter(locale => (localeCode ? locale.code === localeCode : true)) - .flatMap(locale => { - const sitemap = localizedSitemapFromCacheOrBuildEff(ctx, locale); + const allPages = pages(ctx)({ localeCode }); - const homePage = - sitemap._tag === 'Right' - ? sitemap.right.sitemap.get(sitemap.right.root.sys.id) - : undefined; - - if (sitemap._tag === 'Right' && homePage) { - return localizedRecursiveSearch( - locale.code, - sitemap.right, - phrase, - `/${locale.code}`, - homePage, - ); - } else { - return []; - } - }) - .sort((a, b) => b.phraseSimilarity - a.phraseSimilarity) - .slice(0, limit || defaultLimit) - .map(({ page }) => page); + if (allPages._tag === "Right") { + return allPages.right + .map((page) => { + const searchScore = + page.type === "page" + ? pagePhraseSimilarity(page, phrase) + : redirectPhraseSimilarity(page, phrase); + return { ...page, searchScore }; + }) + .filter( + ({ searchScore }) => searchScore >= CONSTANTS.search.minSearchScore + ) + .sort((a, b) => b.searchScore - a.searchScore) + .slice(0, limit); + } else { + return []; + } }; diff --git a/src/utils/phrase-similarity.ts b/src/utils/phrase-similarity.ts new file mode 100644 index 0000000..916f5c3 --- /dev/null +++ b/src/utils/phrase-similarity.ts @@ -0,0 +1,7 @@ +import { stringSimilarity } from "./string-similarity"; + +export const phraseSimilarity = (phrase: string, value: string): number => { + const s1 = stringSimilarity(phrase, value); + const s2 = stringSimilarity(phrase, value.slice(0, phrase.length)); + return s1 > s2 ? s1 : s2; +}; diff --git a/src/utils/string-similarity.ts b/src/utils/string-similarity.ts index e8a1363..c2b761e 100644 --- a/src/utils/string-similarity.ts +++ b/src/utils/string-similarity.ts @@ -9,10 +9,10 @@ export function stringSimilarity( str1: string, str2: string, - gramSize: number = 2, + gramSize: number = 2 ): number { function getNGrams(s: string, len: number) { - s = ' '.repeat(len - 1) + s.toLowerCase() + ' '.repeat(len - 1); + s = " ".repeat(len - 1) + s.toLowerCase() + " ".repeat(len - 1); let v = new Array(s.length - len + 1); for (let i = 0; i < v.length; i++) { v[i] = s.slice(i, i + len);