Skip to content

Commit

Permalink
Add searchOrphanedPages method, improve search method, add searchScor…
Browse files Browse the repository at this point in the history
…e param to search results
  • Loading branch information
Dacjan committed Oct 18, 2024
1 parent 248bbf0 commit 9c5f91d
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 120 deletions.
30 changes: 20 additions & 10 deletions src/arboretum-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,24 @@ export type ArboretumPageT = ArboretumPageBaseT & {
additionalFields?: { [key: string]: any };
};

export type OrphanedArboretumPageT = Omit<
ArboretumPageT,
"path" | "totalDirectChildrenCount" | "ancestors" | "children"
>;

export type OrphanedArboretumPageSearchResultT = OrphanedArboretumPageT & {
searchScore: number;
};

export type ArboretumPageNodeT =
| ArboretumPageT
| ArboretumRedirectT
| ArboretumAliasT;

export type ArboretumPageSearchResultNodeT = ArboretumPageNodeT & {
searchScore: number;
};

export type OptionsT = { withChildren?: boolean; withAncestors?: boolean };

export type ArboretumClientOptionsT = Pick<
Expand Down Expand Up @@ -187,7 +200,7 @@ export type ArboretumClientT = {
phrase: string,
localeCode?: string,
limit?: number
) => Array<ArboretumPageNodeT>;
) => Array<ArboretumPageSearchResultNodeT>;
status: () => {
lastUpdatedAt: string;
pagesCount: number;
Expand All @@ -200,13 +213,10 @@ export type ArboretumClientT = {
limit?: number;
skip?: number;
localeCode?: string;
}) => Either<
string,
Array<
Omit<
ArboretumPageT,
"path" | "totalDirectChildrenCount" | "ancestors" | "children"
>
>
>;
}) => Either<string, Array<OrphanedArboretumPageT>>;
searchOrphanedPages: (
phrase: string,
localeCode?: string,
limit?: number
) => Array<OrphanedArboretumPageSearchResultT>;
};
5 changes: 3 additions & 2 deletions src/impl/arboretum-client.impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
ArboretumClientContentfulConfigOptionsT,
ArboretumClientConfigT,
ArboretumClientT,
ArboretumClientOptions,
ArboretumClientOptionsT,
} from "../arboretum-client";
import {
Expand All @@ -20,7 +19,6 @@ import { pagesByTagId } from "./sitemap/methods/pages-by-tag-id";
import { pagesByIds } from "./sitemap/methods/pages-by-ids";
import { pagesByPaths } from "./sitemap/methods/pages-by-paths";
import { regenerate } from "./sitemap/methods/regenerate";
import { search } from "./sitemap/methods/search";
import { status } from "./sitemap/methods/status";
import { Either, right } from "../utils/fp-utils";
import { pages } from "./sitemap/methods/pages";
Expand All @@ -29,6 +27,8 @@ import { cachedData } from "./sitemap/methods/cached-data";
import { buildPagesByTagEagerly } from "./sitemap/helpers/build-pages-by-tag-eagerly";
import { homePage } from "./sitemap/methods/home-page";
import { orphanedPages } from "./sitemap/methods/orphaned-pages";
import { searchOrphanedPages } from "./sitemap/methods/search-orphaned-pages";
import { search } from "./sitemap/methods/search";

const pageTagIdPrefix = "page";
const pageHomeTagId = `${pageTagIdPrefix}Home`;
Expand Down Expand Up @@ -208,6 +208,7 @@ export const createArboretumClient = async (
status: status(ctx),
cachedData: cachedData(ctx),
orphanedPages: orphanedPages(ctx),
searchOrphanedPages: searchOrphanedPages(ctx),
},
warnings: dataE.right.warnings,
};
Expand Down
5 changes: 5 additions & 0 deletions src/impl/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export const CONSTANTS = {
search: {
minSearchScore: 0.4,
},
};
44 changes: 44 additions & 0 deletions src/impl/sitemap/methods/search-orphaned-pages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import {
ArboretumClientT,
OrphanedArboretumPageT,
} from "../../../arboretum-client";
import { phraseSimilarity } from "../../../utils/phrase-similarity";
import { ArboretumClientCtx } from "../../arboretum-client.impl";
import { CONSTANTS } from "../../constants";
import { orphanedPages } from "./orphaned-pages";

const orphanedPagePhraseSimilarity = (
page: Pick<OrphanedArboretumPageT, "slug" | "id" | "title">,
phrase: string
): number => {
const slugSimilarity = phraseSimilarity(phrase, page.slug);
const idSimilarity = phraseSimilarity(phrase, page.id);
const titleSimilarity = page.title ? phraseSimilarity(phrase, page.title) : 0;
return Math.max(slugSimilarity, idSimilarity, titleSimilarity);
};

export const searchOrphanedPages =
(
ctx: Pick<
ArboretumClientCtx,
"data" | "sitemap" | "options" | "pageHomeTagId"
>
): ArboretumClientT["searchOrphanedPages"] =>
(phrase, localeCode, limit) => {
const pages = orphanedPages(ctx)({ localeCode });

if (pages._tag === "Right") {
return pages.right
.map((page) => {
const searchScore = orphanedPagePhraseSimilarity(page, phrase);
return { ...page, searchScore };
})
.filter(
({ searchScore }) => searchScore >= CONSTANTS.search.minSearchScore
)
.sort((a, b) => b.searchScore - a.searchScore)
.slice(0, limit);
} else {
return [];
}
};
154 changes: 48 additions & 106 deletions src/impl/sitemap/methods/search.ts
Original file line number Diff line number Diff line change
@@ -1,125 +1,67 @@
import {
ArboretumClientT,
ArboretumPageNodeT,
} from '../../../arboretum-client';
import { stringSimilarity } from '../../../utils/string-similarity';
import {
ArboretumClientCtx,
LocalizedSitemapT,
PageT,
RedirectT,
} from '../../arboretum-client.impl';
import { localizedSitemapFromCacheOrBuildEff } from '../helpers/build-localized-sitemap';
import { toArboretumPageWithMissingData } from '../adapters/to-arboretum-page-with-missing-data-adapter';
import { redirectToArboretumPage } from '../adapters/redirect-to-arboretum-page-adapter';

const minPhraseSimilarity = 0.4;
const defaultLimit = 20;

type PageSearchResultT = { phraseSimilarity: number; page: ArboretumPageNodeT };
ArboretumPageT,
ArboretumRedirectT,
} from "../../../arboretum-client";
import { phraseSimilarity } from "../../../utils/phrase-similarity";
import { ArboretumClientCtx } from "../../arboretum-client.impl";
import { CONSTANTS } from "../../constants";
import { pages } from "./pages";

const pagePhraseSimilarity = (
page: PageT,
path: string,
phrase: string,
page: Pick<ArboretumPageT, "id" | "slug" | "path" | "title">,
phrase: string
): number => {
const slugSimilarity = stringSimilarity(phrase, page.slug);
const pathSimilarity = stringSimilarity(phrase, path);
const idSimilarity = stringSimilarity(phrase, page.sys.id);
return Math.max(...[slugSimilarity, pathSimilarity, idSimilarity]);
const slugSimilarity = phraseSimilarity(phrase, page.slug);
const pathSimilarity = phraseSimilarity(phrase, page.path);
const idSimilarity = phraseSimilarity(phrase, page.id);
const titleSimilarity = page.title ? phraseSimilarity(phrase, page.title) : 0;
return Math.max(
slugSimilarity,
pathSimilarity,
idSimilarity,
titleSimilarity
);
};

const redirectPhraseSimilarity = (
redirect: RedirectT,
phrase: string,
redirect: Pick<ArboretumRedirectT, "id" | "path" | "title">,
phrase: string
): number => {
const pathSimilarity = stringSimilarity(phrase, redirect.path);
const idSimilarity = stringSimilarity(phrase, redirect.sys.id);
return Math.max(...[pathSimilarity, idSimilarity]);
};

const localizedRecursiveSearch = (
localeCode: string,
localizedSitemap: LocalizedSitemapT,
phrase: string,
parentPath: string,
currentPage: PageT | RedirectT,
): Array<PageSearchResultT> => {
const getPath = (page: PageT) =>
page.sys.id === localizedSitemap.root.sys.id
? parentPath
: parentPath + '/' + page.slug;
const phraseSimilarity =
currentPage.type === 'page'
? pagePhraseSimilarity(currentPage, getPath(currentPage), phrase)
: redirectPhraseSimilarity(currentPage, phrase);

const childrenResults =
currentPage.type === 'page'
? currentPage.childPages.flatMap(({ sys: { id } }) => {
const childPage = localizedSitemap.sitemap.get(id);
const path = getPath(currentPage);
return childPage
? localizedRecursiveSearch(
localeCode,
localizedSitemap,
phrase,
path,
childPage,
)
: [];
})
: [];
const res: Array<PageSearchResultT> = [];
if (phraseSimilarity >= minPhraseSimilarity) {
res.push({
phraseSimilarity,
page:
currentPage.type === 'page'
? toArboretumPageWithMissingData(localeCode)(
currentPage,
undefined,
undefined,
)
: redirectToArboretumPage(localeCode)(currentPage),
});
}
res.push(...childrenResults);
return res;
const pathSimilarity = phraseSimilarity(phrase, redirect.path);
const idSimilarity = phraseSimilarity(phrase, redirect.id);
const titleSimilarity = redirect.title
? phraseSimilarity(phrase, redirect.title)
: 0;
return Math.max(pathSimilarity, idSimilarity, titleSimilarity);
};

// Primitive implementation that can be inefficient for large sitemaps
export const search =
(
ctx: Pick<
ArboretumClientCtx,
'data' | 'sitemap' | 'options' | 'pageHomeTagId'
>,
): ArboretumClientT['search'] =>
"data" | "sitemap" | "options" | "pageHomeTagId"
>
): ArboretumClientT["search"] =>
(phrase, localeCode, limit) => {
return [...ctx.data.locales.values()]
.filter(locale => (localeCode ? locale.code === localeCode : true))
.flatMap(locale => {
const sitemap = localizedSitemapFromCacheOrBuildEff(ctx, locale);
const allPages = pages(ctx)({ localeCode });

const homePage =
sitemap._tag === 'Right'
? sitemap.right.sitemap.get(sitemap.right.root.sys.id)
: undefined;

if (sitemap._tag === 'Right' && homePage) {
return localizedRecursiveSearch(
locale.code,
sitemap.right,
phrase,
`/${locale.code}`,
homePage,
);
} else {
return [];
}
})
.sort((a, b) => b.phraseSimilarity - a.phraseSimilarity)
.slice(0, limit || defaultLimit)
.map(({ page }) => page);
if (allPages._tag === "Right") {
return allPages.right
.map((page) => {
const searchScore =
page.type === "page"
? pagePhraseSimilarity(page, phrase)
: redirectPhraseSimilarity(page, phrase);
return { ...page, searchScore };
})
.filter(
({ searchScore }) => searchScore >= CONSTANTS.search.minSearchScore
)
.sort((a, b) => b.searchScore - a.searchScore)
.slice(0, limit);
} else {
return [];
}
};
7 changes: 7 additions & 0 deletions src/utils/phrase-similarity.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { stringSimilarity } from "./string-similarity";

export const phraseSimilarity = (phrase: string, value: string): number => {
const s1 = stringSimilarity(phrase, value);
const s2 = stringSimilarity(phrase, value.slice(0, phrase.length));
return s1 > s2 ? s1 : s2;
};
4 changes: 2 additions & 2 deletions src/utils/string-similarity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
export function stringSimilarity(
str1: string,
str2: string,
gramSize: number = 2,
gramSize: number = 2
): number {
function getNGrams(s: string, len: number) {
s = ' '.repeat(len - 1) + s.toLowerCase() + ' '.repeat(len - 1);
s = " ".repeat(len - 1) + s.toLowerCase() + " ".repeat(len - 1);
let v = new Array(s.length - len + 1);
for (let i = 0; i < v.length; i++) {
v[i] = s.slice(i, i + len);
Expand Down

0 comments on commit 9c5f91d

Please sign in to comment.