From 8f3e62d9b3e57e9d699552bcce583fca8eaecd47 Mon Sep 17 00:00:00 2001 From: Caroline <4971715+carolineBda@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:08:46 +0100 Subject: [PATCH] fix(glossary): exclure les titres et sous-titres provenant des contribs (#1316) --- .../glossary/__tests__/glossary.test.ts | 16 ++++++++- .../ingester/glossary/explodeGlossaryTerms.ts | 35 +++++++------------ 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/targets/export-elasticsearch/src/ingester/glossary/__tests__/glossary.test.ts b/targets/export-elasticsearch/src/ingester/glossary/__tests__/glossary.test.ts index ad284c26b..73feb719c 100644 --- a/targets/export-elasticsearch/src/ingester/glossary/__tests__/glossary.test.ts +++ b/targets/export-elasticsearch/src/ingester/glossary/__tests__/glossary.test.ts @@ -66,7 +66,21 @@ describe("Glossary", () => { expect(addGlossary(markdown)).toEqual(markdown); }); - test("should not replace html property for cc word", () => { + test("should not add webcomponent tooltip in a span tag with class \"title\"", () => { + const htmlContent = `L'indemnité de fin de contrat n'est pas due dans les cas suivants`; + expect(addGlossary(htmlContent)).toEqual( + `L'indemnité de fin de contrat n'est pas due dans les cas suivants` + ); + }); + + test("should not add webcomponent tooltip in a span tag with class \"sub-title\"", () => { + const htmlContent = `L'indemnité de fin de contrat n'est pas due dans les cas suivants`; + expect(addGlossary(htmlContent)).toEqual( + `L'indemnité de fin de contrat n'est pas due dans les cas suivants` + ); + }); + + test("should not replace within tag attributes", () => { const htmlContent = '

voici une convention collective et un web component mais aussi dispositions, ceci est un test

'; expect(addGlossary(htmlContent)).toEqual( diff --git a/targets/export-elasticsearch/src/ingester/glossary/explodeGlossaryTerms.ts b/targets/export-elasticsearch/src/ingester/glossary/explodeGlossaryTerms.ts index 2e9c3fffd..038d05bac 100644 --- a/targets/export-elasticsearch/src/ingester/glossary/explodeGlossaryTerms.ts +++ b/targets/export-elasticsearch/src/ingester/glossary/explodeGlossaryTerms.ts @@ -2,16 +2,17 @@ import type { Glossary, Term } from "../types"; import type { GlossaryTerms } from "./types"; const conventionMatchers = - "[C|c]onventions? [C|c]ollectives?|[A|a]ccords? de [B|b]ranches?|[D|d]ispositions? [C|c]onventionnelles?"; + "[Cc]onventions? [Cc]ollectives?|[Aa]ccords? de [Bb]ranches?|[Dd]ispositions? [Cc]onventionnelles?"; const startWordBreaks = `(?<=^| |\\.|,|'|>|\\()`; const endWordBreaks = `(?= |\\.|,|'|$|<|\\))`; -const endAnchorOmit = `(?![^<]*|[^<]*|[^<]*|[^<]*)`; +const startAnchorOmit = `(?[^<]*)`; +const endAnchorOmit = `(?![^<]*(?:|||))`; const tagAttributeOmit = `(?<=(^|>)[^><]*)`; -const startTag = `${tagAttributeOmit}${startWordBreaks}`; +const startTag = `${tagAttributeOmit}${startAnchorOmit}${startWordBreaks}`; const endTag = `${endWordBreaks}${endAnchorOmit}`; export const explodeGlossaryTerms = (glossary: Glossary): GlossaryTerms[] => { @@ -23,26 +24,14 @@ export const explodeGlossaryTerms = (glossary: Glossary): GlossaryTerms[] => { return glossaryTerms; }; - +const regexSpecialChars = ["(", ")"]; const escapeRegexSpecialChars = (term: string) => { - const regexSpecialChars = [ - ".", - "+", - "*", - "?", - "^", - "$", - "(", - ")", - "[", - "]", - "{", - "}", - "|", - ]; - return regexSpecialChars.reduce((term: string, specialChar: string) => { - return term.replace(new RegExp(`\\${specialChar}`), `\\${specialChar}`); - }, term); + return regexSpecialChars.reduce( + (escapedTerm: string, specialChar: string) => { + return escapedTerm.replace(specialChar, `\\${specialChar}`); + }, + term + ); }; const explodeTerm = (term: Term): GlossaryTerms[] => { @@ -72,7 +61,7 @@ const regexCapital = (term: string) => { const firstCharUp = firstChar.toUpperCase(); const firstCharRegex = firstCharLow !== firstCharUp - ? `[${firstCharLow}|${firstCharUp}]` + ? `[${firstCharLow}${firstCharUp}]` : firstCharLow; return `${ regexString ? `${regexString} ` : ""