From ef064de97a703cbda9542a6175cd3921690b6610 Mon Sep 17 00:00:00 2001 From: 3y3 <3y3@ya.ru> Date: Tue, 3 Sep 2024 02:36:02 +0300 Subject: [PATCH] fix: Fix heading anchors handling --- src/consumer/index.ts | 21 +-- src/consumer/split.ts | 32 ++-- src/consumer/utils.ts | 156 ++++++++++++++++-- .../__snapshots__/index.spec.ts.snap | 119 +++++++++---- src/integration/index.spec.ts | 8 +- 5 files changed, 258 insertions(+), 78 deletions(-) diff --git a/src/consumer/index.ts b/src/consumer/index.ts index 01a810f..cfaf6ca 100644 --- a/src/consumer/index.ts +++ b/src/consumer/index.ts @@ -1,5 +1,3 @@ -import {token} from 'src/utils'; - import {dropUselessTokens, eruler, gobble} from './utils'; import {split} from './split'; import {CriticalProcessingError} from './error'; @@ -115,27 +113,10 @@ export class Consumer { }; consume = (part: Token[], past?: string) => { - let [before, tokens, after] = dropUselessTokens(part); - - if (!this.compact && tokens.length) { - [before, tokens, after] = [[], part, []]; - } + const [before, tokens, after] = dropUselessTokens(part, !this.compact); this.drop(before); - if (tokens.length === 1) { - // If single contentful token is something like liquid variable - // then this token is useless for translation. - if (tokens[0].type === 'liquid') { - after = tokens.concat(after); - tokens = []; - } else if (tokens[0].type !== 'text') { - tokens[0] = token('text', { - content: tokens[0].content, - }); - } - } - if (tokens.length) { // erule has side effects and can modify tokens content // so we need to generate xliff only after original content replacement diff --git a/src/consumer/split.ts b/src/consumer/split.ts index 53bc394..78eceb6 100644 --- a/src/consumer/split.ts +++ b/src/consumer/split.ts @@ -3,30 +3,38 @@ import {sentenize} from '@diplodoc/sentenizer'; import {token} from 'src/utils'; import {mtre} from 'src/symbols'; -import {eruler, firstContentful, gobble, lastContentful} from './utils'; +import {eruler, gobble, head, splitByContent, tail} from './utils'; const hasContent = (token: Token) => token.content || (token.markup && !token.skip); export function trim(part: Token[]) { - const [first, iFirst] = firstContentful(part); - if (first) { - part[iFirst] = token(first.type, { + const [before, tokens, after] = splitByContent(part); + + if (!tokens.length) { + return part; + } + + const first = head(tokens) as Token; + head( + tokens, + token(first.type, { ...first, content: first.content.trimStart(), generated: (first.generated || '') + '|trimStart', - }); - } + }), + ); - const [last, iLast] = lastContentful(part); - if (last) { - part[iLast] = token(last.type, { + const last = tail(tokens) as Token; + tail( + tokens, + token(last.type, { ...last, content: last.content.trimEnd(), generated: (last.generated || '') + '|trimEnd', - }); - } + }), + ); - return part; + return [...before, ...tokens, ...after]; } function exclude(content: string, tokens: Token[]) { diff --git a/src/consumer/utils.ts b/src/consumer/utils.ts index ef5c4c6..032d0bb 100644 --- a/src/consumer/utils.ts +++ b/src/consumer/utils.ts @@ -62,28 +62,152 @@ export const gobble: Gobbler = (content, [start, end], token, i) => { return [-1, -1]; }; -const reflink = (token: Token) => token.reflink; -const isContentful = (token: Token) => !reflink(token) && token.content.replace(mtre, '')?.trim(); +function isContentful(token: Token) { + return Boolean(token.content.replace(mtre, '')?.trim()); +} -export const firstContentful = (tokens: Token[]): [null | Token, number] => { - const index = tokens.findIndex(isContentful); +function isTranslatable(token: Token) { + return Boolean(isContentful(token) && token.type !== 'liquid'); +} - return index > -1 ? [tokens[index], index] : [null, -1]; -}; -export const lastContentful = (tokens: Token[]): [null | Token, number] => { - // @ts-ignore - const index = tokens.findLastIndex(isContentful); +export function dropUselessTokens(tokens: Token[], accurate = false) { + if (accurate) { + const grouped = groupUselessTokens(tokens); + + if (grouped) { + return splitByContent(grouped, isTranslatable); + } + } - return index > -1 ? [tokens[index], index] : [null, -1]; + return splitByContent(tokens, isTranslatable); +} + +type TokenGroup = { + role: string; + type: string; + child: (Token | TokenGroup)[]; + parent?: TokenGroup; }; -export function dropUselessTokens(tokens: Token[]) { - const [, first] = firstContentful(tokens); - const [, last] = lastContentful(tokens); +export function head(tokens: (TokenGroup | Token)[], value?: TokenGroup | Token) { + if (value) { + tokens[0] = value; + } + + return tokens[0]; +} + +export function tail(tokens: (TokenGroup | Token)[], value?: TokenGroup | Token) { + if (value) { + tokens[tokens.length - 1] = value; + } + + return tokens[tokens.length - 1]; +} + +function matchGroup(token: Token) { + const match = /(.*?)_(open|close)/.exec(token.type); + + return match + ? { + type: match[1], + kind: match[2], + } + : null; +} + +function isGroup(token: Token | TokenGroup): token is TokenGroup { + return 'role' in token && token.role === 'group'; +} + +function groupUselessTokens(tokens: Token[]): (Token | TokenGroup)[] | null { + const tree = {role: 'group', type: 'root', child: []}; + + let group: TokenGroup = tree; + for (const token of tokens) { + const match = matchGroup(token); + if (match) { + if (match.kind === 'open') { + group.child.push( + (group = { + role: 'group', + type: match.type, + child: [token], + parent: group, + }), + ); + } else if (group.type === match.type) { + group.child.push(token); + group = group.parent as TokenGroup; + } else { + return null; + } + } else { + group.child.push(token); + } + } + + return tree.child; +} + +export function splitByContent(grouped: (Token | TokenGroup)[], hasContent = isContentful) { + const before: Token[] = []; + const content: Token[] = []; + const after: Token[] = []; + + let contentful = false; + let action = shift; + // shift -> pop -> end + while (action) { + action = action(); + } + + return contentful ? [before, content, after] : [before.concat(content), [], after]; + + // consumes all useless tokens before content + function shift() { + const token = head(grouped); + if (!token || isGroup(token) || isContentful(token)) { + return pop; + } + + before.push(grouped.shift() as Token); - if (first === -1) { - return [tokens, [], []]; + return shift; } - return [tokens.slice(0, first), tokens.slice(first, last + 1), tokens.slice(last + 1)]; + // consumes all useless tokens after content + function pop() { + const token = tail(grouped); + if (!token || isGroup(token) || isContentful(token)) { + return end; + } + + after.unshift(grouped.pop() as Token); + + return pop; + } + + // ungroup grouped content + // counts if content is really useful + function end() { + const token = grouped.shift(); + if (!token) { + return; + } + + if (isGroup(token)) { + grouped.unshift(...token.child); + + return end; + } + + if (hasContent(token)) { + contentful = true; + } + + content.push(token as Token); + + return end; + } } diff --git a/src/integration/__snapshots__/index.spec.ts.snap b/src/integration/__snapshots__/index.spec.ts.snap index 59baa19..613a824 100644 --- a/src/integration/__snapshots__/index.spec.ts.snap +++ b/src/integration/__snapshots__/index.spec.ts.snap @@ -226,7 +226,10 @@ exports[`integration computes valid sentenses: skeleton expr 1`] = ` %%%2_s-3%%% %%%3_s-5%%%" `; -exports[`integration computes valid sentenses: skeleton main 1`] = `"%%%0%%%%%%1%%% %%%2%%%"`; +exports[`integration computes valid sentenses: skeleton main 1`] = ` +"%%%0%%% +%%%1%%% %%%2%%%" +`; exports[`integration computes valid sentenses: xliff expr 1`] = ` " @@ -263,8 +266,7 @@ exports[`integration computes valid sentenses: xliff main 1`] = ` - Записывает или удаляет элементы из таблиц. + Записывает или удаляет элементы из таблиц. Один вызов может записать до 16 Мб данных, что может включать до 25 запросов на размещение или удаление. @@ -281,6 +283,7 @@ exports[`integration handles   in lists: skeleton expr 1`] = `"- %%%1%%%"`; exports[`integration handles   in lists: skeleton main 1`] = ` "- %%%0%%% +   %%%1%%%" `; @@ -313,8 +316,7 @@ exports[`integration handles   in lists: xliff main 1`] = ` - Some text  + Some text Some other text @@ -387,9 +389,11 @@ exports[`integration handles blockquotes: skeleton expr 1`] = ` `; exports[`integration handles blockquotes: skeleton main 1`] = ` -"> %%%0%%%> %%%1%%% +"> %%%0%%% +> %%%1%%% > -> > %%%2%%%> > %%%3%%% +> > %%%2%%% +> > %%%3%%% > > %%%4%%%" `; @@ -416,15 +420,13 @@ exports[`integration handles blockquotes: xliff main 1`] = ` - 1 + 1 2 - 3 + 3 4 @@ -587,7 +589,10 @@ exports[`integration handles empty image in list: skeleton expr 1`] = ` ![](../_assets/stat-segment.png)" `; -exports[`integration handles empty image in list: skeleton main 1`] = `"1. %%%0%%% ![](../_assets/stat-segment.png)"`; +exports[`integration handles empty image in list: skeleton main 1`] = ` +"1. %%%0%%% + ![](../_assets/stat-segment.png)" +`; exports[`integration handles empty image in list: xliff expr 1`] = ` " @@ -618,8 +623,7 @@ exports[`integration handles empty image in list: xliff main 1`] = ` - A. + A. @@ -744,9 +748,63 @@ exports[`integration handles empty links: xliff main 1`] = ` " `; +exports[`integration handles heading anchors: skeleton expr 1`] = ` +"# %%%1%%% + +## %%%2%%%" +`; + +exports[`integration handles heading anchors: skeleton main 1`] = ` +"# %%%0%%% + +## %%%1%%% {#heading_2}" +`; + +exports[`integration handles heading anchors: xliff expr 1`] = ` +" + + +
+ + + +
+ + + Заголовок 1 + + + Заголовок 2 + + +
+
" +`; + +exports[`integration handles heading anchors: xliff main 1`] = ` +" + + +
+ + + +
+ + + Заголовок 1 + + + Заголовок 2 + + +
+
" +`; + exports[`integration handles html line breaks: skeleton expr 1`] = `"%%%1_s-1%%%
%%%2_s-3%%%"`; -exports[`integration handles html line breaks: skeleton main 1`] = `"%%%0%%%
%%%1%%%"`; +exports[`integration handles html line breaks: skeleton main 1`] = `"%%%0%%%

%%%1%%%"`; exports[`integration handles html line breaks: xliff expr 1`] = ` " @@ -782,7 +840,7 @@ exports[`integration handles html line breaks: xliff main 1`] = ` |Moes | Matter +\\" id=\\"g-test\\"/>Moes | Matter Moes Matter @@ -1133,7 +1191,7 @@ exports[`integration handles link with image with variables in title: xliff expr - ![Image](index-mini.md "Title . And what?" x100)Title . And what? + Title . And what?Title . And what? @@ -1157,10 +1215,13 @@ exports[`integration handles link with image with variables in title: xliff main And what? - ![Image](index-mini.md "Title . + Title . - And what?" x100) + And what? + + + Image @@ -1406,7 +1467,8 @@ exports[`integration handles self closing html tags: skeleton expr 1`] = ` exports[`integration handles self closing html tags: skeleton main 1`] = ` "# %%%0%%% -%%%1%%%%%%2%%% +%%%1%%% +%%%2%%% @@ -1459,8 +1521,7 @@ exports[`integration handles self closing html tags: xliff main 1`] = ` Header
- A a. + A a. B b. @@ -1533,7 +1594,8 @@ exports[`integration handles terms: skeleton expr 1`] = ` exports[`integration handles terms: skeleton main 1`] = ` "%%%0%%% -[*term]: %%%1%%%%%%2%%%" +[*term]: %%%1%%% +%%%2%%%" `; exports[`integration handles terms: xliff expr 1`] = ` @@ -1574,8 +1636,7 @@ exports[`integration handles terms: xliff main 1`] = ` Some term - Some multiline term. + Some multiline term. Here. @@ -1649,7 +1710,8 @@ exports[`integration handles wrond ordered terms: skeleton main 1`] = ` "%%%0%%% [*term]: - %%%1%%% %%%2%%%" + %%%1%%% + %%%2%%%" `; exports[`integration handles wrond ordered terms: xliff expr 1`] = ` @@ -1690,8 +1752,7 @@ exports[`integration handles wrond ordered terms: xliff main 1`] = ` Some term - Some multiline term. + Some multiline term. Here. diff --git a/src/integration/index.spec.ts b/src/integration/index.spec.ts index 7c4f3d2..ac74dc2 100644 --- a/src/integration/index.spec.ts +++ b/src/integration/index.spec.ts @@ -274,7 +274,7 @@ test('handles image with variables in title')` `; test('handles link with image with variables in title')` - [![Image](index-mini.md "Title {{product-name-short.station-mini-old}}. And what?" x100)](index-mini.md "Title {{product-name-short.station-mini-old}}. And what?") + [![Image](index-mini.md "Title {{product-name-short.station-mini-old}}. And what?" =x100)](index-mini.md "Title {{product-name-short.station-mini-old}}. And what?") `; test('handles empty link in list')` @@ -339,3 +339,9 @@ test('handles html line breaks')` [**Moes** | Matter](https://link.html?sku_id=12000038372920468)

Moes Matter | `; + +test('handles heading anchors')` + # Заголовок 1 + + ## Заголовок 2 {#heading_2} +`;