fix: Fix heading anchors handling

diplodoc-platform · Sep 2, 2024 · ef064de · ef064de
1 parent 9f7859d
commit ef064de
Show file tree

Hide file tree

Showing 5 changed files with 258 additions and 78 deletions.
diff --git a/src/consumer/index.ts b/src/consumer/index.ts
@@ -1,5 +1,3 @@
-import {token} from 'src/utils';
-
 import {dropUselessTokens, eruler, gobble} from './utils';
 import {split} from './split';
 import {CriticalProcessingError} from './error';
@@ -115,27 +113,10 @@ export class Consumer {
     };
 
     consume = (part: Token[], past?: string) => {
-        let [before, tokens, after] = dropUselessTokens(part);
-
-        if (!this.compact && tokens.length) {
-            [before, tokens, after] = [[], part, []];
-        }
+        const [before, tokens, after] = dropUselessTokens(part, !this.compact);
 
         this.drop(before);
 
-        if (tokens.length === 1) {
-            // If single contentful token is something like liquid variable
-            // then this token is useless for translation.
-            if (tokens[0].type === 'liquid') {
-                after = tokens.concat(after);
-                tokens = [];
-            } else if (tokens[0].type !== 'text') {
-                tokens[0] = token('text', {
-                    content: tokens[0].content,
-                });
-            }
-        }
-
         if (tokens.length) {
             // erule has side effects and can modify tokens content
             // so we need to generate xliff only after original content replacement

diff --git a/src/consumer/split.ts b/src/consumer/split.ts
@@ -3,30 +3,38 @@ import {sentenize} from '@diplodoc/sentenizer';
 import {token} from 'src/utils';
 import {mtre} from 'src/symbols';
 
-import {eruler, firstContentful, gobble, lastContentful} from './utils';
+import {eruler, gobble, head, splitByContent, tail} from './utils';
 
 const hasContent = (token: Token) => token.content || (token.markup && !token.skip);
 
 export function trim(part: Token[]) {
-    const [first, iFirst] = firstContentful(part);
-    if (first) {
-        part[iFirst] = token(first.type, {
+    const [before, tokens, after] = splitByContent(part);
+
+    if (!tokens.length) {
+        return part;
+    }
+
+    const first = head(tokens) as Token;
+    head(
+        tokens,
+        token(first.type, {
             ...first,
             content: first.content.trimStart(),
             generated: (first.generated || '') + '|trimStart',
-        });
-    }
+        }),
+    );
 
-    const [last, iLast] = lastContentful(part);
-    if (last) {
-        part[iLast] = token(last.type, {
+    const last = tail(tokens) as Token;
+    tail(
+        tokens,
+        token(last.type, {
             ...last,
             content: last.content.trimEnd(),
             generated: (last.generated || '') + '|trimEnd',
-        });
-    }
+        }),
+    );
 
-    return part;
+    return [...before, ...tokens, ...after];
 }
 
 function exclude(content: string, tokens: Token[]) {

diff --git a/src/consumer/utils.ts b/src/consumer/utils.ts
@@ -62,28 +62,152 @@ export const gobble: Gobbler<Token> = (content, [start, end], token, i) => {
     return [-1, -1];
 };
 
-const reflink = (token: Token) => token.reflink;
-const isContentful = (token: Token) => !reflink(token) && token.content.replace(mtre, '')?.trim();
+function isContentful(token: Token) {
+    return Boolean(token.content.replace(mtre, '')?.trim());
+}
 
-export const firstContentful = (tokens: Token[]): [null | Token, number] => {
-    const index = tokens.findIndex(isContentful);
+function isTranslatable(token: Token) {
+    return Boolean(isContentful(token) && token.type !== 'liquid');
+}
 
-    return index > -1 ? [tokens[index], index] : [null, -1];
-};
-export const lastContentful = (tokens: Token[]): [null | Token, number] => {
-    // @ts-ignore
-    const index = tokens.findLastIndex(isContentful);
+export function dropUselessTokens(tokens: Token[], accurate = false) {
+    if (accurate) {
+        const grouped = groupUselessTokens(tokens);
+
+        if (grouped) {
+            return splitByContent(grouped, isTranslatable);
+        }
+    }
 
-    return index > -1 ? [tokens[index], index] : [null, -1];
+    return splitByContent(tokens, isTranslatable);
+}
+
+type TokenGroup = {
+    role: string;
+    type: string;
+    child: (Token | TokenGroup)[];
+    parent?: TokenGroup;
 };
 
-export function dropUselessTokens(tokens: Token[]) {
-    const [, first] = firstContentful(tokens);
-    const [, last] = lastContentful(tokens);
+export function head(tokens: (TokenGroup | Token)[], value?: TokenGroup | Token) {
+    if (value) {
+        tokens[0] = value;
+    }
+
+    return tokens[0];
+}
+
+export function tail(tokens: (TokenGroup | Token)[], value?: TokenGroup | Token) {
+    if (value) {
+        tokens[tokens.length - 1] = value;
+    }
+
+    return tokens[tokens.length - 1];
+}
+
+function matchGroup(token: Token) {
+    const match = /(.*?)_(open|close)/.exec(token.type);
+
+    return match
+        ? {
+              type: match[1],
+              kind: match[2],
+          }
+        : null;
+}
+
+function isGroup(token: Token | TokenGroup): token is TokenGroup {
+    return 'role' in token && token.role === 'group';
+}
+
+function groupUselessTokens(tokens: Token[]): (Token | TokenGroup)[] | null {
+    const tree = {role: 'group', type: 'root', child: []};
+
+    let group: TokenGroup = tree;
+    for (const token of tokens) {
+        const match = matchGroup(token);
+        if (match) {
+            if (match.kind === 'open') {
+                group.child.push(
+                    (group = {
+                        role: 'group',
+                        type: match.type,
+                        child: [token],
+                        parent: group,
+                    }),
+                );
+            } else if (group.type === match.type) {
+                group.child.push(token);
+                group = group.parent as TokenGroup;
+            } else {
+                return null;
+            }
+        } else {
+            group.child.push(token);
+        }
+    }
+
+    return tree.child;
+}
+
+export function splitByContent(grouped: (Token | TokenGroup)[], hasContent = isContentful) {
+    const before: Token[] = [];
+    const content: Token[] = [];
+    const after: Token[] = [];
+
+    let contentful = false;
+    let action = shift;
+    // shift -> pop -> end
+    while (action) {
+        action = action();
+    }
+
+    return contentful ? [before, content, after] : [before.concat(content), [], after];
+
+    // consumes all useless tokens before content
+    function shift() {
+        const token = head(grouped);
+        if (!token || isGroup(token) || isContentful(token)) {
+            return pop;
+        }
+
+        before.push(grouped.shift() as Token);
 
-    if (first === -1) {
-        return [tokens, [], []];
+        return shift;
     }
 
-    return [tokens.slice(0, first), tokens.slice(first, last + 1), tokens.slice(last + 1)];
+    // consumes all useless tokens after content
+    function pop() {
+        const token = tail(grouped);
+        if (!token || isGroup(token) || isContentful(token)) {
+            return end;
+        }
+
+        after.unshift(grouped.pop() as Token);
+
+        return pop;
+    }
+
+    // ungroup grouped content
+    // counts if content is really useful
+    function end() {
+        const token = grouped.shift();
+        if (!token) {
+            return;
+        }
+
+        if (isGroup(token)) {
+            grouped.unshift(...token.child);
+
+            return end;
+        }
+
+        if (hasContent(token)) {
+            contentful = true;
+        }
+
+        content.push(token as Token);
+
+        return end;
+    }
 }