diff --git a/src/experiment/adapter/adapter.ts b/src/experiment/adapter/adapter.ts index 4c0852a..98b7997 100644 --- a/src/experiment/adapter/adapter.ts +++ b/src/experiment/adapter/adapter.ts @@ -9,6 +9,7 @@ import {translate} from 'src/experiment/translate'; export function extract(content: string, options: ExtractOptions): ExtractOutput { const {xliff, skeleton} = transform(content, { + compact: options.compact, showTokens: options.showTokens, showMap: options.showMap, showRawMap: options.showRawMap, diff --git a/src/experiment/adapter/types.ts b/src/experiment/adapter/types.ts index b876cfe..ec9b2b7 100644 --- a/src/experiment/adapter/types.ts +++ b/src/experiment/adapter/types.ts @@ -3,6 +3,7 @@ import languages from '@cospired/i18n-iso-languages'; import {Xliff} from 'src/experiment/xliff/xliff'; import {TransformOptions} from 'src/experiment/transform'; +import {TranslateOptions} from 'src/experiment/translate'; const languagesList = languages.langs(); @@ -31,9 +32,7 @@ export type ExtractOutput = { xliff: Xliff; }; -export type ComposeOptions = Experiment & { - useSource?: boolean; -}; +export type ComposeOptions = Experiment & TranslateOptions; export interface ComposeOutput { document: string; diff --git a/src/experiment/transform.ts b/src/experiment/transform.ts index f7c6258..b915c49 100644 --- a/src/experiment/transform.ts +++ b/src/experiment/transform.ts @@ -38,7 +38,7 @@ import { tokenizeYaml, variableReplace, } from './utils'; -import {applySegmentation} from './utils/segmentation'; +import {applySegmentation, trimInlineToken} from './utils/segmentation'; import {buildXliff, markTokens, prepareInlineToken} from './xliff/builder'; /* eslint-disable no-console */ @@ -49,6 +49,7 @@ const SHOW_RAW_MAP = false; const SHOW_VARS = false; export interface TransformOptions { + compact?: boolean; showTokens?: boolean; showMap?: boolean; showRawMap?: boolean; @@ -61,6 +62,7 @@ export function transform(content: string, options?: TransformOptions) { showMap = SHOW_MAP, showRawMap = SHOW_RAW_MAP, showVars = SHOW_VARS, + compact, } = options || {}; let mdData = content; @@ -172,21 +174,47 @@ export function transform(content: string, options?: TransformOptions) { return false; }); + eachTokens(allTokens, (token) => { + if (token.type === 'inline') { + if (!token.children) return false; + + const {yamlToken} = getExtraToken(tokenExtraMap, token); + const targetToken = yamlToken ?? token; + const data = yamlToken ? yamlToken.content : mdData; + + targetToken.children = prepareInlineToken(targetToken, tokenExtraMap, data); + return true; + } + return false; + }); + + eachTokens(allTokens, (token, _idx, tokens) => { + if (token.type === 'inline' && !token.attrGet('yaml')) { + const trimmedTokens = trimInlineToken(mdData, token, tokenExtraMap); + if (trimmedTokens.length > 1) { + const pos = tokens.indexOf(token); + if (pos === -1) { + throw new Error('Token not found for trimming'); + } + tokens.splice(pos, 1, ...trimmedTokens); + } + return true; + } + return false; + }); + const replaceParts: ReplacePart[] = []; const typeAction = { inline(token: Token) { - const extraToken = getExtraToken(tokenExtraMap, token); if (!token.children) return false; + const extraToken = getExtraToken(tokenExtraMap, token); const {yamlToken} = extraToken; const targetToken = yamlToken ?? token; - const data = yamlToken ? yamlToken.content : mdData; - - targetToken.children = prepareInlineToken(targetToken, tokenExtraMap, data); const hasText = someTokens( - targetToken.children, + targetToken.children ?? [], (tokenLocal) => tokenLocal.type === 'text' && !passSymbols.test(tokenLocal.content), ); @@ -195,7 +223,7 @@ export function transform(content: string, options?: TransformOptions) { const id = `${replaceParts.length + 1}${postfix}`; replaceParts.push({...extraToken, token, id}); - markTokens(targetToken.children, tokenExtraMap); + markTokens(targetToken.children ?? [], tokenExtraMap); } return true; }, @@ -233,7 +261,7 @@ export function transform(content: string, options?: TransformOptions) { }); } - const xliff = buildXliff(replaceParts, tokenExtraMap, mdData); + const xliff = buildXliff(replaceParts, tokenExtraMap, mdData, compact); return {skeleton: outMd, variables: variableTextMap, xliff}; } diff --git a/src/experiment/translate.ts b/src/experiment/translate.ts index 7c8c9f3..2c55954 100644 --- a/src/experiment/translate.ts +++ b/src/experiment/translate.ts @@ -1,18 +1,27 @@ import yaml from 'js-yaml'; -import xmlParser, {XmlParserElementChildNode, XmlParserElementNode} from 'xml-parser-xo'; - -import {ComposeOptions} from 'src/experiment/adapter/types'; +import xmlParser, { + XmlParserElementChildNode, + XmlParserElementNode, + XmlParserResult, +} from 'xml-parser-xo'; import {YamlQuotingTypeQuote} from './constants'; import {unescapeXmlText} from './xliff/utils'; /* eslint-disable no-console */ -export function translate(xliffData: string, skeletonData: string, options?: ComposeOptions) { - const {useSource} = options ?? {}; - const xliff = xmlParser(xliffData, { - strictMode: true, - }); +export type TranslateOptions = { + useSource?: boolean; + parsedXliff?: XmlParserResult; +}; + +export function translate(xliffData: string, skeletonData: string, options?: TranslateOptions) { + const {useSource, parsedXliff} = options ?? {}; + const xliff = + parsedXliff ?? + xmlParser(xliffData, { + strictMode: true, + }); const externalFileElement = findNodeByNamePath(xliff.root, [ 'header', @@ -128,7 +137,7 @@ function nodeToString(node: XmlParserElementChildNode): string { throw new Error(`Unsupported node type: ${node.type}`); } -function getAttr(node: XmlParserElementNode, attr: string) { +export function getAttr(node: XmlParserElementNode, attr: string) { let value = node.attributes[attr]; if (typeof value !== 'undefined') { value = unescapeXmlText(value); @@ -155,7 +164,7 @@ function findNodeByName(rooNode: XmlParserElementChildNode, name: string) { return next(rooNode); } -function findNodeByNamePath(node: XmlParserElementChildNode, names: string[]) { +export function findNodeByNamePath(node: XmlParserElementChildNode, names: string[]) { let lastResult: XmlParserElementNode | undefined; let cursor: XmlParserElementChildNode = node; for (let i = 0, len = names.length; i < len; i++) { diff --git a/src/experiment/utils/segmentation.ts b/src/experiment/utils/segmentation.ts index 346760a..3faf7ab 100644 --- a/src/experiment/utils/segmentation.ts +++ b/src/experiment/utils/segmentation.ts @@ -87,7 +87,92 @@ export function applySegmentation( return inlineTokens; } -function splitInlineToken( +export function trimInlineToken(mdData: string, inlineToken: Token, tokenExtraMap: TokenExtraMap) { + const result = []; + let children; + let remainingPart = inlineToken; + + const inheritPostfix = function (newToken: Token, prevToken: Token) { + const idPostfix = prevToken.attrGet('idPostfix'); + if (idPostfix) { + newToken.attrSet('idPostfix', idPostfix); + } + }; + + const canTrimToken = (token: Token) => { + return ['html_inline', 'attr_anchor', 'softbreak', 'liquid_operator'].includes(token.type); + }; + + children = (remainingPart.children ?? []).slice(0); + let leftPoint: number | undefined; + for (let i = 0, len = children.length; i < len; i++) { + const token = children[i]; + if (canTrimToken(token)) { + const extraToken = getExtraToken(tokenExtraMap, token); + leftPoint = extraToken.end; + } else { + if (token.type === 'text' && leftPoint) { + const offset = token.content.length - token.content.trimStart().length; + if (offset > 0) { + leftPoint += offset; + } + } + break; + } + } + if (leftPoint) { + const [leftInlineToken, rightInlineToken] = splitInlineToken( + mdData, + remainingPart, + leftPoint, + tokenExtraMap, + ); + if (leftInlineToken.content.length) { + result.push(leftInlineToken); + } + inheritPostfix(rightInlineToken, inlineToken); + remainingPart = rightInlineToken; + } + + children = (remainingPart.children ?? []).slice(0); + let rightPoint: number | undefined; + for (let i = children.length - 1; i >= 0; i--) { + const token = children[i]; + if (canTrimToken(token)) { + const extraToken = getExtraToken(tokenExtraMap, token); + rightPoint = extraToken.start; + } else { + if (token.type === 'text' && rightPoint) { + const offset = token.content.length - token.content.trimEnd().length; + if (offset > 0) { + rightPoint -= offset; + } + } + break; + } + } + if (rightPoint) { + const [leftInlineToken, rightInlineToken] = splitInlineToken( + mdData, + remainingPart, + rightPoint, + tokenExtraMap, + ); + if (leftInlineToken.content.length) { + inheritPostfix(leftInlineToken, remainingPart); + result.push(leftInlineToken); + } + remainingPart = rightInlineToken; + } + + if (remainingPart.content.length) { + result.push(remainingPart); + } + + return result; +} + +export function splitInlineToken( mdData: string, inlineToken: Token, point: number, diff --git a/src/experiment/xliff/builder.ts b/src/experiment/xliff/builder.ts index bea8287..e977a5a 100644 --- a/src/experiment/xliff/builder.ts +++ b/src/experiment/xliff/builder.ts @@ -21,16 +21,17 @@ export const buildXliff = ( replaceParts: ReplacePart[], tokenExtraMap: TokenExtraMap, mdData: string, + compact = false, ) => { - const xliff = new Xliff(); + const xliff = new Xliff({compact}); replaceParts.forEach(({token, id, yamlToken}) => { let transUnit; if (yamlToken) { const data = yamlToken.content; - transUnit = buildTransUnit(id, yamlToken, tokenExtraMap, data); + transUnit = buildTransUnit(id, yamlToken, tokenExtraMap, data, compact); } else { - transUnit = buildTransUnit(id, token, tokenExtraMap, mdData); + transUnit = buildTransUnit(id, token, tokenExtraMap, mdData, compact); } xliff.appendTransUnit(transUnit); }); @@ -43,10 +44,11 @@ function buildTransUnit( inlineToken: Token, tokenExtraMap: TokenExtraMap, mdData: string, + compact: boolean, ) { const transUnit = new TransUnitElement(transUnitId); - const source = new SourceElement(); + const source = new SourceElement({compact}); transUnit.appendElement(source); let level = 0; @@ -64,10 +66,15 @@ function buildTransUnit( const [, name, state] = typeM; if (state === 'open') { const element = new GElement(id, raw); - if (name in tokenNameCType) { - element.setAttr('ctype', tokenNameCType[name as keyof typeof tokenNameCType]); + if (!compact) { + if (name in tokenNameCType) { + element.setAttr( + 'ctype', + tokenNameCType[name as keyof typeof tokenNameCType], + ); + } + element.setAttr('x-type', name); } - element.setAttr('x-type', name); element.setAttr('x-begin', raw); levelToken.push(element); @@ -96,7 +103,9 @@ function buildTransUnit( }); } else if (raw.length > 0) { const element = new XElement(id, raw); - element.setAttr('x-type', token.type); + if (!compact) { + element.setAttr('x-type', token.type); + } parentElement.appendElement(element); } }); diff --git a/src/experiment/xliff/elements/BXElement.ts b/src/experiment/xliff/elements/BXElement.ts deleted file mode 100644 index b8171a8..0000000 --- a/src/experiment/xliff/elements/BXElement.ts +++ /dev/null @@ -1,19 +0,0 @@ -import {BaseElement} from './BaseElement'; - -export class BXElement extends BaseElement { - tag = 'bx'; - equivText: string; - - constructor(id: string, equivText: string) { - super(); - - this.setAttr('id', id); - this.equivText = equivText; - } - - toString() { - return super.toString({ - 'equiv-text': this.equivText, - }); - } -} diff --git a/src/experiment/xliff/elements/BaseElement.ts b/src/experiment/xliff/elements/BaseElement.ts index 49465cf..8836005 100644 --- a/src/experiment/xliff/elements/BaseElement.ts +++ b/src/experiment/xliff/elements/BaseElement.ts @@ -3,6 +3,7 @@ import {attributesToString} from '../utils'; export class BaseElement { declare tag: string; children: BaseElement[] = []; + compact?: boolean; attrs: Record = {}; diff --git a/src/experiment/xliff/elements/EXElement.ts b/src/experiment/xliff/elements/EXElement.ts deleted file mode 100644 index 948b5e6..0000000 --- a/src/experiment/xliff/elements/EXElement.ts +++ /dev/null @@ -1,19 +0,0 @@ -import {BaseElement} from './BaseElement'; - -export class EXElement extends BaseElement { - tag = 'ex'; - equivText: string; - - constructor(id: string, equivText: string) { - super(); - - this.setAttr('id', id); - this.equivText = equivText; - } - - toString() { - return super.toString({ - 'equiv-text': this.equivText, - }); - } -} diff --git a/src/experiment/xliff/elements/GElement.ts b/src/experiment/xliff/elements/GElement.ts index 46e983f..1ae4a07 100644 --- a/src/experiment/xliff/elements/GElement.ts +++ b/src/experiment/xliff/elements/GElement.ts @@ -13,7 +13,7 @@ export class GElement extends BaseElement { toString() { return super.toString({ - 'equiv-text': this.equivText, + 'equiv-text': this.compact ? undefined : this.equivText, }); } } diff --git a/src/experiment/xliff/elements/SourceElement.ts b/src/experiment/xliff/elements/SourceElement.ts index 2b47f0a..a71026e 100644 --- a/src/experiment/xliff/elements/SourceElement.ts +++ b/src/experiment/xliff/elements/SourceElement.ts @@ -3,9 +3,11 @@ import {BaseElement} from './BaseElement'; export class SourceElement extends BaseElement { tag = 'source'; - constructor() { + constructor({compact} = {compact: false}) { super(); - - this.setAttr('xml:space', 'preserve'); + this.compact = compact; + if (!this.compact) { + this.setAttr('xml:space', 'preserve'); + } } } diff --git a/src/experiment/xliff/xliff.ts b/src/experiment/xliff/xliff.ts index 050042d..4715714 100644 --- a/src/experiment/xliff/xliff.ts +++ b/src/experiment/xliff/xliff.ts @@ -5,6 +5,7 @@ import {attributesToString} from './utils'; interface XliffProps { datatype?: string; + compact?: boolean; } export class Xliff { @@ -13,11 +14,13 @@ export class Xliff { sourceLanguage = ''; datatype: string; skeletonFile?: string; + compact?: boolean; transUnits: TransUnitElement[] = []; - constructor({datatype = 'markdown'}: XliffProps = {}) { + constructor({datatype = 'markdown', compact}: XliffProps = {}) { this.datatype = datatype; + this.compact = compact; } setFile(path: string) { @@ -54,20 +57,22 @@ export class Xliff { const data = ` -
- ${ - this.skeletonFile - ? ` - - ` - : '' - } -
+ ${ + this.compact + ? '' + : `
${ + this.skeletonFile + ? ` + +` + : '' + }
` + } ${this.transUnits.map((unit) => unit.toString()).join('\n')} diff --git a/src/integration/__snapshots__/index.spec.ts.snap b/src/integration/__snapshots__/index.spec.ts.snap index 613a824..42b8073 100644 --- a/src/integration/__snapshots__/index.spec.ts.snap +++ b/src/integration/__snapshots__/index.spec.ts.snap @@ -400,6 +400,7 @@ exports[`integration handles blockquotes: skeleton main 1`] = ` exports[`integration handles blockquotes: xliff expr 1`] = ` Xliff { + "compact": false, "datatype": "markdown", "file": "file.ext", "skeletonFile": "file.skl", @@ -642,6 +643,7 @@ exports[`integration handles empty images: skeleton main 1`] = ` exports[`integration handles empty images: xliff expr 1`] = ` Xliff { + "compact": false, "datatype": "markdown", "file": "file.ext", "skeletonFile": "file.skl", @@ -723,6 +725,7 @@ exports[`integration handles empty links: skeleton main 1`] = ` exports[`integration handles empty links: xliff expr 1`] = ` Xliff { + "compact": false, "datatype": "markdown", "file": "file.ext", "skeletonFile": "file.skl", @@ -751,7 +754,7 @@ exports[`integration handles empty links: xliff main 1`] = ` exports[`integration handles heading anchors: skeleton expr 1`] = ` "# %%%1%%% -## %%%2%%%" +## %%%2%%% {#heading_2}" `; exports[`integration handles heading anchors: skeleton main 1`] = ` @@ -774,7 +777,7 @@ exports[`integration handles heading anchors: xliff expr 1`] = ` Заголовок 1 - Заголовок 2 + Заголовок 2 @@ -802,7 +805,7 @@ exports[`integration handles heading anchors: xliff main 1`] = `
" `; -exports[`integration handles html line breaks: skeleton expr 1`] = `"%%%1_s-1%%%
%%%2_s-3%%%"`; +exports[`integration handles html line breaks: skeleton expr 1`] = `"%%%1_s-1%%%

%%%2_s-3%%%"`; exports[`integration handles html line breaks: skeleton main 1`] = `"%%%0%%%

%%%1%%%"`; @@ -817,7 +820,7 @@ exports[`integration handles html line breaks: xliff expr 1`] = ` - |Moes | Matter + |Moes | Matter Moes Matter| @@ -1357,7 +1360,9 @@ exports[`integration handles links with brackets: xliff main 1`] = ` exports[`integration handles liquid in html attributes: skeleton expr 1`] = ` "# %%%1%%% -%%%2%%%" + +%%%2%%% +" `; exports[`integration handles liquid in html attributes: skeleton main 1`] = ` @@ -1382,7 +1387,7 @@ exports[`integration handles liquid in html attributes: xliff expr 1`] = ` Variable in href attribute - Button + Button @@ -1421,6 +1426,7 @@ exports[`integration handles multiline inline code: skeleton main 1`] = `"%%%0%% exports[`integration handles multiline inline code: xliff expr 1`] = ` Xliff { + "compact": false, "datatype": "markdown", "file": "file.ext", "skeletonFile": "file.skl", @@ -1559,6 +1565,7 @@ exports[`integration handles single variable as content: skeleton main 1`] = ` exports[`integration handles single variable as content: xliff expr 1`] = ` Xliff { + "compact": false, "datatype": "markdown", "file": "file.ext", "skeletonFile": "file.skl", diff --git a/src/integration/index.spec.ts b/src/integration/index.spec.ts index ac74dc2..4626065 100644 --- a/src/integration/index.spec.ts +++ b/src/integration/index.spec.ts @@ -37,6 +37,7 @@ const test = (() => { function expr() { const {xliff, skeleton} = extract(markdown, { + compact: false, originalFile: 'file.ext', skeletonFile: 'file.skl', source: {