diff --git a/src/experiment/adapter/adapter.ts b/src/experiment/adapter/adapter.ts
index 4c0852a..98b7997 100644
--- a/src/experiment/adapter/adapter.ts
+++ b/src/experiment/adapter/adapter.ts
@@ -9,6 +9,7 @@ import {translate} from 'src/experiment/translate';
export function extract(content: string, options: ExtractOptions): ExtractOutput {
const {xliff, skeleton} = transform(content, {
+ compact: options.compact,
showTokens: options.showTokens,
showMap: options.showMap,
showRawMap: options.showRawMap,
diff --git a/src/experiment/adapter/types.ts b/src/experiment/adapter/types.ts
index b876cfe..ec9b2b7 100644
--- a/src/experiment/adapter/types.ts
+++ b/src/experiment/adapter/types.ts
@@ -3,6 +3,7 @@ import languages from '@cospired/i18n-iso-languages';
import {Xliff} from 'src/experiment/xliff/xliff';
import {TransformOptions} from 'src/experiment/transform';
+import {TranslateOptions} from 'src/experiment/translate';
const languagesList = languages.langs();
@@ -31,9 +32,7 @@ export type ExtractOutput = {
xliff: Xliff;
};
-export type ComposeOptions = Experiment & {
- useSource?: boolean;
-};
+export type ComposeOptions = Experiment & TranslateOptions;
export interface ComposeOutput {
document: string;
diff --git a/src/experiment/transform.ts b/src/experiment/transform.ts
index f7c6258..b915c49 100644
--- a/src/experiment/transform.ts
+++ b/src/experiment/transform.ts
@@ -38,7 +38,7 @@ import {
tokenizeYaml,
variableReplace,
} from './utils';
-import {applySegmentation} from './utils/segmentation';
+import {applySegmentation, trimInlineToken} from './utils/segmentation';
import {buildXliff, markTokens, prepareInlineToken} from './xliff/builder';
/* eslint-disable no-console */
@@ -49,6 +49,7 @@ const SHOW_RAW_MAP = false;
const SHOW_VARS = false;
export interface TransformOptions {
+ compact?: boolean;
showTokens?: boolean;
showMap?: boolean;
showRawMap?: boolean;
@@ -61,6 +62,7 @@ export function transform(content: string, options?: TransformOptions) {
showMap = SHOW_MAP,
showRawMap = SHOW_RAW_MAP,
showVars = SHOW_VARS,
+ compact,
} = options || {};
let mdData = content;
@@ -172,21 +174,47 @@ export function transform(content: string, options?: TransformOptions) {
return false;
});
+ eachTokens(allTokens, (token) => {
+ if (token.type === 'inline') {
+ if (!token.children) return false;
+
+ const {yamlToken} = getExtraToken(tokenExtraMap, token);
+ const targetToken = yamlToken ?? token;
+ const data = yamlToken ? yamlToken.content : mdData;
+
+ targetToken.children = prepareInlineToken(targetToken, tokenExtraMap, data);
+ return true;
+ }
+ return false;
+ });
+
+ eachTokens(allTokens, (token, _idx, tokens) => {
+ if (token.type === 'inline' && !token.attrGet('yaml')) {
+ const trimmedTokens = trimInlineToken(mdData, token, tokenExtraMap);
+ if (trimmedTokens.length > 1) {
+ const pos = tokens.indexOf(token);
+ if (pos === -1) {
+ throw new Error('Token not found for trimming');
+ }
+ tokens.splice(pos, 1, ...trimmedTokens);
+ }
+ return true;
+ }
+ return false;
+ });
+
const replaceParts: ReplacePart[] = [];
const typeAction = {
inline(token: Token) {
- const extraToken = getExtraToken(tokenExtraMap, token);
if (!token.children) return false;
+ const extraToken = getExtraToken(tokenExtraMap, token);
const {yamlToken} = extraToken;
const targetToken = yamlToken ?? token;
- const data = yamlToken ? yamlToken.content : mdData;
-
- targetToken.children = prepareInlineToken(targetToken, tokenExtraMap, data);
const hasText = someTokens(
- targetToken.children,
+ targetToken.children ?? [],
(tokenLocal) => tokenLocal.type === 'text' && !passSymbols.test(tokenLocal.content),
);
@@ -195,7 +223,7 @@ export function transform(content: string, options?: TransformOptions) {
const id = `${replaceParts.length + 1}${postfix}`;
replaceParts.push({...extraToken, token, id});
- markTokens(targetToken.children, tokenExtraMap);
+ markTokens(targetToken.children ?? [], tokenExtraMap);
}
return true;
},
@@ -233,7 +261,7 @@ export function transform(content: string, options?: TransformOptions) {
});
}
- const xliff = buildXliff(replaceParts, tokenExtraMap, mdData);
+ const xliff = buildXliff(replaceParts, tokenExtraMap, mdData, compact);
return {skeleton: outMd, variables: variableTextMap, xliff};
}
diff --git a/src/experiment/translate.ts b/src/experiment/translate.ts
index 7c8c9f3..2c55954 100644
--- a/src/experiment/translate.ts
+++ b/src/experiment/translate.ts
@@ -1,18 +1,27 @@
import yaml from 'js-yaml';
-import xmlParser, {XmlParserElementChildNode, XmlParserElementNode} from 'xml-parser-xo';
-
-import {ComposeOptions} from 'src/experiment/adapter/types';
+import xmlParser, {
+ XmlParserElementChildNode,
+ XmlParserElementNode,
+ XmlParserResult,
+} from 'xml-parser-xo';
import {YamlQuotingTypeQuote} from './constants';
import {unescapeXmlText} from './xliff/utils';
/* eslint-disable no-console */
-export function translate(xliffData: string, skeletonData: string, options?: ComposeOptions) {
- const {useSource} = options ?? {};
- const xliff = xmlParser(xliffData, {
- strictMode: true,
- });
+export type TranslateOptions = {
+ useSource?: boolean;
+ parsedXliff?: XmlParserResult;
+};
+
+export function translate(xliffData: string, skeletonData: string, options?: TranslateOptions) {
+ const {useSource, parsedXliff} = options ?? {};
+ const xliff =
+ parsedXliff ??
+ xmlParser(xliffData, {
+ strictMode: true,
+ });
const externalFileElement = findNodeByNamePath(xliff.root, [
'header',
@@ -128,7 +137,7 @@ function nodeToString(node: XmlParserElementChildNode): string {
throw new Error(`Unsupported node type: ${node.type}`);
}
-function getAttr(node: XmlParserElementNode, attr: string) {
+export function getAttr(node: XmlParserElementNode, attr: string) {
let value = node.attributes[attr];
if (typeof value !== 'undefined') {
value = unescapeXmlText(value);
@@ -155,7 +164,7 @@ function findNodeByName(rooNode: XmlParserElementChildNode, name: string) {
return next(rooNode);
}
-function findNodeByNamePath(node: XmlParserElementChildNode, names: string[]) {
+export function findNodeByNamePath(node: XmlParserElementChildNode, names: string[]) {
let lastResult: XmlParserElementNode | undefined;
let cursor: XmlParserElementChildNode = node;
for (let i = 0, len = names.length; i < len; i++) {
diff --git a/src/experiment/utils/segmentation.ts b/src/experiment/utils/segmentation.ts
index 346760a..3faf7ab 100644
--- a/src/experiment/utils/segmentation.ts
+++ b/src/experiment/utils/segmentation.ts
@@ -87,7 +87,92 @@ export function applySegmentation(
return inlineTokens;
}
-function splitInlineToken(
+export function trimInlineToken(mdData: string, inlineToken: Token, tokenExtraMap: TokenExtraMap) {
+ const result = [];
+ let children;
+ let remainingPart = inlineToken;
+
+ const inheritPostfix = function (newToken: Token, prevToken: Token) {
+ const idPostfix = prevToken.attrGet('idPostfix');
+ if (idPostfix) {
+ newToken.attrSet('idPostfix', idPostfix);
+ }
+ };
+
+ const canTrimToken = (token: Token) => {
+ return ['html_inline', 'attr_anchor', 'softbreak', 'liquid_operator'].includes(token.type);
+ };
+
+ children = (remainingPart.children ?? []).slice(0);
+ let leftPoint: number | undefined;
+ for (let i = 0, len = children.length; i < len; i++) {
+ const token = children[i];
+ if (canTrimToken(token)) {
+ const extraToken = getExtraToken(tokenExtraMap, token);
+ leftPoint = extraToken.end;
+ } else {
+ if (token.type === 'text' && leftPoint) {
+ const offset = token.content.length - token.content.trimStart().length;
+ if (offset > 0) {
+ leftPoint += offset;
+ }
+ }
+ break;
+ }
+ }
+ if (leftPoint) {
+ const [leftInlineToken, rightInlineToken] = splitInlineToken(
+ mdData,
+ remainingPart,
+ leftPoint,
+ tokenExtraMap,
+ );
+ if (leftInlineToken.content.length) {
+ result.push(leftInlineToken);
+ }
+ inheritPostfix(rightInlineToken, inlineToken);
+ remainingPart = rightInlineToken;
+ }
+
+ children = (remainingPart.children ?? []).slice(0);
+ let rightPoint: number | undefined;
+ for (let i = children.length - 1; i >= 0; i--) {
+ const token = children[i];
+ if (canTrimToken(token)) {
+ const extraToken = getExtraToken(tokenExtraMap, token);
+ rightPoint = extraToken.start;
+ } else {
+ if (token.type === 'text' && rightPoint) {
+ const offset = token.content.length - token.content.trimEnd().length;
+ if (offset > 0) {
+ rightPoint -= offset;
+ }
+ }
+ break;
+ }
+ }
+ if (rightPoint) {
+ const [leftInlineToken, rightInlineToken] = splitInlineToken(
+ mdData,
+ remainingPart,
+ rightPoint,
+ tokenExtraMap,
+ );
+ if (leftInlineToken.content.length) {
+ inheritPostfix(leftInlineToken, remainingPart);
+ result.push(leftInlineToken);
+ }
+ remainingPart = rightInlineToken;
+ }
+
+ if (remainingPart.content.length) {
+ result.push(remainingPart);
+ }
+
+ return result;
+}
+
+export function splitInlineToken(
mdData: string,
inlineToken: Token,
point: number,
diff --git a/src/experiment/xliff/builder.ts b/src/experiment/xliff/builder.ts
index bea8287..e977a5a 100644
--- a/src/experiment/xliff/builder.ts
+++ b/src/experiment/xliff/builder.ts
@@ -21,16 +21,17 @@ export const buildXliff = (
replaceParts: ReplacePart[],
tokenExtraMap: TokenExtraMap,
mdData: string,
+ compact = false,
) => {
- const xliff = new Xliff();
+ const xliff = new Xliff({compact});
replaceParts.forEach(({token, id, yamlToken}) => {
let transUnit;
if (yamlToken) {
const data = yamlToken.content;
- transUnit = buildTransUnit(id, yamlToken, tokenExtraMap, data);
+ transUnit = buildTransUnit(id, yamlToken, tokenExtraMap, data, compact);
} else {
- transUnit = buildTransUnit(id, token, tokenExtraMap, mdData);
+ transUnit = buildTransUnit(id, token, tokenExtraMap, mdData, compact);
}
xliff.appendTransUnit(transUnit);
});
@@ -43,10 +44,11 @@ function buildTransUnit(
inlineToken: Token,
tokenExtraMap: TokenExtraMap,
mdData: string,
+ compact: boolean,
) {
const transUnit = new TransUnitElement(transUnitId);
- const source = new SourceElement();
+ const source = new SourceElement({compact});
transUnit.appendElement(source);
let level = 0;
@@ -64,10 +66,15 @@ function buildTransUnit(
const [, name, state] = typeM;
if (state === 'open') {
const element = new GElement(id, raw);
- if (name in tokenNameCType) {
- element.setAttr('ctype', tokenNameCType[name as keyof typeof tokenNameCType]);
+ if (!compact) {
+ if (name in tokenNameCType) {
+ element.setAttr(
+ 'ctype',
+ tokenNameCType[name as keyof typeof tokenNameCType],
+ );
+ }
+ element.setAttr('x-type', name);
}
- element.setAttr('x-type', name);
element.setAttr('x-begin', raw);
levelToken.push(element);
@@ -96,7 +103,9 @@ function buildTransUnit(
});
} else if (raw.length > 0) {
const element = new XElement(id, raw);
- element.setAttr('x-type', token.type);
+ if (!compact) {
+ element.setAttr('x-type', token.type);
+ }
parentElement.appendElement(element);
}
});
diff --git a/src/experiment/xliff/elements/BXElement.ts b/src/experiment/xliff/elements/BXElement.ts
deleted file mode 100644
index b8171a8..0000000
--- a/src/experiment/xliff/elements/BXElement.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import {BaseElement} from './BaseElement';
-
-export class BXElement extends BaseElement {
- tag = 'bx';
- equivText: string;
-
- constructor(id: string, equivText: string) {
- super();
-
- this.setAttr('id', id);
- this.equivText = equivText;
- }
-
- toString() {
- return super.toString({
- 'equiv-text': this.equivText,
- });
- }
-}
diff --git a/src/experiment/xliff/elements/BaseElement.ts b/src/experiment/xliff/elements/BaseElement.ts
index 49465cf..8836005 100644
--- a/src/experiment/xliff/elements/BaseElement.ts
+++ b/src/experiment/xliff/elements/BaseElement.ts
@@ -3,6 +3,7 @@ import {attributesToString} from '../utils';
export class BaseElement {
declare tag: string;
children: BaseElement[] = [];
+ compact?: boolean;
attrs: Record