Skip to content

Commit

Permalink
add trim and compact
Browse files Browse the repository at this point in the history
  • Loading branch information
Feverqwe committed Sep 16, 2024
1 parent deac480 commit f7b4d12
Show file tree
Hide file tree
Showing 14 changed files with 198 additions and 89 deletions.
1 change: 1 addition & 0 deletions src/experiment/adapter/adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {translate} from 'src/experiment/translate';

export function extract(content: string, options: ExtractOptions): ExtractOutput {
const {xliff, skeleton} = transform(content, {
compact: options.compact,
showTokens: options.showTokens,
showMap: options.showMap,
showRawMap: options.showRawMap,
Expand Down
5 changes: 2 additions & 3 deletions src/experiment/adapter/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import languages from '@cospired/i18n-iso-languages';

import {Xliff} from 'src/experiment/xliff/xliff';
import {TransformOptions} from 'src/experiment/transform';
import {TranslateOptions} from 'src/experiment/translate';

const languagesList = languages.langs();

Expand Down Expand Up @@ -31,9 +32,7 @@ export type ExtractOutput = {
xliff: Xliff;
};

export type ComposeOptions = Experiment & {
useSource?: boolean;
};
export type ComposeOptions = Experiment & TranslateOptions;

export interface ComposeOutput {
document: string;
Expand Down
44 changes: 36 additions & 8 deletions src/experiment/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import {
tokenizeYaml,
variableReplace,
} from './utils';
import {applySegmentation} from './utils/segmentation';
import {applySegmentation, trimInlineToken} from './utils/segmentation';
import {buildXliff, markTokens, prepareInlineToken} from './xliff/builder';

/* eslint-disable no-console */
Expand All @@ -49,6 +49,7 @@ const SHOW_RAW_MAP = false;
const SHOW_VARS = false;

export interface TransformOptions {
compact?: boolean;
showTokens?: boolean;
showMap?: boolean;
showRawMap?: boolean;
Expand All @@ -61,6 +62,7 @@ export function transform(content: string, options?: TransformOptions) {
showMap = SHOW_MAP,
showRawMap = SHOW_RAW_MAP,
showVars = SHOW_VARS,
compact,
} = options || {};

let mdData = content;
Expand Down Expand Up @@ -172,21 +174,47 @@ export function transform(content: string, options?: TransformOptions) {
return false;
});

eachTokens(allTokens, (token) => {
if (token.type === 'inline') {
if (!token.children) return false;

const {yamlToken} = getExtraToken(tokenExtraMap, token);
const targetToken = yamlToken ?? token;
const data = yamlToken ? yamlToken.content : mdData;

targetToken.children = prepareInlineToken(targetToken, tokenExtraMap, data);
return true;
}
return false;
});

eachTokens(allTokens, (token, _idx, tokens) => {
if (token.type === 'inline' && !token.attrGet('yaml')) {
const trimmedTokens = trimInlineToken(mdData, token, tokenExtraMap);
if (trimmedTokens.length > 1) {
const pos = tokens.indexOf(token);
if (pos === -1) {
throw new Error('Token not found for trimming');
}
tokens.splice(pos, 1, ...trimmedTokens);
}
return true;
}
return false;
});

const replaceParts: ReplacePart[] = [];

const typeAction = {
inline(token: Token) {
const extraToken = getExtraToken(tokenExtraMap, token);
if (!token.children) return false;

const extraToken = getExtraToken(tokenExtraMap, token);
const {yamlToken} = extraToken;
const targetToken = yamlToken ?? token;
const data = yamlToken ? yamlToken.content : mdData;

targetToken.children = prepareInlineToken(targetToken, tokenExtraMap, data);

const hasText = someTokens(
targetToken.children,
targetToken.children ?? [],
(tokenLocal) => tokenLocal.type === 'text' && !passSymbols.test(tokenLocal.content),
);

Expand All @@ -195,7 +223,7 @@ export function transform(content: string, options?: TransformOptions) {
const id = `${replaceParts.length + 1}${postfix}`;
replaceParts.push({...extraToken, token, id});

markTokens(targetToken.children, tokenExtraMap);
markTokens(targetToken.children ?? [], tokenExtraMap);
}
return true;
},
Expand Down Expand Up @@ -233,7 +261,7 @@ export function transform(content: string, options?: TransformOptions) {
});
}

const xliff = buildXliff(replaceParts, tokenExtraMap, mdData);
const xliff = buildXliff(replaceParts, tokenExtraMap, mdData, compact);

return {skeleton: outMd, variables: variableTextMap, xliff};
}
29 changes: 19 additions & 10 deletions src/experiment/translate.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
import yaml from 'js-yaml';
import xmlParser, {XmlParserElementChildNode, XmlParserElementNode} from 'xml-parser-xo';

import {ComposeOptions} from 'src/experiment/adapter/types';
import xmlParser, {
XmlParserElementChildNode,
XmlParserElementNode,
XmlParserResult,
} from 'xml-parser-xo';

import {YamlQuotingTypeQuote} from './constants';
import {unescapeXmlText} from './xliff/utils';

/* eslint-disable no-console */

export function translate(xliffData: string, skeletonData: string, options?: ComposeOptions) {
const {useSource} = options ?? {};
const xliff = xmlParser(xliffData, {
strictMode: true,
});
export type TranslateOptions = {
useSource?: boolean;
parsedXliff?: XmlParserResult;
};

export function translate(xliffData: string, skeletonData: string, options?: TranslateOptions) {
const {useSource, parsedXliff} = options ?? {};
const xliff =
parsedXliff ??
xmlParser(xliffData, {
strictMode: true,
});

const externalFileElement = findNodeByNamePath(xliff.root, [
'header',
Expand Down Expand Up @@ -128,7 +137,7 @@ function nodeToString(node: XmlParserElementChildNode): string {
throw new Error(`Unsupported node type: ${node.type}`);
}

function getAttr(node: XmlParserElementNode, attr: string) {
export function getAttr(node: XmlParserElementNode, attr: string) {
let value = node.attributes[attr];
if (typeof value !== 'undefined') {
value = unescapeXmlText(value);
Expand All @@ -155,7 +164,7 @@ function findNodeByName(rooNode: XmlParserElementChildNode, name: string) {
return next(rooNode);
}

function findNodeByNamePath(node: XmlParserElementChildNode, names: string[]) {
export function findNodeByNamePath(node: XmlParserElementChildNode, names: string[]) {
let lastResult: XmlParserElementNode | undefined;
let cursor: XmlParserElementChildNode = node;
for (let i = 0, len = names.length; i < len; i++) {
Expand Down
87 changes: 86 additions & 1 deletion src/experiment/utils/segmentation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,92 @@ export function applySegmentation(
return inlineTokens;
}

function splitInlineToken(
export function trimInlineToken(mdData: string, inlineToken: Token, tokenExtraMap: TokenExtraMap) {
const result = [];
let children;
let remainingPart = inlineToken;

const inheritPostfix = function (newToken: Token, prevToken: Token) {
const idPostfix = prevToken.attrGet('idPostfix');
if (idPostfix) {
newToken.attrSet('idPostfix', idPostfix);
}
};

const canTrimToken = (token: Token) => {
return ['html_inline', 'attr_anchor', 'softbreak', 'liquid_operator'].includes(token.type);
};

children = (remainingPart.children ?? []).slice(0);
let leftPoint: number | undefined;
for (let i = 0, len = children.length; i < len; i++) {
const token = children[i];
if (canTrimToken(token)) {
const extraToken = getExtraToken(tokenExtraMap, token);
leftPoint = extraToken.end;
} else {
if (token.type === 'text' && leftPoint) {
const offset = token.content.length - token.content.trimStart().length;
if (offset > 0) {
leftPoint += offset;
}
}
break;
}
}
if (leftPoint) {
const [leftInlineToken, rightInlineToken] = splitInlineToken(
mdData,
remainingPart,
leftPoint,
tokenExtraMap,
);
if (leftInlineToken.content.length) {
result.push(leftInlineToken);
}
inheritPostfix(rightInlineToken, inlineToken);
remainingPart = rightInlineToken;
}

children = (remainingPart.children ?? []).slice(0);
let rightPoint: number | undefined;
for (let i = children.length - 1; i >= 0; i--) {
const token = children[i];
if (canTrimToken(token)) {
const extraToken = getExtraToken(tokenExtraMap, token);
rightPoint = extraToken.start;
} else {
if (token.type === 'text' && rightPoint) {
const offset = token.content.length - token.content.trimEnd().length;
if (offset > 0) {
rightPoint -= offset;
}
}
break;
}
}
if (rightPoint) {
const [leftInlineToken, rightInlineToken] = splitInlineToken(
mdData,
remainingPart,
rightPoint,
tokenExtraMap,
);
if (leftInlineToken.content.length) {
inheritPostfix(leftInlineToken, remainingPart);
result.push(leftInlineToken);
}
remainingPart = rightInlineToken;
}

if (remainingPart.content.length) {
result.push(remainingPart);
}

return result;
}

export function splitInlineToken(
mdData: string,
inlineToken: Token,
point: number,
Expand Down
25 changes: 17 additions & 8 deletions src/experiment/xliff/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@ export const buildXliff = (
replaceParts: ReplacePart[],
tokenExtraMap: TokenExtraMap,
mdData: string,
compact = false,
) => {
const xliff = new Xliff();
const xliff = new Xliff({compact});

replaceParts.forEach(({token, id, yamlToken}) => {
let transUnit;
if (yamlToken) {
const data = yamlToken.content;
transUnit = buildTransUnit(id, yamlToken, tokenExtraMap, data);
transUnit = buildTransUnit(id, yamlToken, tokenExtraMap, data, compact);
} else {
transUnit = buildTransUnit(id, token, tokenExtraMap, mdData);
transUnit = buildTransUnit(id, token, tokenExtraMap, mdData, compact);
}
xliff.appendTransUnit(transUnit);
});
Expand All @@ -43,10 +44,11 @@ function buildTransUnit(
inlineToken: Token,
tokenExtraMap: TokenExtraMap,
mdData: string,
compact: boolean,
) {
const transUnit = new TransUnitElement(transUnitId);

const source = new SourceElement();
const source = new SourceElement({compact});
transUnit.appendElement(source);

let level = 0;
Expand All @@ -64,10 +66,15 @@ function buildTransUnit(
const [, name, state] = typeM;
if (state === 'open') {
const element = new GElement(id, raw);
if (name in tokenNameCType) {
element.setAttr('ctype', tokenNameCType[name as keyof typeof tokenNameCType]);
if (!compact) {
if (name in tokenNameCType) {
element.setAttr(
'ctype',
tokenNameCType[name as keyof typeof tokenNameCType],
);
}
element.setAttr('x-type', name);
}
element.setAttr('x-type', name);
element.setAttr('x-begin', raw);

levelToken.push(element);
Expand Down Expand Up @@ -96,7 +103,9 @@ function buildTransUnit(
});
} else if (raw.length > 0) {
const element = new XElement(id, raw);
element.setAttr('x-type', token.type);
if (!compact) {
element.setAttr('x-type', token.type);
}
parentElement.appendElement(element);
}
});
Expand Down
19 changes: 0 additions & 19 deletions src/experiment/xliff/elements/BXElement.ts

This file was deleted.

1 change: 1 addition & 0 deletions src/experiment/xliff/elements/BaseElement.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {attributesToString} from '../utils';
export class BaseElement {
declare tag: string;
children: BaseElement[] = [];
compact?: boolean;

attrs: Record<string, string> = {};

Expand Down
19 changes: 0 additions & 19 deletions src/experiment/xliff/elements/EXElement.ts

This file was deleted.

2 changes: 1 addition & 1 deletion src/experiment/xliff/elements/GElement.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export class GElement extends BaseElement {

toString() {
return super.toString({
'equiv-text': this.equivText,
'equiv-text': this.compact ? undefined : this.equivText,
});
}
}
Loading

0 comments on commit f7b4d12

Please sign in to comment.