diff --git a/src/app/glossParser.ts b/src/app/glossParser.ts new file mode 100644 index 0000000..8ffd082 --- /dev/null +++ b/src/app/glossParser.ts @@ -0,0 +1,2162 @@ +import * as g from "./glossUtils"; + +// Generated by peggy v. 1.2.0 (ts-pegjs plugin v. 1.2.2 ) +// +// https://peggyjs.org/ https://github.com/metadevpro/ts-pegjs + +("use strict"); + +export interface IFilePosition { + offset: number; + line: number; + column: number; +} + +export interface IFileRange { + start: IFilePosition; + end: IFilePosition; + source: string; +} + +export interface ILiteralExpectation { + type: "literal"; + text: string; + ignoreCase: boolean; +} + +export interface IClassParts extends Array {} + +export interface IClassExpectation { + type: "class"; + parts: IClassParts; + inverted: boolean; + ignoreCase: boolean; +} + +export interface IAnyExpectation { + type: "any"; +} + +export interface IEndExpectation { + type: "end"; +} + +export interface IOtherExpectation { + type: "other"; + description: string; +} + +export type Expectation = + | ILiteralExpectation + | IClassExpectation + | IAnyExpectation + | IEndExpectation + | IOtherExpectation; + +function peg$padEnd(str: string, targetLength: number, padString: string) { + padString = padString || " "; + if (str.length > targetLength) { + return str; + } + targetLength -= str.length; + padString += padString.repeat(targetLength); + return str + padString.slice(0, targetLength); +} + +export class SyntaxError extends Error { + public static buildMessage(expected: Expectation[], found: string | null) { + function hex(ch: string): string { + return ch.charCodeAt(0).toString(16).toUpperCase(); + } + + function literalEscape(s: string): string { + return s + .replace(/\\/g, "\\\\") + .replace(/"/g, '\\"') + .replace(/\0/g, "\\0") + .replace(/\t/g, "\\t") + .replace(/\n/g, "\\n") + .replace(/\r/g, "\\r") + .replace(/[\x00-\x0F]/g, (ch) => "\\x0" + hex(ch)) + .replace(/[\x10-\x1F\x7F-\x9F]/g, (ch) => "\\x" + hex(ch)); + } + + function classEscape(s: string): string { + return s + .replace(/\\/g, "\\\\") + .replace(/\]/g, "\\]") + .replace(/\^/g, "\\^") + .replace(/-/g, "\\-") + .replace(/\0/g, "\\0") + .replace(/\t/g, "\\t") + .replace(/\n/g, "\\n") + .replace(/\r/g, "\\r") + .replace(/[\x00-\x0F]/g, (ch) => "\\x0" + hex(ch)) + .replace(/[\x10-\x1F\x7F-\x9F]/g, (ch) => "\\x" + hex(ch)); + } + + function describeExpectation(expectation: Expectation) { + switch (expectation.type) { + case "literal": + return '"' + literalEscape(expectation.text) + '"'; + case "class": + const escapedParts = expectation.parts.map((part) => { + return Array.isArray(part) + ? classEscape(part[0] as string) + + "-" + + classEscape(part[1] as string) + : classEscape(part); + }); + + return "[" + (expectation.inverted ? "^" : "") + escapedParts + "]"; + case "any": + return "any character"; + case "end": + return "end of input"; + case "other": + return expectation.description; + } + } + + function describeExpected(expected1: Expectation[]) { + const descriptions = expected1.map(describeExpectation); + let i: number; + let j: number; + + descriptions.sort(); + + if (descriptions.length > 0) { + for (i = 1, j = 1; i < descriptions.length; i++) { + if (descriptions[i - 1] !== descriptions[i]) { + descriptions[j] = descriptions[i]; + j++; + } + } + descriptions.length = j; + } + + switch (descriptions.length) { + case 1: + return descriptions[0]; + + case 2: + return descriptions[0] + " or " + descriptions[1]; + + default: + return ( + descriptions.slice(0, -1).join(", ") + + ", or " + + descriptions[descriptions.length - 1] + ); + } + } + + function describeFound(found1: string | null) { + return found1 ? '"' + literalEscape(found1) + '"' : "end of input"; + } + + return ( + "Expected " + + describeExpected(expected) + + " but " + + describeFound(found) + + " found." + ); + } + + public message: string; + public expected: Expectation[]; + public found: string | null; + public location: IFileRange; + public name: string; + + constructor( + message: string, + expected: Expectation[], + found: string | null, + location: IFileRange + ) { + super(); + this.message = message; + this.expected = expected; + this.found = found; + this.location = location; + this.name = "SyntaxError"; + + if (typeof (Object as any).setPrototypeOf === "function") { + (Object as any).setPrototypeOf(this, SyntaxError.prototype); + } else { + (this as any).__proto__ = SyntaxError.prototype; + } + if (typeof (Error as any).captureStackTrace === "function") { + (Error as any).captureStackTrace(this, SyntaxError); + } + } + + format(sources: { source: string; text: string }[]): string { + let str = "Error: " + this.message; + if (this.location) { + let src: string[] | null = null; + let k; + for (k = 0; k < sources.length; k++) { + if (sources[k].source === this.location.source) { + src = sources[k].text.split(/\r\n|\n|\r/g); + break; + } + } + let s = this.location.start; + let loc = this.location.source + ":" + s.line + ":" + s.column; + if (src) { + let e = this.location.end; + let filler = peg$padEnd("", s.line.toString().length, " "); + let line = src[s.line - 1]; + let last = s.line === e.line ? e.column : line.length + 1; + str += + "\n --> " + + loc + + "\n" + + filler + + " |\n" + + s.line + + " | " + + line + + "\n" + + filler + + " | " + + peg$padEnd("", s.column - 1, " ") + + peg$padEnd("", last - s.column, "^"); + } else { + str += "\n at " + loc; + } + } + return str; + } +} + +function peg$parse(input: string, options?: IParseOptions) { + options = options !== undefined ? options : {}; + + const peg$FAILED: Readonly = {}; + const peg$source = options.grammarSource; + + const peg$startRuleFunctions: { [id: string]: any } = { + Expression: peg$parseExpression, + }; + let peg$startRuleFunction: () => any = peg$parseExpression; + + const peg$c0 = function (groups: any): any { + return new g.GlossDocument(groups); + }; + const peg$c1 = function (newSegment: any, base: any): any { + base.segments.unshift(newSegment); + base.column = location().start.column; + base.line = location().start.line; + base.location = String([base.line, base.column]); + return base; + }; + const peg$c2 = function (segment: any, inflection: any): any { + const segments = [segment]; + if (inflection) segments.push(inflection); + return new g.CharacterGloss({ + segments, + location: [location().start.line, location().start.column], + }); + }; + const peg$c3 = "("; + const peg$c4 = peg$literalExpectation("(", false); + const peg$c5 = ")"; + const peg$c6 = peg$literalExpectation(")", false); + const peg$c7 = function (gloss: any, idiomatic: any): any { + if (!gloss.amend) console.log("no amend", { gloss }); + gloss.amend({ idiomatic }); + return gloss; + }; + const peg$c8 = "-"; + const peg$c9 = peg$literalExpectation("-", false); + const peg$c10 = function (gloss: any): any { + return gloss; + }; + const peg$c11 = function ( + firstComponents: any, + lastComponentGloss: any + ): any { + const components = [...firstComponents, lastComponentGloss]; + const glossLocation: [number, number] = [ + location().start.line, + location().start.column, + ]; + if (lastComponentGloss.segments.some((s: any) => s.text.includes("tire"))) + throw new Error("kk it's parser"); + for (const component of components) { + component.attachToCompound(glossLocation); + } + const meaning = lastComponentGloss.idiomatic; + lastComponentGloss.amend({ idiomatic: null }); + return new g.CompoundGloss({ + location: glossLocation, + meaning, + components, + }); + }; + const peg$c12 = function (number: any, prePadding: any, gloss: any): any { + gloss.components[0].amend({ prePadding }); + gloss.number = number; + return new g.GlossGroup({ + characters: [...gloss.components], + compounds: { [gloss.location]: gloss }, + }); + }; + const peg$c13 = function (number: any, prePadding: any, gloss: any): any { + return new g.GlossGroup({ + characters: [gloss.amend({ number: +number, prePadding })], + }); + }; + const peg$c14 = function (gloss: any): any { + return new g.GlossGroup({ + characters: [...gloss.components], + compounds: { [gloss.location]: gloss }, + }); + }; + const peg$c15 = function (gloss: any): any { + return new g.GlossGroup({ + characters: [gloss], + }); + }; + const peg$c16 = function (baseGroup: any, postPadding: any, ep: any): any { + baseGroup.endPunctuation = ep; + baseGroup.characters[baseGroup.characters.length - 1].amend({ + postPadding, + }); + return baseGroup; + }; + const peg$c17 = function ( + baseGroup: any, + w: any, + postPadding: any, + groupToIntegrate: any + ): any { + const lastChar = baseGroup.characters[baseGroup.characters.length - 1]; + const nextChar = groupToIntegrate.characters[0]; + if (postPadding) { + if ( + nextChar.number != null || + groupToIntegrate.compounds[nextChar.compoundLocation]?.number != null + ) { + lastChar.amend({ + postPadding: + (lastChar.postPadding || "") + + w + + (postPadding || "") + + (groupToIntegrate.prePadding || ""), + }); + } else { + nextChar.amend({ + prePadding: + w + + (postPadding || "") + + (groupToIntegrate.prePadding || "") + + (nextChar.prePadding || ""), + }); + } + } + + for (const c of groupToIntegrate.characters) baseGroup.characters.push(c); + Object.assign(baseGroup.compounds, groupToIntegrate.compounds); + baseGroup.endPunctuation = groupToIntegrate.endPunctuation; + return baseGroup; + }; + const peg$c18 = function (prePadding: any, baseGroup: any): any { + baseGroup.prePadding = prePadding + (baseGroup.prePadding || ""); + return baseGroup; + }; + const peg$c19 = function (pre: any, filler: any, w: any): any { + return (pre || "") + filler; + }; + const peg$c20 = function (w: any, filler: any, post: any): any { + return filler + (post || ""); + }; + const peg$c21 = peg$otherExpectation("pre-padding punctuation"); + const peg$c22 = /^[ ]/; + const peg$c23 = peg$classExpectation([" "], false, false); + const peg$c24 = '"'; + const peg$c25 = peg$literalExpectation('"', false); + const peg$c26 = peg$otherExpectation("quote"); + const peg$c27 = "["; + const peg$c28 = peg$literalExpectation("[", false); + const peg$c29 = "]"; + const peg$c30 = peg$literalExpectation("]", false); + const peg$c31 = function (chars: any): any { + return chars; + }; + const peg$c32 = peg$otherExpectation("identifier characters"); + const peg$c33 = "\\"; + const peg$c34 = peg$literalExpectation("\\", false); + const peg$c35 = peg$anyExpectation(); + const peg$c36 = /^[a-zA-Z'*_~]/; + const peg$c37 = peg$classExpectation( + [["a", "z"], ["A", "Z"], "'", "*", "_", "~"], + false, + false + ); + const peg$c38 = /^[a-zA-Z'*_~\-]/; + const peg$c39 = peg$classExpectation( + [["a", "z"], ["A", "Z"], "'", "*", "_", "~", "-"], + false, + false + ); + const peg$c40 = /^[a-zA-Z'*_~\- ]/; + const peg$c41 = peg$classExpectation( + [["a", "z"], ["A", "Z"], "'", "*", "_", "~", "-", " "], + false, + false + ); + const peg$c42 = function (x: any): any { + return new g.GlossSegment({ + role: "LemmaComponent", + text: x, + column: location().start.column, + }); + }; + const peg$c43 = function (chars: any): any { + return new g.GlossSegment({ + role: "Inflection", + text: chars, + column: location().start.column, + }); + }; + const peg$c44 = ":"; + const peg$c45 = peg$literalExpectation(":", false); + const peg$c46 = function (): any { + return ""; + }; + const peg$c47 = peg$otherExpectation("space"); + const peg$c48 = " "; + const peg$c49 = peg$literalExpectation(" ", false); + const peg$c50 = peg$otherExpectation("end punctuation"); + const peg$c51 = /^[.!?,;\n\r]/; + const peg$c52 = peg$classExpectation( + [".", "!", "?", ",", ";", "\n", "\r"], + false, + false + ); + const peg$c53 = "--"; + const peg$c54 = peg$literalExpectation("--", false); + const peg$c55 = /^[1-9]/; + const peg$c56 = peg$classExpectation([["1", "9"]], false, false); + const peg$c57 = peg$otherExpectation("mandatorywhitespace"); + + let peg$currPos = 0; + let peg$savedPos = 0; + const peg$posDetailsCache = [{ line: 1, column: 1 }]; + let peg$maxFailPos = 0; + let peg$maxFailExpected: Expectation[] = []; + let peg$silentFails = 0; + + let peg$result; + + if (options.startRule !== undefined) { + if (!(options.startRule in peg$startRuleFunctions)) { + throw new Error( + "Can't start parsing from rule \"" + options.startRule + '".' + ); + } + + peg$startRuleFunction = peg$startRuleFunctions[options.startRule]; + } + + function text(): string { + return input.substring(peg$savedPos, peg$currPos); + } + + function location(): IFileRange { + return peg$computeLocation(peg$savedPos, peg$currPos); + } + + function expected(description: string, location1?: IFileRange) { + location1 = + location1 !== undefined + ? location1 + : peg$computeLocation(peg$savedPos, peg$currPos); + + throw peg$buildStructuredError( + [peg$otherExpectation(description)], + input.substring(peg$savedPos, peg$currPos), + location1 + ); + } + + function error(message: string, location1?: IFileRange) { + location1 = + location1 !== undefined + ? location1 + : peg$computeLocation(peg$savedPos, peg$currPos); + + throw peg$buildSimpleError(message, location1); + } + + function peg$literalExpectation( + text1: string, + ignoreCase: boolean + ): ILiteralExpectation { + return { type: "literal", text: text1, ignoreCase: ignoreCase }; + } + + function peg$classExpectation( + parts: IClassParts, + inverted: boolean, + ignoreCase: boolean + ): IClassExpectation { + return { + type: "class", + parts: parts, + inverted: inverted, + ignoreCase: ignoreCase, + }; + } + + function peg$anyExpectation(): IAnyExpectation { + return { type: "any" }; + } + + function peg$endExpectation(): IEndExpectation { + return { type: "end" }; + } + + function peg$otherExpectation(description: string): IOtherExpectation { + return { type: "other", description: description }; + } + + function peg$computePosDetails(pos: number) { + let details = peg$posDetailsCache[pos]; + let p; + + if (details) { + return details; + } else { + p = pos - 1; + while (!peg$posDetailsCache[p]) { + p--; + } + + details = peg$posDetailsCache[p]; + details = { + line: details.line, + column: details.column, + }; + + while (p < pos) { + if (input.charCodeAt(p) === 10) { + details.line++; + details.column = 1; + } else { + details.column++; + } + + p++; + } + + peg$posDetailsCache[pos] = details; + + return details; + } + } + + function peg$computeLocation(startPos: number, endPos: number): IFileRange { + const startPosDetails = peg$computePosDetails(startPos); + const endPosDetails = peg$computePosDetails(endPos); + + return { + source: peg$source, + start: { + offset: startPos, + line: startPosDetails.line, + column: startPosDetails.column, + }, + end: { + offset: endPos, + line: endPosDetails.line, + column: endPosDetails.column, + }, + }; + } + + function peg$fail(expected1: Expectation) { + if (peg$currPos < peg$maxFailPos) { + return; + } + + if (peg$currPos > peg$maxFailPos) { + peg$maxFailPos = peg$currPos; + peg$maxFailExpected = []; + } + + peg$maxFailExpected.push(expected1); + } + + function peg$buildSimpleError(message: string, location1: IFileRange) { + return new SyntaxError(message, [], "", location1); + } + + function peg$buildStructuredError( + expected1: Expectation[], + found: string | null, + location1: IFileRange + ) { + return new SyntaxError( + SyntaxError.buildMessage(expected1, found), + expected1, + found, + location1 + ); + } + + function peg$parseExpression(): any { + let s0, s1, s2; + + s0 = peg$currPos; + s1 = []; + s2 = peg$parseGlossGroup(); + if ((s2 as any) !== peg$FAILED) { + while ((s2 as any) !== peg$FAILED) { + s1.push(s2); + s2 = peg$parseGlossGroup(); + } + } else { + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c0(s1); + } + s0 = s1; + + return s0; + } + + function peg$parseSingleCharacterGloss(): any { + let s0, s1, s2; + + s0 = peg$currPos; + s1 = peg$parseInflection(); + if ((s1 as any) === peg$FAILED) { + s1 = peg$parseGlossLemmaSegment(); + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseSingleCharacterGloss(); + if ((s2 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c1(s1, s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseGlossLemmaSegment(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseInflection(); + if ((s2 as any) === peg$FAILED) { + s2 = null; + } + if ((s2 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c2(s1, s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + + return s0; + } + + function peg$parseSingleCharacterGlossWithIdiomatic(): any { + let s0, s1, s2, s3, s4, s5; + + s0 = peg$currPos; + s1 = peg$parseSingleCharacterGloss(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parse___(); + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 40) { + s3 = peg$c3; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c4); + } + } + if ((s3 as any) !== peg$FAILED) { + s4 = []; + s5 = peg$parseIdiomaticGlossSegment(); + if ((s5 as any) !== peg$FAILED) { + while ((s5 as any) !== peg$FAILED) { + s4.push(s5); + s5 = peg$parseIdiomaticGlossSegment(); + } + } else { + s4 = peg$FAILED; + } + if ((s4 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 41) { + s5 = peg$c5; + peg$currPos++; + } else { + s5 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c6); + } + } + if ((s5 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c7(s1, s4); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parseCompoundContinuation(): any { + let s0, s1, s2; + + s0 = peg$currPos; + s1 = peg$parseSingleCharacterGloss(); + if ((s1 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 45) { + s2 = peg$c8; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c9); + } + } + if ((s2 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c10(s1); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parseCompoundGloss(): any { + let s0, s1, s2; + + s0 = peg$currPos; + s1 = []; + s2 = peg$parseCompoundContinuation(); + if ((s2 as any) !== peg$FAILED) { + while ((s2 as any) !== peg$FAILED) { + s1.push(s2); + s2 = peg$parseCompoundContinuation(); + } + } else { + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseSingleCharacterGlossWithIdiomatic(); + if ((s2 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c11(s1, s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parseGlossOrderingUnit(): any { + let s0, s1, s2, s3; + + s0 = peg$currPos; + s1 = peg$parseNumber(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parsePrePadding(); + if ((s2 as any) === peg$FAILED) { + s2 = null; + } + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parseCompoundGloss(); + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c12(s1, s2, s3); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseNumber(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parsePrePadding(); + if ((s2 as any) === peg$FAILED) { + s2 = null; + } + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parseSingleCharacterGlossWithIdiomatic(); + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c13(s1, s2, s3); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseNumber(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parsePrePadding(); + if ((s2 as any) === peg$FAILED) { + s2 = null; + } + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parseSingleCharacterGloss(); + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c13(s1, s2, s3); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseCompoundGloss(); + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c14(s1); + } + s0 = s1; + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseSingleCharacterGlossWithIdiomatic(); + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c15(s1); + } + s0 = s1; + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseSingleCharacterGloss(); + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c15(s1); + } + s0 = s1; + } + } + } + } + } + + return s0; + } + + function peg$parseGlossGroup(): any { + let s0, s1, s2, s3, s4; + + s0 = peg$currPos; + s1 = peg$parseGlossOrderingUnit(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parsePostPadding(); + if ((s2 as any) === peg$FAILED) { + s2 = null; + } + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parseEndPunctuation(); + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c16(s1, s2, s3); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseGlossOrderingUnit(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parse___(); + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parsePrePadding(); + if ((s3 as any) === peg$FAILED) { + s3 = null; + } + if ((s3 as any) !== peg$FAILED) { + s4 = peg$parseGlossGroup(); + if ((s4 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c17(s1, s2, s3, s4); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parsePrePadding(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseGlossGroup(); + if ((s2 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c18(s1, s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + } + + return s0; + } + + function peg$parsePrePadding(): any { + let s0, s1, s2, s3; + + s0 = peg$currPos; + s1 = peg$parseQuote(); + if ((s1 as any) === peg$FAILED) { + s1 = null; + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseTranslationFiller(); + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parse___(); + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c19(s1, s2, s3); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$parsePrePaddingPunctuation(); + } + + return s0; + } + + function peg$parsePostPadding(): any { + let s0, s1, s2, s3; + + s0 = peg$currPos; + s1 = peg$parse___(); + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseTranslationFiller(); + if ((s2 as any) !== peg$FAILED) { + s3 = peg$parseQuote(); + if ((s3 as any) === peg$FAILED) { + s3 = null; + } + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c20(s1, s2, s3); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$parsePostPaddingPunctuation(); + } + + return s0; + } + + function peg$parsePrePaddingPunctuation(): any { + let s0, s1, s2, s3; + + peg$silentFails++; + s0 = peg$currPos; + s1 = peg$currPos; + s2 = []; + if (peg$c22.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c23); + } + } + if ((s3 as any) !== peg$FAILED) { + while ((s3 as any) !== peg$FAILED) { + s2.push(s3); + if (peg$c22.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c23); + } + } + } + } else { + s2 = peg$FAILED; + } + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 34) { + s3 = peg$c24; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c25); + } + } + if ((s3 as any) === peg$FAILED) { + s3 = null; + } + if ((s3 as any) !== peg$FAILED) { + s2 = [s2, s3]; + s1 = s2; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$parseQuote(); + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c21); + } + } + + return s0; + } + + function peg$parsePostPaddingPunctuation(): any { + let s0, s1, s2, s3, s4; + + peg$silentFails++; + s0 = peg$currPos; + s1 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 34) { + s2 = peg$c24; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c25); + } + } + if ((s2 as any) === peg$FAILED) { + s2 = null; + } + if ((s2 as any) !== peg$FAILED) { + s3 = []; + if (peg$c22.test(input.charAt(peg$currPos))) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c23); + } + } + if ((s4 as any) !== peg$FAILED) { + while ((s4 as any) !== peg$FAILED) { + s3.push(s4); + if (peg$c22.test(input.charAt(peg$currPos))) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c23); + } + } + } + } else { + s3 = peg$FAILED; + } + if ((s3 as any) !== peg$FAILED) { + s2 = [s2, s3]; + s1 = s2; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c21); + } + } + + return s0; + } + + function peg$parseQuote(): any { + let s0, s1; + + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 34) { + s0 = peg$c24; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c25); + } + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c26); + } + } + + return s0; + } + + function peg$parseTranslationFiller(): any { + let s0, s1, s2, s3; + + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 91) { + s1 = peg$c27; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c28); + } + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseIdentifierCharactersAllowingSpaces(); + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 93) { + s3 = peg$c29; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c30); + } + } + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c31(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + + function peg$parseGlossSegment(): any { + let s0; + + s0 = peg$parseGlossLemmaSegment(); + if ((s0 as any) === peg$FAILED) { + s0 = peg$parseInflection(); + } + + return s0; + } + + function peg$parseIdiomaticGlossSegment(): any { + let s0; + + s0 = peg$parseGlossLemmaSegmentAllowingHyphens(); + if ((s0 as any) === peg$FAILED) { + s0 = peg$parseInflection(); + } + + return s0; + } + + function peg$parseIdentifierCharacters(): any { + let s0, s1, s2, s3, s4; + + peg$silentFails++; + s0 = peg$currPos; + s1 = []; + s2 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s3 = peg$c33; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c34); + } + } + if ((s3 as any) !== peg$FAILED) { + if (input.length > peg$currPos) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + if ((s4 as any) !== peg$FAILED) { + s3 = [s3, s4]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if ((s2 as any) === peg$FAILED) { + s2 = peg$currPos; + if (peg$c36.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c37); + } + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + } + if ((s2 as any) !== peg$FAILED) { + while ((s2 as any) !== peg$FAILED) { + s1.push(s2); + s2 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s3 = peg$c33; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c34); + } + } + if ((s3 as any) !== peg$FAILED) { + if (input.length > peg$currPos) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + if ((s4 as any) !== peg$FAILED) { + s3 = [s3, s4]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if ((s2 as any) === peg$FAILED) { + s2 = peg$currPos; + if (peg$c36.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c37); + } + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + } + } + } else { + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c32); + } + } + + return s0; + } + + function peg$parseIdentifierCharactersAllowingHyphens(): any { + let s0, s1, s2, s3, s4; + + s0 = peg$currPos; + s1 = []; + s2 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s3 = peg$c33; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c34); + } + } + if ((s3 as any) !== peg$FAILED) { + if (input.length > peg$currPos) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + if ((s4 as any) !== peg$FAILED) { + s3 = [s3, s4]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if ((s2 as any) === peg$FAILED) { + s2 = peg$currPos; + if (peg$c38.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c39); + } + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + } + if ((s2 as any) !== peg$FAILED) { + while ((s2 as any) !== peg$FAILED) { + s1.push(s2); + s2 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s3 = peg$c33; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c34); + } + } + if ((s3 as any) !== peg$FAILED) { + if (input.length > peg$currPos) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + if ((s4 as any) !== peg$FAILED) { + s3 = [s3, s4]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if ((s2 as any) === peg$FAILED) { + s2 = peg$currPos; + if (peg$c38.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c39); + } + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + } + } + } else { + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + + return s0; + } + + function peg$parseIdentifierCharactersAllowingSpaces(): any { + let s0, s1, s2, s3, s4; + + s0 = peg$currPos; + s1 = []; + s2 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s3 = peg$c33; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c34); + } + } + if ((s3 as any) !== peg$FAILED) { + if (input.length > peg$currPos) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + if ((s4 as any) !== peg$FAILED) { + s3 = [s3, s4]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if ((s2 as any) === peg$FAILED) { + s2 = peg$currPos; + if (peg$c40.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c41); + } + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + } + if ((s2 as any) !== peg$FAILED) { + while ((s2 as any) !== peg$FAILED) { + s1.push(s2); + s2 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s3 = peg$c33; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c34); + } + } + if ((s3 as any) !== peg$FAILED) { + if (input.length > peg$currPos) { + s4 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + if ((s4 as any) !== peg$FAILED) { + s3 = [s3, s4]; + s2 = s3; + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + } else { + peg$currPos = s2; + s2 = peg$FAILED; + } + if ((s2 as any) === peg$FAILED) { + s2 = peg$currPos; + if (peg$c40.test(input.charAt(peg$currPos))) { + s3 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c41); + } + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + } + } + } else { + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + + return s0; + } + + function peg$parseGlossLemmaSegment(): any { + let s0, s1; + + s0 = peg$currPos; + s1 = peg$parseIdentifierCharacters(); + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c42(s1); + } + s0 = s1; + + return s0; + } + + function peg$parseGlossLemmaSegmentAllowingHyphens(): any { + let s0, s1; + + s0 = peg$currPos; + s1 = peg$parseIdentifierCharactersAllowingHyphens(); + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c42(s1); + } + s0 = s1; + + return s0; + } + + function peg$parseInflection(): any { + let s0, s1, s2, s3, s4, s5; + + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 91) { + s1 = peg$c27; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c28); + } + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$parseIdentifierCharacters(); + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 93) { + s3 = peg$c29; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c30); + } + } + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c43(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 91) { + s1 = peg$c27; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c28); + } + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$currPos; + s3 = peg$currPos; + s4 = []; + if (input.charCodeAt(peg$currPos) === 45) { + s5 = peg$c8; + peg$currPos++; + } else { + s5 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c9); + } + } + if ((s5 as any) !== peg$FAILED) { + while ((s5 as any) !== peg$FAILED) { + s4.push(s5); + if (input.charCodeAt(peg$currPos) === 45) { + s5 = peg$c8; + peg$currPos++; + } else { + s5 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c9); + } + } + } + } else { + s4 = peg$FAILED; + } + if ((s4 as any) !== peg$FAILED) { + s5 = peg$parseIdentifierCharacters(); + if ((s5 as any) === peg$FAILED) { + s5 = null; + } + if ((s5 as any) !== peg$FAILED) { + s4 = [s4, s5]; + s3 = s4; + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 93) { + s3 = peg$c29; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c30); + } + } + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c43(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 91) { + s1 = peg$c27; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c28); + } + } + if ((s1 as any) !== peg$FAILED) { + s2 = peg$currPos; + s3 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 58) { + s4 = peg$c44; + peg$currPos++; + } else { + s4 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c45); + } + } + if ((s4 as any) !== peg$FAILED) { + s5 = peg$parseIdentifierCharacters(); + if ((s5 as any) !== peg$FAILED) { + s4 = [s4, s5]; + s3 = s4; + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + if ((s3 as any) !== peg$FAILED) { + s2 = input.substring(s2, peg$currPos); + } else { + s2 = s3; + } + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 93) { + s3 = peg$c29; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c30); + } + } + if ((s3 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c43(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + } + } + + return s0; + } + + function peg$parseEndPunctuation(): any { + let s0, s1, s2, s3, s4, s5; + + s0 = peg$currPos; + s1 = peg$currPos; + s2 = []; + s3 = peg$parseSpace(); + while ((s3 as any) !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseSpace(); + } + if ((s2 as any) !== peg$FAILED) { + s3 = []; + s4 = peg$parseNonSpaceEndPunctuation(); + if ((s4 as any) !== peg$FAILED) { + while ((s4 as any) !== peg$FAILED) { + s3.push(s4); + s4 = peg$parseNonSpaceEndPunctuation(); + } + } else { + s3 = peg$FAILED; + } + if ((s3 as any) !== peg$FAILED) { + s4 = []; + s5 = peg$parseSpace(); + while ((s5 as any) !== peg$FAILED) { + s4.push(s5); + s5 = peg$parseSpace(); + } + if ((s4 as any) !== peg$FAILED) { + s2 = [s2, s3, s4]; + s1 = s2; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + if ((s0 as any) === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$currPos; + peg$silentFails++; + if (input.length > peg$currPos) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c35); + } + } + peg$silentFails--; + if ((s2 as any) === peg$FAILED) { + s1 = undefined; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$c46(); + } + s0 = s1; + } + + return s0; + } + + function peg$parseSpace(): any { + let s0, s1; + + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 32) { + s0 = peg$c48; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c49); + } + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c47); + } + } + + return s0; + } + + function peg$parseNonSpaceEndPunctuation(): any { + let s0, s1, s2, s3; + + peg$silentFails++; + s0 = peg$currPos; + s1 = peg$currPos; + if (peg$c51.test(input.charAt(peg$currPos))) { + s2 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c52); + } + } + if ((s2 as any) !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 34) { + s3 = peg$c24; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c25); + } + } + if ((s3 as any) === peg$FAILED) { + s3 = null; + } + if ((s3 as any) !== peg$FAILED) { + s2 = [s2, s3]; + s1 = s2; + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + } else { + peg$currPos = s1; + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + if ((s0 as any) === peg$FAILED) { + if (input.substr(peg$currPos, 2) === peg$c53) { + s0 = peg$c53; + peg$currPos += 2; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c54); + } + } + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c50); + } + } + + return s0; + } + + function peg$parseNumber(): any { + let s0; + + if (peg$c55.test(input.charAt(peg$currPos))) { + s0 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c56); + } + } + + return s0; + } + + function peg$parse___(): any { + let s0, s1, s2; + + peg$silentFails++; + s0 = peg$currPos; + s1 = []; + if (input.charCodeAt(peg$currPos) === 32) { + s2 = peg$c48; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c49); + } + } + if ((s2 as any) !== peg$FAILED) { + while ((s2 as any) !== peg$FAILED) { + s1.push(s2); + if (input.charCodeAt(peg$currPos) === 32) { + s2 = peg$c48; + peg$currPos++; + } else { + s2 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c49); + } + } + } + } else { + s1 = peg$FAILED; + } + if ((s1 as any) !== peg$FAILED) { + s0 = input.substring(s0, peg$currPos); + } else { + s0 = s1; + } + peg$silentFails--; + if ((s0 as any) === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { + peg$fail(peg$c57); + } + } + + return s0; + } + + peg$result = peg$startRuleFunction(); + + if (peg$result !== peg$FAILED && peg$currPos === input.length) { + return peg$result; + } else { + if (peg$result !== peg$FAILED && peg$currPos < input.length) { + peg$fail(peg$endExpectation()); + } + + throw peg$buildStructuredError( + peg$maxFailExpected, + peg$maxFailPos < input.length ? input.charAt(peg$maxFailPos) : null, + peg$maxFailPos < input.length + ? peg$computeLocation(peg$maxFailPos, peg$maxFailPos + 1) + : peg$computeLocation(peg$maxFailPos, peg$maxFailPos) + ); + } +} + +export interface IParseOptions { + filename?: string; + startRule?: string; + tracer?: any; + [key: string]: any; +} +export type ParseFunction = (input: string, options?: IParseOptions) => any; +export const parse: ParseFunction = peg$parse; diff --git a/src/app/glossUtils.ts b/src/app/glossUtils.ts new file mode 100644 index 0000000..eb10655 --- /dev/null +++ b/src/app/glossUtils.ts @@ -0,0 +1,728 @@ +export type TranslationElement = + | { + type: "CharacterGloss"; + segments: TranslationElementSegment[]; + characterIndex: number; + character: CharacterGloss; + } + | { + type: "CompoundGloss"; + segments: TranslationElementSegment[]; + compound: CompoundGloss; + characterIndexes: CharacterIndexesRange; + } + | { + type: "Padding"; + position: "Pre" | "Post"; + core: "CharacterGloss" | "Group"; + segments: [TranslationElementSegment]; + } + | { + type: "EndPunctuation"; + segments: [TranslationElementSegment]; + }; +type TranslationElementSegment = { + type: "GlossComponent" | "Padding" | "EndPunctuation"; + text: string; +}; +export const NodeTypes = { + CharacterGloss: "CharacterGloss", + Padding: "Padding", + CompoundGloss: "CompoundGloss", + GlossSegment: "GlossSegment", +} as const; + +function toTranslationElement( + gloss: CharacterGloss | CompoundGloss, + indexesRange: CharacterIndexesRange +): TranslationElement { + switch (gloss.type) { + case NodeTypes.CharacterGloss: + return { + type: NodeTypes.CharacterGloss, + segments: gloss.display(), + character: gloss, + characterIndex: indexesRange[0], + }; + case NodeTypes.CompoundGloss: + return { + type: NodeTypes.CompoundGloss, + segments: gloss.display(), + compound: gloss, + characterIndexes: indexesRange, + }; + } +} + +function isGloss(el?: TranslationElement) { + return el && (el.type === "CharacterGloss" || el.type === "CompoundGloss"); +} + +function isSentenceEndPunctuation(text?: string) { + return Boolean(text && /[\.\?!\n\r]/.test(text)); +} + +type MapTranslationElementFn = ( + element: TranslationElement, + leadingSpace: string, + capitalizedTranslation: TranslationElementSegment[], + translationElementIndex: number +) => T; + +const getTranslationElementText: MapTranslationElementFn = ( + element, + leadingSpace, + capitalizedTranslation +) => { + return leadingSpace + capitalizedTranslation.map((t) => t.text).join(""); +}; + +export type TranslateOptions = { + mapTranslationElementFn: MapTranslationElementFn; + combineElements: (base: U, addition: T) => U; + base: U; +}; + +const defaultTranslateOptions = { + mapTranslationElementFn: getTranslationElementText, + combineElements: (base: string, addition: string) => base + addition, + base: "", +} as const; +type CharacterIndexesRange = [first: number, last: number]; + +function getRenderTranslation(translation: TranslationElement[]) { + return function translate( + translateOptions?: TranslateOptions + ): U { + const options = + translateOptions || + (defaultTranslateOptions as unknown as TranslateOptions); + let translationText = options.base; + let translationElementIndex = 0; + for (const currentElement of translation) { + let previousElement = translation[translationElementIndex - 1]; + let nextElement = translation[translationElementIndex + 1]; + + const previousElementText = previousElement?.segments + .map((s) => s.text) + .join(""); + const currentElementText = currentElement?.segments + .map((s) => s.text) + .join(""); + + const doCapitalize = Boolean( + !previousElement || isSentenceEndPunctuation(previousElementText) + ); + + const currentElementSegmentsWithoutTilde = currentElement.segments.reduce( + (acc, segment, i) => { + if (segment.text.startsWith("~")) { + return [ + ...acc.slice(0, -1), + ...(acc[acc.length - 1] + ? [ + { + ...acc[acc.length - 1], + text: acc[acc.length - 1].text.replace(/[ \t]+$/, ""), + }, + ] + : []), + { + ...segment, + text: segment.text.slice(1), + }, + ]; + } + return [...acc, segment]; + }, + [] as TranslationElementSegment[] + ); + + const leadingSpace = + !previousElement || + currentElement.type === "EndPunctuation" || + /^[~ ]/.test(currentElementText) || + /(--|\n)/.test(previousElementText) + ? "" + : previousElementText.endsWith(" ") + ? "" + : " "; + + const shouldDeleteCurrentTrailingSpace = + nextElement?.type === "EndPunctuation" || + (isGloss(nextElement) && /^[\s~]/.test(nextElement.segments[0].text)); + + const lastOfCurrentElementSegmentsWithoutTilde = + currentElementSegmentsWithoutTilde[ + currentElementSegmentsWithoutTilde.length - 1 + ]; + const currentElementSegmentsBeforeCapitalization = + shouldDeleteCurrentTrailingSpace + ? [ + ...currentElementSegmentsWithoutTilde.slice(0, -1), + ...(lastOfCurrentElementSegmentsWithoutTilde + ? [ + { + ...lastOfCurrentElementSegmentsWithoutTilde, + text: lastOfCurrentElementSegmentsWithoutTilde.text.replace( + /[ \t]+$/g, + "" + ), + }, + ] + : []), + ] + : currentElementSegmentsWithoutTilde; + const toCapitalize = doCapitalize + ? currentElementSegmentsBeforeCapitalization.findIndex( + (segment) => segment.text.search(/[a-z]/i) !== -1 + ) + : -1; + + const currentElementSegmentsAfterCapitalization = + toCapitalize !== -1 + ? [ + ...currentElementSegmentsBeforeCapitalization.slice( + 0, + toCapitalize + ), + { + ...currentElementSegmentsBeforeCapitalization[toCapitalize], + text: capitalizeFirstLetter( + currentElementSegmentsBeforeCapitalization[toCapitalize].text + ), + }, + ...currentElementSegmentsBeforeCapitalization.slice( + toCapitalize + 1 + ), + ] + : currentElementSegmentsBeforeCapitalization; + const currentElementDisplay = options.mapTranslationElementFn( + currentElement, + leadingSpace, + currentElementSegmentsAfterCapitalization, + translationElementIndex + ); + translationText = options.combineElements( + translationText, + currentElementDisplay + ); + + translationElementIndex += 1; + } + return translationText; + }; +} + +export type DocumentCharacter = { + character: CharacterGloss; + indexInDocument: number; + group: GlossGroup; +}; + +/***************** *****************/ +/***************** *****************/ +/***************** *****************/ +/***************** *****************/ +export class GlossDocument { + groups: GlossGroup[]; + characters: DocumentCharacter[]; + compounds: { [location: string]: CompoundGloss }; + + constructor(groups: GlossGroup[]) { + this.groups = groups; + const { characters, compounds } = + this._initializeCharactersAndCompounds(groups); + this.compounds = compounds; + this.characters = characters; + } + + /** mutates groups */ + _initializeCharactersAndCompounds(groups: GlossGroup[]) { + let runningCharacterIndexStartForGroup = 0; + const compounds = {}; + const characters = groups + .map((group) => { + Object.assign(compounds, group.compounds); + group.registerInDocument(runningCharacterIndexStartForGroup); + runningCharacterIndexStartForGroup += group.characters.length; + return group.characters.map((character, i) => ({ + indexInDocument: i + group.firstCharacterIndexInDocument, + character, + group, + })); + }) + .flat(1); + return { characters, compounds }; + } + + getCompound(location: string) { + return this.compounds[location]; + } + + getCharacter(index: number): DocumentCharacter | null { + return this.characters[index] || null; + } + + getGroupCharacter(group: GlossGroup, groupCharacterIndex: number) { + return this.characters[ + group.firstCharacterIndexInDocument + groupCharacterIndex + ]; + } + + flatmapCharacters( + callback: ( + gloss: CharacterGloss, + index: number, + group: GlossGroup, + groupIndex: number + ) => T[] + ): T[] { + return this.groups.flatMap((group, gi) => + group.characters.flatMap((c, ci) => callback(c, ci, group, gi)) + ); + } + + flatmapLines( + callback: (line: GlossGroup[], lineIndex: number) => T[] | T + ): T[] { + let result: T[] = []; + let nextLine: GlossGroup[] = []; + let lineIndex = 0; + let groupIndex = 0; + const lastGroupIndex = this.groups.length - 1; + for (const group of this.groups) { + const endPunctuation = group.endPunctuation!; + nextLine.push(group); + if (groupIndex === lastGroupIndex || /\n/.test(endPunctuation)) { + const next = callback(nextLine, lineIndex); + Array.isArray(next) ? result.push(...next) : result.push(next); + nextLine = []; + lineIndex += 1; + } + + groupIndex += 1; + } + return result; + } + + getTranslation() { + let translation: TranslationElement[] = []; + let pendingNumberedGlosses: { + glossNumber: number; + element: TranslationElement; + }[] = []; + let resolvedCompounds = new Set(); + + for (const group of this.groups) { + if (group.prePadding) { + translation.push({ + type: "Padding", + segments: [ + { + type: "Padding", + text: convertUnderscoresAndBackslashes(group.prePadding), + }, + ], + position: "Pre", + core: "Group", + }); + } + + let i = 0; + for (const character of group.characters) { + const groupCharacterIndex = i; + const compoundContainingCharacter = character.compoundLocation + ? group.compounds[character.compoundLocation] + : null; + const glossIsUnresolved = + !compoundContainingCharacter || + !resolvedCompounds.has(compoundContainingCharacter); + if (compoundContainingCharacter && glossIsUnresolved) + resolvedCompounds.add(compoundContainingCharacter); + const unresolvedCharactersInCompoundCount = + compoundContainingCharacter && glossIsUnresolved + ? compoundContainingCharacter.components.length + : 0; + const currentGlossCharactersCount = compoundContainingCharacter + ? unresolvedCharactersInCompoundCount + : 1; + if (glossIsUnresolved) { + i += currentGlossCharactersCount; + } + const groupCharacter = this.getGroupCharacter( + group, + groupCharacterIndex + ); + const unresolvedGloss = glossIsUnresolved + ? compoundContainingCharacter || character + : null; + const startIndex = groupCharacter?.indexInDocument ?? -1; + const endIndex = unresolvedGloss + ? startIndex + currentGlossCharactersCount - 1 + : startIndex; + + if (unresolvedGloss?.number) { + pendingNumberedGlosses.push({ + glossNumber: unresolvedGloss.number, + element: toTranslationElement(unresolvedGloss, [ + startIndex, + endIndex, + ]), + }); + if (unresolvedGloss.number == 1) { + pendingNumberedGlosses.sort( + (a, b) => a.glossNumber! - b.glossNumber! + ); + + for (const unresolvedNumberedGloss of pendingNumberedGlosses) { + translation.push(unresolvedNumberedGloss.element); + } + pendingNumberedGlosses = []; + } + } else if (unresolvedGloss) { + try { + translation.push( + toTranslationElement(unresolvedGloss, [startIndex, endIndex]) + ); + } catch (err) { + console.error(err); + throw new Error( + `Problem displaying character at column ${character.column}: ${err}` + ); + } + } + } + + if (group.postPadding) { + translation.push({ + type: "Padding", + segments: [ + { + type: "Padding", + text: convertUnderscoresAndBackslashes(group.postPadding), + }, + ], + position: "Post", + core: "Group", + }); + } + + if (group.endPunctuation) + translation.push({ + type: "EndPunctuation", + segments: [ + { + type: "EndPunctuation", + text: convertUnderscoresAndBackslashes(group.endPunctuation), + }, + ], + }); + } + + return { + translation, + renderTranslation: getRenderTranslation(translation), + }; + } +} + +type Padding = string; + +export class GlossGroup { + characters: CharacterGloss[]; + compounds: { [location: string]: CompoundGloss }; + endPunctuation?: string; + + prePadding?: Padding; + postPadding?: Padding; + + /** set as last step in parser */ + firstCharacterIndexInDocument: number = 0; + + constructor({ + characters, + compounds = {}, + endPunctuation, + prePadding, + postPadding, + }: { + characters: CharacterGloss[]; + compounds?: { [location: string]: CompoundGloss }; + endPunctuation?: string; + prePadding?: Padding; + postPadding?: Padding; + }) { + this.characters = characters; + this.compounds = compounds; + this.endPunctuation = endPunctuation; + this.prePadding = prePadding; + this.postPadding = postPadding; + } + + addCompound(gloss: CompoundGloss) { + this.compounds[gloss.location] = gloss; + } + + registerInDocument(firstCharacterIndexInDocument: number) { + this.firstCharacterIndexInDocument = firstCharacterIndexInDocument; + } +} + +type TransformFn = (glossSegments: { + pre: string; + core: string; + post: string; +}) => T; +function glossToString({ + pre, + core, + post, +}: { + pre: string; + core: string; + post: string; +}) { + return `${pre}${core}${post}`; +} +function glossToTranslationElementSegments({ + pre, + core, + post, +}: { + pre: string; + core: string; + post: string; +}) { + const segments = []; + if (pre) + segments.push({ + type: "Padding" as const, + text: pre, + }); + segments.push({ + type: "GlossComponent" as const, + text: core, + }); + if (post) + segments.push({ + type: "Padding" as const, + text: post, + }); + + return segments; +} +export class CompoundGloss { + components: CharacterGloss[]; + meaning: GlossSegment[]; + + number?: number; + location: string; + line: number; + column: number; + + type = NodeTypes.CompoundGloss; + + constructor({ + components, + meaning, + location, + number, + }: Pick & { + location: [line: number, column: number]; + }) { + this.number = number; + this.components = components; + this.meaning = meaning; + this.location = String(location); + const [line, column] = location; + this.line = line; + this.column = column; + } + + display() { + return glossToTranslationElementSegments( + displayGlossText( + this.meaning, + this.components[0].prePadding || undefined, + this.postPadding || undefined + ) + ); + } + + get postPadding() { + return this.components[this.components.length - 1].postPadding; + } +} + +export class CharacterGloss { + type = NodeTypes.CharacterGloss; + + segments: GlossSegment[]; + idiomatic: GlossSegment[] | null; + number?: number; + location: string; + line: number; + column: number; + + compoundLocation?: string; + prePadding?: Padding | null; + postPadding?: Padding | null; + + constructor({ + segments, + idiomatic = null, + location, + compoundLocation, + prePadding, + postPadding, + }: { + segments: GlossSegment[]; + idiomatic?: GlossSegment[] | null; + location: [line: number, column: number]; + compoundLocation?: string; + prePadding?: Padding; + postPadding?: Padding; + }) { + this.segments = segments; + this.idiomatic = idiomatic; + this.compoundLocation = compoundLocation; + this.prePadding = prePadding; + this.postPadding = postPadding; + const [line, column] = location; + this.location = String(location); + this.line = line; + this.column = column; + } + + amend(opts: Partial) { + for (const optName in opts) { + if (typeof opts[optName as keyof CharacterGloss] !== "undefined") { + // @ts-ignore + this[optName as keyof CharacterGloss] = + opts[optName as keyof CharacterGloss]; + } + } + return this; + } + + attachToCompound(location: [number, number]) { + this.compoundLocation = String(location); + return this; + } + + display() { + return glossToTranslationElementSegments( + displayGlossText( + this.idiomatic || this.segments, + this.prePadding || undefined, + this.postPadding || undefined + ) + ); + } + + getLemma() { + return displaySegments(this.segments); + } + + getIdiomaticLemma() { + return this.idiomatic && displaySegments(this.idiomatic); + } +} + +function displaySegments(segments: GlossSegment[]) { + return segments + .flatMap((el) => + el.role === "LemmaComponent" + ? el.text.replace(/\\{1,1}/gu, "").replace(/(? { + const backtrack = + segment.role === GlossSegmentRoles.Inflection && + segment.text.match(/^(-+)(.+)/); + + const base = backtrack + ? text.slice(0, text.length - backtrack?.[1].length) + : text; + const addition = backtrack ? backtrack[2] : segment.text; + + return base + addition; + }, "") + ), + post: convertUnderscoresAndBackslashes(adjustedPostPadding), + }; +} + +function convertUnderscoresAndBackslashes(string: string) { + return string.replace(/[_]/g, " ").replace(/[\\]/g, ""); +} + +function capitalizeFirstLetter(string: string) { + const letterIndex = string.search(/[a-z]/i); + return letterIndex === -1 + ? string + : string.slice(0, letterIndex) + + string[letterIndex].toUpperCase() + + string.slice(letterIndex + 1); +} + +export const GlossSegmentRoles = { + Inflection: "Inflection", + LemmaComponent: "LemmaComponent", +} as const; +type GlossSegmentRole = keyof typeof GlossSegmentRoles; + +export class GlossSegment { + text: string; + role: GlossSegmentRole; + column: number; + + type = NodeTypes.GlossSegment; + + constructor({ + role, + text, + column, + }: { + role: GlossSegmentRole; + text: string; + column: number; + }) { + this.role = role; + this.text = text; + this.column = column; + } +} diff --git a/src/app/texts/[textId]/ChineseWithPopover.tsx b/src/app/texts/[textId]/ChineseWithPopover.tsx index fdec7a2..7af3e5f 100644 --- a/src/app/texts/[textId]/ChineseWithPopover.tsx +++ b/src/app/texts/[textId]/ChineseWithPopover.tsx @@ -14,14 +14,22 @@ import { toEnMatchKeyword, } from "@/app/lexiconEntryEnKeywords"; import dynamic from "next/dynamic"; +import { + CharacterGloss, + DocumentCharacter, + GlossDocument, +} from "@/app/glossUtils"; const RubyText = dynamic(() => import("./RubyText").then((r) => r.RubyText), { ssr: false, }); +export const LATEST_DISPLAY_OPTIONS_VERSION = 1; + export type DisplayOptions = { ruby: null | VocabEntryPronunciationKey; - translation: boolean; + translation: "gloss" | "idiomatic"; + version: number; }; export function ChineseWithPopover({ @@ -29,11 +37,22 @@ export function ChineseWithPopover({ vocab, displayOptions, gloss, + highlightedCharactersRange, + setHighlightedCharactersRange, + segmentStartingCharacterIndexInLine, }: { text: string; vocab: PassageVocab; displayOptions: DisplayOptions; - gloss: string[] | null; + gloss: GlossDocument | null; + highlightedCharactersRange?: { + startCharacterIndex: number; + endCharacterIndex: number; + } | null; + setHighlightedCharactersRange?: ( + range: { startCharacterIndex: number; endCharacterIndex: number } | null + ) => void; + segmentStartingCharacterIndexInLine?: number; }) { const popover = usePopover(); const [popoverChar, setChar] = useState(null); @@ -50,26 +69,64 @@ export function ChineseWithPopover({ const id = `text-${char}-${i}`; + const characterIndexInLine = + segmentStartingCharacterIndexInLine != null + ? segmentStartingCharacterIndexInLine + glossIndex + : null; + const characterIsHighlighted = + highlightedCharactersRange != null && + characterIndexInLine != null && + characterIndexInLine >= + highlightedCharactersRange.startCharacterIndex && + characterIndexInLine <= highlightedCharactersRange.endCharacterIndex; const entries = vocab[char]; if (!entries?.length) { return ( - + {char} ); } - const enGloss = gloss?.[glossIndex]?.replace(/_/g, " ") || null; + const characterGloss = + characterIndexInLine != null + ? gloss?.characters[characterIndexInLine] + : null; + const glossLemma = characterGloss?.character.getLemma() || null; - const soleEntry = entries.length === 1 ? entries[0] : null; + const highlightRange = characterGloss?.character.compoundLocation + ? { + startCharacterIndex: gloss!.characters.findIndex( + (c) => + c.character === + gloss!.getCompound(characterGloss.character.compoundLocation!) + .components[0] + ), - const matchingEntry = enGloss - ? findEntryMatchingEnKeywords(entries, [enGloss]) + endCharacterIndex: gloss!.characters.findIndex( + (c) => + c.character === + gloss!.getCompound(characterGloss.character.compoundLocation!) + .components[ + gloss!.getCompound( + characterGloss.character.compoundLocation! + ).components.length - 1 + ] + ), + } + : { + startCharacterIndex: characterIndexInLine!, + endCharacterIndex: characterIndexInLine!, + }; + + const soleEntry = entries.length === 1 ? entries[0] : null; + const matchingEntry = glossLemma + ? findEntryMatchingEnKeywords(entries, [glossLemma]) : null; let rubyText: string | null = null; - if (displayOptions.ruby === "en") rubyText = enGloss; + if (displayOptions.ruby === "en" && glossLemma) rubyText = glossLemma; else rubyText = displayOptions?.ruby && @@ -79,11 +136,15 @@ export function ChineseWithPopover({ ] : null; - const className = `relative cursor:pointer hover:bg-yellow-400/40`; + const className = `relative cursor:pointer hovers:bg-yellow-400/40 ${ + characterIsHighlighted ? "bg-blue-400/40" : "" + }`; return ( { popover.refs.setReference(e.currentTarget); setChar(char); - setCharGloss(enGloss); + setCharGloss(glossLemma); }, + onMouseEnter: setHighlightedCharactersRange + ? () => setHighlightedCharactersRange(highlightRange) + : undefined, })} + onMouseLeave={ + setHighlightedCharactersRange + ? () => setHighlightedCharactersRange?.(null) + : undefined + } > {char} ); })} - {popover.open && - popoverChar && - vocab[popoverChar] && - PopoverDictionaryContent(popover, popoverChar, vocab, popoverCharGloss)} + {popover.open && popoverChar && vocab[popoverChar] && ( + + )} ); } -function PopoverDictionaryContent( - popover: ReturnType, - popoverChar: string, - vocab: PassageVocab, - enGloss: string | null -) { +function PopoverDictionaryContent({ + popover, + popoverChar, + vocab, + enGloss, +}: { + popover: ReturnType; + popoverChar: string; + vocab: PassageVocab; + enGloss: string | null; +}) { return ( diff --git a/src/app/texts/[textId]/TextPage.tsx b/src/app/texts/[textId]/TextPage.tsx index 611f830..3139775 100644 --- a/src/app/texts/[textId]/TextPage.tsx +++ b/src/app/texts/[textId]/TextPage.tsx @@ -3,9 +3,15 @@ import Markdown from "markdown-to-jsx"; import markdownCss from "./markdown.module.css"; import { Passage, PassageVocab } from "../Passage"; -import { ChineseWithPopover, DisplayOptions } from "./ChineseWithPopover"; -import { useEffect, useRef, useState } from "react"; +import { + ChineseWithPopover, + DisplayOptions, + LATEST_DISPLAY_OPTIONS_VERSION, +} from "./ChineseWithPopover"; +import { ReactNode, useEffect, useRef, useState } from "react"; import { normalizeText } from "./punctuation"; +import { parseGloss } from "./parseGloss"; +import { GlossDocument, TranslationElement } from "@/app/glossUtils"; export default function TextPage({ text, @@ -16,6 +22,25 @@ export default function TextPage({ }) { const notesWithHeadings: { id: string; heading: string }[] = []; const [displayOptions, setDisplayOptions] = useDisplayOptions(); + const [highlightedCharactersRange, setHighlightedCharactersRange] = useState<{ + lineIndex: number; + startCharacterIndex: number; + endCharacterIndex: number; + } | null>(null); + const getSetHoveredCharacterLocation = + (lineIndex: number) => + ( + characterRange: { + startCharacterIndex: number; + endCharacterIndex: number; + } | null + ) => { + console.log(characterRange); + setHighlightedCharactersRange( + characterRange === null ? null : { lineIndex, ...characterRange } + ); + }; + return (
@@ -94,29 +119,49 @@ export default function TextPage({ label="none" />
-
- - setDisplayOptions((opts) => ({ - ...opts, - translation: !opts.translation, - })) - } - /> - +
+ + + setDisplayOptions((opts) => ({ ...opts, translation: "gloss" })) + } + /> + + + + + setDisplayOptions((opts) => ({ + ...opts, + translation: "idiomatic", + })) + } + /> + +
- {text.lines.map((line, i) => { - const lineGloss = line.gloss - ?.replaceAll(/\([^)]+\)/g, "") - .split(/ +|-/); + {text.lines.map((line, lineIndex) => { + const glossText = line.gloss?.replaceAll(/^`|`$/g, "") || null; + + const gloss = parseGloss(glossText); + let charactersProcessed = 0; const chineseSegments = line.chinese .split(/(?={)|(?<=})/) .reduce((segments, segment) => { @@ -126,62 +171,122 @@ export default function TextPage({ if (noteId) notesWithHeadings.push({ id: noteId, heading: text }); const normalizedText = normalizeText(text); - const glossedSoFar = segments.reduce( - (acc, { normalizedText }) => acc + normalizedText.length, - 0 - ); - const gloss = - lineGloss?.slice( - glossedSoFar, - glossedSoFar + normalizedText.length - ) || null; segments.push({ noteId, text, normalizedText, - gloss, + gloss: gloss.result || null, + segmentStartingCharacterIndexInLine: charactersProcessed, }); + charactersProcessed += normalizedText.length; + return segments; - }, [] as { noteId: string | null; text: string; normalizedText: string; gloss: string[] | null }[]); + }, [] as { noteId: string | null; text: string; normalizedText: string; gloss: GlossDocument | null; segmentStartingCharacterIndexInLine: number }[]); return ( -
- {chineseSegments.map(({ noteId, text, gloss }, segmentIndex) => { - return ( -
- + {chineseSegments.map( + ( + { noteId, text, gloss, segmentStartingCharacterIndexInLine }, + segmentIndex + ) => { + return ( +
- - - {noteId && ( - - [{noteId}] - - )} -
- ); - })} - {displayOptions.translation && ( + +
+ {noteId && ( + + [{noteId}] + + )} +
+ ); + } + )} + {(!gloss.result || + displayOptions.translation === "idiomatic") && (
{toCurlyQuotes(line.english)}
)} + {displayOptions.translation === "gloss" && gloss.result && ( +
+ {gloss.result + .getTranslation() + ?.renderTranslation({ + combineElements: (base, addition) => + base.concat(addition), + mapTranslationElementFn: ( + element, + leadingSpace, + capitalizedTranslation, + translationElementIndex + ) => { + const highlightTargetRange = + (element.type === "CharacterGloss" && { + startCharacterIndex: element.characterIndex, + endCharacterIndex: element.characterIndex, + }) || + (element.type === "CompoundGloss" && { + startCharacterIndex: element.characterIndexes[0], + endCharacterIndex: element.characterIndexes.at(-1)!, + }); + + const highlighted = + highlightedCharactersRange && + lineIndex === highlightedCharactersRange.lineIndex && + highlightTargetRange && + highlightTargetRange.startCharacterIndex >= + highlightedCharactersRange.startCharacterIndex && + highlightTargetRange.endCharacterIndex <= + highlightedCharactersRange.endCharacterIndex; + return ( + + ); + }, + base: [] as ReactNode[], + }) || ""} +
+ )}
); })} @@ -229,15 +334,91 @@ export default function TextPage({ ); } -function useDisplayOptions() { - const [displayOptions, setDisplayOptions] = useState(() => - globalThis.window && localStorage.getItem("displayOptions") - ? JSON.parse(localStorage.getItem("displayOptions") as string) - : { - ruby: "vi", - translation: true, +function GlossElement({ + translationElementIndex, + element, + leadingSpace, + capitalizedTranslation, + highlighted, + setHoveredCharacterLocation, + highlightTargetRange, +}: { + translationElementIndex: number; + element: TranslationElement; + leadingSpace: string; + capitalizedTranslation: { + type: "Padding" | "EndPunctuation" | "GlossComponent"; + text: string; + }[]; + highlighted: boolean; + setHoveredCharacterLocation: ( + characterRange: { + startCharacterIndex: number; + endCharacterIndex: number; + } | null + ) => void; + highlightTargetRange: { + startCharacterIndex: number; + endCharacterIndex: number; + } | null; +}) { + return ( + + {leadingSpace} + {capitalizedTranslation.map((segment, segmentIndex) => { + if (segment.type === "Padding" || segment.type === "EndPunctuation") { + return ( + + {segment.text} + + ); } + return ( + { + setHoveredCharacterLocation({ + startCharacterIndex: + highlightTargetRange.startCharacterIndex, + endCharacterIndex: + highlightTargetRange?.endCharacterIndex, + }); + } + : undefined + } + onMouseLeave={() => { + setHoveredCharacterLocation(null); + }} + > + {segment.text} + + ); + })} + ); +} + +function useDisplayOptions() { + const [displayOptions, setDisplayOptions] = useState(() => { + const storedString = + globalThis.window && localStorage.getItem("displayOptions"); + const parsed = storedString ? JSON.parse(storedString) : null; + if (parsed?.version === LATEST_DISPLAY_OPTIONS_VERSION) { + return parsed; + } + return { + ruby: "vi", + translation: "gloss", + version: LATEST_DISPLAY_OPTIONS_VERSION, + }; + }); const initialized = useRef(false); useEffect(() => { if (!initialized.current) { @@ -263,7 +444,7 @@ function NotesChinese({ children: string; vocab: PassageVocab; displayOptions: DisplayOptions; - gloss: string[] | null; + gloss: GlossDocument | null; }) { return ( diff --git a/src/app/texts/[textId]/parseGloss.tsx b/src/app/texts/[textId]/parseGloss.tsx new file mode 100644 index 0000000..9421c99 --- /dev/null +++ b/src/app/texts/[textId]/parseGloss.tsx @@ -0,0 +1,13 @@ +"use client"; +import { parse } from "@/app/glossParser"; +import { GlossDocument } from "@/app/glossUtils"; + +export function parseGloss(glossText: string | null) { + if (!glossText) return { ok: true, result: null }; + try { + return { ok: true, result: parse(glossText) as GlossDocument }; + } catch (e) { + console.error("Error parsing gloss", e); + return { ok: false, result: null, error: e }; + } +} diff --git a/texts/brandt-ch01-1.passage.md b/texts/brandt-ch01-1.passage.md index 0971807..ced57c2 100644 --- a/texts/brandt-ch01-1.passage.md +++ b/texts/brandt-ch01-1.passage.md @@ -8,39 +8,39 @@ 吉凶 Lucky and Bad Omens -auspicious unlucky +auspicious[:lucky] [and] unlucky[:bad_omens] 有鴉{a:集庭樹}。{b:引頸而鳴}。 There were (some) crows (who) flocked together (in a) court-yard (upon a) tree and cawed (lit. sang) stretching (their) necks. -be crow flock_together courtyard tree stretch neck and sing +`be[:there_were] [some] crow[s] [who] flock[ed]_together [in a] courtyard [upon a] tree [and] 2stretch 4[their] neck[s] 3and[:~ing] 1sing (caw[ed]).` {c:兒叱之}。 (A) boy hooted at them. -boy hoot_at them +`[A] boy hoot[ed]_at them.` 父曰。是何害。 (His) father asked (lit. said), "What (is) the harm (in) this?" -father speak this what harm +`[His] father speak[:asked], 2[in] this "w[-W]hat 1[is the] harm?"` 兒曰。常聞人言。鵲鳴吉。鴉鳴凶。 (The) boy said, "(I) have often heard people say (that when a) magpie chatters, (it brings) good luck, (and when a) crow caws, (it brings) bad luck. -boy speak frequent hear person say magpie sing auspicious crow sing unlucky +`[The] boy speak[:said], "[I have] frequent[:often] hear[d] person[:people] say [that when a] magpie sing[:chatters], [it is] auspicious[:good_luck], [and when a] crow sing[:caws], [it is] unlucky[:bad_luck].` 今{d:鳴者}{e:鴉也}。故叱之。 Today the crows have been cawing, therefore I have hooted at them." -now sing that_which crow final_particle therefore hoot_at them +`now[:Today] 4[are] sing[ing] 3that_which[:that] 2[the] crow[s] 1final_particle[:it_is], therefore [I have] hoot[ed]_at them."` 父曰。{f:人之智識}。{g:遠勝於鳥}。 The father said, "Man's knowledge and experience (are) much higher than (those of) birds, -father speak man of wisdom-know (knowledge_and_experience) distant excel than bird +`[The] father speak[:said], "m[-~M]an of[:~'s] wisdom-know (knowledge_and_experience) [are] distant[:much] excel[:higher] than [those of] bird[s].` 尚不能預知吉凶。 Yet he cannot foresee good luck and ill luck. -still not be_able beforehand-know (foresee) auspicious unlucky +`still[:Yet] [he] 2[~]not 1be_able[:can] beforehand-know (foresee) auspicious[:good_luck] [and] unlucky[:ill_luck].` 而況鳥乎。 Still more the birds are not able to do it." -and-still_more (still_more) bird exclamatory_particle +`and-still_more (s[-S]till_more) [the] bird[s] [are not able to do it] exclamatory_particle (~\!) [\"]` ---