Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

USX implementations in node and web modules #263

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
6f1c264
Node: Start with toUSX() with xmldom library
kavitharaju Oct 16, 2024
b00a1ab
Node: Startwith USXGenerator class, constructore and Id node
kavitharaju Oct 16, 2024
4f4e452
Node: Implement chapter, verse, text etc methods in USX Generation
kavitharaju Oct 16, 2024
9ef34c5
Node: Implement content paragraph node in USX Generation
kavitharaju Oct 16, 2024
f7881eb
Node: Implement Notes conversion to USX
kavitharaju Oct 18, 2024
6c0e185
Node: Implement char nodes and attributes conversion to USX
kavitharaju Oct 18, 2024
03f16ff
Node: Implement esb, cat, ref etc and generic parastyle markers in US…
kavitharaju Oct 18, 2024
c9986d2
Node: Implement milestone and table nodes in USX generation
kavitharaju Oct 18, 2024
dd0505b
Node: Make verse nodes empty and not carrying the text in USX generation
kavitharaju Oct 18, 2024
da2b383
Node: Return xlmdom element instead of string after USX generation
kavitharaju Oct 18, 2024
f167e95
Node: Fix issue of not adding node to xml tree before processing chil…
kavitharaju Oct 18, 2024
d4e4d4d
Node: Add verse end node at chapter end
kavitharaju Oct 18, 2024
b6e90f9
Node: More minor fixes in USX generation
kavitharaju Oct 18, 2024
7a3757a
Node: Use @xmldom/xlmdom instead of xmldom and xml2js
kavitharaju Oct 18, 2024
061dfc4
Node: Add tests for errorless usfm-usx conversion checks
kavitharaju Oct 18, 2024
3141ad1
Node: Switch back to xmldom for speed
kavitharaju Oct 18, 2024
90ee92e
Node: exlcude usfm(version) node in USX
kavitharaju Oct 18, 2024
8143a77
Node: Fix issue with numbered markers
kavitharaju Oct 18, 2024
a4568fa
Node: Fix marker usage instead od style
kavitharaju Oct 18, 2024
4557449
Node: Fix issues of pi style value
kavitharaju Oct 18, 2024
afbf86e
Node: Fix the similar issue with numbered marker fiun in USJ generation
kavitharaju Oct 18, 2024
0ebc4a6
Node: Keep ref marker not as char in USX generation
kavitharaju Oct 18, 2024
04bfc89
Node: tests for checking all markers in generated USX
kavitharaju Oct 18, 2024
95a5e92
Node: Minor fix in ref handling
kavitharaju Oct 18, 2024
f1d16fa
Node: Allow tests to run in parallel
kavitharaju Oct 18, 2024
3ef16d2
Web: Replicate toUSX() implementation as in Node
kavitharaju Oct 18, 2024
533b702
Web: Add tests for toUSX() connersion as in Node
kavitharaju Oct 18, 2024
83daaa1
Node: Fix issue is error handling
kavitharaju Oct 18, 2024
9735ede
Node: Implement USFM Generation from USX and fromUsx initialization o…
kavitharaju Oct 19, 2024
7f1d2ce
Node: Add tests for roundtripping USFM via USX
kavitharaju Oct 19, 2024
0afce59
Web: Implement USFM Generation from USX and fromUsx initialization of…
kavitharaju Oct 19, 2024
6689d2b
Web: Add tests for roundtripping USFM via USX
kavitharaju Oct 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions node-usfm-parser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"module": "./dist/es/index.mjs",
"scripts": {
"build": "parcel build ./src/index.js",
"test": "mocha --timeout 40000"
"test": "mocha --timeout 40000 --parallel"
},
"repository": {
"type": "git",
Expand All @@ -27,13 +27,14 @@
],
"dependencies": {
"tree-sitter": "0.21.1",
"tree-sitter-usfm3": "file:../tree-sitter-usfm3"
"tree-sitter-usfm3": "file:../tree-sitter-usfm3",
"xmldom": "^0.6.0",
"xpath": "^0.0.34"
},
"devDependencies": {
"ajv": "^8.17.1",
"glob": "^11.0.0",
"mocha": "^10.7.3",
"parcel": "^2.12.0",
"xml2js": "^0.6.2"
"parcel": "^2.12.0"
}
}
123 changes: 123 additions & 0 deletions node-usfm-parser/src/usfmGenerator.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
const { NO_USFM_USJ_TYPES, CLOSING_USJ_TYPES, NON_ATTRIB_USJ_KEYS, NO_NEWLINE_USJ_TYPES } = require("./utils/types");
const { NON_ATTRIB_USX_KEYS, NO_NEWLINE_USX_TYPES } = require("./utils/types");
const { DOMParser } = require('xmldom');

class USFMGenerator {
constructor() {
this.usfmString = "";
Expand Down Expand Up @@ -73,6 +76,126 @@ class USFMGenerator {
}
return this.usfmString;
}

usxToUsfm(xmlObj, nested=false) {
// Check if xmlObj is a string
// if (typeof xmlObj === 'string') {
// // this.usfmString += xmlObj;
// return;
// }

const objType = xmlObj.tagName;
let marker = null;
let usfmAttributes = [];

if (['verse', 'chapter'].includes(objType) && xmlObj.hasAttribute('eid')) {
return;
}

if (!NO_NEWLINE_USX_TYPES.includes(objType)) {
this.usfmString += '\n';
}

if (objType === 'optbreak') {
if (this.usfmString !== '' && !['\n', '\r', ' ', '\t'].includes(this.usfmString.slice(-1))) {
this.usfmString += ' ';
}
this.usfmString += '// ';
}

if (xmlObj.hasAttribute('style')) {
marker = xmlObj.getAttribute('style');
if (nested && objType === 'char' && !['xt', 'fv', 'ref'].includes(marker)) {
marker = `+${marker}`;
}
this.usfmString += `\\${marker} `;
} else if (objType === 'ref') {
marker = 'ref'
this.usfmString += `\\${marker} `;
}

if (xmlObj.hasAttribute('code')) {
this.usfmString += xmlObj.getAttribute('code');
}

if (xmlObj.hasAttribute('number')) {
this.usfmString += `${xmlObj.getAttribute('number')} `;
}

if (xmlObj.hasAttribute('caller')) {
this.usfmString += `${xmlObj.getAttribute('caller')} `;
}

if (xmlObj.hasAttribute('altnumber')) {
if (objType === 'verse') {
this.usfmString += `\\va ${xmlObj.getAttribute('altnumber')}\\va*`;
} else if (objType === 'chapter') {
this.usfmString += `\n\\ca ${xmlObj.getAttribute('altnumber')}\\ca*`;
}
}

if (xmlObj.hasAttribute('pubnumber')) {
if (objType === 'verse') {
this.usfmString += `\\vp ${xmlObj.getAttribute('pubnumber')}\\vp*`;
} else if (objType === 'chapter') {
this.usfmString += `\n\\cp ${xmlObj.getAttribute('pubnumber')}`;
}
}

if (xmlObj.hasAttribute('category')) {
this.usfmString += `\n\\cat ${xmlObj.getAttribute('category')} \\cat*`;
}

const children = Array.from(xmlObj.childNodes);
for (const child of children) {
if (child.nodeType === 1) { // Check if child is an element node
if (objType === 'char') {
this.usxToUsfm(child, true);
} else {
this.usxToUsfm(child, false);
}
}
if (child.nodeType === 3 && child.nodeValue.trim()) { // Check if child is a text node with content
if (this.usfmString !== '' && !['\n', '\r', ' ', '\t'].includes(this.usfmString.slice(-1))) {
this.usfmString += ' ';
}
this.usfmString += child.nodeValue.trim();
}
}

const attributes = Array.from(xmlObj.attributes);
for (const attrNode of attributes) {
let key = attrNode.name;
let val = attrNode.value.replace(/"/g, '');
if (key === 'file' && objType === 'figure') {
usfmAttributes.push(`src="${val}"`);
} else if (!NON_ATTRIB_USX_KEYS.includes(key)) {
usfmAttributes.push(`${key}="${val}"`);
}
if (['sid', 'eid'].includes(key) && objType === 'ms') {
usfmAttributes.push(`${key}="${val}"`);
}
}

if (usfmAttributes.length > 0) {
this.usfmString += '|';
this.usfmString += usfmAttributes.join(' ');
}

if ((xmlObj.hasAttribute('closed') && xmlObj.getAttribute('closed') === 'true')
|| CLOSING_USJ_TYPES.includes(objType)
|| usfmAttributes.length > 0) {
if (objType === 'ms') {
this.usfmString += '\\*';
} else {
this.usfmString += `\\${marker}*`;
}
}

if (objType === 'sidebar') {
this.usfmString += '\n\\esbe\n';
}
}
}

exports.USFMGenerator = USFMGenerator;
77 changes: 70 additions & 7 deletions node-usfm-parser/src/usfmParser.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
const Parser = require('tree-sitter');
const assert = require('assert');

const {USFMGenerator} = require("./usfmGenerator");
const {USJGenerator} = require("./usjGenerator");
const {ListGenerator} = require("./listGenerator");
const {USXGenerator} = require("./usxGenerator")
const { includeMarkersInUsj, excludeMarkersInUsj, Filter } = require("./filters.js");
const USFM3 = require('tree-sitter-usfm3');
const { Query } = Parser;
Expand Down Expand Up @@ -39,7 +41,7 @@ Only one of USFM, USJ or USX is supported in one object.`)
this.usfm = this.convertUSJToUSFM()
} else if (fromUsx !== null) {
this.usx = fromUsx;
// this.usfm = this.convertUSXToUSFM()
this.usfm = this.convertUSXToUSFM()
}
this.parser = null;
this.initializeParser();
Expand All @@ -48,6 +50,7 @@ Only one of USFM, USJ or USX is supported in one object.`)
this.errors = [];
this.warnings = [];
this.parseUSFM();

}
initializeParser() {
this.parser = new Parser();
Expand Down Expand Up @@ -135,6 +138,33 @@ Only one of USFM, USJ or USX is supported in one object.`)
return outputUSFM;
}

convertUSXToUSFM() {
try {
assert(1 <= this.usx.nodeType && this.usx.nodeType <= 12 ,
'Input must be an instance of xmldom Document or Element'
);
if (this.usx.tagName !== "usx") {
assert(this.usx.getElementsByTagName('usx').length === 1,
'Expects a <usx> node. Refer docs: https://docs.usfm.bible/usfm/3.1/syntax.html#_usx_usfm_xml');

this.usx = this.usx.getElementsByTagName('usx')[0]
}
// assert(this.usx.childNodes[0].tagName === 'book', "<book> expected as first element in <usx>")

} catch(err) {
throw new Error("USX not in expected format. "+err.message)
}
try {
const usfmGen = new USFMGenerator()
usfmGen.usxToUsfm(this.usx);
// console.log(usfmGen.usfmString)
return usfmGen.usfmString;
} catch(err) {
let message = "Unable to do the conversion from USX to USFM. ";
throw new Error(message, { cause: err });
}
}

convertUSFMToUSJ(
excludeMarkers = null,
includeMarkers = null,
Expand Down Expand Up @@ -187,9 +217,9 @@ Only one of USFM, USJ or USX is supported in one object.`)
/* Uses the toJSON function and converts JSON to CSV
To be re-implemented to work with the flat JSON schema */

if (!ignoreErrors && this.errors && this.errors.length > 0) {
const errStr = this.errors.map(err => err.join(":")).join("\n\t");
throw new Error(`Errors present:\n\t${errStr}\nUse ignoreErrors=true to generate output despite errors`);
if (!ignoreErrors && this.errors.length > 0) {
let errorString = this.errors.join("\n\t");
throw new Error(`Errors present:\n\t${errorString}\nUse ignoreErrors=true to generate output despite errors`);
}

try {
Expand All @@ -201,15 +231,48 @@ Only one of USFM, USJ or USX is supported in one object.`)

} catch (exe) {
let message = "Unable to do the conversion. ";
if (this.errors && this.errors.length > 0) {
const errStr = this.errors.map(err => err.join(":")).join("\n\t");
message += `Could be due to an error in the USFM\n\t${errStr}`;
if (this.errors.length > 0) {
let errorString = this.errors.join("\n\t");
message += `Could be due to an error in the USFM\n\t${errorString}`;
}
throw new Error(message, { cause: exe });
}

}

toUSX(ignoreErrors = false) {
/* Convert the syntax_tree to the XML format (USX) */

if (!ignoreErrors && this.errors.length > 0) {
let errorString = this.errors.join("\n\t");
throw new Error(`Errors present:\n\t${errorString}\nUse ignoreErrors=true to generate output despite errors`);
}
let xmlContent = null;

try {
// Initialize the USX generator (assuming the constructor is already implemented in JS)
const usxGenerator = new USXGenerator(USFM3,
this.usfm);

// Process the syntax tree and convert to USX format
usxGenerator.node2Usx(this.syntaxTree, usxGenerator.xmlRootNode);

// xmlContent = usxSerializer.serializeToString(usxGenerator.xmlRootNode);
xmlContent = usxGenerator.xmlRootNode;
} catch (exe) {
let message = "Unable to do the conversion. ";
if (this.errors.length > 0) {
let errorString = this.errors.join("\n\t");
message += `Could be due to an error in the USFM\n\t${errorString}`;
}
throw new Error(message, { cause: exe });
}

// Return the generated XML structure (in JSON format)
return xmlContent;
}


}


Expand Down
22 changes: 11 additions & 11 deletions node-usfm-parser/src/usjGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -418,21 +418,21 @@ class USJGenerator {
let style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex);
if (style.startsWith("\\")) {
style = style.replace("\\", "").trim();
} else {
style = node.type;
// } else {
// style = node.type;
}

// console.log(node.children.length, node.children[0].type, node.children[1].type)
let childrenRangeStart = 1;
if (
node.children.length > 1 &&
node.children[1].type.startsWith("numbered")
) {
const numNode = node.children[1];
const num = this.usfm.substring(numNode.startIndex, numNode.endIndex);
style += num;
childrenRangeStart = 2;
}
// if (
// node.children.length > 1 &&
// node.children[1].type.startsWith("numbered")
// ) {
// const numNode = node.children[1];
// const num = this.usfm.substring(numNode.startIndex, numNode.endIndex);
// style += num;
// childrenRangeStart = 2;
// }
const paraJsonObj = { type: "para", marker: style, content: [] };
parentJsonObj.content.push(paraJsonObj);

Expand Down
Loading
Loading