Skip to content

Commit

Permalink
React support with seperate usfm-grammar-web (#256)
Browse files Browse the repository at this point in the history
* Bundle tree-sitter.js instead of keeping web-tree-sitter dependency

* Fix the parser init to use input wasm path

* Keep a seperate usfm-grammar-web

* keep a separate usfm-grammar for node w/o using wasm files

* Test publish another alpha.8 version of usfm-grammar at npm
  • Loading branch information
kavitharaju authored Aug 29, 2024
1 parent f0e73d5 commit 90263b6
Show file tree
Hide file tree
Showing 35 changed files with 1,395 additions and 102 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ tree-sitter-usfm3/src/
*/dist/*
*/src/usfm_grammar.egg-info/*
**/my-languages.so
**/wheelhouse/
**/wheelhouse/
js-usfm-parser/src/web-tree-sitter/tree-sitter.js
js-usfm-parser/src/web-tree-sitter/tree-sitter.wasm
29 changes: 18 additions & 11 deletions docs/Dev_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
## How to build the binary for python module?

First compile the grammar
```
```bash
cd tree-sitter-usfm3
export PATH=$PATH:./node_modules/.bin
tree-sitter generate
tree-sitter test
```
To use the grammar module still in developement from within the py-usfm-grammar module
```
```bash
cd py-usfm-parser
source ENV-dev/bin/actiavte
pip install ../tree-sitter-usfm3
Expand All @@ -21,7 +21,7 @@ To make the changes reflect automatically `pip install -e ../tree-sitter-usfm3`.
## How to change version number in files?

In python module,
```
```bash
cd usfm-grammar
source py-usfm-parser/ENV-dev/bin/activate
bumpversion --new-version 3.0.0-alpha.28 num
Expand All @@ -31,7 +31,7 @@ The github action is configured to automatically build and publish to PyPI and N

## Run tests
To check Syntax trees in Grammar module
```
```bash
cd tree-sitter-usfm3
export PATH=$PATH:./node_modules/.bin
tree-sitter generate
Expand All @@ -40,7 +40,7 @@ tree-sitter test

In python module alone

```
```bash
cd py-usfm-parser
python -m pytest -n auto

Expand All @@ -52,26 +52,33 @@ pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc
## How to build and publish JS module for local Development

First compile the grammar and get the wasm file
```
```bash
cd tree-sitter-usfm3
export PATH=$PATH:./node_modules/.bin
tree-sitter generate
cp tree-sitter-usfm.wasm ../js-usfm-parser/
cd ..
```
After npm install, copy the `tree-sitter.wasm` file from `node_modules/web=tree-sitter` to the `js-usfm-parser` folder to include it with the npm packaging.

After npm install, copy the `tree-sitter.js` file from `node_modules/web-tree-sitter` to the `js-usfm-parser/src/web-tree-sitter` folder to include it in the bundle. Also copy the `tree-sitter.wasm` file to `js-usfm-parser/` to be included in the npm packaging.

Build the code base generating both cjs and esm versions of the same code base. The configs are in `.babelrc` file. Upon running the commands two folders `dist/cjs/` and `dist/esm` would be created.
```bash
cd js-usfm-parser/
npm install .
cp node_modules/web-tree-sitter/tree-sitter.js src/web-tree-sitter/
cp node_modules/web-tree-sitter/tree-sitter.wasm ./

```
cd ../js-usfm-parser

Build the code base generating both cjs and esm versions of the same code base. This used parcel and its configs are in package.json(main, module, source, etc). Upon running the commands two folders `dist/cjs/` and `dist/esm` would be created.

```bash
rm -fr ./dist
npm run build
```

Use a local publishing registry for local development and testing

```
```bash
npm install -g verdaccio # need not do again if done once
verdaccio # runs a server at localhost:4873
touch .npmrc
Expand Down
36 changes: 36 additions & 0 deletions docs/react-usage.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
## How to use the usfm-grammar npm package from React

To use the library from a react app, there are a few extra handling required.
1. The modules `fs`, `path` and `process` used by tree-sitter is not required when using from front end. But bundling may cause issue. Hence use `react-app-rewired` and in the `config-overrides.js` file add following settings:

```javascript
const { override } = require('customize-cra');

module.exports = override(
config => {
config.resolve.fallback = {
fs: false,
path: false,
process: false
};
return config;
},
);
```
2. When initializing the `USFMParser` class use the links to required wasm files as shown below:
```javascript
import React, { useEffect } from 'react';
import { USFMParser } from 'usfm-grammar';

function App() {
...
useEffect(() => {
const initParser = async () => {
await USFMParser.init("https://cdn.jsdelivr.net/npm/[email protected]/tree-sitter-usfm.wasm",
"https://cdn.jsdelivr.net/npm/[email protected]/tree-sitter.wasm");
};
initParser();
}, []);
...
}
```
10 changes: 0 additions & 10 deletions js-usfm-parser/.babelrc

This file was deleted.

4 changes: 0 additions & 4 deletions js-usfm-parser/.prettierignore

This file was deleted.

24 changes: 0 additions & 24 deletions js-usfm-parser/pnpm-lock.yaml

This file was deleted.

11 changes: 0 additions & 11 deletions js-usfm-parser/prettierrc.json

This file was deleted.

File renamed without changes.
29 changes: 13 additions & 16 deletions js-usfm-parser/README.md → node-usfm-parser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,26 @@ npm install usfm-grammar
Here's how you can use USFM Grammar in your JavaScript/TypeScript projects:

```javascript
import pkg from 'usfm-grammar';
const USFMParser = pkg.USFMParser;

(async () => {
await USFMParser.init();
const usfmParser = new USFMParser()
const output = usfmParser.usfmToUsj('\\id GEN\n\\c 1\n\\p\n\\v 1 In the begining..\\v 2 more text')
console.log({ output })
const usfm = usfmParser.usjToUsfm(output)
console.log({ usfm })
})();
const { USFMParser } = require('usfm-grammar');

const usfmParser = new USFMParser();
const output = usfmParser.usfmToUsj('\\id GEN\n\\c 1\n\\p\n\\v 1 In the begining..\\v 2 some more text')
console.log({ output });
const usfm = usfmParser.usjToUsfm(output);
console.log({ usfm });

```

If you are using node the import part can be change as below:
When using in an ESModule, if `import {USFMParser} from 'usfm-grammar` doesnt work for you, you could try:
```javascript
const { USFMParser} = require('usfm-grammar');
```
import pkg from 'usfm-grammar';
const {USFMParser} = pkg;

...
```

## API Documentation

### `USFMParser.init()`
Initializes the USFMParser. This function must be called before creating instances of `USFMParser`. And can take the grammar and the tree-sitter files (in wasm format) as arguments, that is included in the package.

### `USFMParser.usfmToUsj(usfmString: string): Object`
Converts a USFM string to a USJ object.
Expand Down
34 changes: 34 additions & 0 deletions node-usfm-parser/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"name": "usfm-grammar",
"version": "3.0.0-alpha.8",
"description": "Parser using tree-sitter-usfm3, to convert usfm to usj format.",
"main": "./dist/cjs/index.cjs",
"module": "./dist/es/index.mjs",
"scripts": {
"build": "parcel build ./src/index.js"
},
"repository": {
"type": "git",
"url": "https://github.com/Bridgeconn/usfm-grammar/js-usfm-parser"
},
"keywords": [
"USFM",
"tree-sitter",
"USJ",
"Parser"
],
"license": "MIT",
"author": "BCS Team",
"contributors": [
"Kavitha Raju <[email protected]> (https://github.com/kavitharaju)",
"Joel Mathew <[email protected]> (https://github.com/joelthe1)",
"Samuel JD <[email protected]> (https://github.com/samueljd)"
],
"dependencies": {
"tree-sitter": "0.21.1",
"tree-sitter-usfm3": "^3.0.0-beta.10"
},
"devDependencies": {
"parcel": "^2.12.0"
}
}
115 changes: 115 additions & 0 deletions node-usfm-parser/src/filters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
const MARKERS_WITH_DISCARDABLE_CONTENTS = [
"ide", "usfm", "h", "toc", "toca", "imt", "is", "ip", "ipi", "im", "imi",
"ipq", "imq", "ipr", "iq", "ib", "ili", "iot", "io", "iex", "imte", "ie",
"mt", "mte", "cl", "cd", "ms", "mr", "s", "sr", "r", "d", "sp", "sd",
"sts", "rem", "lit", "restore", "f", "fe", "ef", "efe", "x", "ex",
"fr", "ft", "fk", "fq", "fqa", "fl", "fw", "fp", "fv", "fdc",
"xo", "xop", "xt", "xta", "xk", "xq", "xot", "xnt", "xdc",
"jmp", "fig", "cat", "esb", "b"
];

const trailingNumPattern = /\d+$/;
const punctPatternNoSpaceBefore = /^[,.\-/;:!?@$%^)}\]>»]/;
const punctPatternNoSpaceAfter = /[\-/`@^&({[<«]$/;

function combineConsecutiveTextContents(contentsList) {
let textCombinedContents = [];
let textContents = '';
contentsList.forEach(item => {
if (typeof item === 'string') {
if (!(textContents.endsWith(" ") || item.startsWith(" ") || textContents === '' ||
punctPatternNoSpaceBefore.test(item) || punctPatternNoSpaceAfter.test(textContents))) {
textContents += " ";
}
textContents += item;
} else {
if (textContents !== "") {
textCombinedContents.push(textContents);
textContents = "";
}
textCombinedContents.push(item);
}
});
if (textContents !== "") {
textCombinedContents.push(textContents);
}
return textCombinedContents;
}

function excludeMarkersInUsj(inputUsj, excludeMarkers, combineTexts = true, excludedParent = false) {
if (typeof inputUsj === 'string') {
if (excludedParent && excludeMarkers.includes('text-in-excluded-parent')) {
return [];
}
return [inputUsj];
}

let cleanedKids = [];
let cleanedMarkers = excludeMarkers.map(marker => marker.replace(trailingNumPattern, ''));
let thisMarker = 'marker' in inputUsj ? inputUsj.marker.replace(trailingNumPattern, '') : '';
let thisMarkerNeeded = true;
let innerContentNeeded = true;
excludedParent = false;

if (cleanedMarkers.includes(thisMarker)) {
thisMarkerNeeded = false;
excludedParent = true;
if (MARKERS_WITH_DISCARDABLE_CONTENTS.includes(thisMarker)) {
innerContentNeeded = false;
}
}

if ((thisMarkerNeeded || innerContentNeeded) && "content" in inputUsj) {
inputUsj.content.forEach(item => {
let cleaned = excludeMarkersInUsj(item, excludeMarkers, combineTexts, excludedParent);
if (Array.isArray(cleaned)) {
cleanedKids.push(...cleaned);
} else {
cleanedKids.push(cleaned);
}
});
if (combineTexts) {
cleanedKids = combineConsecutiveTextContents(cleanedKids);
}
}

if (thisMarkerNeeded) {
let cleanedUsj = { ...inputUsj, content: cleanedKids };
return cleanedUsj;
}
return innerContentNeeded ? cleanedKids : [];
}

function includeMarkersInUsj(inputUsj, includeMarkers, combineTexts = true, excludedParent = false) {
if (typeof inputUsj === 'string') {
return excludedParent ? [] : [inputUsj];
}
let cleanedKids = [];
let cleanedMarkers = includeMarkers.map(marker => marker.replace(trailingNumPattern, ''));
let thisMarker = 'marker' in inputUsj ? inputUsj.marker.replace(trailingNumPattern, '') : '';
let thisMarkerNeeded = cleanedMarkers.includes(thisMarker) || thisMarker === '';
let innerContentNeeded = thisMarkerNeeded || MARKERS_WITH_DISCARDABLE_CONTENTS.includes(thisMarker);

if (innerContentNeeded && "content" in inputUsj) {
inputUsj.content.forEach(item => {
let cleaned = includeMarkersInUsj(item, includeMarkers, combineTexts, !thisMarkerNeeded);
if (Array.isArray(cleaned)) {
cleanedKids.push(...cleaned);
} else {
cleanedKids.push(cleaned);
}
});
if (combineTexts) {
cleanedKids = combineConsecutiveTextContents(cleanedKids);
}
}

if (thisMarkerNeeded) {
let cleanedUsj = { ...inputUsj, content: cleanedKids };
return cleanedUsj;
}
return innerContentNeeded ? cleanedKids : [];
}

exports.excludeMarkersInUsj = excludeMarkersInUsj;
exports.includeMarkersInUsj = includeMarkersInUsj;
2 changes: 2 additions & 0 deletions node-usfm-parser/src/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
const {USFMParser} = require("./usfmParser");
exports.USFMParser = USFMParser;
Loading

0 comments on commit 90263b6

Please sign in to comment.