From 30576c1bf1cb1611fbb2e5fca69072d999f7c4ea Mon Sep 17 00:00:00 2001 From: Florian Cassayre Date: Sun, 17 Jan 2021 17:38:42 +0100 Subject: [PATCH] Ansel support and various bugfixes --- src/model/Gedcom.js | 11 ++++++++++- src/model/Name.js | 8 ++++---- src/model/Node.js | 8 ++++---- src/model/Value.js | 4 ++-- src/parse/decoding/ansel.js | 34 +++++++++++++++++++++++++++++----- src/parse/tokenizer.js | 2 +- 6 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/model/Gedcom.js b/src/model/Gedcom.js index 4709aa1..d990c17 100644 --- a/src/model/Gedcom.js +++ b/src/model/Gedcom.js @@ -62,6 +62,10 @@ export class Gedcom extends Node { // undefined and null are considered as wildcards const tagArray = tag != null ? (Array.isArray(tag) ? tag : [tag]) : null; const idArray = id != null ? (Array.isArray(id) ? id : [id]) : null; + const withLimit = q != null; + if (withLimit && !Number.isInteger(q)) { + throw 'The quantifier provided is not an integer'; + } const data = this._data; const arrayChildren = [], arrayParents = []; @@ -79,16 +83,21 @@ export class Gedcom extends Node { if (idArray !== null) { // Array of ids idArray.forEach(id => { const element = obj[id]; - if(element !== undefined) { + if(element !== undefined && (!withLimit || q > 0)) { // A bit pointless arrayChildren.push(element); arrayParents.push(i); } }); } else { // All ids + let j = 0; for (const id in obj) { + if (withLimit && j >= q) { + break; + } const element = obj[id]; arrayChildren.push(element); arrayParents.push(i); + j++; } } }); diff --git a/src/model/Name.js b/src/model/Name.js index 76b05a1..fa03478 100644 --- a/src/model/Name.js +++ b/src/model/Name.js @@ -4,7 +4,7 @@ import { NameRomanization } from './NameRomanization'; import { NameType } from './NameType'; import { Tag } from '../tag'; -const rNameParts = /^(?:([^\/]*)|(?:(?:([^\/]*) )?\/([^\/]*)\/(?: ([^\/]*))?))$/; +const rNameParts = /^(?:([^\/]*)|(?:(?:([^\/]*) ?)?\/([^\/]*)\/(?: ?([^\/]*))?))$/; export class Name extends NamePieces { constructor(data, clazz) { @@ -20,10 +20,10 @@ export class Name extends NamePieces { if (!groups) { return null; } - if (groups[4] === undefined) { - return [groups[1], groups[2], groups[3]]; + if (groups[1] === undefined) { + return [groups[2], groups[3], groups[4]]; } else { - return [groups[4], undefined, undefined]; + return [groups[1], undefined, undefined]; } }); } diff --git a/src/model/Node.js b/src/model/Node.js index 82504f8..f8bcc45 100644 --- a/src/model/Node.js +++ b/src/model/Node.js @@ -49,16 +49,16 @@ export class Node { }); }); } else { // All tags - let i = 0; + let j = 0; for (const tag in tr.by_tag) { - if (withLimit && q >= i) { + if (withLimit && j >= q) { break; } const objects = tr.by_tag[tag]; objects.forEach(v => { arrayChildren.push(v); arrayParents.push(i); - i++; + j++; }); } } @@ -131,7 +131,7 @@ export class Node { const newTree = [], newIndices = []; data.tree.filter((t, i) => { const parentIndex = data.parentIndices[i]; - const unitNode = this._newInstance(data.Clazz, t, [parentIndex], data.parent); + const unitNode = this._newInstance(data.Clazz, [t], [parentIndex], data.parent); if(f(unitNode)) { newTree.push(t); newIndices.push(parentIndex); diff --git a/src/model/Value.js b/src/model/Value.js index 63dde72..1df0a32 100644 --- a/src/model/Value.js +++ b/src/model/Value.js @@ -25,7 +25,7 @@ export class Value { option(otherwise) { const value = this.values[0]; if (otherwise !== undefined) { - return value !== null ? otherwise : value; + return value !== null ? value : otherwise; } else { return value; } @@ -45,6 +45,6 @@ export class Value { } map(f) { - return new Value(this.values.map(v => f(v))); + return new Value(this.values.map(f)); } } diff --git a/src/parse/decoding/ansel.js b/src/parse/decoding/ansel.js index 1aec4ff..b20b7b0 100644 --- a/src/parse/decoding/ansel.js +++ b/src/parse/decoding/ansel.js @@ -12,11 +12,35 @@ export function decodeAnsel(buffer) { registerTable(table1, ANSEL_TABLE_1); registerTable(table2, ANSEL_TABLE_2); + const byteBuffer = new Uint8Array(buffer); + const output = []; - let pending = -1; - for(let i = 0; i < buffer.length; i++) { - const current = buffer[i]; - // TODO + let i = 0; + let pending = byteBuffer[i]; + i++; + while(pending !== undefined) { + const b = pending; + pending = byteBuffer[i]; + i++; + if (b < 128) { // Unchanged ASCII + output.push(String.fromCharCode(b)); + } else if (pending !== undefined && ((b >= 0xE0 && b <= 0xFF) || (b >= 0xD7 && b <= 0xD9))) { + // Two bytes + const u = table2.get(b * 256 + pending); + if (u !== undefined) { + pending = byteBuffer[i]; + i++; + output.push(String.fromCharCode(u)); + } else { + throw 'Illegal byte code' + } + } else { + // One byte + const u = table1.get(b); + const c = String.fromCharCode(u !== undefined ? u : 0xFFFD); + output.push(c); + } } - // TODO + + return output.join(''); } diff --git a/src/parse/tokenizer.js b/src/parse/tokenizer.js index 50da236..969460b 100644 --- a/src/parse/tokenizer.js +++ b/src/parse/tokenizer.js @@ -15,7 +15,7 @@ const gLineItem = `${gEscape}|${gLineText}|${gEscape}[${cDelim}]${gLineText}`; const gXRefId = `@${gIdentifierString}@`; const gPointer = `${gXRefId}`; const gLineValue = `${gPointer}|(?:${gLineItem})`; -const gTag = `[${ccAlphanum}]+|_[${ccAlphanum}]+`; // TODO +const gTag = `[${ccAlphanum}]+|_[${ccAlphanum}_]+`; // TODO const gTerminator = `${cCR}?${cLF}`; const gGedcomLine = `(${gLevel})(?:${cDelim}(${gXRefId}))?${cDelim}(${gTag})(?:${cDelim}(${gLineValue}))?(?:${gTerminator})`;