From b7f831c76397dd7336e12e68b8c64c9a3a0f886f Mon Sep 17 00:00:00 2001 From: Alexander Jones Date: Fri, 27 Sep 2024 15:39:39 -0500 Subject: [PATCH 1/2] First pass at value classes Definition syntax requires special handling and will be done later. --- tests/event.spec.js | 9 +++++ validator/event/hed3.js | 31 +++++++-------- validator/schema/class_regex.json | 57 +++++++++++++++++++++++++++ validator/schema/hed3.js | 46 +++++++++++++++++++--- validator/schema/types.js | 64 +++++++++++++++++++++++++++++-- 5 files changed, 182 insertions(+), 25 deletions(-) create mode 100644 validator/schema/class_regex.json diff --git a/tests/event.spec.js b/tests/event.spec.js index dfc22f2e..8f99f584 100644 --- a/tests/event.spec.js +++ b/tests/event.spec.js @@ -1152,6 +1152,9 @@ describe('HED string and event validation', () => { tag: testStrings.incorrectPluralUnit, unitClassUnits: legalFrequencyUnits.sort().join(','), }), + generateIssue('invalidValue', { + tag: testStrings.incorrectPluralUnit, + }), ], incorrectSymbolCapitalizedUnit: [ generateIssue('unitClassInvalidUnit', { @@ -1170,12 +1173,18 @@ describe('HED string and event validation', () => { tag: testStrings.incorrectNonSIUnitModifier, unitClassUnits: legalTimeUnits.sort().join(','), }), + generateIssue('invalidValue', { + tag: testStrings.incorrectNonSIUnitModifier, + }), ], incorrectNonSIUnitSymbolModifier: [ generateIssue('unitClassInvalidUnit', { tag: testStrings.incorrectNonSIUnitSymbolModifier, unitClassUnits: legalSpeedUnits.sort().join(','), }), + generateIssue('invalidValue', { + tag: testStrings.incorrectNonSIUnitSymbolModifier, + }), ], notRequiredNumber: [], notRequiredScientific: [], diff --git a/validator/event/hed3.js b/validator/event/hed3.js index 648ea3f7..630dd41b 100644 --- a/validator/event/hed3.js +++ b/validator/event/hed3.js @@ -148,11 +148,9 @@ export class Hed3Validator extends HedValidator { tag: tag, unitClassUnits: tagUnitClassUnits.sort().join(','), }) - } else { - const validValue = this.validateValue(value, true) - if (!validValue) { - this.pushIssue('invalidValue', { tag: tag }) - } + } + if (!this.validateValue(tag, value)) { + this.pushIssue('invalidValue', { tag: tag }) } } @@ -273,10 +271,7 @@ export class Hed3Validator extends HedValidator { */ checkValueTagSyntax(tag) { if (tag.takesValue && !tag.hasUnitClass) { - const isValidValue = this.validateValue( - tag.formattedTagName, - tag.takesValueTag.hasAttributeName('isNumeric'), // Always false - ) + const isValidValue = this.validateValue(tag, tag.formattedTagName) if (!isValidValue) { this.pushIssue('invalidValue', { tag: tag }) } @@ -340,21 +335,23 @@ export class Hed3Validator extends HedValidator { /** * Determine if a stripped value is valid. * + * @param {ParsedHed3Tag} tag The tag being validated. * @param {string} value The stripped value. - * @param {boolean} isNumeric Whether the tag is numeric. * @returns {boolean} Whether the stripped value is valid. - * @todo This function is a placeholder until support for value classes is implemented. */ - validateValue(value, isNumeric) { + validateValue(tag, value) { if (value === '#') { return true } - // TODO: Replace with full value class-based implementation. - if (isNumeric) { - return isNumber(value) + const valueTag = tag.takesValueTag + if (valueTag === undefined) { + return true + } + const valueClasses = valueTag.valueClasses + if (valueClasses.length === 0) { + return true } - // TODO: Placeholder. - return true + return valueClasses.some((valueClass) => valueClass.validateValue(value)) } /** diff --git a/validator/schema/class_regex.json b/validator/schema/class_regex.json new file mode 100644 index 00000000..8e827088 --- /dev/null +++ b/validator/schema/class_regex.json @@ -0,0 +1,57 @@ +{ + "char_regex": { + "alphanumeric": "[A-Za-z0-9]", + "ampersand": "&", + "ascii": "[\\x00-\\x7F]", + "asterisk": "\\*", + "at-sign": "@", + "backslash": "\\", + "blank": " ", + "caret": "\\^", + "colon": ":", + "comma": ",", + "date-time": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?", + "dollar": "\\$", + "digits": "[0-9]", + "double-quote": "\"", + "equals": "=", + "exclamation": "!", + "greater-than": ">", + "hyphen": "-", + "left-paren": "(", + "less-than": "<", + "letters": "[A-Za-z]", + "lowercase": "[a-z]", + "name": "[\\w\\-\\u0080-\\uFFFF]", + "newline": "\\n", + "nonascii": "[\\u0080-\\uFFFF]", + "number-sign": "#", + "numeric": "[0-9.\\-+^Ee]", + "percent-sign": "%", + "period": "\\.", + "plus": "\\+", + "printable": "[\\x20-\\x7E]", + "question-mark": "\\?", + "right-paren": "(", + "semicolon": ";", + "single-quote": "'", + "forward-slash": "/", + "tab": "\\t", + "text": "[^\\x00-\\x1F\\x7F,{}]", + "tilde": "~", + "underscore": "_", + "uppercase": "[A-Z]", + "vertical-bar": "|" + }, + "class_chars": { + "dateTimeClass": [], + "nameClass": ["alphanumeric", "underscore", "hyphen", "nonascii"], + "numericClass": [], + "textClass": ["text"], + "testClass": ["newline", "tab", "nonascii"] + }, + "class_words": { + "dateTimeClass": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?$", + "numericClass": "^[+-]?(\\d+(\\.\\d*)?|\\.\\d+)([eE][+-]?\\d+)?$" + } +} diff --git a/validator/schema/hed3.js b/validator/schema/hed3.js index 12e55026..04e4d5f4 100644 --- a/validator/schema/hed3.js +++ b/validator/schema/hed3.js @@ -24,6 +24,8 @@ import { } from './types' import { generateIssue, IssueError } from '../../common/issues/issues' +import classRegex from './class_regex.json' + const lc = (str) => str.toLowerCase() export class Hed3SchemaParser extends SchemaParser { @@ -69,6 +71,7 @@ export class Hed3SchemaParser extends SchemaParser { populateDictionaries() { this.parseProperties() this.parseAttributes() + this.parseValueClasses() this.parseUnitModifiers() this.parseUnitClasses() this.parseTags() @@ -156,7 +159,19 @@ export class Hed3SchemaParser extends SchemaParser { const [booleanAttributeDefinitions, valueAttributeDefinitions] = this._parseDefinitions('valueClass') for (const [name, valueAttributes] of valueAttributeDefinitions) { const booleanAttributes = booleanAttributeDefinitions.get(name) - valueClasses.set(name, new SchemaValueClass(name, booleanAttributes, valueAttributes)) + let classChars + if (Array.isArray(classRegex.class_chars[name]) && classRegex.class_chars[name].length > 0) { + classChars = + '^(?:' + classRegex.class_chars[name].map((charClass) => classRegex.char_regex[charClass]).join('|') + ')+$' + } else { + classChars = '^.+$' + } + const classCharsRegex = new RegExp(classChars) + const classWordRegex = new RegExp(classRegex.class_words[name] ?? '^.+$') + valueClasses.set( + name, + new SchemaValueClass(name, booleanAttributes, valueAttributes, classCharsRegex, classWordRegex), + ) } this.valueClasses = new SchemaEntryManager(valueClasses) } @@ -223,9 +238,11 @@ export class Hed3SchemaParser extends SchemaParser { const recursiveAttributes = this._getRecursiveAttributes() const tagUnitClassAttribute = this.attributes.get('unitClass') + const tagValueClassAttribute = this.attributes.get('valueClass') const tagTakesValueAttribute = this.attributes.get('takesValue') const tagUnitClassDefinitions = new Map() + const tagValueClassDefinitions = new Map() const recursiveChildren = new Map() for (const [tagElement, tagName] of shortTags) { const valueAttributes = valueAttributeDefinitions.get(tagName) @@ -238,6 +255,15 @@ export class Hed3SchemaParser extends SchemaParser { ) valueAttributes.delete(tagUnitClassAttribute) } + if (valueAttributes.has(tagValueClassAttribute)) { + tagValueClassDefinitions.set( + tagName, + valueAttributes.get(tagValueClassAttribute).map((valueClassName) => { + return this.valueClasses.getEntry(valueClassName) + }), + ) + valueAttributes.delete(tagValueClassAttribute) + } for (const attribute of recursiveAttributes) { const children = recursiveChildren.get(attribute) ?? [] if (booleanAttributeDefinitions.get(tagName).has(attribute)) { @@ -261,10 +287,14 @@ export class Hed3SchemaParser extends SchemaParser { } const booleanAttributes = booleanAttributeDefinitions.get(name) const unitClasses = tagUnitClassDefinitions.get(name) + const valueClasses = tagValueClassDefinitions.get(name) if (booleanAttributes.has(tagTakesValueAttribute)) { - tagEntries.set(lc(name), new SchemaValueTag(name, booleanAttributes, valueAttributes, unitClasses)) + tagEntries.set( + lc(name), + new SchemaValueTag(name, booleanAttributes, valueAttributes, unitClasses, valueClasses), + ) } else { - tagEntries.set(lc(name), new SchemaTag(name, booleanAttributes, valueAttributes, unitClasses)) + tagEntries.set(lc(name), new SchemaTag(name, booleanAttributes, valueAttributes, unitClasses, valueClasses)) } } @@ -520,12 +550,18 @@ export class Hed3PartneredSchemaMerger { const unitClasses = tag.unitClasses.map( (unitClass) => this.destination.entries.unitClasses.getEntry(unitClass.name) ?? unitClass, ) + /** + * @type {SchemaValueClass[]} + */ + const valueClasses = tag.valueClasses.map( + (valueClass) => this.destination.entries.valueClasses.getEntry(valueClass.name) ?? valueClass, + ) let newTag if (tag instanceof SchemaValueTag) { - newTag = new SchemaValueTag(tag.name, booleanAttributes, valueAttributes, unitClasses) + newTag = new SchemaValueTag(tag.name, booleanAttributes, valueAttributes, unitClasses, valueClasses) } else { - newTag = new SchemaTag(tag.name, booleanAttributes, valueAttributes, unitClasses) + newTag = new SchemaTag(tag.name, booleanAttributes, valueAttributes, unitClasses, valueClasses) } const destinationParentTag = this.destinationTags.getEntry(tag.parent?.name?.toLowerCase()) if (destinationParentTag) { diff --git a/validator/schema/types.js b/validator/schema/types.js index 62cd713c..25151edc 100644 --- a/validator/schema/types.js +++ b/validator/schema/types.js @@ -666,8 +666,42 @@ export class SchemaUnitModifier extends SchemaEntryWithAttributes { * SchemaValueClass class */ export class SchemaValueClass extends SchemaEntryWithAttributes { - constructor(name, booleanAttributes, valueAttributes) { + /** + * The character class-based regular expression. + * @type {RegExp} + * @private + */ + _charClassRegex + /** + * The "word form"-based regular expression. + * @type {RegExp} + * @private + */ + _wordRegex + + /** + * Constructor. + * + * @param {string} name The name of this value class. + * @param {Set} booleanAttributes The boolean attributes for this value class. + * @param {Map} valueAttributes The value attributes for this value class. + * @param {RegExp} charClassRegex The character class-based regular expression for this value class. + * @param {RegExp} wordRegex The "word form"-based regular expression for this value class. + */ + constructor(name, booleanAttributes, valueAttributes, charClassRegex, wordRegex) { super(name, booleanAttributes, valueAttributes) + this._charClassRegex = charClassRegex + this._wordRegex = wordRegex + } + + /** + * Determine if a value is valid according to this value class. + * + * @param {string} value A HED value. + * @returns {boolean} Whether the value conforms to this value class. + */ + validateValue(value) { + return this._charClassRegex.test(value) && this._wordRegex.test(value) } } @@ -687,6 +721,12 @@ export class SchemaTag extends SchemaEntryWithAttributes { * @private */ _unitClasses + /** + * This tag's value classes. + * @type {SchemaValueClass[]} + * @private + */ + _valueClasses /** * This tag's value-taking child. * @type {SchemaValueTag} @@ -701,11 +741,13 @@ export class SchemaTag extends SchemaEntryWithAttributes { * @param {Set} booleanAttributes The boolean attributes for this tag. * @param {Map} valueAttributes The value attributes for this tag. * @param {SchemaUnitClass[]} unitClasses The unit classes for this tag. + * @param {SchemaValueClass[]} valueClasses The value classes for this tag. * @constructor */ - constructor(name, booleanAttributes, valueAttributes, unitClasses) { + constructor(name, booleanAttributes, valueAttributes, unitClasses, valueClasses) { super(name, booleanAttributes, valueAttributes) this._unitClasses = unitClasses ?? [] + this._valueClasses = valueClasses ?? [] } /** @@ -716,12 +758,28 @@ export class SchemaTag extends SchemaEntryWithAttributes { return this._unitClasses.slice() } + /** + * This tag's value classes. + * @type {SchemaValueClass[]} + */ + get valueClasses() { + return this._valueClasses.slice() + } + /** * Whether this tag has any unit classes. * @returns {boolean} */ get hasUnitClasses() { - return this.unitClasses.length !== 0 + return this._unitClasses.length !== 0 + } + + /** + * Whether this tag has any value classes. + * @returns {boolean} + */ + get hasValueClasses() { + return this._valueClasses.length !== 0 } /** From 3ad617f71d16bc137d3be886053b7e0f46c0f1dc Mon Sep 17 00:00:00 2001 From: Alexander Jones Date: Fri, 27 Sep 2024 17:05:48 -0500 Subject: [PATCH 2/2] Flip order of value regex tests --- validator/schema/types.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/schema/types.js b/validator/schema/types.js index 25151edc..9bc97313 100644 --- a/validator/schema/types.js +++ b/validator/schema/types.js @@ -701,7 +701,7 @@ export class SchemaValueClass extends SchemaEntryWithAttributes { * @returns {boolean} Whether the value conforms to this value class. */ validateValue(value) { - return this._charClassRegex.test(value) && this._wordRegex.test(value) + return this._wordRegex.test(value) && this._charClassRegex.test(value) } }