Skip to content

Commit

Permalink
Initial implementation of library schema nickname parsing
Browse files Browse the repository at this point in the history
This commit adds the necessary code to parse library schema nicknames
from HED tags and pass them to ParsedHedTag objects. It also adds the
unmatchedLibrarySchema issue for cases when a schema cannot be found.

The internal converter functions have been modified to accept Schema
objects instead of Schemas objects, and the public-facing ones
(which were not changed for BC reasons) have been deprecated from the
public API and will be made private in version 4.0.0 due to a lack of
independent utility.

A minor issue in the new schema loading code was also fixed, bringing
it in line with the surrounding code, and some documentation was fixed.

None of this code has been tested with tags using library schemas. Tests
will be added in the next commit.
  • Loading branch information
happy5214 committed Jun 11, 2022
1 parent d729871 commit bb906d0
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 50 deletions.
7 changes: 6 additions & 1 deletion common/issues/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,15 @@ const issueData = {
level: 'error',
message: stringTemplate`The fallback schema bundled with this validator failed to load. The error given was "${'error'}". No HED validation was performed.`,
},
unmatchedLibrarySchema: {
hedCode: 'HED_LIBRARY_UNMATCHED',
level: 'error',
message: stringTemplate`Tag "${'tag'}" is declared to use a library schema nicknamed "${'library'}" in the dataset's schema listing, but no such schema was found.`,
},
genericError: {
hedCode: 'HED_GENERIC_ERROR',
level: 'error',
message: stringTemplate`Unknown HED error "${'internalCode'}".`,
message: stringTemplate`Unknown HED error "${'internalCode'}" - parameters: "${'parameters'}".`,
},
}

Expand Down
7 changes: 5 additions & 2 deletions common/issues/issues.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,17 @@ class Issue {
* Generate a new issue object.
*
* @param {string} internalCode The internal error code.
* @param {object<string, (string|number[])>} parameters The error string parameters.
* @param {Object<string, (string|number[])>} parameters The error string parameters.
* @return {Issue} An object representing the issue.
*/
const generateIssue = function (internalCode, parameters) {
const issueCodeData = issueData[internalCode] || issueData.genericError
const { hedCode, level, message } = issueCodeData
const bounds = parameters.bounds || []
parameters.internalCode = internalCode
if (issueCodeData === issueData.genericError) {
parameters.internalCode = internalCode
parameters.parameters = 'Issue parameters: ' + JSON.stringify(parameters)
}
const parsedMessage = message(...bounds, parameters)

return new Issue(internalCode, hedCode, level, parsedMessage)
Expand Down
5 changes: 4 additions & 1 deletion common/schema/loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ const loadSchema = function (schemaDef = {}, useFallback = true) {
if (schemaDef.path) {
schemaPromise = loadLocalSchema(schemaDef.path)
} else if (schemaDef.library) {
return loadRemoteLibrarySchema(schemaDef.library, schemaDef.version)
schemaPromise = loadRemoteLibrarySchema(
schemaDef.library,
schemaDef.version,
)
} else if (schemaDef.version) {
schemaPromise = loadRemoteBaseSchema(schemaDef.version)
} else {
Expand Down
8 changes: 8 additions & 0 deletions common/schema/types.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ class Schemas {
}
}

/**
* Whether this schema collection is for syntactic validation only.
* @return {boolean}
*/
get isSyntaxOnly() {
return this.generation === 0
}

/**
* Whether this schema collection comprises HED 2 schemas.
* @return {boolean}
Expand Down
8 changes: 4 additions & 4 deletions converter/__tests__/converter.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ describe('HED string conversion', () => {
* @param {Object<string, string>} testStrings The test strings.
* @param {Object<string, string>} expectedResults The expected results.
* @param {Object<string, Issue[]>} expectedIssues The expected issues.
* @param {function (Schemas, string, string, number): [string, Issue[]]} testFunction The test function.
* @return {Promise<void> | PromiseLike<any> | Promise<any>}
* @param {function (Schema, string, string, number): [string, Issue[]]} testFunction The test function.
* @return {Promise<void>}
*/
const validatorBase = function (testStrings, expectedResults, expectedIssues, testFunction) {
return schemaPromise.then((schemas) => {
for (const testStringKey of Object.keys(testStrings)) {
const [testResult, issues] = testFunction(schemas, testStrings[testStringKey], testStrings[testStringKey], 0)
const [testResult, issues] = testFunction(schemas.baseSchema, testStrings[testStringKey], testStrings[testStringKey], 0)
assert.strictEqual(testResult, expectedResults[testStringKey], testStrings[testStringKey])
assert.sameDeepMembers(issues, expectedIssues[testStringKey], testStrings[testStringKey])
}
Expand Down Expand Up @@ -583,7 +583,7 @@ describe('HED string conversion', () => {
* @param {Object<string, string>} expectedResults The expected results.
* @param {Object<string, Issue[]>} expectedIssues The expected issues.
* @param {function (Schemas, string): [string, Issue[]]} testFunction The test function.
* @return {Promise<void> | PromiseLike<any> | Promise<any>}
* @return {Promise<void>}
*/
const validatorBase = function (testStrings, expectedResults, expectedIssues, testFunction) {
return schemaPromise.then((schemas) => {
Expand Down
34 changes: 18 additions & 16 deletions converter/converter.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ const removeSlashesAndSpaces = function (hedString) {
* on for HED 3 schemas) allow for similar HED 2 validation with minimal code
* duplication.
*
* @param {Schemas} schemas The schema container object containing short-to-long mappings.
* @param {Schema} schema The schema object containing a short-to-long mapping.
* @param {string} hedTag The HED tag to convert.
* @param {string} hedString The full HED string (for error messages).
* @param {number} offset The offset of this tag within the HED string.
* @return {[string, Issue[]]} The long-form tag and any issues.
*/
const convertTagToLong = function (schemas, hedTag, hedString, offset) {
const mapping = schemas.baseSchema.mapping
const convertTagToLong = function (schema, hedTag, hedString, offset) {
const mapping = schema.mapping

if (hedTag.startsWith('/')) {
hedTag = hedTag.slice(1)
Expand Down Expand Up @@ -125,14 +125,14 @@ const convertTagToLong = function (schemas, hedTag, hedString, offset) {
/**
* Convert a HED tag to short form.
*
* @param {Schemas} schemas The schema container object containing short-to-long mappings.
* @param {Schema} schema The schema object containing a short-to-long mapping.
* @param {string} hedTag The HED tag to convert.
* @param {string} hedString The full HED string (for error messages).
* @param {number} offset The offset of this tag within the HED string.
* @return {[string, Issue[]]} The short-form tag and any issues.
*/
const convertTagToShort = function (schemas, hedTag, hedString, offset) {
const mapping = schemas.baseSchema.mapping
const convertTagToShort = function (schema, hedTag, hedString, offset) {
const mapping = schema.mapping

if (hedTag.startsWith('/')) {
hedTag = hedTag.slice(1)
Expand Down Expand Up @@ -196,13 +196,13 @@ const convertTagToShort = function (schemas, hedTag, hedString, offset) {
*
* This is for the internal string parsing for the validation side.
*
* @param {Schemas} schemas The schema container object containing short-to-long mappings.
* @param {Schema} schema The schema object containing a short-to-long mapping.
* @param {string} partialHedString The partial HED string to convert to long form.
* @param {string} fullHedString The full HED string.
* @param {number} offset The offset of the partial HED string within the full string.
* @return {[string, Issue[]]} The converted string and any issues.
*/
const convertPartialHedStringToLong = function (schemas, partialHedString, fullHedString, offset) {
const convertPartialHedStringToLong = function (schema, partialHedString, fullHedString, offset) {
let issues = []

const hedString = removeSlashesAndSpaces(partialHedString)
Expand All @@ -218,7 +218,7 @@ const convertPartialHedStringToLong = function (schemas, partialHedString, fullH
for (const [isHedTag, [startPosition, endPosition]] of hedTags) {
const tag = hedString.slice(startPosition, endPosition)
if (isHedTag) {
const [shortTagString, singleError] = convertTagToLong(schemas, tag, fullHedString, startPosition + offset)
const [shortTagString, singleError] = convertTagToLong(schema, tag, fullHedString, startPosition + offset)
issues = issues.concat(singleError)
finalString += shortTagString
} else {
Expand All @@ -232,15 +232,15 @@ const convertPartialHedStringToLong = function (schemas, partialHedString, fullH
/**
* Convert a HED string.
*
* @param {Schemas} schemas The schema container object containing short-to-long mappings.
* @param {Schema} schema The schema object containing a short-to-long mapping.
* @param {string} hedString The HED tag to convert.
* @param {function (Schemas, string, string, number): [string, Issue[]]} conversionFn The conversion function for a tag.
* @param {function (Schema, string, string, number): [string, Issue[]]} conversionFn The conversion function for a tag.
* @return {[string, Issue[]]} The converted string and any issues.
*/
const convertHedString = function (schemas, hedString, conversionFn) {
const convertHedString = function (schema, hedString, conversionFn) {
let issues = []

if (!schemas.baseSchema.mapping.hasNoDuplicates) {
if (!schema.mapping.hasNoDuplicates) {
issues.push(generateIssue('duplicateTagsInSchema', ''))
return [hedString, issues]
}
Expand All @@ -258,7 +258,7 @@ const convertHedString = function (schemas, hedString, conversionFn) {
for (const [isHedTag, [startPosition, endPosition]] of hedTags) {
const tag = hedString.slice(startPosition, endPosition)
if (isHedTag) {
const [shortTagString, singleError] = conversionFn(schemas, tag, hedString, startPosition)
const [shortTagString, singleError] = conversionFn(schema, tag, hedString, startPosition)
issues = issues.concat(singleError)
finalString += shortTagString
} else {
Expand All @@ -275,9 +275,10 @@ const convertHedString = function (schemas, hedString, conversionFn) {
* @param {Schemas} schemas The schema container object containing short-to-long mappings.
* @param {string} hedString The HED tag to convert.
* @return {[string, Issue[]]} The long-form string and any issues.
* @deprecated
*/
const convertHedStringToLong = function (schemas, hedString) {
return convertHedString(schemas, hedString, convertTagToLong)
return convertHedString(schemas.baseSchema, hedString, convertTagToLong)
}

/**
Expand All @@ -286,9 +287,10 @@ const convertHedStringToLong = function (schemas, hedString) {
* @param {Schemas} schemas The schema container object containing short-to-long mappings.
* @param {string} hedString The HED tag to convert.
* @return {[string, Issue[]]} The short-form string and any issues.
* @deprecated
*/
const convertHedStringToShort = function (schemas, hedString) {
return convertHedString(schemas, hedString, convertTagToShort)
return convertHedString(schemas.baseSchema, hedString, convertTagToShort)
}

module.exports = {
Expand Down
25 changes: 25 additions & 0 deletions validator/stringParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const delimiters = new Set([','])
*/
const splitHedString = function (hedString, hedSchemas, groupStartingIndex = 0) {
const doubleQuoteCharacter = '"'
const colonCharacter = ':'
const slashCharacter = '/'
const invalidCharacters = ['{', '}', '[', ']', '~']

const hedTags = []
Expand All @@ -27,6 +29,7 @@ const splitHedString = function (hedString, hedSchemas, groupStartingIndex = 0)
let currentTag = ''
let startingIndex = 0
let resetStartingIndex = false
let extraColons = { before: [], after: [] }

let ParsedHedTagClass
if (hedSchemas.isHed2) {
Expand All @@ -39,17 +42,34 @@ const splitHedString = function (hedString, hedSchemas, groupStartingIndex = 0)

const pushTag = function (i) {
if (!utils.string.stringIsEmpty(currentTag)) {
let librarySchemaName = ''
if (extraColons.before.length === 1) {
const colonIndex = extraColons.before.pop()
librarySchemaName = currentTag.substring(0, colonIndex)
currentTag = currentTag.substring(colonIndex + 1)
}
const parsedHedTag = new ParsedHedTagClass(
currentTag.trim(),
hedString,
[groupStartingIndex + startingIndex, groupStartingIndex + i],
hedSchemas,
librarySchemaName,
)
hedTags.push(parsedHedTag)
conversionIssues.push(...parsedHedTag.conversionIssues)
}
resetStartingIndex = true
currentTag = ''
for (const extraColonIndex of extraColons.before) {
syntaxIssues.push(
generateIssue('invalidCharacter', {
character: colonCharacter,
index: groupStartingIndex + extraColonIndex,
string: hedString,
}),
)
}
extraColons = { before: [], after: [] }
}

// Loop a character at a time.
Expand All @@ -67,6 +87,11 @@ const splitHedString = function (hedString, hedSchemas, groupStartingIndex = 0)
groupDepth++
} else if (character === closingGroupCharacter) {
groupDepth--
} else if (character === slashCharacter) {
extraColons.before.push(...extraColons.after)
extraColons.after = []
} else if (character === colonCharacter) {
extraColons.after.push(i)
}
if (groupDepth === 0 && delimiters.has(character)) {
// Found the end of a tag, so push the current tag.
Expand Down
89 changes: 63 additions & 26 deletions validator/types/parsedHed.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const { Memoizer } = require('../../utils/types')

const { getTagSlashIndices, replaceTagNameWithPound, getTagName } = require('../../utils/hed')
const { convertPartialHedStringToLong } = require('../../converter/converter')
const { generateIssue } = require('../../common/issues/issues')

/**
* A parsed HED substring.
Expand Down Expand Up @@ -39,44 +40,80 @@ class ParsedHedTag extends ParsedHedSubstring {
* @param {string} hedString The original HED string.
* @param {int[]} originalBounds The bounds of the HED tag in the original HED string.
* @param {Schemas} hedSchemas The collection of HED schemas.
* @param {string} librarySchemaName The label of this tag's library schema in the dataset's schema spec.
*/
constructor(originalTag, hedString, originalBounds, hedSchemas) {
constructor(
originalTag,
hedString,
originalBounds,
hedSchemas,
librarySchemaName,
) {
super(originalTag, originalBounds)
let canonicalTag, conversionIssues
if (hedSchemas.baseSchema) {
;[canonicalTag, conversionIssues] = convertPartialHedStringToLong(
hedSchemas,
originalTag,
hedString,
originalBounds[0],
)
} else {
canonicalTag = originalTag
conversionIssues = []
}

this.convertTag(hedString, hedSchemas, librarySchemaName)
/**
* The canonical form of the HED tag.
* The formatted canonical version of the HED tag.
* @type {string}
*/
this.formattedTag = this.formatTag()
}

/**
* Convert this tag to long form.
*
* @param {string} hedString The original HED string.
* @param {Schemas} hedSchemas The collection of HED schemas.
* @param {string} librarySchemaName The label of this tag's library schema in the dataset's schema spec.
*/
convertTag(hedString, hedSchemas, librarySchemaName) {
if (hedSchemas.isSyntaxOnly) {
/**
* The canonical form of the HED tag.
* @type {string}
*/
this.canonicalTag = this.originalTag
/**
* Any issues encountered during tag conversion.
* @type {Issue[]}
*/
this.conversionIssues = []

return
}
if (librarySchemaName) {
/**
* The HED schema this tag belongs to.
* @type {Schema}
*/
this.schema = hedSchemas.librarySchemas.get(librarySchemaName)
if (this.schema === undefined) {
this.conversionIssues = [
generateIssue('unmatchedLibrarySchema', {
tag: this.originalTag,
library: librarySchemaName,
}),
]
this.canonicalTag = this.originalTag
return
}
} else {
this.schema = hedSchemas.baseSchema
}
const [canonicalTag, conversionIssues] = convertPartialHedStringToLong(
this.schema,
this.originalTag,
hedString,
this.originalBounds[0],
)
this.canonicalTag = canonicalTag
/**
* Any issues encountered during tag conversion.
* @type {Array}
*/
this.conversionIssues = conversionIssues
// TODO: Implement
this.schema = hedSchemas.baseSchema
/**
* The formatted canonical version of the HED tag.
* @type {string}
*/
this.formattedTag = this.format()
}

/**
* Format this HED tag by removing newlines, double quotes, and slashes.
*/
format() {
formatTag() {
this.originalTag = this.originalTag.replace('\n', ' ')
let hedTagString = this.canonicalTag.trim()
if (hedTagString.startsWith('"')) {
Expand Down

0 comments on commit bb906d0

Please sign in to comment.