Skip to content

Commit

Permalink
use rdf validators to guess datatypes
Browse files Browse the repository at this point in the history
  • Loading branch information
giacomociti committed Oct 29, 2024
1 parent ed3202e commit 3830fff
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 38 deletions.
3 changes: 2 additions & 1 deletion packages/model/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
"@rdfine/shacl": "^0.8.5",
"@tpluscode/rdf-ns-builders": "^1.0.0",
"@tpluscode/rdfine": "^0.5.19",
"@types/rdf-validate-datatype": "^0.2.0",
"is-uri": "^1.2.0",
"uri-template": "^1.0.1"
},
"devDependencies": {
"@cube-creator/testing": "^0.1.21",
"@rdfjs/types": "^1.1.0",
"@types/clownface": "^1",
"@types/is-uri": "^1",
"@rdfjs/types": "^1.1.0",
"alcaeus": "^2",
"chai": "^4.3.4",
"mocha": "^10"
Expand Down
76 changes: 48 additions & 28 deletions packages/model/test/CsvColumn.test.ts
Original file line number Diff line number Diff line change
@@ -1,59 +1,79 @@
import { describe, it } from 'mocha'
import { expect } from 'chai'
import { validators } from 'rdf-validate-datatype'
import { xsd } from '@tpluscode/rdf-ns-builders'
import { NamedNode } from '@rdfjs/types'

const isInteger = (value: string) => /^-?\d+$/.test(value)
const isDecimal = (value: string) => /^-?\d+(\.\d+)?$/.test(value)
const isDate = (value: string) => /^\d{4}-\d{2}-\d{2}$/.test(value)
type Validator = (value: string) => boolean

const datatypes = [
{ check: isInteger, name: 'integer' },
{ check: isDecimal, name: 'decimal' },
{ check: isDate, name: 'date' },
]
const datatypes: Array<{ check: Validator; name: NamedNode }> = []

const add = (name: NamedNode) => {
const check = validators.find(name)
if (check) {
datatypes.push({ check, name })
}
}

add(xsd.integer)
add(xsd.decimal)
add(xsd.date)

const datatype = (values: string[]) => {
let i = 0
let current = datatypes[i]
for (const value of values) {
while (!current.check(value)) {
if (++i === datatypes.length) {
return 'string'
return xsd.string
}
current = datatypes[i]
}
}
return current.name
}

describe('@cube-creator/model/CsvColumn', () => {
describe.only('@cube-creator/model/CsvColumn', () => {
describe('columnDatatype', () => {
it('recognize integers', () => {
expect(isInteger('42')).to.be.true
expect(isInteger('-42')).to.be.true
expect(isInteger('foo')).to.be.false
expect(isInteger('42.0')).to.be.false
expect(isInteger('2021-01-01')).to.be.false
const isInteger = validators.find(xsd.integer)
expect(isInteger).to.be.not.null
if (isInteger) {
expect(isInteger('42')).to.be.true
expect(isInteger('-42')).to.be.true
expect(isInteger('foo')).to.be.false
expect(isInteger('42.0')).to.be.false
expect(isInteger('2021-01-01')).to.be.false
}
})
it('recognize decimals', () => {
expect(isDecimal('42')).to.be.true
expect(isDecimal('-42')).to.be.true
expect(isDecimal('foo')).to.be.false
expect(isDecimal('42.0')).to.be.true
expect(isDecimal('2021-01-01')).to.be.false
const isDecimal = validators.find(xsd.decimal)
expect(isDecimal).to.be.not.null
if (isDecimal) {
expect(isDecimal('42')).to.be.true
expect(isDecimal('-42')).to.be.true
expect(isDecimal('foo')).to.be.false
expect(isDecimal('42.0')).to.be.true
expect(isDecimal('2021-01-01')).to.be.false
}
})
it('recognize dates', () => {
expect(isDate('2021-01-01')).to.be.true
expect(isDate('2021-01-01T00:00:00Z')).to.be.false
expect(isDate('foo')).to.be.false
const isDate = validators.find(xsd.date)
expect(isDate).to.be.not.null
if (isDate) {
expect(isDate('2021-01-01')).to.be.true
expect(isDate('2021-01-01T00:00:00Z')).to.be.false
expect(isDate('foo')).to.be.false
}
})
})
describe('datatype', () => {
it('recognize datatype', () => {
expect(datatype(['42'])).to.equal('integer')
expect(datatype(['42', '42'])).to.equal('integer')
expect(datatype(['42', '42.1'])).to.equal('decimal')
expect(datatype(['42', 'foo'])).to.equal('string')
expect(datatype(['2021-01-01', '2021-01-01'])).to.equal('date')
expect(datatype(['42']).equals(xsd.integer)).to.be.true
expect(datatype(['42', '42']).equals(xsd.integer)).to.be.true
expect(datatype(['42', '42.1']).equals(xsd.decimal)).to.be.true
expect(datatype(['42', 'foo']).equals(xsd.string)).to.be.true
expect(datatype(['2021-01-01', '2021-01-01']).equals(xsd.date)).to.be.true
})
})
})
33 changes: 24 additions & 9 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3364,14 +3364,22 @@
"@types/eslint" "*"
"@types/estree" "*"

"@types/eslint@*", "@types/eslint@^7.28.2", "@types/eslint@^8":
"@types/eslint@*":
version "8.4.10"
resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-8.4.10.tgz#19731b9685c19ed1552da7052b6f668ed7eb64bb"
integrity sha512-Sl/HOqN8NKPmhWo2VBEPm0nvHnu2LL3v9vKo8MEq0EtbJ4eVzGPl41VNPvn5E1i5poMk4/XD8UriLHpJvEP/Nw==
dependencies:
"@types/estree" "*"
"@types/json-schema" "*"

"@types/eslint@^7.28.2":
version "7.29.0"
resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.29.0.tgz#e56ddc8e542815272720bb0b4ccc2aff9c3e1c78"
integrity sha512-VNcvioYDH8/FxaeTKkM4/TiTwt6pBV9E3OfGmvaw8tPl0rrHCJ4Ll15HRT+pMiFAf/MLQvAzC+6RzUMEL9Ceng==
dependencies:
"@types/estree" "*"
"@types/json-schema" "*"

"@types/estree@*", "@types/estree@^0.0.51":
version "0.0.51"
resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.51.tgz#cfd70924a25a3fd32b218e5e420e6897e1ac4f40"
Expand Down Expand Up @@ -3698,6 +3706,13 @@
dependencies:
rdf-js "^4.0.2"

"@types/rdf-validate-datatype@^0.2.0":
version "0.2.0"
resolved "https://registry.yarnpkg.com/@types/rdf-validate-datatype/-/rdf-validate-datatype-0.2.0.tgz#2f509dea6e1e276c9ed6490c4f4b17a3f72b2d78"
integrity sha512-JWzGvOX3pw6x1fED6lEgu+IHE4k/he/a4WAPA+ho6etfGHWT1Cja+3vc9HDVWFNsorv8sO+TcSQBapKws49hFw==
dependencies:
rdf-validate-datatype "*"

"@types/rdf-validate-shacl@^0.4.0":
version "0.4.7"
resolved "https://registry.yarnpkg.com/@types/rdf-validate-shacl/-/rdf-validate-shacl-0.4.7.tgz#9f269df160d98506e648447da389118adf99ac55"
Expand Down Expand Up @@ -12616,6 +12631,14 @@ rdf-utils-fs@^2, rdf-utils-fs@^2.1.0:
"@rdfjs/formats-common" "^2.2.0"
readable-stream "^3.6.0"

rdf-validate-datatype@*, rdf-validate-datatype@^0.2.0:
version "0.2.1"
resolved "https://registry.yarnpkg.com/rdf-validate-datatype/-/rdf-validate-datatype-0.2.1.tgz#40da8e6ce6dbcc19e45da12f9446b37aa69ef315"
integrity sha512-DpREnmoWDxC80KyslZeBPLQb3ztyeiOolT4uCl58tCju2KHJu4j5vonmVVdEJh2Mpad5UY57v6sSM/hfSTFGKQ==
dependencies:
"@rdfjs/term-map" "^2.0.0"
"@tpluscode/rdf-ns-builders" "3 - 4"

rdf-validate-datatype@^0.1.3, rdf-validate-datatype@^0.1.5:
version "0.1.5"
resolved "https://registry.yarnpkg.com/rdf-validate-datatype/-/rdf-validate-datatype-0.1.5.tgz#1ebfe4a506aa7ff55e6c20eb4d559e55cf3936d7"
Expand All @@ -12624,14 +12647,6 @@ rdf-validate-datatype@^0.1.3, rdf-validate-datatype@^0.1.5:
"@rdfjs/namespace" "^1.1.0"
"@rdfjs/to-ntriples" "^2.0.0"

rdf-validate-datatype@^0.2.0:
version "0.2.1"
resolved "https://registry.yarnpkg.com/rdf-validate-datatype/-/rdf-validate-datatype-0.2.1.tgz#40da8e6ce6dbcc19e45da12f9446b37aa69ef315"
integrity sha512-DpREnmoWDxC80KyslZeBPLQb3ztyeiOolT4uCl58tCju2KHJu4j5vonmVVdEJh2Mpad5UY57v6sSM/hfSTFGKQ==
dependencies:
"@rdfjs/term-map" "^2.0.0"
"@tpluscode/rdf-ns-builders" "3 - 4"

rdf-validate-shacl@^0.4.0, rdf-validate-shacl@^0.4.3, rdf-validate-shacl@^0.4.4, rdf-validate-shacl@^0.4.5:
version "0.4.5"
resolved "https://registry.yarnpkg.com/rdf-validate-shacl/-/rdf-validate-shacl-0.4.5.tgz#a95e92e22ff45c9ffd5131229c3cb08a4a5c668e"
Expand Down

0 comments on commit 3830fff

Please sign in to comment.