-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/master' into deps
# Conflicts: # yarn.lock
- Loading branch information
Showing
8 changed files
with
799 additions
and
226 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
--- | ||
"@cube-creator/model": patch | ||
"@cube-creator/core-api": patch | ||
--- | ||
|
||
Guess default column datatype from CSV sample values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,30 @@ | ||
/* eslint-disable import/no-unresolved */ | ||
/* eslint-disable import/no-extraneous-dependencies */ | ||
/* eslint-disable @typescript-eslint/no-var-requires */ | ||
require('@babel/register')({ | ||
configFile: './babel.config.json', | ||
extensions: ['.js', '.jsx', '.ts', '.tsx'], | ||
}) | ||
|
||
require('dotenv').config({ | ||
path: require('path').resolve(__dirname, '.local.env') | ||
path: require('path').resolve(__dirname, '.local.env'), | ||
}) | ||
|
||
require('chai-snapshot-matcher') | ||
const chai = require('chai') | ||
const sinonChai = require('sinon-chai') | ||
const quantifiers = require('chai-quantifiers') | ||
|
||
var chaiAsPromised = require('chai-as-promised') | ||
const chaiAsPromised = require('chai-as-promised') | ||
chai.use(chaiAsPromised) | ||
chai.use(quantifiers) | ||
|
||
require('./packages/testing/lib/chaiShapeMatcher') | ||
|
||
chai.use(sinonChai) | ||
|
||
// Dynamically import mocha-chai-rdf | ||
;(async () => { | ||
const rdfMatchers = await import('mocha-chai-rdf/matchers.js') | ||
chai.use(rdfMatchers.default) | ||
})() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import { NamedNode } from '@rdfjs/types' | ||
import { xsd } from '@tpluscode/rdf-ns-builders' | ||
import { validators } from 'rdf-validate-datatype' | ||
|
||
type Validator = (value: string) => boolean | ||
|
||
const getValidator = (name: NamedNode): Validator => | ||
validators.find(name) ?? (() => false) | ||
|
||
interface Datatype { | ||
check: Validator | ||
name: NamedNode | ||
broader: Datatype[] | ||
} | ||
|
||
const getDatatype = (name: NamedNode, ...broader: Datatype[]): Datatype => | ||
({ name, check: getValidator(name), broader }) | ||
|
||
const getDatatypes = () => { | ||
// avoid gDay, gMonth and gYear because they are easily confused with integer | ||
const decimal = getDatatype(xsd.decimal) | ||
const integer = getDatatype(xsd.integer, decimal) | ||
const gYearMonth = getDatatype(xsd.gYearMonth) | ||
const date = getDatatype(xsd.date) | ||
const time = getDatatype(xsd.time) | ||
const dateTime = getDatatype(xsd.dateTime) | ||
const boolean = getDatatype(xsd.boolean) | ||
// integer before decimal because decimal is broader | ||
return [integer, decimal, date, time, dateTime, gYearMonth, boolean] | ||
} | ||
|
||
const nextUntil = <T>(iterator: Iterator<T>, predicate: (value: T) => boolean) => { | ||
while (true) { | ||
const result = iterator.next() | ||
if (result.done || predicate(result.value)) { | ||
return result | ||
} | ||
} | ||
} | ||
|
||
export function inferDatatype(values: Iterable<string>): NamedNode { | ||
// get the first datatype that matches the first (non-empty) value | ||
const valueIterator = values[Symbol.iterator]() | ||
let currentValue = nextUntil(valueIterator, value => value !== '') | ||
if (currentValue.done) { | ||
return xsd.string // no values to check | ||
} | ||
const datatypeIterator = getDatatypes()[Symbol.iterator]() | ||
let currentDatatype = nextUntil(datatypeIterator, type => type.check(currentValue.value)) | ||
if (currentDatatype.done) { | ||
return xsd.string // no datatype found that matches the first value | ||
} | ||
// iterate over the rest of the values, moving to broader types if needed | ||
while (true) { | ||
currentValue = nextUntil(valueIterator, value => value !== '' && !currentDatatype.value.check(value)) | ||
if (currentValue.done) { | ||
return currentDatatype.value.name // all values successfuly checked | ||
} | ||
// look for broader types | ||
currentDatatype = nextUntil(currentDatatype.value.broader[Symbol.iterator](), type => type.check(currentValue.value)) | ||
if (currentDatatype.done) { | ||
return xsd.string // no broader type found that matches the value | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import { describe, it } from 'mocha' | ||
import { expect } from 'chai' | ||
import { xsd } from '@tpluscode/rdf-ns-builders' | ||
import { inferDatatype } from '../../lib/datatypeInference' | ||
|
||
describe('@cube-creator/model/DatatypeChecker', () => { | ||
it('recognize xsd:integer', () => { | ||
expect(inferDatatype(['42'])).to.eq(xsd.integer) | ||
}) | ||
it('recognize xsd:decimal', () => { | ||
expect(inferDatatype(['42.1'])).to.eq(xsd.decimal) | ||
}) | ||
it('recognize xsd:boolean', () => { | ||
// if the first value was 0 or 1, it would be considered as xsd:integer | ||
expect(inferDatatype(['true', 'false', '0', '1'])).to.eq(xsd.boolean) | ||
}) | ||
it('recognize xsd:date', () => { | ||
expect(inferDatatype(['2021-01-01'])).to.eq(xsd.date) | ||
}) | ||
it('recognize xsd:time', () => { | ||
expect(inferDatatype(['23:57:05'])).to.eq(xsd.time) | ||
}) | ||
it('recognize xsd:dateTime', () => { | ||
expect(inferDatatype(['2021-01-01T23:57:05'])).to.eq(xsd.dateTime) | ||
}) | ||
it('recognize xsd:gYearMonth', () => { | ||
expect(inferDatatype(['2021-12'])).to.eq(xsd.gYearMonth) | ||
}) | ||
it('recognize xsd:string', () => { | ||
expect(inferDatatype(['abc'])).to.eq(xsd.string) | ||
}) | ||
it('recognize two xsd:integer values', () => { | ||
expect(inferDatatype(['42', '42'])).to.eq(xsd.integer) | ||
}) | ||
it('recognize xsd:string with empty array', () => { | ||
expect(inferDatatype([])).to.eq(xsd.string) | ||
}) | ||
it('recognize xsd:string with empty string', () => { | ||
expect(inferDatatype([''])).to.eq(xsd.string) | ||
}) | ||
it('recognize xd:integer ignoring empty strings', () => { | ||
expect(inferDatatype(['', '42', ''])).to.eq(xsd.integer) | ||
}) | ||
it('recognize xsd:string after xsd:date', () => { | ||
expect(inferDatatype(['2021-01-01', 'foo'])).to.eq(xsd.string) | ||
}) | ||
it('recognize xsd:decimal after xsd:integer', () => { | ||
expect(inferDatatype(['42', '42.1'])).to.eq(xsd.decimal) | ||
}) | ||
it('recognize xsd:string after xsd:integer', () => { | ||
expect(inferDatatype(['42', 'foo'])).to.eq(xsd.string) | ||
}) | ||
it('recognize xd:string when mixed types', () => { | ||
expect(inferDatatype(['', '42', '2021-01-01'])).to.eq(xsd.string) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,8 @@ | |
"include": [ | ||
"packages", | ||
"apis", | ||
"cli" | ||
"cli", | ||
"mocha-setup.js" | ||
], | ||
"exclude": [ | ||
"node_modules" | ||
|
Oops, something went wrong.