From 36f4f0e92cb0c301910a9d8e3417d42ecb1b22fb Mon Sep 17 00:00:00 2001 From: Baris Sencan Date: Tue, 23 Feb 2021 22:18:10 +0000 Subject: [PATCH] feat(lib): Replace shuffling lib with own implementation (#174) --- README.md | 6 ++++- package-lock.json | 25 ++++----------------- package.json | 3 --- src/loadCsv.ts | 2 +- src/shuffle.ts | 51 +++++++++++++++++++++++++++++++++++++++++++ tests/loadCsv.test.ts | 12 +++++----- tests/shuffle.test.ts | 24 ++++++++++++++++++++ 7 files changed, 91 insertions(+), 32 deletions(-) create mode 100644 src/shuffle.ts create mode 100644 tests/shuffle.test.ts diff --git a/README.md b/README.md index f1d7caa..84ba4b9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A library that aims to remove the overhead of creating tensors from CSV files completely; allowing you to dive right into the fun parts of your ML project. -- Lightweight. +- [Lightweight](https://bundlephobia.com/result?p=tensorflow-load-csv). - Fast. - Flexible. - TypeScript compatible. @@ -21,11 +21,13 @@ You can find the docs [here](https://barissencan.com/tensorflow-load-csv/). ## Installation NPM: + ```sh npm install tensorflow-load-csv ``` Yarn: + ```sh yarn add tensorflow-load-csv ``` @@ -33,6 +35,7 @@ yarn add tensorflow-load-csv ## Usage Simple usage: + ```js import loadCsv from 'tensorflow-load-csv'; @@ -46,6 +49,7 @@ labels.print(); ``` Advanced usage: + ```js import loadCsv from 'tensorflow-load-csv'; diff --git a/package-lock.json b/package-lock.json index ad4aadd..b555e37 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,9 +7,6 @@ "": { "version": "1.0.0", "license": "MIT", - "dependencies": { - "shuffle-seed": "^1.1.6" - }, "devDependencies": { "@commitlint/config-conventional": "^11.0.0", "@tensorflow/tfjs": "^2.1.0", @@ -17917,7 +17914,8 @@ "node_modules/seedrandom": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-2.4.3.tgz", - "integrity": "sha1-JDhQTa0zkXMUv/GKxNeU8W1qrsw=" + "integrity": "sha1-JDhQTa0zkXMUv/GKxNeU8W1qrsw=", + "dev": true }, "node_modules/semantic-release": { "version": "17.2.3", @@ -18306,14 +18304,6 @@ "vscode-textmate": "^5.2.0" } }, - "node_modules/shuffle-seed": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/shuffle-seed/-/shuffle-seed-1.1.6.tgz", - "integrity": "sha1-UzwSaDurO0+j6HUfxOViFGdEJgs=", - "dependencies": { - "seedrandom": "^2.4.2" - } - }, "node_modules/signal-exit": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.3.tgz", @@ -34769,7 +34759,8 @@ "seedrandom": { "version": "2.4.3", "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-2.4.3.tgz", - "integrity": "sha1-JDhQTa0zkXMUv/GKxNeU8W1qrsw=" + "integrity": "sha1-JDhQTa0zkXMUv/GKxNeU8W1qrsw=", + "dev": true }, "semantic-release": { "version": "17.2.3", @@ -35074,14 +35065,6 @@ "vscode-textmate": "^5.2.0" } }, - "shuffle-seed": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/shuffle-seed/-/shuffle-seed-1.1.6.tgz", - "integrity": "sha1-UzwSaDurO0+j6HUfxOViFGdEJgs=", - "requires": { - "seedrandom": "^2.4.2" - } - }, "signal-exit": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.3.tgz", diff --git a/package.json b/package.json index 49e1e9f..bb40fb6 100644 --- a/package.json +++ b/package.json @@ -71,8 +71,5 @@ }, "peerDependencies": { "@tensorflow/tfjs": "^2.0.1" - }, - "dependencies": { - "shuffle-seed": "^1.1.6" } } diff --git a/src/loadCsv.ts b/src/loadCsv.ts index b1f1411..e5d2c8d 100644 --- a/src/loadCsv.ts +++ b/src/loadCsv.ts @@ -1,12 +1,12 @@ import fs from 'fs'; import * as tf from '@tensorflow/tfjs'; -import { shuffle } from 'shuffle-seed'; import { CsvReadOptions, CsvTable } from './loadCsv.models'; import filterColumns from './filterColumns'; import splitTestData from './splitTestData'; import applyMappings from './applyMappings'; +import shuffle from './shuffle'; const defaultShuffleSeed = 'mncv9340ur'; diff --git a/src/shuffle.ts b/src/shuffle.ts new file mode 100644 index 0000000..86d9f32 --- /dev/null +++ b/src/shuffle.ts @@ -0,0 +1,51 @@ +const mulberry32 = (a: number) => () => { + let t = (a += 0x6d2b79f5); + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; +}; + +const cyrb53 = (str: string, seed = 0) => { + let h1 = 0xdeadbeef ^ seed, + h2 = 0x41c6ce57 ^ seed; + for (let i = 0, ch; i < str.length; i++) { + ch = str.charCodeAt(i); + h1 = Math.imul(h1 ^ ch, 2654435761); + h2 = Math.imul(h2 ^ ch, 1597334677); + } + h1 = + Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ + Math.imul(h2 ^ (h2 >>> 13), 3266489909); + h2 = + Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ + Math.imul(h1 ^ (h1 >>> 13), 3266489909); + return 4294967296 * (2097151 & h2) + (h1 >>> 0); +}; + +function shuffle(array: T[], seed: number | string = 0) { + if (typeof seed === 'string') { + seed = cyrb53(seed); + } + const random = mulberry32(seed); + + const output = new Array(array.length); + + for (let i = 0; i < array.length; i++) { + output[i] = array[i]; + } + + let m = output.length; + + while (m) { + const i = Math.floor(random() * m--); + + const t = output[m]; + output[m] = output[i]; + output[i] = t; + ++seed; + } + + return output; +} + +export default shuffle; diff --git a/tests/loadCsv.test.ts b/tests/loadCsv.test.ts index 141a0ab..46ef089 100644 --- a/tests/loadCsv.test.ts +++ b/tests/loadCsv.test.ts @@ -37,18 +37,18 @@ test('Shuffling should work and preserve feature - label pairs', () => { // @ts-ignore expect(features.arraySync()).toBeDeepCloseTo( [ - [102, -164], [5, 40.34], [0.234, 1.47], [-93.2, 103.34], + [102, -164], ], 3 ); expect(labels.arraySync()).toMatchObject([ - ['Landotzka'], ['Landistan'], ['SomeCountria'], ['SomeOtherCountria'], + ['Landotzka'], ]); }); @@ -61,18 +61,18 @@ test('Shuffling with a custom seed should work', () => { // @ts-ignore expect(features.arraySync()).toBeDeepCloseTo( [ - [5, 40.34], + [-93.2, 103.34], [102, -164], + [5, 40.34], [0.234, 1.47], - [-93.2, 103.34], ], 3 ); expect(labels.arraySync()).toMatchObject([ - ['Landistan'], + ['SomeOtherCountria'], ['Landotzka'], + ['Landistan'], ['SomeCountria'], - ['SomeOtherCountria'], ]); }); diff --git a/tests/shuffle.test.ts b/tests/shuffle.test.ts new file mode 100644 index 0000000..e014ed3 --- /dev/null +++ b/tests/shuffle.test.ts @@ -0,0 +1,24 @@ +import shuffle from '../src/shuffle'; + +const data = [1, 2, 3, 4]; + +test('Shuffling without a seed should change order', () => { + expect(shuffle(data)).toEqual([4, 3, 1, 2]); +}); + +test('Shuffling should not modify the original array', () => { + expect(shuffle(data)).not.toEqual(data); +}); + +test('Shuffling with a number seed should change order', () => { + expect(shuffle(data, 7)).toEqual([3, 2, 4, 1]); +}); + +test('Shuffling with a string seed should change order', () => { + expect(shuffle(data, 'hello')).toEqual([2, 4, 3, 1]); +}); + +test('Shuffling with different seeds should produce different results', () => { + const results = [shuffle(data, 7), shuffle(data, 'hello')]; + expect(results[0]).not.toEqual(results[2]); +});