Skip to content

Commit

Permalink
feat(lib): Replace shuffling lib with own implementation (#174)
Browse files Browse the repository at this point in the history
  • Loading branch information
isair authored Feb 23, 2021
1 parent 0508ccb commit 36f4f0e
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 32 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

A library that aims to remove the overhead of creating tensors from CSV files completely; allowing you to dive right into the fun parts of your ML project.

- Lightweight.
- [Lightweight](https://bundlephobia.com/result?p=tensorflow-load-csv).
- Fast.
- Flexible.
- TypeScript compatible.
Expand All @@ -21,18 +21,21 @@ You can find the docs [here](https://barissencan.com/tensorflow-load-csv/).
## Installation

NPM:

```sh
npm install tensorflow-load-csv
```

Yarn:

```sh
yarn add tensorflow-load-csv
```

## Usage

Simple usage:

```js
import loadCsv from 'tensorflow-load-csv';

Expand All @@ -46,6 +49,7 @@ labels.print();
```

Advanced usage:

```js
import loadCsv from 'tensorflow-load-csv';

Expand Down
25 changes: 4 additions & 21 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,5 @@
},
"peerDependencies": {
"@tensorflow/tfjs": "^2.0.1"
},
"dependencies": {
"shuffle-seed": "^1.1.6"
}
}
2 changes: 1 addition & 1 deletion src/loadCsv.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import fs from 'fs';

import * as tf from '@tensorflow/tfjs';
import { shuffle } from 'shuffle-seed';

import { CsvReadOptions, CsvTable } from './loadCsv.models';
import filterColumns from './filterColumns';
import splitTestData from './splitTestData';
import applyMappings from './applyMappings';
import shuffle from './shuffle';

const defaultShuffleSeed = 'mncv9340ur';

Expand Down
51 changes: 51 additions & 0 deletions src/shuffle.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const mulberry32 = (a: number) => () => {
let t = (a += 0x6d2b79f5);
t = Math.imul(t ^ (t >>> 15), t | 1);
t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};

const cyrb53 = (str: string, seed = 0) => {
let h1 = 0xdeadbeef ^ seed,
h2 = 0x41c6ce57 ^ seed;
for (let i = 0, ch; i < str.length; i++) {
ch = str.charCodeAt(i);
h1 = Math.imul(h1 ^ ch, 2654435761);
h2 = Math.imul(h2 ^ ch, 1597334677);
}
h1 =
Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^
Math.imul(h2 ^ (h2 >>> 13), 3266489909);
h2 =
Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^
Math.imul(h1 ^ (h1 >>> 13), 3266489909);
return 4294967296 * (2097151 & h2) + (h1 >>> 0);
};

function shuffle<T>(array: T[], seed: number | string = 0) {
if (typeof seed === 'string') {
seed = cyrb53(seed);
}
const random = mulberry32(seed);

const output = new Array(array.length);

for (let i = 0; i < array.length; i++) {
output[i] = array[i];
}

let m = output.length;

while (m) {
const i = Math.floor(random() * m--);

const t = output[m];
output[m] = output[i];
output[i] = t;
++seed;
}

return output;
}

export default shuffle;
12 changes: 6 additions & 6 deletions tests/loadCsv.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,18 @@ test('Shuffling should work and preserve feature - label pairs', () => {
// @ts-ignore
expect(features.arraySync()).toBeDeepCloseTo(
[
[102, -164],
[5, 40.34],
[0.234, 1.47],
[-93.2, 103.34],
[102, -164],
],
3
);
expect(labels.arraySync()).toMatchObject([
['Landotzka'],
['Landistan'],
['SomeCountria'],
['SomeOtherCountria'],
['Landotzka'],
]);
});

Expand All @@ -61,18 +61,18 @@ test('Shuffling with a custom seed should work', () => {
// @ts-ignore
expect(features.arraySync()).toBeDeepCloseTo(
[
[5, 40.34],
[-93.2, 103.34],
[102, -164],
[5, 40.34],
[0.234, 1.47],
[-93.2, 103.34],
],
3
);
expect(labels.arraySync()).toMatchObject([
['Landistan'],
['SomeOtherCountria'],
['Landotzka'],
['Landistan'],
['SomeCountria'],
['SomeOtherCountria'],
]);
});

Expand Down
24 changes: 24 additions & 0 deletions tests/shuffle.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import shuffle from '../src/shuffle';

const data = [1, 2, 3, 4];

test('Shuffling without a seed should change order', () => {
expect(shuffle(data)).toEqual([4, 3, 1, 2]);
});

test('Shuffling should not modify the original array', () => {
expect(shuffle(data)).not.toEqual(data);
});

test('Shuffling with a number seed should change order', () => {
expect(shuffle(data, 7)).toEqual([3, 2, 4, 1]);
});

test('Shuffling with a string seed should change order', () => {
expect(shuffle(data, 'hello')).toEqual([2, 4, 3, 1]);
});

test('Shuffling with different seeds should produce different results', () => {
const results = [shuffle(data, 7), shuffle(data, 'hello')];
expect(results[0]).not.toEqual(results[2]);
});

0 comments on commit 36f4f0e

Please sign in to comment.