From 5f66bc203301d5e36da4b6e49b9298e2194b5980 Mon Sep 17 00:00:00 2001 From: Diogo Sobral Date: Fri, 30 Dec 2022 12:55:34 +0000 Subject: [PATCH] Add support for serializers --- README.md | 51 ++++++++++- package.json | 2 + src/index.js | 75 ++++++++++++++++- test/src/benchmark/samples.js | 65 ++++++++++++++ test/src/index.test.js | 154 +++++++++++++++++++++++++++++++++- yarn.lock | 17 ++++ 6 files changed, 359 insertions(+), 5 deletions(-) create mode 100644 test/src/benchmark/samples.js diff --git a/README.md b/README.md index 4f0184f..a53aeb0 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,15 @@ Object redaction with whitelist and blacklist. Blacklist items have higher prior 3. `options` _(Object)_: An object with optional options. `options.replacement` _(Function)_: A function that allows customizing the replacement value (default implementation is `--REDACTED--`). - + + `options.serializers` _(List[Object])_: A list with serializers to apply. Each serializers must contain two properties: `path` (path for the value to be serialized, must be a `string`) and `serializer` (function to be called on the path's value). + `options.trim` _(Boolean)_: A flag that enables trimming all redacted values, saving their keys to a `__redacted__` list (default value is `false`). ### Example ```js -const anonymizer = require('@uphold/anonymizer'); +const { anonymizer } = require('@uphold/anonymizer'); const whitelist = ['foo.key', 'foo.depth.*', 'bar.*', 'toAnonymize.baz', 'toAnonymizeSuperString']; const blacklist = ['foo.depth.innerBlacklist', 'toAnonymize.*']; const anonymize = anonymizer({ blacklist, whitelist }); @@ -39,6 +41,51 @@ anonymize(data); // } ``` +#### Example using serializers + +```js +const { anonymizer } = require('@uphold/anonymizer'); +const whitelist = ['foo.key', 'foo.depth.*', 'bar.*', 'toAnonymize.baz']; +const blacklist = ['foo.depth.innerBlacklist']; +const serializers = [ + { path: 'foo.key', serializer: () => 'biz' }, + { path: 'toAnonymize', serializer: () => ({ baz: 'baz' }) } +] +const anonymize = anonymizer({ blacklist, whitelist }); + +const data = { + foo: { key: 'public', another: 'bar', depth: { bar: 10, innerBlacklist: 11 } }, + bar: { foo: 1, bar: 2 }, + toAnonymize: {} +}; + +// { +// foo: { +// key: 'biz', +// another: '--REDACTED--', +// depth: { bar: 10, innerBlacklist: '--REDACTED--' } +// }, +// bar: { foo: 1, bar: 2 }, +// toAnonymize: { baz: 'baz' } +// } +``` + +### Default serializers + +The introduction of serializers also added the possibility of using serializer functions exported by our module. The list of default serializers is presented below: +- error + +#### Example + +```js +const { anonymizer, defaultSerializers } = require('@uphold/anonymizer'); +const serializers = [ + { path: 'foo', serializer: defaultSerializers.error } +]; + +const anonymize = anonymizer({ whitelist }, { serializers }); +``` + ## Releasing a new version - Diff the current code with the latest tag and make sure the output is expected. diff --git a/package.json b/package.json index e7821ec..f758242 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,8 @@ "dependencies": { "json-stringify-safe": "^5.0.1", "lodash.get": "^4.4.2", + "lodash.set": "^4.3.2", + "serialize-error": "^5.0.0", "traverse": "^0.6.6" }, "devDependencies": { diff --git a/src/index.js b/src/index.js index 467cde2..511c9a2 100644 --- a/src/index.js +++ b/src/index.js @@ -4,7 +4,9 @@ * Module dependencies. */ +const { serializeError } = require('serialize-error'); const get = require('lodash.get'); +const set = require('lodash.set'); const stringify = require('json-stringify-safe'); const traverse = require('traverse'); @@ -14,27 +16,88 @@ const traverse = require('traverse'); const DEFAULT_REPLACEMENT = '--REDACTED--'; +/** + * Validate serializers. + */ + +function validateSerializers(serializers) { + serializers.map(({ path, serializer }) => { + if (typeof serializer !== 'function') { + throw new TypeError(`Invalid serializer for \`${path}\` path: must be a function`); + } + }); +} + +/** + * Compute Mutations + */ + +function computeSerializedChanges(values, serializers) { + const changes = {}; + + for (const { path, serializer } of serializers) { + const value = get(values, path); + + if (value === undefined) { + continue; + } + + try { + changes[path] = serializer(value); + } catch (error) { + changes[path] = `Anonymize ERROR: Error while applying ${path} serializer`; + } + } + + return changes; +} + /** * Module exports. + * + * Example: + * + * anonymizer({ + * whitelist: ['foo'] + * }, { + * replacement, + * serializers: [ + * { path: 'foo.bar', serializer: () => {} } + * ] + * }) */ -module.exports = ( +module.exports.anonymizer = ( { blacklist = [], whitelist = [] } = {}, - { replacement = () => DEFAULT_REPLACEMENT, trim = false } = {} + { replacement = () => DEFAULT_REPLACEMENT, serializers = [], trim = false } = {} ) => { const whitelistTerms = whitelist.join('|'); const whitelistPaths = new RegExp(`^(${whitelistTerms.replace(/\./g, '\\.').replace(/\*/g, '.*')})$`, 'i'); const blacklistTerms = blacklist.join('|'); const blacklistPaths = new RegExp(`^(${blacklistTerms.replace(/\./g, '\\.').replace(/\*/g, '.*')})$`, 'i'); + validateSerializers(serializers); + return values => { if (!(values instanceof Object)) { return values; } const blacklistedKeys = new Set(); + // JSON.parse(stringify(values)) builds an object copy that isn't an + // exact replication of the initial input. It destroys some relevant + // data that can't be lost. However, it can't be swapped for another + // solution due to its performance and because it can also handle + // classes correctly. Moreover, the `computeSerializedChanges()` + // also requires a copy to avoid updates by reference and in order + // to avoid two copies, we build an object with the result of applying + // the serializers to their respective paths. After we perform the copy, + // the serializers output is merged into the copy. + const changes = computeSerializedChanges(values, serializers); const obj = JSON.parse(stringify(values)); + Object.entries(changes).forEach(([path, change]) => set(obj, path, change)); + traverse(obj).forEach(function() { const path = this.path.join('.'); const isBuffer = Buffer.isBuffer(get(values, path)); @@ -78,3 +141,11 @@ module.exports = ( return obj; }; }; + +/** + * Module exports defaultSerializers. + */ + +module.exports.defaultSerializers = { + error: serializeError +}; diff --git a/test/src/benchmark/samples.js b/test/src/benchmark/samples.js new file mode 100644 index 0000000..159c9c1 --- /dev/null +++ b/test/src/benchmark/samples.js @@ -0,0 +1,65 @@ +'use strict'; + +/** + * `generateObjectSample` generates a sample object with a tree structure. + */ + +module.exports.generateObjectSample = ({ depth = 6, branches = 2, leafValue = () => 'foobar', leafs = 32 }) => { + const sample = {}; + + if (depth === 0) { + for (let leaf = 0; leaf < leafs; leaf++) { + sample[`leaf-${leaf}`] = leafValue(); + } + + return sample; + } + + for (let branch = 0; branch < branches; branch++) { + sample[`branch-${branch}`] = this.generateObjectSample({ branches, depth: depth - 1, leafs }); + } + + return sample; +}; + +/** + * `generateObjectSamplesPaths` generates a list with all paths contained in a sample generated using `generateObjectSample`. + */ + +module.exports.generateObjectSamplesPaths = ({ depth = 6, branches = 2, leafs = 32, path = '' }) => { + let paths = []; + + if (depth === 0) { + for (let leaf = 0; leaf < leafs; leaf++) { + paths.push(`${path}.leaf-${leaf}`); + } + + return paths; + } + + for (let branch = 0; branch < branches; branch++) { + const childPathString = path === '' ? `branch-${branch}` : `${path}.branch-${branch}`; + const childPaths = this.generateObjectSamplesPaths({ branches, depth: depth - 1, leafs, path: childPathString }); + + paths = paths.concat(childPaths); + } + + return paths; +}; + +module.exports.samples = { + // Sample with 0 props. + SAMPLE_0x: {}, + // Sample with 2048 props. + SAMPLE_1x: this.generateObjectSample({ depth: 6 }), + // Sample with 4096 props. + SAMPLE_2x: this.generateObjectSample({ depth: 7 }), + // Sample with 8192 props. + SAMPLE_4x: this.generateObjectSample({ depth: 8 }), + // Sample with 16384 props. + SAMPLE_8x: this.generateObjectSample({ depth: 9 }), + // Sample with 32768 props. + SAMPLE_16x: this.generateObjectSample({ depth: 10 }), + // Sample with 65536 props. + SAMPLE_32x: this.generateObjectSample({ depth: 11 }) +}; diff --git a/test/src/index.test.js b/test/src/index.test.js index 88b0a19..7e30f37 100644 --- a/test/src/index.test.js +++ b/test/src/index.test.js @@ -4,7 +4,9 @@ * Module dependencies. */ -const anonymizer = require('src'); +const { anonymizer } = require('src'); +const { generateObjectSample, generateObjectSamplesPaths, samples } = require('./benchmark/samples'); +const { serializeError } = require('serialize-error'); /** * Test `Anonymizer`. @@ -251,6 +253,100 @@ describe('Anonymizer', () => { }); }); + describe('serializers', () => { + it('should throw an error when serializer is not a function', () => { + const serializers = [{ path: 'foo', serializer: 123 }]; + const whitelist = ['*']; + + try { + anonymizer({ whitelist }, { serializers }); + + fail(); + } catch (error) { + expect(error).toBeInstanceOf(TypeError); + expect(error.message).toEqual('Invalid serializer for `foo` path: must be a function'); + } + }); + + it('should serialize errors when `serializeError` is applied', () => { + const error = new Error('foobar'); + const serializer = jest.fn(serializeError); + const serializers = [{ path: 'e', serializer }, { path: 'err', serializer }, { path: 'error', serializer }]; + const whitelist = ['*']; + const anonymize = anonymizer({ whitelist }, { serializers }); + + const result = anonymize({ + e: error, + err: { + statusCode: 400 + }, + error, + error2: error, + foo: 'bar' + }); + + expect(serializer).toHaveBeenCalledTimes(3); + expect(result.e).toHaveProperty('name', 'Error'); + expect(result.e).toHaveProperty('message', 'foobar'); + expect(result.err).toHaveProperty('statusCode', 400); + expect(result.error).toHaveProperty('name', 'Error'); + expect(result.error).toHaveProperty('message', 'foobar'); + expect(result.error2).toEqual({}); + expect(result.foo).toEqual('bar'); + }); + + it('should apply serializers to existing paths', () => { + const foobar = jest.fn(() => 'bii'); + const foobiz = jest.fn(() => 'bzz'); + const foobzz = jest.fn(() => ({ bar: 'biz' })); + const whitelist = ['*']; + const serializers = [ + { path: 'bar', serializer: foobiz }, + { path: 'foo', serializer: foobar }, + { path: 'foobar', serializer: foobzz } + ]; + const anonymize = anonymizer({ whitelist }, { serializers }); + + const result = anonymize({ foo: 'bar' }); + + expect(foobar).toHaveBeenCalledTimes(1); + expect(foobar).toHaveBeenCalledWith('bar'); + expect(foobiz).toHaveBeenCalledTimes(0); + expect(foobzz).toHaveBeenCalledTimes(0); + expect(result.foo).toEqual('bii'); + }); + + it('should apply serializers to nested paths', () => { + const error = new Error('foobar'); + const foobar = jest.fn(() => 'bii'); + const foobiz = jest.fn(() => 'bzz'); + const fooerror = jest.fn(serializeError); + const whitelist = ['*']; + const serializers = [ + { path: 'bar.foo', serializer: foobiz }, + { path: 'bar.error', serializer: fooerror }, + { path: 'foo.bar.biz', serializer: foobar } + ]; + const anonymize = anonymizer({ whitelist }, { serializers }); + + const result = anonymize({ + bar: { error, foo: 'bar' }, + foo: { + bar: { biz: 'foo' } + } + }); + + expect(foobar).toHaveBeenCalledTimes(1); + expect(foobar).toHaveBeenCalledWith('foo'); + expect(foobiz).toHaveBeenCalledTimes(1); + expect(foobiz).toHaveBeenCalledWith('bar'); + expect(result.bar.foo).toEqual('bzz'); + expect(result.bar.error).toHaveProperty('name', 'Error'); + expect(result.bar.error).toHaveProperty('message', 'foobar'); + expect(result.foo).toEqual({ bar: { biz: 'bii' } }); + }); + }); + describe('trim', () => { it('should group array keys', () => { const anonymize = anonymizer({ whitelist: ['foo'] }, { trim: true }); @@ -310,5 +406,61 @@ describe('Anonymizer', () => { }); }); }); + + describe.skip('benchmark', () => { + Object.entries(samples).forEach(([sample, data]) => { + it(`should run '${sample}' in less than '350' ms`, () => { + const anonymize = anonymizer({ blacklist: ['*'] }); + + const startTime = process.hrtime(); + + anonymize(data); + + const endTime = process.hrtime(startTime); + const msElapsed = endTime[1] / 1000000; + + expect(msElapsed).toBeLessThan(350); + }); + }); + + [6, 7, 8, 9, 10, 11].forEach(depth => { + it(`should call serializers in all ${Math.pow(2, depth + 5)} properties in less than '450' ms`, () => { + const data = generateObjectSample({ depth }); + const serializer = jest.fn(() => 'bii'); + const serializers = generateObjectSamplesPaths({ depth }).map(path => ({ path, serializer })); + const anonymize = anonymizer({ blacklist: ['*'] }, { serializers }); + + const startTime = process.hrtime(); + + anonymize(data); + + const endTime = process.hrtime(startTime); + const msElapsed = endTime[1] / 1000000; + + expect(msElapsed).toBeLessThan(450); + expect(serializer).toHaveBeenCalledTimes(Math.pow(2, depth) * 32); + }); + }); + + [6, 7, 8, 9, 10, 11].forEach(depth => { + it(`should call 'serializeError' in all ${Math.pow(2, depth + 5)} properties in less than '450' ms`, () => { + const data = generateObjectSample({ depth, leafValue: () => new Error('foobar') }); + const serializer = jest.fn(serializeError); + const serializers = generateObjectSamplesPaths({ depth }).map(path => ({ path, serializer })); + const anonymize = anonymizer({ blacklist: ['*'] }, { serializers }); + + const startTime = process.hrtime(); + + anonymize(data); + + const endTime = process.hrtime(startTime); + const msElapsed = endTime[1] / 1000000; + + console.log('Time elapsed: %dms', msElapsed); + expect(msElapsed).toBeLessThan(450); + expect(serializer).toHaveBeenCalledTimes(Math.pow(2, depth) * 32); + }); + }); + }); }); }); diff --git a/yarn.lock b/yarn.lock index 981f4f4..2807df3 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2606,6 +2606,11 @@ lodash.get@^4.4.2: resolved "https://registry.yarnpkg.com/lodash.get/-/lodash.get-4.4.2.tgz#2d177f652fa31e939b4438d5341499dfa3825e99" integrity sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk= +lodash.set@^4.3.2: + version "4.3.2" + resolved "https://registry.yarnpkg.com/lodash.set/-/lodash.set-4.3.2.tgz#d8757b1da807dde24816b0d6a84bea1a76230b23" + integrity sha512-4hNPN5jlm/N/HLMCO43v8BXKq9Z7QdAGc/VGrRD61w8gN9g/6jF9A4L1pbUgBLCffi0w9VsXfTOij5x8iTyFvg== + lodash.sortby@^4.7.0: version "4.7.0" resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438" @@ -3552,6 +3557,13 @@ semver@^6.0.0, semver@^6.1.2: resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d" integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw== +serialize-error@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/serialize-error/-/serialize-error-5.0.0.tgz#a7ebbcdb03a5d71a6ed8461ffe0fc1a1afed62ac" + integrity sha512-/VtpuyzYf82mHYTtI4QKtwHa79vAdU5OQpNPAmE/0UDdlGT0ZxHwC+J6gXkw29wwoVI8fMPsfcVHOwXtUQYYQA== + dependencies: + type-fest "^0.8.0" + set-blocking@^2.0.0, set-blocking@~2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" @@ -4018,6 +4030,11 @@ type-fest@^0.5.2: resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.5.2.tgz#d6ef42a0356c6cd45f49485c3b6281fc148e48a2" integrity sha512-DWkS49EQKVX//Tbupb9TFa19c7+MK1XmzkrZUR8TAktmE/DizXoaoJV6TZ/tSIPXipqNiRI6CyAe7x69Jb6RSw== +type-fest@^0.8.0: + version "0.8.1" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d" + integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA== + typedarray@^0.0.6: version "0.0.6" resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"