From 60ecebaf6f75449120e7dbc786281c995692f385 Mon Sep 17 00:00:00 2001 From: Sam Thorogood Date: Tue, 28 Apr 2020 16:24:18 +1000 Subject: [PATCH] fix test code --- bench/README.md | 131 +++++++++++++++++++++++++++++------------------ bench/compare.js | 94 +++++++++++++++------------------- 2 files changed, 123 insertions(+), 102 deletions(-) diff --git a/bench/README.md b/bench/README.md index ba3537d..50f8516 100644 --- a/bench/README.md +++ b/bench/README.md @@ -1,4 +1,4 @@ -Benchmark code for Node. +Benchmark code for Node, which encodes and decodes a string. Usage: ```bash @@ -10,10 +10,9 @@ Usage: If you don't provide a source file, or specify a length instead, this will generate actual random text in JavaScript. For a better test, use suggested UTF-8 encoded source text from [Project Gutenberg](https://www.gutenberg.org/files/23841/23841-0.txt). -The linked file has a ratio of "bytes-to-length" of 0.35. +The linked file has a "bytes-to-length" ratio of 0.35. -This ratio is an odd number. -It compares the on-disk UTF-8 bytes (which optimize for ASCII and other low Unicode values) to the length of JavaScript's UCS-2 / UTF-16 internal representation. +This ratio compares the on-disk UTF-8 bytes (which optimize for ASCII and other low Unicode values) to the length of JavaScript's UCS-2 / UTF-16 internal representation. All Unicode code points can be represented as either one or two "lengths" of a JavaScript string, but each code point can be between 1-4 bytes in UTF-8. The valid ratios therefore range from ⅓ through 1.0 (e.g., ASCII). @@ -22,57 +21,91 @@ The valid ratios therefore range from ⅓ through 1.0 (e.g., ASCII). By default, the benchmark tool disables and removes native-like implementations in Node. It removes `Buffer` plus the native `TextEncoder` and `TextDecoder` from the global scope. -Use `--native` to enable support for them. -This will speed up `fast-text-encoding`, as it uses `Buffer` when available. +Use `--native` to retain `Buffer`, which will speed up `fast-text-encoding`. # Results -For the suggested text on my test rig (macOS 3.6GHz i9), output looks like (snipped): +As you'd expect, the native implementation is the speediest. +There's a bit of noise in the test; it's not perfect. + +Tests on macOS, 3.6ghz i9, "fast-text-encoding" version 1.0.2. + +## Low Ratio + +Using the mentioned [test file](https://www.gutenberg.org/files/23841/23841-0.txt). ``` compare (file): length=971478, bytes=2740678 (ratio=0.35) +native speedups allowed? NO! - 10.2209ms .native 971477 - 10.8853ms .native 971477 - 10.9297ms .native 971477 - 11.1351ms .native 971477 - 11.3154ms .native 971477 - 11.3741ms .native 971477 - 11.4921ms .native 971477 - 12.1611ms .native 971477 - 25.9949ms fast-text-encoding - 26.3912ms fast-text-encoding - 26.7037ms fast-text-encoding - 32.1910ms fast-text-encoding - 36.6454ms fast-text-encoding - 44.6358ms fast-text-encoding - 47.1846ms fast-text-encoding - 51.7178ms fast-text-encoding - 125.2835ms fastestsmallesttextencoderdecoder - 126.0772ms fastestsmallesttextencoderdecoder - 129.5148ms fastestsmallesttextencoderdecoder - 129.9449ms fastestsmallesttextencoderdecoder - 135.1421ms fastestsmallesttextencoderdecoder - 137.6716ms fastestsmallesttextencoderdecoder - 152.4639ms fastestsmallesttextencoderdecoder - 155.1741ms fastestsmallesttextencoderdecoder - 467.4895ms text-encoding-polyfill 971477 - 469.5857ms text-encoding-polyfill 971477 - 470.4829ms text-encoding-polyfill 971477 - 472.6093ms text-encoding-polyfill 971477 - 472.6358ms text-encoding-polyfill 971477 - 474.5790ms text-encoding-polyfill 971477 - 476.7881ms text-encoding-polyfill 971477 - 477.0778ms text-encoding 971477 - 478.0450ms text-encoding-polyfill 971477 - 478.2031ms text-encoding 971477 - 480.0009ms text-encoding 971477 - 480.2125ms text-encoding 971477 - 485.2014ms text-encoding 971477 - 485.9727ms text-encoding 971477 - 486.2783ms text-encoding 971477 - 490.5393ms text-encoding 971477 + 10.2111ms .native 971477 + 10.3203ms .native 971477 + 10.9366ms .native 971477 + 11.0249ms .native 971477 + 11.6899ms .native 971477 + 12.0494ms .native 971477 + 36.8205ms fast-text-encoding + 38.8506ms fast-text-encoding + 42.8944ms fast-text-encoding + 47.1252ms fast-text-encoding + 53.2264ms fast-text-encoding + 54.3824ms fast-text-encoding + 134.3251ms fastestsmallesttextencoderdecoder + 136.6160ms fastestsmallesttextencoderdecoder + 136.6426ms fastestsmallesttextencoderdecoder + 137.1191ms fastestsmallesttextencoderdecoder + 138.0675ms fastestsmallesttextencoderdecoder + 139.7024ms fastestsmallesttextencoderdecoder + 470.6317ms text-encoding 971477 + 473.9435ms text-encoding-polyfill 971477 + 475.3746ms text-encoding-polyfill 971477 + 475.5197ms text-encoding 971477 + 479.5304ms text-encoding-polyfill 971477 + 481.5665ms text-encoding-polyfill 971477 + 482.3216ms text-encoding-polyfill 971477 + 485.8300ms text-encoding 971477 + 488.6046ms text-encoding-polyfill 971477 + 490.6234ms text-encoding 971477 + 493.1231ms text-encoding 971477 + 493.4262ms text-encoding 971477 ``` -As you'd expect, the native implementation is the speediest. -There's a bit of noise in the test; it's not perfect. +## High Ratio + +UTF-8 text which mostly looks like ASCII, [from here](https://www.gutenberg.org/ebooks/44217.txt.utf-8). + +``` +compare (file): length=99190, bytes=101960 (ratio=0.97) +native speedups allowed? NO! + + 0.3634ms .native 99189 + 0.6308ms .native 99189 + 0.6374ms .native 99189 + 0.6768ms .native 99189 + 0.8520ms .native 99189 + 0.8711ms .native 99189 + 2.2705ms fastestsmallesttextencoderdecoder + 2.2917ms fastestsmallesttextencoderdecoder + 2.3838ms fastestsmallesttextencoderdecoder + 2.9010ms fast-text-encoding + 3.3695ms fast-text-encoding + 3.4776ms fast-text-encoding + 7.5336ms fast-text-encoding + 8.3014ms fastestsmallesttextencoderdecoder + 9.4051ms fastestsmallesttextencoderdecoder + 10.0201ms fastestsmallesttextencoderdecoder + 10.7546ms fast-text-encoding + 12.2336ms fast-text-encoding + 16.4143ms text-encoding-polyfill 99189 + 16.6515ms text-encoding-polyfill 99189 + 17.1320ms text-encoding 99189 + 17.8296ms text-encoding 99189 + 23.5324ms text-encoding-polyfill 99189 + 23.5962ms text-encoding 99189 + 25.2543ms text-encoding 99189 + 25.5921ms text-encoding 99189 + 26.2855ms text-encoding-polyfill 99189 + 27.0913ms text-encoding-polyfill 99189 + 30.2643ms text-encoding 99189 + 32.3319ms text-encoding-polyfill 99189 +``` diff --git a/bench/compare.js b/bench/compare.js index 6f90910..5a62525 100755 --- a/bench/compare.js +++ b/bench/compare.js @@ -9,28 +9,21 @@ const options = mri(process.argv.slice(2), { default: { runs: 6, native: false, + local: false, }, }); const packages = ['fast-text-encoding', 'text-encoding', 'text-encoding-polyfill', 'text-encoding-utf-8', 'fastestsmallesttextencoderdecoder']; -if (!options.native) { - global.Buffer.from = () => { - throw new Error('use of Buffer.from'); - }; - delete global.Buffer; - console.warn('NOT including any native code...'); - - if (global.TextEncoder && global.TextDecoder) { - global.TextEncoder.prototype.encode = () => { - throw new Error('use of native encode()'); - }; - global.TextDecoder.prototype.decode = () => { - throw new Error('use of native decode()'); - }; - } +const NativeTextEncoder = global.TextEncoder; +const NativeTextDecoder = global.TextDecoder; + +function deleteGlobals() { delete global.TextEncoder; delete global.TextDecoder; + if (!options.native) { + delete global.Buffer; + } } function buildRandomString(length) { @@ -56,6 +49,9 @@ if (firstArg === undefined || +firstArg) { console.info(`compare (file): length=${chalk.yellow(string.length)}, bytes=${chalk.yellow(stat.size)} (ratio=${chalk.yellow(ratio.toFixed(2))})`); } +console.info('native speedups allowed?', chalk.red(options.native ? 'YES' : 'NO!')); +console.info(''); + // remove 'text-encoding-utf-8' after a certain size as it's just pathologically bad if (string.length >= 32768) { const index = packages.indexOf('text-encoding-utf-8'); @@ -64,48 +60,22 @@ if (string.length >= 32768) { } } -console.info(''); - -function run(use, s) { - const te = new use.TextEncoder('utf-8'); - const data = te.encode(s); - - const td = new use.TextDecoder('utf-8'); - const outs = td.decode(data); - - return outs.length; -} - -function shuffle(arr) { - const out = []; - while (arr.length) { - const choice = Math.floor(Math.random() * arr.length); - out.push(arr.splice(choice, 1)[0]); - } - arr.push(...out); -} - -const results = []; const impl = {}; -const hasNative = (global.TextEncoder && global.TextDecoder) -const nativeImpl = hasNative ? {TextEncoder: global.TextEncoder, TextDecoder: global.TextDecoder} : null; for (const name of packages) { - delete global.TextDecoder; - delete global.TextEncoder; + deleteGlobals(); const exports = require(name); const use = {TextEncoder: global.TextEncoder, TextDecoder: global.TextDecoder, ...exports}; - if (hasNative && ((use.TextDecoder === nativeImpl.TextDecoder || use.TextEncoder === nativeImpl.TextEncoder))) { + if (use.TextDecoder === NativeTextDecoder || use.TextEncoder === NativeTextEncoder) { throw new Error(`package ${name} used native code`); } impl[name] = use; } -do { - delete global.TextDecoder; - delete global.TextEncoder; +if (options.local) { + deleteGlobals(); try { require('../text.min.js'); @@ -114,27 +84,45 @@ do { } catch (e) { // ignore } -} while (false); - -delete global.TextDecoder; -delete global.TextEncoder; +} +deleteGlobals(); -if (hasNative) { +if (NativeTextDecoder && NativeTextEncoder) { packages.push('.native'); - impl['.native'] = nativeImpl; + impl['.native'] = {TextEncoder: NativeTextEncoder, TextDecoder: NativeTextDecoder}; } (async function() { + const results = []; + + function run(use, s) { + const te = new use.TextEncoder('utf-8'); + const data = te.encode(s); + + const td = new use.TextDecoder('utf-8'); + const outs = td.decode(data); + + return outs.length; + } + + function shuffle(arr) { + const out = []; + while (arr.length) { + const choice = Math.floor(Math.random() * arr.length); + out.push(arr.splice(choice, 1)[0]); + } + arr.push(...out); + } for (let i = 0; i < options.runs; ++i) { shuffle(packages); console.info('run', (i + 1)); - + for (const name of packages) { console.debug(chalk.gray(name)); const use = impl[name]; - + const start = performance.now(); const length = run(use, string); const duration = performance.now() - start;