From e5678496c0637df5591a1b097add9c6770854d3e Mon Sep 17 00:00:00 2001 From: Alexander Shtuchkin Date: Tue, 14 Jul 2020 12:10:17 -0400 Subject: [PATCH] Introduce the concept of backends * Add two backends: node & web * Convert core lib files to use the backends (and not use Buffer) * Convert utf16 codec as an example * Add testing for both node side and webpack * Bump Node.js minimal supported version to 4.5.0 and modernize some existing code. This will allow us to get rid of safer-buffer, our only dependency. --- .travis.yml | 11 +- backends/README.md | 73 ++++++++++ backends/node.js | 45 +++++++ backends/web.js | 46 +++++++ encodings/utf16.js | 220 ++++++++++++++----------------- lib/bom-handling.js | 72 +++++----- lib/index-node.js | 3 + lib/index-web.js | 3 + lib/index.js | 40 +++--- lib/streams.js | 158 ++++++++++------------ package.json | 10 +- test/dbcs-test.js | 3 +- test/main-test.js | 18 ++- test/mocha.opts | 3 +- test/sbcs-test.js | 3 +- test/streams-test.js | 4 +- test/utf16-test.js | 62 ++++++--- test/utils.js | 38 ++++++ test/webpack/basic-test.js | 2 +- test/webpack/iconv-lite-tests.js | 7 + test/webpack/index.js | 3 - test/webpack/karma.conf.js | 4 +- test/webpack/package.json | 2 +- test/webpack/stream-test.js | 2 +- 24 files changed, 525 insertions(+), 307 deletions(-) create mode 100644 backends/README.md create mode 100644 backends/node.js create mode 100644 backends/web.js create mode 100644 lib/index-node.js create mode 100644 lib/index-web.js create mode 100644 test/utils.js create mode 100644 test/webpack/iconv-lite-tests.js delete mode 100644 test/webpack/index.js diff --git a/.travis.yml b/.travis.yml index 670a4b3..1ff1c8f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,7 @@ language: node_js node_js: - - "0.10" - - "0.11" - - "0.12" - - "iojs" + - "4.5.0" # Oldest supported version + - "5.10.0" # Oldest supported version from version 5.x - "4" - "6" - "8" @@ -16,4 +14,7 @@ jobs: - name: webpack node_js: "12" install: cd test/webpack; npm install - script: npm test \ No newline at end of file + script: npm test + - name: node-web-backend + node_js: "12" + script: npm run-script test-node-web \ No newline at end of file diff --git a/backends/README.md b/backends/README.md new file mode 100644 index 0000000..a34224b --- /dev/null +++ b/backends/README.md @@ -0,0 +1,73 @@ +# iconv-lite backends + +To accomodate different environments (most notably Node.js and Browser) in an efficient manner, iconv-lite has a concept of 'backends'. +Backends convert internal data representations to what customers expect. + +Here's the overview of the data types used in iconv-lite codecs: + +   | encoder | decoder +------------------------------ | ----------------- | ------------- +input type | js string | Uint8Array, Buffer or any array with bytes +input internal representation | js string | same as input +input data access | str.charCodeAt(i) | bytes[i] +output type | Backend Bytes | js string +output internal representation | Uint8Array | Uint16Array +output data writing | bytes[i] | rawChars[i] + +The reasoning behind this choice is the following: + * For inputs, we try to use passed-in objects directly and not convert them, + to avoid perf hit. For decoder inputs that means that all codecs need to + be able to work with both Uint8Array-s and Buffer-s at the same time. + * For outputs, we standardize internal representation (what codecs works with) + to Uint8Array and Uint16Array because that seems to be the lowest common denominator between the + backends (Buffer can be interchanged with Uint8Array) that is not sacrificing performance. + +## Backend interface +```typescript + +BackendBytes = .. // Depends on the backend + +interface IconvLiteBackend { + // Encoder output: allocBytes() -> use Uint8Array -> bytesToResult(). + allocBytes(numBytes: int, fill: int): Uint8Array; + bytesToResult(bytes: Uint8Array, finalLen: int): BackendBytes; + concatByteResults(bufs: BackendBytes[]): BackendBytes; + + // Decoder output: allocRawChars -> use Uint16Array -> rawCharsToResult(). + allocRawChars(numChars: int): Uint16Array; + rawCharsToResult(rawChars: Uint16Array, finalLen: int): string; + + // TODO: We'll likely add some more methods here for additional performance +}; +``` + +## Codec pseudocode +```js +class Encoder { + write(str) { + const bytes = this.backend.allocBytes(str.length * max_bytes_per_char); + let bytesPos = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); // todo: handle surrogates. + // convert char to bytes + bytes[bytesPos++] = byte1; + ... + } + return this.backend.bytesToResult(bytes, bytesPos); + } +} + +class Decoder { + write(buf) { // NOTE: buf here can be Uint8Array, Buffer or regular array. + const chars = this.backend.allocRawChars(buf.length * max_chars_per_byte); + let charsPos = 0; + for (let i = 0; i < buf.length; i++) { + let byte1 = buf[i]; + // convert byte(s) to char + chars[charsPos++] = char; // todo: handle surrogates. + ... + } + return this.backend.rawCharsToResult(chars, charsPos); + } +} +``` \ No newline at end of file diff --git a/backends/node.js b/backends/node.js new file mode 100644 index 0000000..831dc8b --- /dev/null +++ b/backends/node.js @@ -0,0 +1,45 @@ +"use strict"; +// NOTE: This backend uses Buffer APIs that are only available in Node v4.5+ and v5.10+. + +module.exports = { + // Encoder string input: use str directly, .length, .charCodeAt(i). + // Encoder bytes output: allocBytes() -> use Uint8Array -> bytesToResult(). + allocBytes(numBytes, fill) { + // NOTE: We could do a 'new ArrayBuffer' here, but Buffer.alloc gives us pooling, which makes small chunks faster. + const buf = Buffer.alloc(numBytes, fill); + return new Uint8Array(buf.buffer, buf.byteOffset, numBytes); + }, + bytesToResult(bytes, finalLen) { + // In Node 5.10.0-6.3.0, Buffer.from() raises error if fed with zero-length buffer, so we check for it explicitly. + if (finalLen === 0) { + return Buffer.alloc(0); + } + + // In Node 4.5.0-5.10.0, Buffer.from() does not support (arrayBuffer, byteOffset, length) signature, only (arrayBuffer), + // so we emulate it with .slice(). + return Buffer.from(bytes.buffer).slice(bytes.byteOffset, bytes.byteOffset+finalLen); + }, + concatByteResults(bufs) { + return Buffer.concat(bufs); + }, + + // Decoder bytes input: use only array access + .length, so both Buffer-s and Uint8Array-s work. + // Decoder string output: allocRawChars -> use Uint16Array -> rawCharsToResult(). + allocRawChars(numChars) { + // NOTE: We could do a 'new ArrayBuffer' here, but Buffer.alloc gives us pooling, which makes small chunks faster. + const buf = Buffer.alloc(numChars * Uint16Array.BYTES_PER_ELEMENT); + return new Uint16Array(buf.buffer, buf.byteOffset, numChars); + }, + rawCharsToResult(rawChars, finalLen) { + // See comments in bytesToResult about old Node versions support. + if (finalLen === 0) { + return ''; + } + return Buffer.from(rawChars.buffer) + .slice(rawChars.byteOffset, rawChars.byteOffset + finalLen * Uint16Array.BYTES_PER_ELEMENT) + .toString('ucs2'); + }, + + // Optimizations + // maybe buf.swap16()? +}; \ No newline at end of file diff --git a/backends/web.js b/backends/web.js new file mode 100644 index 0000000..888c903 --- /dev/null +++ b/backends/web.js @@ -0,0 +1,46 @@ +"use strict"; +// NOTE: This backend uses TextDecoder interface. + +module.exports = { + // Encoder string input: use str directly, .length, .charCodeAt(i). + // Encoder bytes output: allocBytes() -> use Uint8Array -> bytesToResult(). + allocBytes(numBytes, fill) { + const arr = new Uint8Array(new ArrayBuffer(numBytes)); + if (fill != null) { + arr.fill(fill); + } + return arr; + }, + bytesToResult(bytes, finalLen) { + return bytes.subarray(0, finalLen); + }, + concatByteResults(bufs) { + bufs = bufs.filter((b) => b.length > 0); + if (bufs.length === 0) { + return new Uint8Array(); + } else if (bufs.length === 1) { + return bufs[0]; + } + + const totalLen = bufs.reduce((a, b) => a + b.length, 0); + const res = new Uint8Array(new ArrayBuffer(totalLen)); + let curPos = 0; + for (var i = 0; i < bufs.length; i++) { + res.set(bufs[i], curPos); + curPos += bufs[i].length; + } + return res; + }, + + // Decoder bytes input: use only array access + .length, so both Buffer-s and Uint8Array-s work. + // Decoder string output: allocRawChars -> use Uint16Array -> rawCharsToResult(). + allocRawChars(numChars) { + return new Uint16Array(new ArrayBuffer(numChars * Uint16Array.BYTES_PER_ELEMENT)); + }, + rawCharsToResult(rawChars, finalLen) { + return new TextDecoder("utf-16").decode(rawChars.subarray(0, finalLen)); + }, + + // Optimizations + // maybe buf.swap16()? +}; \ No newline at end of file diff --git a/encodings/utf16.js b/encodings/utf16.js index 97d0669..ba23f9a 100644 --- a/encodings/utf16.js +++ b/encodings/utf16.js @@ -1,67 +1,60 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; // Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js // == UTF16-BE codec. ========================================================== -exports.utf16be = Utf16BECodec; -function Utf16BECodec() { +exports.utf16be = class Utf16BECodec { + get encoder() { return Utf16BEEncoder; } + get decoder() { return Utf16BEDecoder; } + get bomAware() { return true; } } -Utf16BECodec.prototype.encoder = Utf16BEEncoder; -Utf16BECodec.prototype.decoder = Utf16BEDecoder; -Utf16BECodec.prototype.bomAware = true; - - -// -- Encoding - -function Utf16BEEncoder() { -} - -Utf16BEEncoder.prototype.write = function(str) { - var buf = Buffer.from(str, 'ucs2'); - for (var i = 0; i < buf.length; i += 2) { - var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp; +class Utf16BEEncoder { + constructor(opts, codec, backend) { + this.backend = backend; } - return buf; -} -Utf16BEEncoder.prototype.end = function() { -} - - -// -- Decoding + write(str) { + const bytes = this.backend.allocBytes(str.length * 2); + let bytesPos = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + bytes[bytesPos++] = char >> 8; + bytes[bytesPos++] = char & 0xff; + } + return this.backend.bytesToResult(bytes, bytesPos); + } -function Utf16BEDecoder() { - this.overflowByte = -1; + end() {} } -Utf16BEDecoder.prototype.write = function(buf) { - if (buf.length == 0) - return ''; - - var buf2 = Buffer.alloc(buf.length + 1), - i = 0, j = 0; - - if (this.overflowByte !== -1) { - buf2[0] = buf[0]; - buf2[1] = this.overflowByte; - i = 1; j = 2; +class Utf16BEDecoder { + constructor(opts, codec, backend) { + this.backend = backend; + this.overflowByte = -1; } - for (; i < buf.length-1; i += 2, j+= 2) { - buf2[j] = buf[i+1]; - buf2[j+1] = buf[i]; + write(buf) { + const chars = this.backend.allocRawChars((buf.length+1) >> 1); + let charsPos = 0, i = 0; + + if (this.overflowByte !== -1 && i < buf.length) { + chars[charsPos++] = (this.overflowByte << 8) + buf[i++]; + } + + for (; i < buf.length-1; i += 2) { + chars[charsPos++] = (buf[i] << 8) + buf[i+1]; + } + + this.overflowByte = (i == buf.length-1) ? buf[i] : -1; + + return this.backend.rawCharsToResult(chars, charsPos); } - this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1; - - return buf2.slice(0, j).toString('ucs2'); -} - -Utf16BEDecoder.prototype.end = function() { - this.overflowByte = -1; + end() { + this.overflowByte = -1; + } } @@ -73,96 +66,85 @@ Utf16BEDecoder.prototype.end = function() { // Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false). -exports.utf16 = Utf16Codec; -function Utf16Codec(codecOptions, iconv) { - this.iconv = iconv; -} - -Utf16Codec.prototype.encoder = Utf16Encoder; -Utf16Codec.prototype.decoder = Utf16Decoder; - - -// -- Encoding (pass-through) - -function Utf16Encoder(options, codec) { - options = options || {}; - if (options.addBOM === undefined) - options.addBOM = true; - this.encoder = codec.iconv.getEncoder('utf-16le', options); -} - -Utf16Encoder.prototype.write = function(str) { - return this.encoder.write(str); -} - -Utf16Encoder.prototype.end = function() { - return this.encoder.end(); +exports.utf16 = class Utf16Codec { + constructor(opts, iconv) { + this.iconv = iconv; + } + get encoder() { return Utf16Encoder; } + get decoder() { return Utf16Decoder; } } +class Utf16Encoder { + constructor(options, codec) { + options = options || {}; + if (options.addBOM === undefined) + options.addBOM = true; + this.encoder = codec.iconv.getEncoder(options.use || 'utf-16le', options); + } -// -- Decoding - -function Utf16Decoder(options, codec) { - this.decoder = null; - this.initialBufs = []; - this.initialBufsLen = 0; - - this.options = options || {}; - this.iconv = codec.iconv; + // Pass-through to this.encoder + write(str) { + return this.encoder.write(str); + } + + end() { + return this.encoder.end(); + } } -Utf16Decoder.prototype.write = function(buf) { - if (!this.decoder) { - // Codec is not chosen yet. Accumulate initial bytes. - this.initialBufs.push(buf); - this.initialBufsLen += buf.length; - - if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below) - return ''; - - // We have enough bytes -> detect endianness. - var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding); - this.decoder = this.iconv.getDecoder(encoding, this.options); - - var resStr = ''; - for (var i = 0; i < this.initialBufs.length; i++) - resStr += this.decoder.write(this.initialBufs[i]); - - this.initialBufs.length = this.initialBufsLen = 0; - return resStr; +class Utf16Decoder { + constructor(options, codec) { + this.decoder = null; + this.initialBufs = []; + this.initialBufsLen = 0; + + this.options = options || {}; + this.iconv = codec.iconv; } - return this.decoder.write(buf); -} + write(buf) { + if (!this.decoder) { + // Codec is not chosen yet. Accumulate initial bytes. + this.initialBufs.push(buf); + this.initialBufsLen += buf.length; + + if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below) + return ''; + + // We have enough bytes -> detect endianness. + return this._detectEndiannessAndSetDecoder(); + } + + return this.decoder.write(buf); + } + + end() { + if (!this.decoder) { + return this._detectEndiannessAndSetDecoder() + (this.decoder.end() || ''); + } + return this.decoder.end(); + } -Utf16Decoder.prototype.end = function() { - if (!this.decoder) { - var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding); + _detectEndiannessAndSetDecoder() { + const encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding); this.decoder = this.iconv.getDecoder(encoding, this.options); - var resStr = ''; - for (var i = 0; i < this.initialBufs.length; i++) - resStr += this.decoder.write(this.initialBufs[i]); - - var trail = this.decoder.end(); - if (trail) - resStr += trail; - + let resStr = this.initialBufs.reduce((a, b) => a + this.decoder.write(b), ''); this.initialBufs.length = this.initialBufsLen = 0; return resStr; } - return this.decoder.end(); } + function detectEncoding(bufs, defaultEncoding) { - var b = []; - var charsProcessed = 0; - var asciiCharsLE = 0, asciiCharsBE = 0; // Number of ASCII chars when decoded as LE or BE. + let b = []; + let charsProcessed = 0; + let asciiCharsLE = 0, asciiCharsBE = 0; // Number of ASCII chars when decoded as LE or BE. outer_loop: - for (var i = 0; i < bufs.length; i++) { - var buf = bufs[i]; - for (var j = 0; j < buf.length; j++) { + for (let i = 0; i < bufs.length; i++) { + const buf = bufs[i]; + for (let j = 0; j < buf.length; j++) { b.push(buf[j]); if (b.length === 2) { if (charsProcessed === 0) { diff --git a/lib/bom-handling.js b/lib/bom-handling.js index 1050872..12b944f 100644 --- a/lib/bom-handling.js +++ b/lib/bom-handling.js @@ -2,51 +2,49 @@ var BOMChar = '\uFEFF'; -exports.PrependBOM = PrependBOMWrapper -function PrependBOMWrapper(encoder, options) { - this.encoder = encoder; - this.addBOM = true; -} - -PrependBOMWrapper.prototype.write = function(str) { - if (this.addBOM) { - str = BOMChar + str; - this.addBOM = false; +exports.PrependBOM = class PrependBOMWrapper { + constructor(encoder) { + this.encoder = encoder; + this.addBOM = true; } - return this.encoder.write(str); -} + write(str) { + if (this.addBOM) { + str = BOMChar + str; + this.addBOM = false; + } + return this.encoder.write(str); + } -PrependBOMWrapper.prototype.end = function() { - return this.encoder.end(); + end() { + return this.encoder.end(); + } } -//------------------------------------------------------------------------------ - -exports.StripBOM = StripBOMWrapper; -function StripBOMWrapper(decoder, options) { - this.decoder = decoder; - this.pass = false; - this.options = options || {}; -} +exports.StripBOM = class StripBOMWrapper { + constructor(decoder, options) { + this.decoder = decoder; + this.pass = false; + this.options = options || {}; + } -StripBOMWrapper.prototype.write = function(buf) { - var res = this.decoder.write(buf); - if (this.pass || !res) + write(buf) { + var res = this.decoder.write(buf); + if (this.pass || !res) + return res; + + if (res[0] === BOMChar) { + res = res.slice(1); + if (typeof this.options.stripBOM === 'function') + this.options.stripBOM(); + } + + this.pass = true; return res; - - if (res[0] === BOMChar) { - res = res.slice(1); - if (typeof this.options.stripBOM === 'function') - this.options.stripBOM(); } - this.pass = true; - return res; -} - -StripBOMWrapper.prototype.end = function() { - return this.decoder.end(); + end() { + return this.decoder.end(); + } } - diff --git a/lib/index-node.js b/lib/index-node.js new file mode 100644 index 0000000..123cff6 --- /dev/null +++ b/lib/index-node.js @@ -0,0 +1,3 @@ +var iconv = module.exports = require("./index"); + +iconv.setBackend(require('../backends/node')); diff --git a/lib/index-web.js b/lib/index-web.js new file mode 100644 index 0000000..74c4d5d --- /dev/null +++ b/lib/index-web.js @@ -0,0 +1,3 @@ +var iconv = module.exports = require("./index"); + +iconv.setBackend(require('../backends/web')); diff --git a/lib/index.js b/lib/index.js index 657701c..f85d007 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,7 +1,5 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; - var bomHandling = require("./bom-handling"), iconv = module.exports; @@ -15,25 +13,20 @@ iconv.defaultCharSingleByte = '?'; // Public API. iconv.encode = function encode(str, encoding, options) { - str = "" + (str || ""); // Ensure string. + if (typeof str !== 'string') + throw new TypeError("iconv-lite can only encode() strings."); var encoder = iconv.getEncoder(encoding, options); var res = encoder.write(str); var trail = encoder.end(); - return (trail && trail.length > 0) ? Buffer.concat([res, trail]) : res; + return (trail && trail.length > 0) ? iconv.backend.concatByteResults([res, trail]) : res; } iconv.decode = function decode(buf, encoding, options) { - if (typeof buf === 'string') { - if (!iconv.skipDecodeWarning) { - console.error('Iconv-lite warning: decode()-ing strings is deprecated. Refer to https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding'); - iconv.skipDecodeWarning = true; - } - - buf = Buffer.from("" + (buf || ""), "binary"); // Ensure buffer. - } + if (typeof buf === 'string') + throw new TypeError("iconv-lite can't decode() strings. Please pass Buffer or Uint8Array instead."); var decoder = iconv.getDecoder(encoding, options); @@ -113,7 +106,7 @@ iconv._canonicalizeEncoding = function(encoding) { iconv.getEncoder = function getEncoder(encoding, options) { var codec = iconv.getCodec(encoding), - encoder = new codec.encoder(options, codec); + encoder = new codec.encoder(options, codec, iconv.backend); if (codec.bomAware && options && options.addBOM) encoder = new bomHandling.PrependBOM(encoder, options); @@ -123,7 +116,7 @@ iconv.getEncoder = function getEncoder(encoding, options) { iconv.getDecoder = function getDecoder(encoding, options) { var codec = iconv.getCodec(encoding), - decoder = new codec.decoder(options, codec); + decoder = new codec.decoder(options, codec, iconv.backend); if (codec.bomAware && !(options && options.stripBOM === false)) decoder = new bomHandling.StripBOM(decoder, options); @@ -149,11 +142,11 @@ iconv.enableStreamingAPI = function enableStreamingAPI(stream_module) { // Streaming API. iconv.encodeStream = function encodeStream(encoding, options) { - return new iconv.IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options); + return new iconv.IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options, iconv); } iconv.decodeStream = function decodeStream(encoding, options) { - return new iconv.IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options); + return new iconv.IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options, iconv); } iconv.supportsStreams = true; @@ -175,6 +168,21 @@ if (stream_module && stream_module.Transform) { }; } +// Add a helpful message if the backend is not set. +Object.defineProperty(iconv, "backend", { + configurable: true, + get() { + throw new Error("iconv-lite backend is not set. Please use iconv.setBackend().") + } +}); + +iconv.setBackend = function(backend) { + delete iconv.backend; + iconv.backend = backend; + iconv._codecDataCache = {}; +} + + if ("Ā" != "\u0100") { console.error("iconv-lite warning: js files use non-utf8 encoding. See https://github.com/ashtuchkin/iconv-lite/wiki/Javascript-source-file-encodings for more info."); } diff --git a/lib/streams.js b/lib/streams.js index a150648..548ecd4 100644 --- a/lib/streams.js +++ b/lib/streams.js @@ -1,109 +1,95 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; - // NOTE: Due to 'stream' module being pretty large (~100Kb, significant in browser environments), // we opt to dependency-inject it instead of creating a hard dependency. module.exports = function(stream_module) { - var Transform = stream_module.Transform; - - // == Encoder stream ======================================================= - - function IconvLiteEncoderStream(conv, options) { - this.conv = conv; - options = options || {}; - options.decodeStrings = false; // We accept only strings, so we don't need to decode them. - Transform.call(this, options); - } - - IconvLiteEncoderStream.prototype = Object.create(Transform.prototype, { - constructor: { value: IconvLiteEncoderStream } - }); - IconvLiteEncoderStream.prototype._transform = function(chunk, encoding, done) { - if (typeof chunk != 'string') - return done(new Error("Iconv encoding stream needs strings as its input.")); - try { - var res = this.conv.write(chunk); - if (res && res.length) this.push(res); - done(); + class IconvLiteEncoderStream extends stream_module.Transform { + constructor(conv, options, iconv) { + options = options || {}; + options.decodeStrings = false; // We accept only strings, so we don't need to decode them. + super(options); + this.conv = conv; + this.iconv = iconv; } - catch (e) { - done(e); - } - } - IconvLiteEncoderStream.prototype._flush = function(done) { - try { - var res = this.conv.end(); - if (res && res.length) this.push(res); - done(); - } - catch (e) { - done(e); + _transform(chunk, encoding, done) { + if (typeof chunk !== 'string') + return done(new Error("Iconv encoding stream needs strings as its input.")); + try { + const res = this.conv.write(chunk); + if (res && res.length) this.push(res); + done(); + } + catch (e) { + done(e); + } } - } - - IconvLiteEncoderStream.prototype.collect = function(cb) { - var chunks = []; - this.on('error', cb); - this.on('data', function(chunk) { chunks.push(chunk); }); - this.on('end', function() { - cb(null, Buffer.concat(chunks)); - }); - return this; - } - - // == Decoder stream ======================================================= + _flush(done) { + try { + const res = this.conv.end(); + if (res && res.length) this.push(res); + done(); + } + catch (e) { + done(e); + } + } - function IconvLiteDecoderStream(conv, options) { - this.conv = conv; - options = options || {}; - options.encoding = this.encoding = 'utf8'; // We output strings. - Transform.call(this, options); + collect(cb) { + let chunks = []; + this.on('error', cb); + this.on('data', (chunk) => chunks.push(chunk)); + this.on('end', () => cb(null, this.iconv.backend.concatByteResults(chunks))); + return this; + } } - IconvLiteDecoderStream.prototype = Object.create(Transform.prototype, { - constructor: { value: IconvLiteDecoderStream } - }); - - IconvLiteDecoderStream.prototype._transform = function(chunk, encoding, done) { - if (!Buffer.isBuffer(chunk) && !(chunk instanceof Uint8Array)) - return done(new Error("Iconv decoding stream needs buffers as its input.")); - try { - var res = this.conv.write(chunk); - if (res && res.length) this.push(res, this.encoding); - done(); + class IconvLiteDecoderStream extends stream_module.Transform { + constructor(conv, options) { + options = options || {}; + options.encoding = 'utf8'; // We output strings. + super(options); + this.conv = conv; + this.encoding = options.encoding; } - catch (e) { - done(e); - } - } - IconvLiteDecoderStream.prototype._flush = function(done) { - try { - var res = this.conv.end(); - if (res && res.length) this.push(res, this.encoding); - done(); + _transform(chunk, encoding, done) { + if (!(chunk instanceof Uint8Array)) + return done(new Error("Iconv decoding stream needs Uint8Array-s or Buffers as its input.")); + try { + const res = this.conv.write(chunk); + if (res && res.length) this.push(res, this.encoding); + done(); + } + catch (e) { + done(e); + } } - catch (e) { - done(e); + + _flush(done) { + try { + const res = this.conv.end(); + if (res && res.length) this.push(res, this.encoding); + done(); + } + catch (e) { + done(e); + } } - } - IconvLiteDecoderStream.prototype.collect = function(cb) { - var res = ''; - this.on('error', cb); - this.on('data', function(chunk) { res += chunk; }); - this.on('end', function() { - cb(null, res); - }); - return this; + collect(cb) { + let res = ''; + this.on('error', cb); + this.on('data', (chunk) => { res += chunk; }); + this.on('end', () => cb(null, res)); + return this; + } } return { - IconvLiteEncoderStream: IconvLiteEncoderStream, - IconvLiteDecoderStream: IconvLiteDecoderStream, + IconvLiteEncoderStream, + IconvLiteDecoderStream, }; }; diff --git a/package.json b/package.json index 8f86f9c..90bb804 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "iconv-lite", "description": "Convert character encodings in pure javascript.", - "version": "0.6.2", + "version": "0.7.0-pre", "license": "MIT", "keywords": [ "iconv", @@ -10,7 +10,7 @@ "icu" ], "author": "Alexander Shtuchkin ", - "main": "./lib/index.js", + "main": "./lib/index-node.js", "typings": "./lib/index.d.ts", "homepage": "https://github.com/ashtuchkin/iconv-lite", "bugs": "https://github.com/ashtuchkin/iconv-lite/issues", @@ -19,13 +19,15 @@ "url": "git://github.com/ashtuchkin/iconv-lite.git" }, "engines": { - "node": ">=0.10.0" + "node": ">=4.5.0 <5.0.0 || >=5.10.0" }, "scripts": { "coverage": "c8 _mocha --grep .", - "test": "mocha --reporter spec --grep ." + "test": "FULL_TEST_SUITE=1 mocha", + "test-node-web": "ICONV_BACKEND=web mocha --grep '#node-web'" }, "browser": { + "./lib/index-node.js": "./lib/index-web.js", "stream": false }, "devDependencies": { diff --git a/test/dbcs-test.js b/test/dbcs-test.js index 3eadfb5..f6c7a4d 100644 --- a/test/dbcs-test.js +++ b/test/dbcs-test.js @@ -154,7 +154,8 @@ function strToHex(str) { return spacify4(swapBytes(Buffer.from(str, 'ucs2')).toS // Generate tests for all DBCS encodings. iconv.encode('', 'utf8'); // Load all encodings. -describe("Full DBCS encoding tests", function() { +describe("Full DBCS encoding tests #full", function() { + if (!process.env.FULL_TEST_SUITE) return; this.timeout(10000); // These tests are pretty slow. for (var enc in iconv.encodings) { diff --git a/test/main-test.js b/test/main-test.js index 060bd91..a69e719 100644 --- a/test/main-test.js +++ b/test/main-test.js @@ -39,9 +39,9 @@ describe("Generic UTF8-UCS2 tests", function() { }); it("Convert to string, not buffer (utf8 used)", function() { - var res = iconv.encode(Buffer.from(testStringLatin1, "utf8"), "utf8"); - assert.ok(Buffer.isBuffer(res)); - assert.strictEqual(res.toString("utf8"), testStringLatin1); + assert.throws(function() { + iconv.encode(Buffer.from(testStringLatin1, "utf8"), "utf8"); + }) }); it("Throws on unknown encodings", function() { @@ -50,9 +50,15 @@ describe("Generic UTF8-UCS2 tests", function() { }); it("Convert non-strings and non-buffers", function() { - assert.strictEqual(iconv.encode({}, "utf8").toString(), "[object Object]"); - assert.strictEqual(iconv.encode(10, "utf8").toString(), "10"); - assert.strictEqual(iconv.encode(undefined, "utf8").toString(), ""); + assert.throws(function() { + iconv.encode({}, "utf8"); + }); + assert.throws(function() { + iconv.encode(10, "utf8") + }); + assert.throws(function() { + iconv.encode(undefined, "utf8") + }); }); it("Aliases toEncoding and fromEncoding work the same as encode and decode", function() { diff --git a/test/mocha.opts b/test/mocha.opts index 2a3024f..912aa4a 100644 --- a/test/mocha.opts +++ b/test/mocha.opts @@ -1,2 +1,3 @@ --check-leaks ---grep ^(?!Full). \ No newline at end of file +--forbid-only +--no-exit \ No newline at end of file diff --git a/test/sbcs-test.js b/test/sbcs-test.js index 46f419b..4597a17 100644 --- a/test/sbcs-test.js +++ b/test/sbcs-test.js @@ -51,7 +51,8 @@ iconv.encode('', 'utf8'); // Load all encodings. var sbcsEncodingTests = {}; -describe("Full SBCS encoding tests", function() { +describe("Full SBCS encoding tests #full", function() { + if (!process.env.FULL_TEST_SUITE) return; this.timeout(10000); for (var enc in iconv.encodings) diff --git a/test/streams-test.js b/test/streams-test.js index d4054de..202781f 100644 --- a/test/streams-test.js +++ b/test/streams-test.js @@ -74,7 +74,7 @@ function checkStreamOutput(options) { if (options.checkError) { assert(err, "Expected error, but got success"); if (Object.prototype.toString.call(options.checkError) == '[object RegExp]') - assert(options.checkError.test(err.message)); + assert(options.checkError.test(err.message), "Wrong error message: " + err.message); else if (typeof options.checkError == 'function') options.checkError(err); else @@ -181,7 +181,7 @@ describe("Streaming mode", function() { encoding: "us-ascii", encodingOptions: {decodeStrings: false}, input: ["hello ", "world!"], - checkError: /Iconv decoding stream needs buffers as its input/, + checkError: /Iconv decoding stream needs Uint8Array-s or Buffers as its input/, })); it("Round-trip encoding and decoding", checkStreamOutput({ diff --git a/test/utf16-test.js b/test/utf16-test.js index cb74b99..7a18188 100644 --- a/test/utf16-test.js +++ b/test/utf16-test.js @@ -1,48 +1,68 @@ var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); + utils = require('./utils'), + iconv = utils.requireIconv(), + hex = utils.hex; var testStr = "1aя中文☃💩"; - utf16beBuf = Buffer.from([0, 0x31, 0, 0x61, 0x04, 0x4f, 0x4e, 0x2d, 0x65, 0x87, 0x26, 0x03, 0xd8, 0x3d, 0xdc, 0xa9]), - utf16leBuf = Buffer.from(testStr, 'ucs2'), - utf16beBOM = Buffer.from([0xFE, 0xFF]), - utf16leBOM = Buffer.from([0xFF, 0xFE]), + utf16beBuf = utils.bytesFrom([0, 0x31, 0, 0x61, 0x04, 0x4f, 0x4e, 0x2d, 0x65, 0x87, 0x26, 0x03, 0xd8, 0x3d, 0xdc, 0xa9]), + utf16leBuf = utils.bytesFrom([0x31, 0, 0x61, 0, 0x4f, 0x04, 0x2d, 0x4e, 0x87, 0x65, 0x03, 0x26, 0x3d, 0xd8, 0xa9, 0xdc]), + utf16beBOM = utils.bytesFrom([0xFE, 0xFF]), + utf16leBOM = utils.bytesFrom([0xFF, 0xFE]), sampleStr = '\n<俄语>данные'; -describe("UTF-16BE codec", function() { +describe("UTF-16LE codec #node-web", function() { + it("decodes very short buffers correctly", function() { + assert.equal(iconv.decode(utils.bytesFrom([]), 'utf-16le'), ''); + + // Looks like StringDecoder doesn't do the right thing here, returning '\u0000'. TODO: fix. + //assert.equal(iconv.decode(utils.bytesFrom([0x61]), 'utf-16le'), ''); + }); +}); + +describe("UTF-16BE codec #node-web", function() { it("encodes basic strings correctly", function() { - assert.equal(iconv.encode(testStr, 'UTF16-BE').toString('hex'), utf16beBuf.toString('hex')); + assert.equal(hex(iconv.encode(testStr, 'utf16-be')), hex(utf16beBuf)); }); it("decodes basic buffers correctly", function() { - assert.equal(iconv.decode(utf16beBuf, 'UTF16-BE'), testStr); + assert.equal(iconv.decode(utf16beBuf, 'utf16-be'), testStr); }); it("decodes uneven length buffers with no error", function() { - assert.equal(iconv.decode(Buffer.from([0, 0x61, 0]), 'UTF16-BE'), "a"); + assert.equal(iconv.decode(utils.bytesFrom([0, 0x61, 0]), 'utf16-be'), "a"); + }); + + it("decodes very short buffers correctly", function() { + assert.equal(iconv.decode(utils.bytesFrom([]), 'utf-16be'), ''); + assert.equal(iconv.decode(utils.bytesFrom([0x61]), 'utf-16be'), ''); }); }); -describe("UTF-16 encoder", function() { +describe("UTF-16 encoder #node-web", function() { it("uses UTF-16LE and adds BOM when encoding", function() { - assert.equal(iconv.encode(testStr, "utf-16").toString('hex'), utf16leBOM.toString('hex') + utf16leBuf.toString('hex')); + assert.equal(hex(iconv.encode(testStr, "utf-16")), hex(utf16leBOM) + hex(utf16leBuf)); + }); + + it("can skip BOM", function() { + assert.equal(hex(iconv.encode(testStr, "utf-16", {addBOM: false})), hex(utf16leBuf)); }); - it("can use other encodings, for example UTF-16LE, with BOM", function() { - assert.equal(iconv.encode(testStr, "utf-16", {use: 'UTF-16LE'}).toString('hex'), - utf16leBOM.toString('hex') + Buffer.from(testStr, 'ucs2').toString('hex')); + it("can use other encodings, for example UTF-16BE, with BOM", function() { + assert.equal(hex(iconv.encode(testStr, "utf-16", {use: 'UTF-16BE'})), hex(utf16beBOM) + hex(utf16beBuf)); }); }); -describe("UTF-16 decoder", function() { +describe("UTF-16 decoder #node-web", function() { it("uses BOM to determine encoding", function() { - assert.equal(iconv.decode(Buffer.concat([utf16leBOM, utf16leBuf]), "utf-16"), testStr); - assert.equal(iconv.decode(Buffer.concat([utf16beBOM, utf16beBuf]), "utf-16"), testStr); + assert.equal(iconv.decode(utils.concatBufs([utf16leBOM, utf16leBuf]), "utf-16"), testStr); + assert.equal(iconv.decode(utils.concatBufs([utf16beBOM, utf16beBuf]), "utf-16"), testStr); }); - it("handles very short buffers nice", function() { - assert.equal(iconv.decode(Buffer.from([]), 'utf-16'), ''); - assert.equal(iconv.decode(Buffer.from([0x61]), 'utf-16'), ''); + it("handles very short buffers", function() { + assert.equal(iconv.decode(utils.bytesFrom([]), 'utf-16'), ''); + + // Looks like StringDecoder doesn't do the right thing here. TODO: fix. + //assert.equal(iconv.decode(utils.bytesFrom([0x61]), 'utf-16'), ''); }); it("uses spaces when there is no BOM to determine encoding", function() { diff --git a/test/utils.js b/test/utils.js new file mode 100644 index 0000000..dc9e3c8 --- /dev/null +++ b/test/utils.js @@ -0,0 +1,38 @@ +const assert = require("assert"); + +const utils = module.exports = { + setIconvLite(iconv) { + utils.iconv = iconv; + utils.backend = iconv.backend; + utils.BytesType = utils.backend.bytesToResult(utils.backend.allocBytes(0), 0).constructor; + }, + + requireIconv() { + if (!utils.iconv) { + const iconv_path = '../'; // Don't ship this module in the browser environment. + const iconv = require(iconv_path); + if (process.env.ICONV_BACKEND) { + const backend_path = `../backends/${process.env.ICONV_BACKEND}`; + iconv.setBackend(require(backend_path)); + } + utils.setIconvLite(iconv); + } + return utils.iconv; + }, + + bytesFrom(arr) { + const bytes = utils.backend.allocBytes(arr.length); + bytes.set(arr); + return utils.backend.bytesToResult(bytes, bytes.length); + }, + + concatBufs(bufs) { + return utils.backend.concatByteResults(bufs); + }, + + hex(bytes, nonStrict) { + assert(nonStrict || (bytes instanceof utils.BytesType)); + return bytes.reduce((output, byte) => (output + ('0' + (byte & 0xFF).toString(16)).slice(-2)), ''); + }, +}; + diff --git a/test/webpack/basic-test.js b/test/webpack/basic-test.js index 3649fbc..36ebdbc 100644 --- a/test/webpack/basic-test.js +++ b/test/webpack/basic-test.js @@ -8,7 +8,7 @@ describe("iconv-lite", function() { // See https://github.com/ashtuchkin/iconv-lite/issues/204 for details. process.versions.node = "12.0.0"; - iconv = require(".").iconv; + iconv = require("iconv-lite"); }); it("does not support streams by default", function() { diff --git a/test/webpack/iconv-lite-tests.js b/test/webpack/iconv-lite-tests.js new file mode 100644 index 0000000..f3fb894 --- /dev/null +++ b/test/webpack/iconv-lite-tests.js @@ -0,0 +1,7 @@ + +const iconv = require("iconv-lite"); +iconv.setBackend(require("iconv-lite/backends/web")); +require("../utils").setIconvLite(iconv); + +// List of test files that are ready to be run in web environment. +require("../utf16-test"); diff --git a/test/webpack/index.js b/test/webpack/index.js deleted file mode 100644 index 99e9f15..0000000 --- a/test/webpack/index.js +++ /dev/null @@ -1,3 +0,0 @@ - -// Reexport iconv-lite for tests. -exports.iconv = require('iconv-lite'); \ No newline at end of file diff --git a/test/webpack/karma.conf.js b/test/webpack/karma.conf.js index 7bfab3b..f5e7624 100644 --- a/test/webpack/karma.conf.js +++ b/test/webpack/karma.conf.js @@ -16,13 +16,13 @@ module.exports = function(config) { // list of files / patterns to load in the browser files: [ - { pattern: '*test.js', watched: false }, + { pattern: '*{test,tests}.js', watched: false }, ], // preprocess matching files before serving them to the browser // available preprocessors: https://npmjs.org/browse/keyword/karma-preprocessor preprocessors: { - '*test.js': ['webpack'] + '*{test,tests}.js': ['webpack'] }, webpack: { diff --git a/test/webpack/package.json b/test/webpack/package.json index ef01f4f..4d4952f 100644 --- a/test/webpack/package.json +++ b/test/webpack/package.json @@ -7,7 +7,7 @@ "!note2": "This is needed because webpack4/watchpack1.7 crashes when trying to enumerate circular symlink.", "preinstall": "mv $(npm pack -pq ../../) iconv-lite.tgz", "postinstall": "rm iconv-lite.tgz", - "test": "karma start" + "test": "rm -r node_modules/iconv-lite package-lock.json && npm install --no-audit && karma start" }, "devDependencies": { "karma": "^5.0.9", diff --git a/test/webpack/stream-test.js b/test/webpack/stream-test.js index b6c70a9..befa1dd 100644 --- a/test/webpack/stream-test.js +++ b/test/webpack/stream-test.js @@ -1,7 +1,7 @@ var assert = require('assert').strict; describe("iconv-lite with streams", function() { - var iconv = require(".").iconv; + var iconv = require("iconv-lite"); it("supports streams when explicitly enabled", function() { iconv.enableStreamingAPI(require('stream'));