-
Notifications
You must be signed in to change notification settings - Fork 10
/
index.js
134 lines (108 loc) · 3.96 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"use strict";
const Iconv = require('iconv-lite');
const Jschardet = require('jschardet');
const Stream = require('stream');
const Transform = Stream.Transform;
class AutoDetectDecoderStream extends Transform {
/**
* @param {Object?} options
* @param {string=utf8} options.defaultEncoding - What encoding to fall-back to? (Specify any `iconv-lite` encoding)
* @param {number?} options.minConfidence - Minimum confidence to require for detecting encodings. @see {@link https://github.com/aadsm/jschardet|chardet module}
* @param {number=128} options.consumeSize - How many bytes to use for detecting the encoding? (Default 128)
* @param {boolean=true} options.stripBOM - Should strip BOM for UTF streams?
* @constructor
*/
constructor(options) {
super({encoding: 'utf8'});
options = options || {};
this._defaultEncoding = options.defaultEncoding || 'utf8';
this._minConfidence = options.minConfidence;
this._consumeSize = options.consumeSize || 128;
this._detectedEncoding = false;
this._iconvOptions = {
stripBOM: options.stripBOM == null ? true : options.stripBOM
};
this.encoding = 'utf8'; // We output strings.
}
/**
* @param {Buffer?} chunk
*/
_consumeBufferForDetection(chunk) {
if (!this._detectionBuffer) {
// Initialize buffer on first invocation
this._detectionBuffer = Buffer.alloc(0);
}
if (chunk) {
// Concatenate buffers until we get the minimum size we want
this._detectionBuffer = Buffer.concat([this._detectionBuffer, chunk]);
}
// Do we have enough buffer?
if (this._detectionBuffer.length >= this._consumeSize || !chunk) {
try {
// Try to detect encoding
this._detectedEncoding = Jschardet.detect(this._detectionBuffer, {
minimumThreshold: this._minConfidence
}).encoding;
if (!this._detectedEncoding || this._detectedEncoding === 'ascii') {
//noinspection ExceptionCaughtLocallyJS
throw new Error('Not enough data, recognized as ASCII. We probably need to use the fallback.');
}
} catch (e) {
// Fallback
this._detectedEncoding = this._defaultEncoding;
}
this.conv = Iconv.getDecoder(this._detectedEncoding, this._iconvOptions);
const res = this.conv.write(this._detectionBuffer);
delete this._detectionBuffer;
if (res && res.length > 0) {
this.push(res, this.encoding);
}
}
}
// noinspection JSUnusedGlobalSymbols
_transform(chunk, encoding, done) {
if (!Buffer.isBuffer(chunk))
return done(new Error("Iconv decoding stream needs buffers as its input."));
try {
if (this._detectedEncoding) {
const res = this.conv.write(chunk);
if (res && res.length > 0) {
this.push(res, this.encoding);
}
} else {
this._consumeBufferForDetection(chunk);
}
done();
}
catch (e) {
done(e);
}
}
// noinspection JSUnusedGlobalSymbols
_flush(done) {
try {
if (!this._detectedEncoding) {
this._consumeBufferForDetection(null);
return done();
}
const res = this.conv.end();
if (res && res.length > 0) {
this.push(res, this.encoding);
}
done();
}
catch (e) {
done(e);
}
}
collect(cb) {
let res = '';
this.on('error', cb)
.on('data', function(chunk) { res += chunk; })
.on('end', function() {
cb(null, res);
});
return this;
}
}
module.exports = AutoDetectDecoderStream;