svaarala · SheetJSDev · Sep 7, 2021 · Sep 8, 2021
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -69,6 +69,7 @@ and agreed to irrevocably license their contributions under the Duktape
 * Nancy Li (https://github.com/NancyLi1013)
 * William Parks (https://github.com/WilliamParks)
 * Sam Hellawell (https://github.com/samhellawell)
+* SheetJS (https://github.com/SheetJS)
 
 Other contributions
 ===================

diff --git a/src-input/duk_bi_buffer.c b/src-input/duk_bi_buffer.c
@@ -114,6 +114,42 @@ static duk_uint16_t duk__buffer_elemtype_copy_compatible[9] = {
 };
 #endif  /* !DUK_USE_PREFER_SIZE */
 
+#if defined(DUK_USE_BUFFEROBJECT_SUPPORT)
+/* Buffer supported encodings */
+
+#define DUK_BUF_ENC_UNKNOWN              0
+#define DUK_BUF_ENC_UTF8                 1
+
+/* longest encoding string + 1 -- should be updated when longer strings are added */
+#define DUK_BUFFER_ENCODING_MAX_LEN      7
+
+#define DUK_BUFFER_ENCODING_COUNT        2
+DUK_LOCAL const char * const duk__buffer_encoding_names[DUK_BUFFER_ENCODING_COUNT] = {
+	"utf8",
+	"utf-8"
+};
+
+DUK_LOCAL const duk_int_t duk__buffer_encoding_type_from_name[DUK_BUFFER_ENCODING_COUNT] = {
+	DUK_BUF_ENC_UTF8,
+	DUK_BUF_ENC_UTF8
+};
+
+DUK_LOCAL duk_int_t duk__parse_string_encoding(const char *encoding) {
+	duk_uint8_t i;
+	char buf[DUK_BUFFER_ENCODING_MAX_LEN];
+	/* the valid nodejs buffer encodings only contain letters numbers and hyphens */
+	for (i = 0; i < DUK_BUFFER_ENCODING_MAX_LEN; ++i) {
+		if (encoding[i] == 0) { buf[i] = 0; break; }
+		buf[i] = (char) (encoding[i] | 0x20);
+	}
+	for (i = 0; i < DUK_BUFFER_ENCODING_COUNT; ++i) {
+		if(DUK_STRCMP((const char *)buf, duk__buffer_encoding_names[i]) == 0) return duk__buffer_encoding_type_from_name[i];
+	}
+	return DUK_BUF_ENC_UNKNOWN;
+}
+#undef DUK_BUFFER_ENCODING_COUNT
+#endif  /* DUK_USE_BUFFEROBJECT_SUPPORT */
+
 DUK_LOCAL duk_hbufobj *duk__hbufobj_promote_this(duk_hthread *thr) {
 	duk_tval *tv_dst;
 	duk_hbufobj *res;
@@ -1183,11 +1219,14 @@ DUK_INTERNAL duk_ret_t duk_bi_uint8array_plainof(duk_hthread *thr) {
 
 #if defined(DUK_USE_BUFFEROBJECT_SUPPORT)
 DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
+	const char *encoding;
+	duk_int_t encoding_type;
 	duk_hbufobj *h_this;
 	duk_int_t start_offset, end_offset;
 	duk_uint8_t *buf_slice;
 	duk_size_t slice_length;
 
+
 	h_this = duk__get_bufobj_this(thr);
 	if (h_this == NULL) {
 		/* XXX: happens e.g. when evaluating: String(Buffer.prototype). */
@@ -1196,7 +1235,11 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
 	}
 	DUK_HBUFOBJ_ASSERT_VALID(h_this);
 
-	/* Ignore encoding for now. */
+	encoding = duk_opt_string(thr, 0, "utf8");
+	encoding_type = duk__parse_string_encoding(encoding);
+	if(encoding_type == DUK_BUF_ENC_UNKNOWN) {
+			DUK_DCERROR_TYPE_INVALID_ARGS(thr);
+	}
 
 	duk__clamp_startend_nonegidx_noshift(thr,
 	                                     (duk_int_t) h_this->length,
@@ -1232,6 +1275,7 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
 	 */
 	duk_replace(thr, 0);
 	duk_set_top(thr, 1);
+	/* TODO: support other encodings.  currently only 'utf8' is supported. */
 	return duk_textdecoder_decode_utf8_nodejs(thr);
 }
 #endif  /* DUK_USE_BUFFEROBJECT_SUPPORT */
@@ -2060,11 +2104,9 @@ DUK_INTERNAL duk_ret_t duk_bi_buffer_slice_shared(duk_hthread *thr) {
 DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_is_encoding(duk_hthread *thr) {
 	const char *encoding;
 
-	/* only accept lowercase 'utf8' now. */
-
 	encoding = duk_to_string(thr, 0);
 	DUK_ASSERT(duk_is_string(thr, 0));  /* guaranteed by duk_to_string() */
-	duk_push_boolean(thr, DUK_STRCMP(encoding, "utf8") == 0);
+	duk_push_boolean(thr, duk__parse_string_encoding(encoding) != DUK_BUF_ENC_UNKNOWN);
 	return 1;
 }
 #endif  /* DUK_USE_BUFFEROBJECT_SUPPORT */

diff --git a/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js b/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js
@@ -15,16 +15,17 @@ isEncoding test
 empty: false
 undefined: false
 utf8: true
-utf-8: false
-UTF8: false
-UTF-8: false
-Utf8: false
-Utf-8: false
-uTf8: false
-uTf-8: false
+utf-8: true
+UTF8: true
+UTF-8: true
+Utf8: true
+Utf-8: true
+uTf8: true
+uTf-8: true
 ascii: false
 ASCII: false
 AsCiI: false
+binary: false
 dummy: false
 undefined: false
 null: false
@@ -41,14 +42,14 @@ function isEncodingTest() {
 
     [
         // Any capitalization (and dash / no dash) is accepted by Node.js.
-        // Duktape accepts 'utf8' only for now.
         'utf8', 'utf-8', 'UTF8', 'UTF-8',
         'Utf8', 'Utf-8', 'uTf8', 'uTf-8',
 
         // Duktape doesn't support 'ascii' etc.
         'ascii',
         'ASCII',
         'AsCiI',
+        'binary',
         'dummy',
 
         // Non-string values

diff --git a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
@@ -24,6 +24,11 @@ true
 "ABC"
 "ABC"
 "ABC"
+"ABC"
+"ABC"
+"ABC"
+"TypeError"
+"TypeError"
 "DEFG"
 "EFG"
 "E"
@@ -241,20 +246,31 @@ function nodejsBufferToStringTest() {
     // buf.toString([encoding], [start], [end])
 
     // Without arguments encoding defaults to UTF-8 and the entire
-    // buffer is converted to string.  At least undefined and null
+    // buffer is converted to string.  At least undefined
     // are accepted as "not defined" for encoding.
     b = new Buffer('ABC');
     safePrintString(b.toString());
     safePrintString(b.toString(undefined));
-    safePrintString(b.toString(null));
+
+    // supported encodings
+    safePrintString(b.toString("utf8"));
+    safePrintString(b.toString("utf-8"));
+
+    // encodings are case insensitive
+    safePrintString(b.toString("UtF8"));
+    safePrintString(b.toString("uTf-8"));
+
+    // invalid encodings should throw a TypeError
+    try { safePrintString(b.toString(null)); } catch(e) { safePrintString(e.name); }
+    try { safePrintString(b.toString("wtf")); } catch(e) { safePrintString(e.name); }
 
     // If the buffer is a slice of an underlying buffer, only that slice
     // is string converted.  Offsets are relative to the slice.
     b = new Buffer('ABCDEFGH');
     b = b.slice(3, 7);  // DEFG
     safePrintString(b.toString());
-    safePrintString(b.toString(null, 1));
-    safePrintString(b.toString(null, 1, 2));
+    safePrintString(b.toString(undefined, 1));
+    safePrintString(b.toString(undefined, 1, 2));
 
     // When the buffer data is legal UTF-8 and the chosen encoding
     // is UTF-8 (default), Duktape internal representation is correct