nodejs
diff --git a/‎lib/internal/encoding.js‎
Lines changed: 45 additions & 3 deletions b/‎lib/internal/encoding.js‎
Lines changed: 45 additions & 3 deletions
diff --git a/‎lib/internal/encoding/single-byte.js‎
Lines changed: 0 additions & 155 deletions b/‎lib/internal/encoding/single-byte.js‎
Lines changed: 0 additions & 155 deletions
diff --git a/‎lib/internal/encoding/util.js‎
Lines changed: 55 additions & 0 deletions b/‎lib/internal/encoding/util.js‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎node.gyp‎
Lines changed: 2 additions & 0 deletions b/‎node.gyp‎
Lines changed: 2 additions & 0 deletions
@@ -4,11 +4,13 @@
 // https://encoding.spec.whatwg.org
 
 const {
+  ArrayPrototypeMap,
   Boolean,
   ObjectDefineProperties,
   ObjectGetOwnPropertyDescriptors,
   ObjectSetPrototypeOf,
   ObjectValues,
+  SafeArrayIterator,
   SafeMap,
   StringPrototypeSlice,
   Symbol,
@@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal');
 const kUTF8FastPath = Symbol('kUTF8FastPath');
 const kIgnoreBOM = Symbol('kIgnoreBOM');
 
-const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
-
 const {
   getConstructorOf,
   customInspectSymbol: inspect,
@@ -60,6 +60,7 @@ const {
   encodeIntoResults,
   encodeUtf8String,
   decodeUTF8,
+  decodeSingleByte,
 } = binding;
 
 function validateDecoder(obj) {
@@ -73,6 +74,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
 
 const empty = new FastBuffer();
 
+// Has to be synced with src/
+const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([
+  'ibm866',
+  'koi8-r',
+  'koi8-u',
+  'macintosh',
+  'x-mac-cyrillic',
+  'iso-8859-2',
+  'iso-8859-3',
+  'iso-8859-4',
+  'iso-8859-5',
+  'iso-8859-6',
+  'iso-8859-7',
+  'iso-8859-8',
+  'iso-8859-8-i',
+  'iso-8859-10',
+  'iso-8859-13',
+  'iso-8859-14',
+  'iso-8859-15',
+  'iso-8859-16',
+  'windows-874',
+  'windows-1250',
+  'windows-1251',
+  'windows-1252',
+  'windows-1253',
+  'windows-1254',
+  'windows-1255',
+  'windows-1256',
+  'windows-1257',
+  'windows-1258',
+  'x-user-defined', // Has to be last, special case
+], (e, i) => [e, i])));
+
+const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);
+
+function createSinglebyteDecoder(encoding, fatal) {
+  const key = encodingsSinglebyte.get(encoding);
+  if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
+  return (buf) => decodeSingleByte(buf, key, fatal);
+}
+
 const encodings = new SafeMap([
   ['unicode-1-1-utf-8', 'utf-8'],
   ['unicode11utf8', 'utf-8'],
@@ -479,7 +521,7 @@ class TextDecoder {
     validateDecoder(this);
     validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
 
-    if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
+    if (this[kSingleByte]) return this[kSingleByte](input);
 
     const stream = options?.stream;
     if (this[kUTF8FastPath]) {
 
@@ -0,0 +1,55 @@
+// From https://npmjs.com/package/@exodus/bytes
+// Copyright Exodus Movement. Licensed under MIT License.
+
+'use strict';
+
+// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
+// form a codepoint yet, but can be a part of a single codepoint on more data
+function unfinishedBytesUtf8(u, len) {
+  // 0-3
+  let p = 0
+  while (p < 2 && p < len && (u[len - p - 1] & 0xc0) === 0x80) p++ // go back 0-2 trailing bytes
+  if (p === len) return 0 // no space for lead
+  const l = u[len - p - 1]
+  if (l < 0xc2 || l > 0xf4) return 0 // not a lead
+  if (p === 0) return 1 // nothing to recheck, we have only lead, return it. 2-byte must return here
+  if (l < 0xe0 || (l < 0xf0 && p >= 2)) return 0 // 2-byte, or 3-byte or less and we already have 2 trailing
+  const lower = l === 0xf0 ? 0x90 : l === 0xe0 ? 0xa0 : 0x80
+  const upper = l === 0xf4 ? 0x8f : l === 0xed ? 0x9f : 0xbf
+  const n = u[len - p]
+  return n >= lower && n <= upper ? p + 1 : 0
+}
+
+// Merge prefix `chunk` with `u` and return new combined prefix
+// For u.length < 3, fully consumes u and can return unfinished data,
+// otherwise returns a prefix with no unfinished bytes
+function mergePrefixUtf8(u, chunk) {
+  if (u.length === 0) return chunk
+  if (u.length < 3) {
+    // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
+    const a = new Uint8Array(u.length + chunk.length)
+    a.set(chunk)
+    a.set(u, chunk.length)
+    return a
+  }
+
+  // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
+  const t = new Uint8Array(chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
+  t.set(chunk)
+  t.set(u.subarray(0, 3), chunk.length)
+
+  // Stop at the first offset where unfinished bytes reaches 0 or fits into u
+  // If that doesn't happen (u too short), just concat chunk and u completely (above)
+  for (let i = 1; i <= 3; i++) {
+    const unfinished = unfinishedBytesUtf8(t, chunk.length + i) // 0-3
+    if (unfinished <= i) {
+      // Always reachable at 3, but we still need 'unfinished' value for it
+      const add = i - unfinished // 0-3
+      return add > 0 ? t.subarray(0, chunk.length + add) : chunk
+    }
+  }
+
+  // Unreachable
+}
+
+module.exports = { unfinishedBytesUtf8, mergePrefixUtf8 }
@@ -89,6 +89,7 @@
       'src/debug_utils.cc',
       'src/embedded_data.cc',
       'src/encoding_binding.cc',
+      'src/encoding_singlebyte.cc',
       'src/env.cc',
       'src/fs_event_wrap.cc',
       'src/handle_wrap.cc',
@@ -221,6 +222,7 @@
       'src/debug_utils-inl.h',
       'src/embedded_data.h',
       'src/encoding_binding.h',
+      'src/encoding_singlebyte.h',
       'src/env_properties.h',
       'src/env.h',
       'src/env-inl.h',