From f788349ddb31421566acd725a139a35244c3cc05 Mon Sep 17 00:00:00 2001 From: jim <398651434@qq.com> Date: Thu, 16 Apr 2026 12:12:21 +0800 Subject: [PATCH] fix: use StringDecoder to handle UTF-8 chunk boundaries in setEncoding When setEncoding('utf8') is called, each chunk was being converted to a string individually, which corrupts multi-byte UTF-8 characters that span chunk boundaries. This fix: - Initializes a StringDecoder when setEncoding is called - Uses StringDecoder.write() in consumePush to properly handle incomplete UTF-8 sequences at chunk boundaries - Resets the decoder in consumeFinish to allow garbage collection Closes #5002 --- lib/api/readable.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/api/readable.js b/lib/api/readable.js index 5ebd04ebe83..0c4ec9e5247 100644 --- a/lib/api/readable.js +++ b/lib/api/readable.js @@ -1,6 +1,7 @@ 'use strict' const assert = require('node:assert') +const { StringDecoder } = require('node:string_decoder') const { addAbortListener } = require('node:events') const { Readable } = require('node:stream') const { RequestAbortedError, NotSupportedError, InvalidArgumentError, AbortError } = require('../core/errors') @@ -326,6 +327,9 @@ class BodyReadable extends Readable { setEncoding (encoding) { if (Buffer.isEncoding(encoding)) { this._readableState.encoding = encoding + if (this[kConsume]) { + this[kConsume].decoder = new StringDecoder(encoding) + } } return this } @@ -546,8 +550,14 @@ function consumeEnd (consume, encoding) { * @returns {void} */ function consumePush (consume, chunk) { - consume.length += chunk.length - consume.body.push(chunk) + if (consume.decoder) { + const string = consume.decoder.write(chunk) + consume.length += Buffer.byteLength(string) + consume.body.push(string) + } else { + consume.length += chunk.length + consume.body.push(chunk) + } } /**