tuist · Ryu0118 · Dec 25, 2025 · Dec 25, 2025 · Dec 25, 2025 · Dec 25, 2025
@@ -143,10 +143,11 @@ public struct Terminal: Terminaling {
     }
 
     public func readCharacter() -> Character? {
-        if let char = readRawCharacter() {
-            return Character(UnicodeScalar(UInt8(char)))
+        let reader = UTF8Reader {
+            guard let rawChar = readRawCharacter() else { return nil }
+            return UInt8(truncatingIfNeeded: rawChar)
         }
-        return nil
+        return reader.readCharacter()
     }
 
     /// Returns the size of the terminal if available.
@@ -269,3 +270,49 @@ public struct Terminal: Terminaling {
         }
     }
 }
+
+/// A reader that decodes UTF-8 encoded bytes into characters.
+struct UTF8Reader {
+    private let readByte: () -> UInt8?
+
+    /// Creates a reader with the given byte source.
+    /// - Parameter readByte: A closure that returns the next byte, or `nil` if no more bytes are available.
+    init(readByte: @escaping () -> UInt8?) {
+        self.readByte = readByte
+    }
+
+    func readCharacter() -> Character? {
+        guard let firstByte = readByte() else { return nil }
+        guard let length = sequenceLength(forFirstByte: firstByte) else { return nil }
+        guard let bytes = bytes(forSequenceOfLength: length, startingWith: firstByte) else { return nil }
+        return character(from: bytes)
+    }
+
+    private func sequenceLength(forFirstByte byte: UInt8) -> Int? {
+        switch byte {
+        case 0x00 ... 0x7F: 1 // ASCII
+        case 0xC2 ... 0xDF: 2 // 2-byte sequence (0xC0-0xC1 are overlong encodings)
+        case 0xE0 ... 0xEF: 3 // 3-byte sequence
+        case 0xF0 ... 0xF4: 4 // 4-byte sequence (0xF5+ exceeds Unicode range)
+        default: nil
+        }
+    }
+
+    private func bytes(forSequenceOfLength length: Int, startingWith firstByte: UInt8) -> [UInt8]? {
+        var result: [UInt8] = [firstByte]
+        for _ in 1 ..< length {
+            guard let byte = readByte(), isContinuationByte(byte) else { return nil }
+            result.append(byte)
+        }
+        return result
+    }
+
+    private func isContinuationByte(_ byte: UInt8) -> Bool {
+        // UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
+        (byte & 0xC0) == 0x80
+    }
+
+    private func character(from bytes: [UInt8]) -> Character? {
+        String(bytes: bytes, encoding: .utf8).flatMap(\.first)
+    }
+}
@@ -0,0 +1,70 @@
+import Testing
+
+@testable import Noora
+
+struct UTF8ReaderTests {
+    @Test(arguments: TestCase.allCases)
+    func decodesSingleCharacter(testCase: TestCase) {
+        var iter = testCase.bytes.makeIterator()
+        let reader = UTF8Reader { iter.next() }
+        #expect(reader.readCharacter() == testCase.expected)
+    }
+
+    @Test
+    func readsConsecutiveCharactersWithoutByteLeakage() {
+        let bytes: [UInt8] = [
+            0x41, // A (1-byte)
+            0xC3, 0xA9, // é (2-byte)
+            0xE4, 0xB8, 0xAD, // 中 (3-byte)
+            0xF0, 0x9F, 0x98, 0x80, // 😀 (4-byte)
+        ]
+        var iter = bytes.makeIterator()
+        let reader = UTF8Reader { iter.next() }
+
+        #expect(reader.readCharacter() == "A")
+        #expect(reader.readCharacter() == "é")
+        #expect(reader.readCharacter() == "中")
+        #expect(reader.readCharacter() == "😀")
+        #expect(reader.readCharacter() == nil)
+    }
+
+    struct TestCase: CustomTestStringConvertible, Sendable {
+        let bytes: [UInt8]
+        let expected: Character?
+        let testDescription: String
+
+        static let allCases: [TestCase] = [
+            // 1-byte sequences (ASCII)
+            TestCase(bytes: [0x41], expected: "A", testDescription: "ASCII letter"),
+            TestCase(bytes: [0x00], expected: "\0", testDescription: "null character"),
+            TestCase(bytes: [0x7F], expected: "\u{7F}", testDescription: "ASCII max (DEL)"),
+
+            // 2-byte sequences
+            TestCase(bytes: [0xC3, 0xA9], expected: "é", testDescription: "Latin: French e-acute"),
+            TestCase(bytes: [0xD0, 0x90], expected: "А", testDescription: "Cyrillic: Russian A"),
+
+            // 3-byte sequences
+            TestCase(bytes: [0xE3, 0x81, 0x82], expected: "あ", testDescription: "Japanese hiragana"),
+            TestCase(bytes: [0xE4, 0xB8, 0xAD], expected: "中", testDescription: "Chinese hanzi"),
+            TestCase(bytes: [0xEA, 0xB0, 0x80], expected: "가", testDescription: "Korean hangul"),
+            TestCase(bytes: [0xE2, 0x82, 0xAC], expected: "€", testDescription: "Euro sign"),
+
+            // 4-byte sequences
+            TestCase(bytes: [0xF0, 0x9F, 0x98, 0x80], expected: "😀", testDescription: "Emoji"),
+            TestCase(bytes: [0xF0, 0x9F, 0x87, 0xAF], expected: "🇯", testDescription: "Regional indicator J"),
+
+            // Invalid sequences
+            TestCase(bytes: [0x80], expected: nil, testDescription: "Invalid: lone continuation byte"),
+            TestCase(bytes: [0xFF], expected: nil, testDescription: "Invalid: 0xFF is never valid"),
+            TestCase(bytes: [0xC3], expected: nil, testDescription: "Invalid: incomplete 2-byte sequence"),
+            TestCase(bytes: [0xE3, 0x81], expected: nil, testDescription: "Invalid: incomplete 3-byte sequence"),
+            TestCase(bytes: [0xF0, 0x9F, 0x98], expected: nil, testDescription: "Invalid: incomplete 4-byte sequence"),
+            TestCase(bytes: [0xC0, 0x80], expected: nil, testDescription: "Invalid: overlong encoding"),
+            TestCase(bytes: [0xF5, 0x80, 0x80, 0x80], expected: nil, testDescription: "Invalid: exceeds Unicode range"),
+            TestCase(bytes: [0xC3, 0x00], expected: nil, testDescription: "Invalid: bad continuation byte"),
+
+            // Empty input
+            TestCase(bytes: [], expected: nil, testDescription: "Empty input"),
+        ]
+    }
+}