diff --git a/src/core/operations/UnescapeUnicodeCharacters.mjs b/src/core/operations/UnescapeUnicodeCharacters.mjs index 02d16662d3..f7759c7849 100644 --- a/src/core/operations/UnescapeUnicodeCharacters.mjs +++ b/src/core/operations/UnescapeUnicodeCharacters.mjs @@ -56,7 +56,8 @@ class UnescapeUnicodeCharacters extends Operation { */ run(input, args) { const prefix = prefixToRegex[args[0]], - regex = new RegExp(prefix+"([a-f\\d]{4})", "ig"); + quantifier = args[0] === "U+" ? "{4,6}" : "{4}", + regex = new RegExp(prefix+"([a-f\\d]"+quantifier+")", "ig"); let output = "", m, i = 0; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index f030349d2a..66f8f26999 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -174,6 +174,7 @@ import "./tests/ToFromInsensitiveRegex.mjs"; import "./tests/TranslateDateTimeFormat.mjs"; import "./tests/Typex.mjs"; import "./tests/UnescapeString.mjs"; +import "./tests/UnescapeUnicodeCharacters.mjs"; import "./tests/Unicode.mjs"; import "./tests/URLEncodeDecode.mjs"; import "./tests/RSA.mjs"; diff --git a/tests/operations/tests/UnescapeUnicodeCharacters.mjs b/tests/operations/tests/UnescapeUnicodeCharacters.mjs new file mode 100644 index 0000000000..99955e04b4 --- /dev/null +++ b/tests/operations/tests/UnescapeUnicodeCharacters.mjs @@ -0,0 +1,88 @@ +/** + * Unescape Unicode Characters operation tests. + * + * @author williballenthin + * @copyright Crown Copyright 2024 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Unescape Unicode Characters: \\u 4-digit BMP", + input: "\\u03c3\\u03bf\\u03c5", + expectedOutput: "σου", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["\\u"], + }, + ], + }, + { + name: "Unescape Unicode Characters: %u 4-digit BMP", + input: "%u03c3%u03bf%u03c5", + expectedOutput: "σου", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["%u"], + }, + ], + }, + { + name: "Unescape Unicode Characters: U+ 4-digit BMP", + input: "U+0041", + expectedOutput: "A", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["U+"], + }, + ], + }, + { + name: "Unescape Unicode Characters: U+ 5-digit astral plane emoji", + input: "U+1F600", + expectedOutput: "\u{1F600}", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["U+"], + }, + ], + }, + { + name: "Unescape Unicode Characters: U+ 6-digit zero-padded", + input: "U+000041", + expectedOutput: "A", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["U+"], + }, + ], + }, + { + name: "Unescape Unicode Characters: U+ mixed lengths", + input: "U+0041 U+1F600 U+000042", + expectedOutput: "A \u{1F600} B", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["U+"], + }, + ], + }, + { + name: "Unescape Unicode Characters: passthrough with no matches", + input: "hello world", + expectedOutput: "hello world", + recipeConfig: [ + { + op: "Unescape Unicode Characters", + args: ["\\u"], + }, + ], + }, +]);