Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/core/config/Categories.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@
"Rison Decode",
"To Modhex",
"From Modhex",
"MIME Decoding"
"MIME Decoding",
"Escape Smart Characters"
]
},
{
Expand Down
152 changes: 152 additions & 0 deletions src/core/operations/EscapeSmartCharacters.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/**
* @author min23asdw
* @copyright Crown Copyright 2026
* @license Apache-2.0
*/

import Operation from "../Operation.mjs";

/**
* Map of smart characters to their plain ASCII equivalents.
*/
const ESCAPE_MAP = {
// Quotation marks
"\u2018": "'", // ' LEFT SINGLE QUOTATION MARK
"\u2019": "'", // ' RIGHT SINGLE QUOTATION MARK
"\u201A": "'", // ‚ SINGLE LOW-9 QUOTATION MARK
"\u201B": "'", // ‛ SINGLE HIGH-REVERSED-9 QUOTATION MARK
"\u201C": "\"", // " LEFT DOUBLE QUOTATION MARK
"\u201D": "\"", // " RIGHT DOUBLE QUOTATION MARK
"\u201E": "\"", // „ DOUBLE LOW-9 QUOTATION MARK
"\u201F": "\"", // ‟ DOUBLE HIGH-REVERSED-9 QUOTATION MARK
"\u2039": "<", // ‹ SINGLE LEFT-POINTING ANGLE QUOTATION MARK
"\u203A": ">", // › SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
"\u00AB": "<<", // « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
"\u00BB": ">>", // » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK

// Primes
"\u2032": "'", // ′ PRIME
"\u2033": "''", // ″ DOUBLE PRIME
"\u2034": "'''", // ‴ TRIPLE PRIME
"\u2035": "'", // ‵ REVERSED PRIME
"\u2036": "''", // ‶ REVERSED DOUBLE PRIME
"\u2037": "'''", // ‷ REVERSED TRIPLE PRIME
"\u2057": "''''", // ⁗ QUADRUPLE PRIME

// Dashes and hyphens
"\u2010": "-", // ‐ HYPHEN
"\u2011": "-", // ‑ NON-BREAKING HYPHEN
"\u2012": "-", // ‒ FIGURE DASH
"\u2013": "-", // – EN DASH
"\u2014": "--", // — EM DASH
"\u2015": "--", // ― HORIZONTAL BAR

// Symbols
"\u00A9": "(C)", // © COPYRIGHT SIGN
"\u00AE": "(R)", // ® REGISTERED SIGN
"\u2122": "(TM)", // ™ TRADE MARK SIGN

// Arrows
"\u2190": "<--", // ← LEFTWARDS ARROW
"\u2192": "-->", // → RIGHTWARDS ARROW
"\u2194": "<->", // ↔ LEFT RIGHT ARROW
"\u21D0": "<==", // ⇐ LEFTWARDS DOUBLE ARROW
"\u21D2": "==>", // ⇒ RIGHTWARDS DOUBLE ARROW
"\u21D4": "<=>", // ⇔ LEFT RIGHT DOUBLE ARROW

// Dots, bullets, and ellipsis
"\u2022": ".", // • BULLET
"\u2023": ">", // ‣ TRIANGULAR BULLET
"\u2024": ".", // ․ ONE DOT LEADER
"\u2025": "..", // ‥ TWO DOT LEADER
"\u2026": "...", // … HORIZONTAL ELLIPSIS
"\u2027": ".", // ‧ HYPHENATION POINT

// Misc punctuation
"\u2016": "||", // ‖ DOUBLE VERTICAL LINE
"\u2017": "==", // ‗ DOUBLE LOW LINE
"\u2030": "%0", // ‰ PER MILLE SIGN
"\u2031": "%00", // ‱ PER TEN THOUSAND SIGN
"\u2038": "^", // ‸ CARET
"\u203C": "!!", // ‼ DOUBLE EXCLAMATION MARK
"\u203D": "?!", // ‽ INTERROBANG
"\u2043": "-", // ⁃ HYPHEN BULLET
"\u2044": "/", // ⁄ FRACTION SLASH
"\u2045": "[-", // ⁅ LEFT SQUARE BRACKET WITH QUILL
"\u2046": "-]", // ⁆ RIGHT SQUARE BRACKET WITH QUILL
"\u2047": "??", // ⁇ DOUBLE QUESTION MARK
"\u2048": "?!", // ⁈ QUESTION EXCLAMATION MARK
"\u2049": "!?", // ⁉ EXCLAMATION QUESTION MARK
"\u204E": "*", // ⁎ LOW ASTERISK
"\u204F": ";", // ⁏ REVERSED SEMICOLON
"\u2052": "%", // ⁒ COMMERCIAL MINUS SIGN
"\u2053": "~", // ⁓ SWUNG DASH
"\u2055": "*", // ⁕ FLOWER PUNCTUATION MARK

// Invisible operators
"\u2062": "*", // INVISIBLE TIMES
"\u2064": "+", // INVISIBLE PLUS

// Spaces
"\u00A0": " ", // NO-BREAK SPACE
};

/**
* Escape Smart Characters operation
*/
class EscapeSmartCharacters extends Operation {

/**
* EscapeSmartCharacters constructor
*/
constructor() {
super();

this.name = "Escape Smart Characters";
this.module = "Default";
this.description = "Converts smart characters (quotes, dashes, apostrophes, arrows, copyright signs, ellipses etc.) to their plain ASCII equivalents.<br><br>For characters with no obvious ASCII equivalent, the specified action will be applied.";
this.infoURL = "https://wikipedia.org/wiki/Smart_quotes";
this.inputType = "string";
this.outputType = "string";
this.args = [
{
name: "Unrecognised characters",
type: "option",
value: ["Include", "Remove", "Replace with '.'"]
}
];
}

/**
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run(input, args) {
const [unrecognisedAction] = args;
const result = [];

for (const char of input) {
if (char in ESCAPE_MAP) {
result.push(ESCAPE_MAP[char]);
} else if (char.codePointAt(0) > 0x7F) {
switch (unrecognisedAction) {
case "Remove":
break;
case "Replace with '.'":
result.push(".");
break;
default:
result.push(char);
}
} else {
result.push(char);
}
}

return result.join("");
}

}

export default EscapeSmartCharacters;
1 change: 1 addition & 0 deletions tests/operations/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ import "./tests/JSONtoYAML.mjs";
import "./tests/YARA.mjs";
import "./tests/ParseCSR.mjs";
import "./tests/XXTEA.mjs";
import "./tests/EscapeSmartCharacters.mjs";

const testStatus = {
allTestsPassing: true,
Expand Down
111 changes: 111 additions & 0 deletions tests/operations/tests/EscapeSmartCharacters.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/**
* Escape Smart Characters tests.
*
* @author min23asdw
* @copyright Crown Copyright 2026
* @license Apache-2.0
*/

import TestRegister from "../../lib/TestRegister.mjs";

TestRegister.addTests([
{
name: "Escape Smart Characters: empty input",
input: "",
expectedOutput: "",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
{
name: "Escape Smart Characters: ASCII passthrough",
input: "Hello, World! 123",
expectedOutput: "Hello, World! 123",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
{
name: "Escape Smart Characters: smart quotes and dashes",
input: "\u201C\u201D\u2014\u2018\u2019 \u2192\u00A9\u2026",
expectedOutput: "\"\"--'' -->(C)...",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
{
name: "Escape Smart Characters: guillemets and arrows",
input: "\u00AB\u00BB \u2190\u2194\u21D2",
expectedOutput: "<<>> <--<->==>",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
{
name: "Escape Smart Characters: Remove unrecognised",
input: "\u201CHello\u201D \u2603",
expectedOutput: "\"Hello\" ",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Remove"],
},
],
},
{
name: "Escape Smart Characters: Replace unrecognised with '.'",
input: "\u201CHello\u201D \u2603",
expectedOutput: "\"Hello\" .",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Replace with '.'"],
},
],
},
{
name: "Escape Smart Characters: Include unrecognised",
input: "\u201CHello\u201D \u2603",
expectedOutput: "\"Hello\" \u2603",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
{
name: "Escape Smart Characters: copyright, registered, trademark",
input: "\u00A9 \u00AE \u2122",
expectedOutput: "(C) (R) (TM)",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
{
name: "Escape Smart Characters: non-breaking space",
input: "hello\u00A0world",
expectedOutput: "hello world",
recipeConfig: [
{
op: "Escape Smart Characters",
args: ["Include"],
},
],
},
]);