diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean index f811d99cbaace4..331010eb8cb11b 100644 --- a/Mathlib/Tactic/Linter/TextBased.lean +++ b/Mathlib/Tactic/Linter/TextBased.lean @@ -32,8 +32,8 @@ Currently, this file contains linters checking - for module names to be in upper camel case, - for module names to be valid Windows filenames, and containing no forbidden characters such as `!`, `.` or spaces. -- for any code containing blocklisted unicode characters -- bad unicode characters +- for any code containing unicode characters not on the allowlist +- for incorrect usage of unicode variant selectors For historic reasons, some further such checks are written in a Python script `lint-style.py`: these are gradually being rewritten in Lean. @@ -394,7 +394,10 @@ def findBadUnicode (s : String) : Array StyleError := end UnicodeLinter -/-- Lint a collection of input strings for disallowed unicode characters. -/ +/-- Lint a collection of input strings for disallowed unicode characters. + +This is implemented using an allowlist, see +`Mathlib.Linter.TextBased.UnicodeLinter.isAllowedCharacter`. -/ public register_option linter.unicodeLinter : Bool := { defValue := true } @[inherit_doc linter.unicodeLinter] diff --git a/Mathlib/Tactic/Linter/TextBased/UnicodeLinter.lean b/Mathlib/Tactic/Linter/TextBased/UnicodeLinter.lean index 58e517e6354e43..b4b7a8b9e8c9e3 100644 --- a/Mathlib/Tactic/Linter/TextBased/UnicodeLinter.lean +++ b/Mathlib/Tactic/Linter/TextBased/UnicodeLinter.lean @@ -14,7 +14,9 @@ import Mathlib.Init The actual linter is defined in `TextBased.lean`. -This file defines the blocklist and other tools used by the linter. +This file defines the allowlist and other tools used by the linter. + +**When changing, make sure to stay in sync with [style guide](https://github.com/leanprover-community/leanprover-community.github.io/blob/lean4/templates/contribute/style.md#unicode-usage)** -/ @@ -184,20 +186,26 @@ public def emojis : Array Char := #[ /-- Unicode symbols in mathlib that should always be followed by the text variant selector. -/ public def nonEmojis : Array Char := #[] -/-- Blocklist: If `false`, the character is not allowed in Mathlib. +/-- If `false`, the character is not allowed in Mathlib. + +Implemented using an allowlist consisting of: +- certain ASCII characters +- characters with abbreviations in the VSCode extension (`withVSCodeAbbrev`) +- "the rest" (`othersInMathlib`) + Note: if `true`, a character might still not be allowed depending on context (e.g. misplaced variant selectors). -/ public def isAllowedCharacter (c : Char) : Bool := ASCII.allowed c || withVSCodeAbbrev.contains c + || othersInMathlib.contains c || emojis.contains c || nonEmojis.contains c || c == UnicodeVariant.emoji || c == UnicodeVariant.text - || othersInMathlib.contains c -/-- Provide default replacement (`String`) for a blocklisted character, or `none` if none defined -/ +/-- Provide default replacement (`String`) for a disallowed character, or `none` if none defined -/ public def replaceDisallowed : Char → Option String | '\u0009' => " " -- "TAB" => "2 spaces" | '\u000B' => "\n" -- "LINE TABULATION" => "Line Feed"