mathlib4/Mathlib/Tactic/Linter/TextBased.lean at d588fe6bbeb6fb1ffc5f3cafb8ee060187c8e200 · adomasbaliuka/mathlib4 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
/-
Copyright (c) 2024 Michael Rothgang. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Michael Rothgang, Jon Eugster, Adomas Baliuka
-/
module

public meta import Batteries.Data.String.Matcher
public meta import Lake.Util.Casing
public import Batteries.Data.String.Basic
public import Mathlib.Data.Nat.Notation
public meta import Mathlib.Tactic.Linter.TextBased.UnicodeLinter
public import Mathlib.Tactic.Linter.TextBased.UnicodeLinter

-- Don't warn about the lake import: the above file has almost no imports, and this PR has been
-- benchmarked.
set_option linter.style.header false

meta section


/-!
## Text-based linters

This file defines various mathlib linters which are based on reading the source code only.
In practice, all such linters check for code style issues.

Currently, this file contains linters checking
- if the string "adaptation note" is used instead of the command `#adaptation_note`,
- for lines with windows line endings,
- for lines containing trailing whitespace,
- for module names to be in upper camel case,
- for module names to be valid Windows filenames, and containing no forbidden characters such as
  `!`, `.` or spaces.
- for any code containing unicode characters not on the allowlist
- for incorrect usage of unicode variant selectors

For historic reasons, some further such checks are written in a Python script `lint-style.py`:
these are gradually being rewritten in Lean.

This linter has a file for style exceptions (to avoid false positives in the implementation),
or for downstream projects to allow a gradual adoption of this linter.

An executable running all these linters is defined in `scripts/lint-style.lean`.
-/

open Lean.Linter System

namespace Mathlib.Linter.TextBased

/-- Possible errors that text-based linters can report. -/
-- We collect these in one inductive type to centralise error reporting.
inductive StyleError where
  /-- The bare string "Adaptation note" (or variants thereof):
  instead, the `#adaptation_note` command should be used. -/
  | adaptationNote
  /-- A line ends with windows line endings (\r\n) instead of unix ones (\n). -/
  | windowsLineEnding
  /-- A line contains trailing whitespace. -/
  | trailingWhitespace
  /-- A line contains a space before a semicolon -/
  | semicolon
  /-- A unicode character was used that isn't allowed -/
  | unwantedUnicode (c : Char)
  /-- Unicode variant selectors are used in a bad way.
  * `s` is the string containing the unicode character and any unicode variant selector following it
  * `selector` is the desired selector or `none`
  -/
  | unicodeVariant (s : String) (selector: Option Char)
deriving BEq, Inhabited

/-- How to format style errors -/
public inductive ErrorFormat
  /-- Produce style error output aimed at humans: no error code, clickable file name -/
  | humanReadable : ErrorFormat
  /-- Produce an entry in the style-exceptions file: mention the error code, slightly uglier
  than human-readable output -/
  | exceptionsFile : ErrorFormat
  /-- Produce output suitable for Github error annotations: in particular,
  duplicate the file path, line number and error code -/
  | github : ErrorFormat
  deriving BEq

open UnicodeLinter in
/--
Create the underlying error message for a given `StyleError`.

Note: changes to the texts here must be accounted for in `parse?_errorContext`!
-/
def StyleError.errorMessage (err : StyleError) : String := match err with
  | StyleError.adaptationNote =>
    "Found the string \"Adaptation note:\", please use the #adaptation_note command instead"
  | windowsLineEnding => "This file contains windows line endings (\\r\\n): please use Unix line\
    endings (\\n) instead"
  | trailingWhitespace => "This line ends with some whitespace: please remove this"
  | semicolon => "This line contains a space before a semicolon"
  | StyleError.unwantedUnicode c => s!"This line contains a unicode character that is not on the \
    allowlist '{c}' ({c.printCodepointHex}). \
    For adding new symbols see `Mathlib.Linter.TextBased.UnicodeLinter.othersInMathlib`."
  | StyleError.unicodeVariant s selector =>
    let variantText := if selector == UnicodeVariant.emoji then
      "emoji"
    else if selector == UnicodeVariant.text then
      "text"
    else
      "default"
    let oldHex := s.printCodepointHex
    match s.toList, selector with
    | c₀ :: [], some sel =>
      let newC : String := String.ofList [c₀, sel]
      let newHex := s.printCodepointHex
      s!"Missing unicode variant selector: \"{s}\" ({oldHex}). \
        Please use the {variantText} variant: \"{newC}\" ({newHex})!"
    | c₀ :: _ :: [], some sel =>
      -- by assumption, the second character is a variant selector
      let newC : String := String.ofList [c₀, sel]
      let newHex := s.printCodepointHex
      s!"Wrong unicode variant selector: \"{s}\" ({oldHex}). \
        Please use the {variantText} variant: \"{newC}\" ({newHex})!"
    | _, _ =>
      s!"Unexpected unicode variant selector: \"{s}\" ({oldHex}). \
        Consider deleting it."

/-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
-- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
-- in principle, we could also print something more readable.
def StyleError.errorCode (err : StyleError) : String := match err with
  | StyleError.adaptationNote => "ERR_ADN"
  | StyleError.windowsLineEnding => "ERR_WIN"
  | StyleError.trailingWhitespace => "ERR_TWS"
  | StyleError.semicolon => "ERR_SEM"
  | StyleError.unwantedUnicode _ => "ERR_UNICODE"
  | StyleError.unicodeVariant _ _ => "ERR_UNICODE_VARIANT"


/-- Context for a style error: the actual error, the line number in the file we're reading
and the path to the file. -/
structure ErrorContext where
  /-- The underlying `StyleError` -/
  error : StyleError
  /-- The line number of the error (1-based) -/
  lineNumber : ℕ
  /-- The path to the file which was linted -/
  path : FilePath
deriving BEq

/-- Possible results of comparing an `ErrorContext` to an `existing` entry:
most often, they are different --- if the existing entry covers the new exception,
depending on the error, we prefer the new or the existing entry. -/
inductive ComparisonResult
  /-- The contexts describe different errors: two separate style exceptions are required
  to cover both. -/
  | Different
  /-- The existing exception also covers the new error:
  we keep the existing exception. -/
  | Comparable
  deriving BEq

/-- Determine whether a `new` `ErrorContext` is covered by an `existing` exception,
and, if it is, if we prefer replacing the new exception or keeping the previous one. -/
def compare (existing new : ErrorContext) : ComparisonResult :=
  -- Two comparable error contexts must have the same path.
  -- To avoid issues with different path separators across different operating systems,
  -- we compare the set of path components instead.
  if existing.path.components != new.path.components then ComparisonResult.Different
  -- We entirely ignore their line numbers: not sure if this is best.

  -- NB: keep the following in sync with `parse?_errorContext` below.
  -- Generally, comparable errors must have equal `StyleError`s.
  else
    if existing.error == new.error then ComparisonResult.Comparable else ComparisonResult.Different

/-- Find the first style exception in `exceptions` (if any) which covers a style exception `e`. -/
def ErrorContext.find?_comparable (e : ErrorContext) (exceptions : Array ErrorContext) :
    Option ErrorContext :=
  exceptions.find? (fun new ↦ compare e new == ComparisonResult.Comparable)

/-- Output the formatted error message, containing its context.
`style` specifies if the error should be formatted for humans to read, github problem matchers
to consume, or for the style exceptions file. -/
def outputMessage (errctx : ErrorContext) (style : ErrorFormat) : String :=
  let errorMessage := errctx.error.errorMessage
  match style with
  | ErrorFormat.github =>
    -- We are outputting for github: duplicate file path, line number and error code,
    -- so that they are also visible in the plain text output.
    let path := errctx.path
    let nr := errctx.lineNumber
    let code := errctx.error.errorCode
    s!"::ERR file={path},line={nr},code={code}::{path}:{nr} {code}: {errorMessage}"
  | ErrorFormat.exceptionsFile =>
    -- Produce an entry in the exceptions file: with error code and "line" in front of the number.
    s!"{errctx.path} : line {errctx.lineNumber} : {errctx.error.errorCode} : {errorMessage}"
  | ErrorFormat.humanReadable =>
    -- Print for humans: clickable file name and omit the error code
    s!"error: {errctx.path}:{errctx.lineNumber}: {errorMessage}"

/-- Removes quotation marks '"' at front and back of string. -/
def removeQuotations (s : String) : String :=
  ((s.dropPrefix "\"").toString.dropSuffix "\"").toString

/-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise.
This should be the inverse of `fun ctx ↦ outputMessage ctx .exceptionsFile`
Used for, e.g., parsing the "exceptions" file.

Need to ensure (see unit tests in `MathlibTest/LintStyle.lean`) that
  `∀ (ec : ErrorContext), (parse?_errorContext <| outputMessage ec .exceptionsFile) = some ec`
-/
def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
  let parts := line.splitToList (· == ' ')
  match parts with
    | filename :: ":" :: "line" :: lineNumber :: ":" :: errorCode :: ":" :: errorMessage =>
      -- Turn the filename into a path. In general, this is ambiguous if we don't know if we're
      -- dealing with e.g. Windows or POSIX paths. In our setting, this is fine, since no path
      -- component contains any path separator.
      let path := mkFilePath (filename.splitToList (FilePath.pathSeparators.contains ·))
      -- Parse the error kind from the error code, ugh.
      -- NB: keep this in sync with `StyleError.errorCode` above!
      let err : Option StyleError := match errorCode with
        -- Use default values for parameters which are ignored for comparing style exceptions.
        -- NB: keep this in sync with `compare` above!
        | "ERR_ADN" => some (StyleError.adaptationNote)
        | "ERR_SEM" => some (StyleError.semicolon)
        | "ERR_TWS" => some (StyleError.trailingWhitespace)
        | "ERR_WIN" => some (StyleError.windowsLineEnding)
        | "ERR_UNICODE" => do
          -- extract the offending unicode character from `errorMessage`
          -- (if the offending character is 'C', `errorMessage[7] == "'C'"` )
          -- and wrap it in the appropriate `StyleError`, which will print it as '+NNNN'
          let str ← errorMessage[12]?
          let c ← String.Pos.Raw.get? str ⟨1⟩ -- take middle character of expected three
          StyleError.unwantedUnicode c
        | "ERR_UNICODE_VARIANT" => do
          match (← errorMessage[0]?).toLower with
          | "wrong" | "missing" =>
            let offending := removeQuotations (← errorMessage[4]?)
            let selector := match ← errorMessage[9]? with
            | "emoji" => UnicodeLinter.UnicodeVariant.emoji
            | "text" => UnicodeLinter.UnicodeVariant.text
            | _ => none
            StyleError.unicodeVariant offending selector
          | "unexpected" =>
            let offending := removeQuotations (← errorMessage[4]?)
            StyleError.unicodeVariant offending none
          | _ => none
        | _ => none
      match String.toNat? lineNumber with
      | some n => err.map fun e ↦ (ErrorContext.mk e n path)
      | _ => none
    -- It would be nice to print an error on any line which doesn't match the above format,
    -- but is awkward to do so (this `def` is not in any IO monad). Hopefully, this is not necessary
    -- anyway as the style exceptions file is mostly automatically generated.
    | _ => none

/-- Parse all style exceptions for a line of input.
Return an array of all exceptions which could be parsed: invalid input is ignored. -/
def parseStyleExceptions (lines : Array String) : Array ErrorContext := Id.run do
  -- We treat all lines starting with "--" as a comment and ignore them.
  Array.filterMap (parse?_errorContext ·) (lines.filter (fun line ↦ !line.startsWith "--"))

/-- Print information about all errors encountered to standard output.
`style` specifies if the error should be formatted for humans to read, github problem matchers
to consume, or for the style exceptions file. -/
def formatErrors (errors : Array ErrorContext) (style : ErrorFormat) : IO Unit := do
  for e in errors do
    IO.println (outputMessage e style)

/-- Core logic of a text based linter: given a collection of lines,
return an array of all style errors with (1-based!) line numbers. If possible,
also return the collection of all lines, changed as needed to fix the linter errors.
(Such automatic fixes are only possible for some kinds of `StyleError`s.)
-/
abbrev TextbasedLinter := LinterOptions → Array String →
  Array (StyleError × ℕ) × (Option (Array String))

/-! Definitions of the actual text-based linters. -/
section

/-- Lint on any occurrences of the string "Adaptation note:" or variants thereof. -/
public register_option linter.adaptationNote : Bool := { defValue := true }

@[inherit_doc linter.adaptationNote]
def adaptationNoteLinter : TextbasedLinter := fun opts lines ↦ Id.run do
  unless getLinterValue linter.adaptationNote opts do return (#[], none)

  let mut errors := Array.mkEmpty 0
  for h : idx in [:lines.size] do
    let line := lines[idx]
    -- Flag lines that look like a hand-written adaptation note comment
    -- (e.g. "-- Adaptation note:" or "-- adaptation note:"), but not lines that
    -- merely reference the concept (e.g. "-- see adaptation note") or that
    -- use the correct #adaptation_note command.
    if line.containsSubstr "daptation note" &&
        !line.containsSubstr "#adaptation_note" &&
        !line.containsSubstr "see adaptation note" then
      errors := errors.push (StyleError.adaptationNote, idx + 1)
  return (errors, none)

/-- Lint a collection of input strings if one of them contains trailing whitespace. -/
public register_option linter.trailingWhitespace : Bool := { defValue := true }

@[inherit_doc linter.trailingWhitespace]
def trailingWhitespaceLinter : TextbasedLinter := fun opts lines ↦ Id.run do
  unless getLinterValue linter.trailingWhitespace opts do return (#[], none)

  let mut errors := Array.mkEmpty 0
  let mut fixedLines : Vector String lines.size := lines.toVector
  for h : idx in [:lines.size] do
    let line := lines[idx]
    if line.back == ' ' then
      errors := errors.push (StyleError.trailingWhitespace, idx + 1)
      fixedLines := fixedLines.set idx line.trimAsciiEnd.copy
  return (errors, if errors.size > 0 then some fixedLines.toArray else none)

/-- Lint a collection of input strings for a semicolon preceded by a space. -/
public register_option linter.whitespaceBeforeSemicolon : Bool := { defValue := true }

@[inherit_doc linter.whitespaceBeforeSemicolon]
def semicolonLinter : TextbasedLinter := fun opts lines ↦ Id.run do
  unless getLinterValue linter.whitespaceBeforeSemicolon opts do return (#[], none)

  let mut errors := Array.mkEmpty 0
  let mut fixedLines := lines
  for h : idx in [:lines.size] do
    let line := lines[idx]
    let pos := line.find (· == ';')
    -- Future: also lint for a semicolon *not* followed by a space or ⟩.
    if pos != line.endPos && pos.prev!.get! == ' ' then
      errors := errors.push (StyleError.semicolon, idx + 1)
      -- We spell the bad string pattern this way to avoid the linter firing on itself.
      fixedLines := fixedLines.set! idx (line.replace (String.ofList [' ', ';']) ";")
  return (errors, if errors.size > 0 then some fixedLines else none)

/-- Whether a collection of lines consists *only* of imports, blank lines and single-line comments.
In practice, this means it's an imports-only file and exempt from almost all linting. -/
def isImportsOnlyFile (lines : Array String) : Bool :=
  -- The Python version also excluded multi-line comments: for all files generated by `mk_all`,
  -- this is in fact not necessary. (It is needed for `Mathlib/Tactic/Linter.lean`, though.)
  lines.all (fun line ↦ line.startsWith "import " || line == "" || line.startsWith "-- ")

end

namespace UnicodeLinter

/-- Creates `StyleError`s for bad usage of unicode characters. -/
def findBadUnicodeAux (s : String) (pos : s.Pos) (c : Char)
    (err : Array StyleError := #[]) : Array StyleError :=
  if h : pos < s.endPos then
    let posₙ := pos.next (show pos ≠ s.endPos from String.Pos.ne_of_lt h)
    match posₙ.get? with
    | none =>
      -- `c` is the last character of the string
      if ! isAllowedCharacter c then
        -- bad: character not allowed. Add StyleError.
        (err.push (.unwantedUnicode c))
      else
        err
    | some cₙ =>
      have : posₙ.remainingBytes < pos.remainingBytes :=
          (pos.lt_iff_remainingBytes_lt posₙ).mp pos.lt_next
      if ! isAllowedCharacter c then
        -- bad: character not allowed.
        findBadUnicodeAux s posₙ cₙ (err.push (.unwantedUnicode c))
      else if cₙ == UnicodeVariant.emoji && !(emojis.contains c) then
        -- bad: unwanted emoji variant selector.
        let errₙ := err.push (.unicodeVariant (String.ofList [c, cₙ]) none)
        findBadUnicodeAux s posₙ cₙ errₙ
      else if cₙ == UnicodeVariant.text && !(nonEmojis.contains c) then
        -- bad: unwanted text variant selector.
        let errₙ := err.push (.unicodeVariant (String.ofList [c, cₙ]) none)
        findBadUnicodeAux s posₙ cₙ errₙ
      else if cₙ != UnicodeVariant.emoji && emojis.contains c then
        -- bad: missing emoji variant selector.
        let errₙ := err.push (.unicodeVariant c.toString UnicodeVariant.emoji)
        findBadUnicodeAux s posₙ cₙ errₙ
      else if cₙ != UnicodeVariant.text && nonEmojis.contains c then
        -- bad: missing text variant selector.
        let errₙ := err.push (.unicodeVariant c.toString UnicodeVariant.text)
        findBadUnicodeAux s posₙ cₙ errₙ
      else
        -- okay. Continue recursion.
        findBadUnicodeAux s posₙ cₙ err
  else
    err
termination_by pos.remainingBytes

/-- Creates `StyleError`s for bad usage of unicode characters. -/
@[inline]
def findBadUnicode (s : String) : Array StyleError :=
  match s.startPos.get? with
  | none => #[]
  | some c =>
    findBadUnicodeAux s s.startPos c

end UnicodeLinter

/-- Lint a collection of input strings for disallowed unicode characters.

This is implemented using an allowlist, see
`Mathlib.Linter.TextBased.UnicodeLinter.isAllowedCharacter`. -/
public register_option linter.unicodeLinter : Bool := { defValue := true }

@[inherit_doc linter.unicodeLinter]
def unicodeLinter : TextbasedLinter := fun opts lines ↦ Id.run do
  unless getLinterValue linter.unicodeLinter opts do return (#[], none)

  let mut changed : Array String := #[]
  let mut errors : Array (StyleError × ℕ) := Array.mkEmpty 0
  let mut lineNumber := 1 -- one-based line numbers!
  for line in lines do
    let err := UnicodeLinter.findBadUnicode line

    -- try to auto-fix the style error
    let mut newLine := line
    for e in err.reverse do -- reversing is a cheap fix to prevent shifting indices
      match e with
      | .unwantedUnicode c =>
        if let some replacement := UnicodeLinter.replaceDisallowed c then
            newLine := newLine.replace c replacement
        else
            pure ()
      | .unicodeVariant s sel =>
        let replacement := match sel, s.startPos.get? with
        | none, some c => c.toString
        | some v, some c => String.ofList [c, v]
        | _, none => unreachable!
        newLine := newLine.replace s replacement
      | _ => unreachable!

    changed := changed.push newLine
    errors := errors.append (err.map (fun e => (e, lineNumber)))
    lineNumber := lineNumber + 1
  return (errors, if (changed == lines) then none else some changed)

/-- All text-based linters registered in this file. -/
def allLinters : Array TextbasedLinter := #[
    adaptationNoteLinter,
    semicolonLinter,
    trailingWhitespaceLinter,
    unicodeLinter,
  ]

/-- Read a file and apply all text-based linters.
Return a list of all unexpected errors, and, if some errors could be fixed automatically,
the collection of all lines with every automatic fix applied.
`exceptions` are any pre-existing style exceptions for this file. -/
def lintFile (opts : LinterOptions) (path : FilePath) (exceptions : Array ErrorContext) :
    IO (Array ErrorContext × Option (Array String)) := do
  let mut errors := #[]
  -- Whether any changes were made by auto-fixes.
  let mut changes_made := false
  -- Check for windows line endings first: as `FS.lines` treats Unix and Windows lines the same,
  -- we need to analyse the actual file contents.
  let contents ← IO.FS.readFile path
  let replaced := contents.crlfToLf
  if replaced != contents then
    changes_made := true
    errors := errors.push (ErrorContext.mk StyleError.windowsLineEnding 1 path)
  let lines := (replaced.splitOn "\n").toArray

  -- We don't need to run any further checks on imports-only files.
  if isImportsOnlyFile lines then
    return (errors, if changes_made then some lines else none)

  -- All further style errors raised in this file.
  let mut allOutput := #[]
  -- A working copy of the lines in this file, modified by applying the auto-fixes.
  let mut changed := lines

  for lint in allLinters do
    let (new_errors, changes) := lint opts changed
    if let some c := changes then
      -- apply linter's suggested changes only where no exceptions apply.
      -- Each changed line must correspond to line number of at least one error.
      if changed.size != c.size then
        throw <| IO.userError "linter's suggested changes must have same number of lines as input"
      -- For each line in `changed`,
      changed := Array.ofFn fun (lineIdx : Fin changed.size) ↦
        -- check if any exception applies:
        if new_errors.any fun (e, idx) ↦
          (idx - 1 == lineIdx) -- Subtract 1 since linter's line numbers are one-based
          ∧ (ErrorContext.find?_comparable ⟨e, lineIdx, path⟩ exceptions).isNone
        then
          c[lineIdx]! -- no exception applies. Assign linter's suggestion.
        else
          changed[lineIdx]! -- An least one exception applies. Ignore linter's suggested line.
      -- Note: to keep logic simple, changed lines where an exception applies are left alone,
      --   even if there are other suggested changes where no exception applies.

    -- append ALL errors to the output. For this, exception filtering happens later below.
    allOutput := allOutput.append
      (Array.map (fun (e, n) ↦ #[(ErrorContext.mk e n path)]) new_errors)
    if changed != lines then
      changes_made := true
    -- Note: we ASSUME that the linters' auto-fixes do not introduce new issues!

  -- Filter exceptions. Note: This list is not sorted. For github, this is fine.
  errors := errors.append
    (allOutput.flatten.filter (fun e ↦ (e.find?_comparable exceptions).isNone))
  return (errors, if changes_made then some changed else none)

/-- Enables the old Python-based style linters. -/
-- TODO: these linters assume they are being run in `./scripts` and do not work on
-- downstream projects. Fix this before re-enabling them by default.
-- Or better yet: port them to Lean 4.
public register_option linter.pythonStyle : Bool := { defValue := false }

/-- Lint a collection of modules for style violations.
Print formatted errors for all unexpected style violations to standard output;
correct automatically fixable style errors if configured so.
Return the number of files which had new style errors.
`opts` contains the options defined in the Lakefile, determining which linters to enable.
`nolints` is a list of style exceptions to take into account.
`moduleNames` are the names of all the modules to lint,
`mode` specifies what kind of output this script should produce,
`fix` configures whether fixable errors should be corrected in-place. -/
public
def lintModules (opts : LinterOptions) (nolints : Array String) (moduleNames : Array Lean.Name)
    (style : ErrorFormat) (fix : Bool) : IO UInt32 := do
  let styleExceptions := parseStyleExceptions nolints
  let mut numberErrorFiles : UInt32 := 0
  let mut allUnexpectedErrors := #[]
  for module in moduleNames do
    -- Convert the module name to a file name, then lint that file.
    let path := mkFilePath (module.components.map toString)|>.addExtension "lean"

    let (errors, changed) := ← lintFile opts path styleExceptions
    if let some c := changed then
      if fix then
        let _ := ← IO.FS.writeFile path ("\n".intercalate c.toList)
    if errors.size > 0 then
      allUnexpectedErrors := allUnexpectedErrors.append errors
      numberErrorFiles := numberErrorFiles + 1

  -- Passing Lean options to Python files seems like a lot of work for something we want to
  -- run entirely inside of Lean in the end anyway.
  -- So for now, we enable/disable all of them with a single switch.
  if getLinterValue linter.pythonStyle opts then
    -- Run the remaining python linters. It is easier to just run on all files.
    -- If this poses an issue, I can either filter the output
    -- or wait until lint-style.py is fully rewritten in Lean.
    let args := if fix then #["--fix"] else #[]
    let output ← IO.Process.output { cmd := "./scripts/print-style-errors.sh", args := args }
    if output.exitCode != 0 then
      numberErrorFiles := numberErrorFiles + 1
      IO.eprintln s!"error: `print-style-error.sh` exited with code {output.exitCode}"
      IO.eprint output.stderr
    else if output.stdout != "" then
      numberErrorFiles := numberErrorFiles + 1
      IO.eprint output.stdout
  formatErrors allUnexpectedErrors style
  if allUnexpectedErrors.size > 0 then
    IO.eprintln s!"error: found {allUnexpectedErrors.size} new style error(s)! \
      Try `lake exe lint-style --fix` to apply automatic fixes."
  return numberErrorFiles

/-- Verify that all modules are named in `UpperCamelCase` -/
public register_option linter.modulesUpperCamelCase : Bool := { defValue := true }

/-- Verifies that all modules in `modules` are named in `UpperCamelCase`
(except for explicitly discussed exceptions, which are hard-coded here).
Return the number of modules violating this. -/
public
def modulesNotUpperCamelCase (opts : LinterOptions) (modules : Array Lean.Name) : IO Nat := do
  unless getLinterValue linter.modulesUpperCamelCase opts do return 0

  -- Exceptions to this list should be discussed on zulip!
  let exceptions := [
    `Mathlib.Analysis.CStarAlgebra.lpSpace,
    `Mathlib.Analysis.InnerProductSpace.l2Space,
    `Mathlib.Analysis.Normed.Lp.lpSpace
  ]
  -- We allow only names in UpperCamelCase, possibly with a trailing underscore.
  let badNames := modules.filter fun name ↦
    let upperCamelName := Lake.toUpperCamelCase name
    !exceptions.contains name &&
      upperCamelName != name && s!"{upperCamelName}_" != name.toString
  for bad in badNames do
    let upperCamelName := Lake.toUpperCamelCase bad
    let good := if bad.toString.endsWith "_" then s!"{upperCamelName}_" else upperCamelName.toString
    IO.eprintln
      s!"error: module name '{bad}' is not in 'UpperCamelCase': it should be '{good}' instead"
  return badNames.size

/-- Verify that no module name is forbidden according to Windows' filename rules. -/
public register_option linter.modulesForbiddenWindows : Bool := { defValue := true }

/-- Verifies that no module in `modules` contains CON, PRN, AUX, NUL, COM1, COM2, COM3, COM4, COM5,
COM6, COM7, COM8, COM9, COM¹, COM², COM³, LPT1, LPT2, LPT3, LPT4, LPT5, LPT6, LPT7, LPT8, LPT9,
LPT¹, LPT² or LPT³ in its filename, as these are forbidden on Windows.

Also verify that module names contain no forbidden characters such as `*`, `?` (Windows),
`!` (forbidden on Nix OS) or `.` (might result from confusion with a module name).

Source: https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file.
Return the number of module names violating this rule. -/
public def modulesOSForbidden (opts : LinterOptions) (modules : Array Lean.Name) : IO Nat := do
  unless getLinterValue linter.modulesUpperCamelCase opts do return 0
  let forbiddenNames := [
    "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
    "COM9", "COM¹", "COM²", "COM³", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
    "LPT9", "LPT¹", "LPT²", "LPT³"
  ]
  -- We also check for the exclamation mark (which is not forbidden on Windows, but e.g. on Nix OS),
  -- but do so below (with a custom error message).
  let forbiddenCharacters := [
    '<', '>', '"', '/', '\\', '|', '?', '*',
  ]
  let mut badNamesNum := 0
  for name in modules do
    let mut isBad := false
    let mut badComps : List String := []
    let mut badChars : List String := []
    for comp in name.componentsRev do
      let .str .anonymous s := comp | continue
      if forbiddenNames.contains s.toUpper then
        badComps := s :: badComps
      else
        if s.contains '!' then
          isBad := true
          IO.eprintln s!"error: module name '{name}' contains forbidden character '!'"
        else if s.contains '.' then
          isBad := true
          IO.eprintln s!"error: module name '{name}' contains forbidden character '.'"
        else if s.contains ' ' || s.contains '\t' || s.contains '\n' then
          isBad := true
          IO.eprintln s!"error: module name '{name}' contains a whitespace character"
        for c in forbiddenCharacters do
          if s.contains c then
            badChars := c.toString :: badChars
    if !badComps.isEmpty || !badChars.isEmpty then
      isBad := true
    if isBad then badNamesNum := badNamesNum + 1

    if !badComps.isEmpty then
      IO.eprintln s!"error: module name '{name}' contains \
        component{if badComps.length > 1 then "s" else ""} '{badComps}', \
        which {if badComps.length > 1 then "are" else "is"} forbidden in Windows filenames."
    if !badChars.isEmpty then
      IO.eprintln s!"error: module name '{name}' contains \
        character{if badChars.length > 1 then "s" else ""} '{badChars}', \
        which {if badChars.length > 1 then "are" else "is"} forbidden in Windows filenames."
  return badNamesNum

end Mathlib.Linter.TextBased