mandiant · devs6186 · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -68,6 +68,7 @@
 - doc: fix typo in usage.md, add documentation links to README @devs6186 #2274
 - doc: add table comparing ways to consume capa output (CLI, IDA, Ghidra, dynamic sandbox, web) @devs6186 #2273
 - binja: add mypy config for top-level binaryninja module to fix mypy issues @devs6186 #2399
+- rules: skip regex engine for pure-literal case-insensitive patterns via O(1) lowercased-string lookup #2129
 - ci: deprecate macos-13 runner and use Python v3.13 for testing @mike-hunhoff #2777
 - ci: pin pip-audit action SHAs and update to v1.1.0 @kami922 #1131
 

diff --git a/capa/features/common.py b/capa/features/common.py
@@ -328,6 +328,13 @@ def __init__(self, value: str, description=None):
                 f"invalid regular expression: {value} it should use Python syntax, try it at https://pythex.org"
             ) from exc
 
+        # Detect pure-literal case-insensitive patterns: no regex metacharacters,
+        # just a simple string with the /i flag. For these we can skip the regex
+        # engine when the lowercased string value is present in the feature set.
+        # See: https://github.com/mandiant/capa/issues/2129
+        self._is_pure_literal_ci: bool = value.endswith("/i") and re.escape(pat) == pat
+        self._normalized_lower: str = pat.lower() if self._is_pure_literal_ci else ""
+
     def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.regex"] += 1

diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py
@@ -1988,11 +1988,6 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea
         # We may want to try to pre-evaluate these strings, based on their presence in the file,
         # to reduce the number of evaluations we do here.
         # See: https://github.com/mandiant/capa/issues/2126
-        #
-        # We may also want to specialize case-insensitive strings, which would enable them to
-        # be indexed, and therefore skip the scanning here, improving performance.
-        # This strategy is described here:
-        # https://github.com/mandiant/capa/issues/2129
         if feature_index.string_rules:
             # This is a FeatureSet that contains only String features.
             # Since we'll only be evaluating String/Regex features below, we don't care about
@@ -2009,10 +2004,30 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea
                     string_features[feature] = locations
 
             if string_features:
+                # Build this lazily, only when we encounter a pure-literal `/i` regex.
+                # This preserves fast-path wins while avoiding avoidable overhead in
+                # workloads where such regexes are uncommon.
+                lowercased_strings: frozenset[str] | None = None
                 for rule_name, wanted_strings in feature_index.string_rules.items():
                     for wanted_string in wanted_strings:
+                        # Fast path: pure-literal /i patterns can be resolved via O(1) lookup.
+                        if isinstance(wanted_string, capa.features.common.Regex) and wanted_string._is_pure_literal_ci:
+                            if lowercased_strings is None:
+                                lowercased_strings = frozenset(
+                                    feature.value.lower()
+                                    for feature in string_features
+                                    if isinstance(feature.value, str)
+                                )
+
+                            if wanted_string._normalized_lower in lowercased_strings:
+                                candidate_rule_names.add(rule_name)
+                                break
+
+                        # When the fast path is not sufficient, keep the existing
+                        # regex behavior to preserve substring semantics.
                         if wanted_string.evaluate(string_features):
                             candidate_rule_names.add(rule_name)
+                            break
 
         # Like with String/Regex features above, we have to scan for Bytes to find candidate rules.
         #

diff --git a/tests/test_match.py b/tests/test_match.py
@@ -816,3 +816,56 @@ def test_index_features_nested_unstable():
 
     assert not index.string_rules
     assert not index.bytes_rules
+
+
+def test_regex_pure_literal_ci_fast_path_detection():
+    """Verify that pure-literal case-insensitive Regex patterns are detected correctly."""
+    # Pure literal patterns: no metacharacters, /i flag
+    r1 = capa.features.common.Regex("/createfile/i")
+    assert r1._is_pure_literal_ci is True
+    assert r1._normalized_lower == "createfile"
+
+    r2 = capa.features.common.Regex("/useragent/i")
+    assert r2._is_pure_literal_ci is True
+    assert r2._normalized_lower == "useragent"
+
+    # Complex patterns: has metacharacters, should NOT be flagged
+    r3 = capa.features.common.Regex("/create.*file/i")
+    assert r3._is_pure_literal_ci is False
+
+    # Case-sensitive pattern: no /i flag
+    r4 = capa.features.common.Regex("/createfile/")
+    assert r4._is_pure_literal_ci is False
+
+
+def test_regex_ci_fast_path_correctness():
+    """Verify the fast path produces the same results as the full regex engine."""
+    rule_text = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test ci fast path
+                scopes:
+                    static: function
+                    dynamic: process
+            features:
+                - string: /createfile/i
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule_text)
+    rr = capa.rules.RuleSet([r])
+
+    # Should match: exact case-insensitive match (fast path)
+    _, matches = rr.match(capa.rules.Scope.FUNCTION, {String("CreateFile"): {0x0}}, 0x0)
+    assert "test ci fast path" in matches
+
+    _, matches = rr.match(capa.rules.Scope.FUNCTION, {String("CREATEFILE"): {0x0}}, 0x0)
+    assert "test ci fast path" in matches
+
+    # Should match: substring match (regex fallback path)
+    _, matches = rr.match(capa.rules.Scope.FUNCTION, {String("CreateFileA"): {0x0}}, 0x0)
+    assert "test ci fast path" in matches
+
+    # Should not match
+    _, matches = rr.match(capa.rules.Scope.FUNCTION, {String("WriteFile"): {0x0}}, 0x0)
+    assert "test ci fast path" not in matches