From d29ddac75c7345dfa8483bcf6883b5e66058fa9e Mon Sep 17 00:00:00 2001 From: devs6186 Date: Tue, 7 Apr 2026 18:11:10 +0530 Subject: [PATCH] features: emit number(0) for xor-zeroing idioms across all backends #2622 Detect register-to-register XOR instructions where both operands are the same (xor eax, eax, xorpd xmm0, xmm0, eor rd, rn, rn, etc.) and emit Number(0) at the instruction address, since these idioms zero the destination register. Previously the nzxor extractor silently returned early for these instructions, so no feature was recorded at all. Rules that need to detect zero-valued arguments passed before API calls (a common MSVC pattern such as xor r9d, r9d before NtFsControlFile) had no way to match them. The change is applied to all six backends: - viv/insn.py - binexport2/arch/intel/insn.py - binexport2/arch/arm/insn.py (eor rd, rn, rn) - ida/insn.py - ghidra/insn.py - binja/insn.py (handles BN LLIL canonicalization of xor reg,reg to LLIL_SET_REG(reg, 0) rather than LLIL_XOR) In all cases the zeroing idiom does not produce Characteristic("nzxor"). Test coverage via FEATURE_PRESENCE_TESTS at instruction scope: ("mimikatz", "function=0x40105D,bb=0x40105D,insn=0x401066", Number(0x0), True) -- xor ebx, ebx emits Number(0) same scope, Characteristic("nzxor"), False -- must not emit nzxor Closes #2622 --- CHANGELOG.md | 2 ++ .../extractors/binexport2/arch/arm/insn.py | 9 +++++++-- .../extractors/binexport2/arch/intel/insn.py | 3 +++ capa/features/extractors/binja/insn.py | 16 ++++++++++++++-- capa/features/extractors/ghidra/insn.py | 2 ++ capa/features/extractors/ida/insn.py | 2 ++ capa/features/extractors/viv/insn.py | 3 +++ tests/fixtures.py | 16 ++++++++++++++++ 8 files changed, 49 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1cc6d2365..366ee17ff4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### New Features +- features: emit `number(0)` for xor-zeroing idioms like `xor eax, eax` across all backends #2622 + ### Breaking Changes ### New Rules (0) diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 2cce683129..3938f316a4 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -136,8 +136,13 @@ def extract_insn_nzxor_characteristic_features( # so we don't have to realize the tree/list. operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - if operands[1] != operands[2]: - yield Characteristic("nzxor"), ih.address + if operands[1] == operands[2]: + # eor rd, rn, rn zeros the destination register. + # emit Number(0) to let rules match on the produced value. + yield Number(0), ih.address + return + + yield Characteristic("nzxor"), ih.address INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(""" diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index ed0f186343..2f254aa406 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -209,6 +209,9 @@ def extract_insn_nzxor_characteristic_features( operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[0] == operands[1]: + # xor eax, eax and similar instructions zero a register. + # emit Number(0) to let rules match on the produced value. + yield Number(0), ih.address return instruction_address: int = idx.insn_address_by_index[ii.instruction_index] diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index da5ba70436..c18aaf6684 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -362,8 +362,6 @@ def extract_insn_nzxor_characteristic_features( results = [] def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool: - # If the two operands of the xor instruction are the same, the LLIL will be translated to other instructions, - # e.g., , (LLIL_SET_REG). So we do not need to check whether the two operands are the same. if il.operation == LowLevelILOperation.LLIL_XOR: # Exclude cases related to the stack cookie if is_nzxor_stack_cookie(fh.inner, bbh.inner, il): @@ -373,6 +371,20 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index else: return True + # Binary Ninja canonicalizes `xor reg, reg` to LLIL_SET_REG(reg, 0) rather than LLIL_XOR, + # so the llil_checker above never fires for zeroing XOR idioms. + # Detect them here by checking the mnemonic and the lifted result. + insn: DisassemblyInstruction = ih.inner + if insn.text and insn.text[0].text.lower() in ("xor", "xorpd", "xorps", "pxor"): + for llil in func.get_llils_at(ih.address): + if ( + llil.operation == LowLevelILOperation.LLIL_SET_REG + and llil.src.operation == LowLevelILOperation.LLIL_CONST + and llil.src.constant == 0 + ): + yield Number(0), ih.address + return + for llil in func.get_llils_at(ih.address): visit_llil_exprs(llil, llil_checker) diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index 82b989fac9..959aa0ed1d 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -457,6 +457,8 @@ def extract_insn_nzxor_characteristic_features( if capa.features.extractors.ghidra.helpers.is_stack_referenced(insn): return if capa.features.extractors.ghidra.helpers.is_zxor(insn): + # xor eax, eax and similar zero a register; emit Number(0) instead of nzxor. + yield Number(0), ih.address return if check_nzxor_security_cookie_delta(f, insn): return diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 86fd14b8e6..b9f4125c2e 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -391,6 +391,8 @@ def extract_insn_nzxor_characteristic_features( if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor): return if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2): + # xor eax, eax and similar zero a register; emit Number(0) instead of nzxor. + yield Number(0), ih.address return if is_nzxor_stack_cookie(fh.inner, bbh.inner, insn): return diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 552edfe490..c7c305ade2 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -391,6 +391,9 @@ def extract_insn_nzxor_characteristic_features( return if insn.opers[0] == insn.opers[1]: + # xor eax, eax and similar instructions zero a register. + # emit Number(0) to let rules match on the produced value. + yield Number(0), ih.address return if is_security_cookie(f, bb, insn): diff --git a/tests/fixtures.py b/tests/fixtures.py index 6f15d03655..ad1d826c51 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -466,6 +466,8 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "773290480d5445f11d3dc1b800728966.exe_" elif name.startswith("3b13b"): return CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_" + elif name == "microsocks": + return CD / "data" / "microsocks.elf_" elif name == "7351f.elf": return CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_" elif name.startswith("79abd"): @@ -919,6 +921,11 @@ def parametrize(params, values, **kwargs): ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True), ("mimikatz", "function=0x401000", capa.features.insn.Number(0x0), True), + # insn/number: xor-zeroing idiom, small ELF (microsocks.elf_, xor ebp,ebp at 0x2002564) + ("microsocks", "function=0x2002560,bb=0x2002560,insn=0x2002564", capa.features.insn.Number(0x0), True), + # insn/number: xor-zeroing idiom (xor eax, eax -> Number(0)) + # function 0x40105D contains `xor ebx, ebx` at 0x401066 + ("mimikatz", "function=0x40105D,bb=0x40105D,insn=0x401066", capa.features.insn.Number(0x0), True), # insn/number: stack adjustments ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), @@ -1033,6 +1040,15 @@ def parametrize(params, values, **kwargs): # insn/characteristic(nzxor) ("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True), ("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False), + # insn/characteristic(nzxor): xor-zeroing idiom must not be tagged as nzxor + ( + "mimikatz", + "function=0x40105D,bb=0x40105D,insn=0x401066", + capa.features.common.Characteristic("nzxor"), + False, + ), + # insn/characteristic(nzxor): xor-zeroing idiom, small ELF (microsocks.elf_, xor ebp,ebp at 0x2002564) + ("microsocks", "function=0x2002560,bb=0x2002560,insn=0x2002564", capa.features.common.Characteristic("nzxor"), False), # insn/characteristic(nzxor): no security cookies ("mimikatz", "function=0x46D534", capa.features.common.Characteristic("nzxor"), False), # insn/characteristic(nzxor): xorps