diff --git a/CHANGELOG.md b/CHANGELOG.md index e1cc6d2365..366ee17ff4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### New Features +- features: emit `number(0)` for xor-zeroing idioms like `xor eax, eax` across all backends #2622 + ### Breaking Changes ### New Rules (0) diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 2cce683129..3938f316a4 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -136,8 +136,13 @@ def extract_insn_nzxor_characteristic_features( # so we don't have to realize the tree/list. operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - if operands[1] != operands[2]: - yield Characteristic("nzxor"), ih.address + if operands[1] == operands[2]: + # eor rd, rn, rn zeros the destination register. + # emit Number(0) to let rules match on the produced value. + yield Number(0), ih.address + return + + yield Characteristic("nzxor"), ih.address INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(""" diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index ed0f186343..2f254aa406 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -209,6 +209,9 @@ def extract_insn_nzxor_characteristic_features( operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[0] == operands[1]: + # xor eax, eax and similar instructions zero a register. + # emit Number(0) to let rules match on the produced value. + yield Number(0), ih.address return instruction_address: int = idx.insn_address_by_index[ii.instruction_index] diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index da5ba70436..c18aaf6684 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -362,8 +362,6 @@ def extract_insn_nzxor_characteristic_features( results = [] def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool: - # If the two operands of the xor instruction are the same, the LLIL will be translated to other instructions, - # e.g., , (LLIL_SET_REG). So we do not need to check whether the two operands are the same. if il.operation == LowLevelILOperation.LLIL_XOR: # Exclude cases related to the stack cookie if is_nzxor_stack_cookie(fh.inner, bbh.inner, il): @@ -373,6 +371,20 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index else: return True + # Binary Ninja canonicalizes `xor reg, reg` to LLIL_SET_REG(reg, 0) rather than LLIL_XOR, + # so the llil_checker above never fires for zeroing XOR idioms. + # Detect them here by checking the mnemonic and the lifted result. + insn: DisassemblyInstruction = ih.inner + if insn.text and insn.text[0].text.lower() in ("xor", "xorpd", "xorps", "pxor"): + for llil in func.get_llils_at(ih.address): + if ( + llil.operation == LowLevelILOperation.LLIL_SET_REG + and llil.src.operation == LowLevelILOperation.LLIL_CONST + and llil.src.constant == 0 + ): + yield Number(0), ih.address + return + for llil in func.get_llils_at(ih.address): visit_llil_exprs(llil, llil_checker) diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index 82b989fac9..959aa0ed1d 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -457,6 +457,8 @@ def extract_insn_nzxor_characteristic_features( if capa.features.extractors.ghidra.helpers.is_stack_referenced(insn): return if capa.features.extractors.ghidra.helpers.is_zxor(insn): + # xor eax, eax and similar zero a register; emit Number(0) instead of nzxor. + yield Number(0), ih.address return if check_nzxor_security_cookie_delta(f, insn): return diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 86fd14b8e6..b9f4125c2e 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -391,6 +391,8 @@ def extract_insn_nzxor_characteristic_features( if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor): return if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2): + # xor eax, eax and similar zero a register; emit Number(0) instead of nzxor. + yield Number(0), ih.address return if is_nzxor_stack_cookie(fh.inner, bbh.inner, insn): return diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 552edfe490..c7c305ade2 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -391,6 +391,9 @@ def extract_insn_nzxor_characteristic_features( return if insn.opers[0] == insn.opers[1]: + # xor eax, eax and similar instructions zero a register. + # emit Number(0) to let rules match on the produced value. + yield Number(0), ih.address return if is_security_cookie(f, bb, insn): diff --git a/tests/fixtures.py b/tests/fixtures.py index 6f15d03655..ad1d826c51 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -466,6 +466,8 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "773290480d5445f11d3dc1b800728966.exe_" elif name.startswith("3b13b"): return CD / "data" / "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_" + elif name == "microsocks": + return CD / "data" / "microsocks.elf_" elif name == "7351f.elf": return CD / "data" / "7351f8a40c5450557b24622417fc478d.elf_" elif name.startswith("79abd"): @@ -919,6 +921,11 @@ def parametrize(params, values, **kwargs): ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True), ("mimikatz", "function=0x401000", capa.features.insn.Number(0x0), True), + # insn/number: xor-zeroing idiom, small ELF (microsocks.elf_, xor ebp,ebp at 0x2002564) + ("microsocks", "function=0x2002560,bb=0x2002560,insn=0x2002564", capa.features.insn.Number(0x0), True), + # insn/number: xor-zeroing idiom (xor eax, eax -> Number(0)) + # function 0x40105D contains `xor ebx, ebx` at 0x401066 + ("mimikatz", "function=0x40105D,bb=0x40105D,insn=0x401066", capa.features.insn.Number(0x0), True), # insn/number: stack adjustments ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), @@ -1033,6 +1040,15 @@ def parametrize(params, values, **kwargs): # insn/characteristic(nzxor) ("mimikatz", "function=0x410DFC", capa.features.common.Characteristic("nzxor"), True), ("mimikatz", "function=0x40105D", capa.features.common.Characteristic("nzxor"), False), + # insn/characteristic(nzxor): xor-zeroing idiom must not be tagged as nzxor + ( + "mimikatz", + "function=0x40105D,bb=0x40105D,insn=0x401066", + capa.features.common.Characteristic("nzxor"), + False, + ), + # insn/characteristic(nzxor): xor-zeroing idiom, small ELF (microsocks.elf_, xor ebp,ebp at 0x2002564) + ("microsocks", "function=0x2002560,bb=0x2002560,insn=0x2002564", capa.features.common.Characteristic("nzxor"), False), # insn/characteristic(nzxor): no security cookies ("mimikatz", "function=0x46D534", capa.features.common.Characteristic("nzxor"), False), # insn/characteristic(nzxor): xorps