diff --git a/CHANGELOG.md b/CHANGELOG.md index 38da4512b..4773a567a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### New Features +- Add "loop" static characteristic in Extractors + ### Breaking Changes ### New Rules (1) diff --git a/capa/features/extractors/binexport2/__init__.py b/capa/features/extractors/binexport2/__init__.py index 1ebb84edc..bfab5fc50 100644 --- a/capa/features/extractors/binexport2/__init__.py +++ b/capa/features/extractors/binexport2/__init__.py @@ -33,9 +33,11 @@ from pefile import PE from elftools.elf.elffile import ELFFile +from capa.features.address import AbsoluteVirtualAddress import capa.features.common import capa.features.extractors.common import capa.features.extractors.binexport2.helpers +from capa.features.extractors import loops from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 logger = logging.getLogger(__name__) @@ -415,6 +417,17 @@ class FunctionContext: os: set[str] arch: set[str] + def __post_init__(self): + flow_graph = self.ctx.be2.flow_graph[self.flow_graph_index] + edges: list[tuple[int, int]] = [] + for edge in flow_graph.edge: + edges.append((edge.source_basic_block_index, edge.target_basic_block_index)) + looping_indices = loops.get_loop_vertices(edges) + self.ctx.cyclic_loop = { + AbsoluteVirtualAddress(self.ctx.idx.get_basic_block_address(idx_val)) + for idx_val in looping_indices + } + @dataclass class BasicBlockContext: diff --git a/capa/features/extractors/binexport2/basicblock.py b/capa/features/extractors/binexport2/basicblock.py index d7a2b6c8a..da43db6a6 100644 --- a/capa/features/extractors/binexport2/basicblock.py +++ b/capa/features/extractors/binexport2/basicblock.py @@ -36,6 +36,15 @@ def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[F yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address) +def extract_bb_inside_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: + fhi: FunctionContext = fh.inner + bbi: BasicBlockContext = bbh.inner + + if bbi.basic_block_index in fhi.looping_vertices: + basic_block_address: int = fhi.ctx.idx.get_basic_block_address(bbi.basic_block_index) + yield Characteristic("inside loop"), AbsoluteVirtualAddress(basic_block_address) + + def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: @@ -44,4 +53,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur yield BasicBlock(), bbh.address -BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,) +BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop, extract_bb_inside_loop) diff --git a/capa/features/extractors/binexport2/function.py b/capa/features/extractors/binexport2/function.py index 11747fab8..2ac5e2658 100644 --- a/capa/features/extractors/binexport2/function.py +++ b/capa/features/extractors/binexport2/function.py @@ -39,22 +39,6 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Add yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address) -def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: - fhi: FunctionContext = fh.inner - - be2: BinExport2 = fhi.ctx.be2 - - flow_graph_index: int = fhi.flow_graph_index - flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index] - - edges: list[tuple[int, int]] = [] - for edge in flow_graph.edge: - edges.append((edge.source_basic_block_index, edge.target_basic_block_index)) - - if loops.has_loop(edges): - yield Characteristic("loop"), fh.address - - def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner @@ -76,4 +60,4 @@ def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_function_name) +FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_name) diff --git a/capa/features/extractors/binexport2/insn.py b/capa/features/extractors/binexport2/insn.py index 86b9632ad..688246b75 100644 --- a/capa/features/extractors/binexport2/insn.py +++ b/capa/features/extractors/binexport2/insn.py @@ -241,6 +241,13 @@ def extract_function_indirect_call_characteristic_features( ) +def extract_insn_loop(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: + """extract loop characteristic feature at the instruction scope if inside a cycle""" + fhi: FunctionContext = fh.inner + if "cyclic_loop" in fhi.ctx and bbh.address in fhi.ctx["cyclic_loop"]: + yield Characteristic("loop"), ih.address + + def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: @@ -258,4 +265,5 @@ def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iter extract_insn_mnemonic_features, extract_function_calls_from, extract_function_indirect_call_characteristic_features, + extract_insn_loop, ) diff --git a/capa/features/extractors/loops.py b/capa/features/extractors/loops.py index 9821006dd..57bc4a733 100644 --- a/capa/features/extractors/loops.py +++ b/capa/features/extractors/loops.py @@ -30,3 +30,26 @@ def has_loop(edges, threshold=2): g = networkx.DiGraph() g.add_edges_from(edges) return any(len(comp) >= threshold for comp in strongly_connected_components(g)) + + +def get_loop_vertices(edges, threshold=2): + """find vertices that are part of a cycle in a directed graph + + args: + edges: list of edge sets representing a directed graph i.e. [(1, 2), (2, 1)] + threshold: min number of nodes contained in loop + + returns: + set of vertex IDs + """ + g = networkx.DiGraph() + g.add_edges_from(edges) + loop_vertices = set() + for comp in strongly_connected_components(g): + if len(comp) >= threshold: + loop_vertices.update(comp) + # Also include any vertices with self-loops (for tight loops) + for u, v in edges: + if u == v: + loop_vertices.add(u) + return loop_vertices diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 2e1c8fa6e..8a59a4076 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -214,7 +214,6 @@ def from_dict(cls, scopes: dict[str, str]) -> "Scopes": capa.features.basicblock.BasicBlock, capa.features.common.Characteristic("calls from"), capa.features.common.Characteristic("calls to"), - capa.features.common.Characteristic("loop"), capa.features.common.Characteristic("recursive call"), # plus basic block scope features, see below }, @@ -235,6 +234,7 @@ def from_dict(cls, scopes: dict[str, str]) -> "Scopes": capa.features.insn.Mnemonic, capa.features.insn.OperandNumber, capa.features.insn.OperandOffset, + capa.features.common.Characteristic("loop"), capa.features.common.Characteristic("nzxor"), capa.features.common.Characteristic("peb access"), capa.features.common.Characteristic("fs access"),