Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions tests/unit/compiler/venom/test_invoke_arg_copy_forwarding.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,77 @@ def test_readonly_forwarding_allows_retained_copy_comparison_from_invoke_arg():
assert all(inst.opcode != "mcopy" for inst in insts)


def test_readonly_forwarding_rejects_param_source_with_huge_callee_frame():
# Source is a function param (unresolved alloca). The callee transitively
# holds two ABI-encode-scale buffers (totaling >8MB) — after inlining the
# param could resolve to a small caller alloca that gets forced to a high
# address by the conflict graph, producing a quadratic memory-expansion
# gas bomb. Forwarding must bail out.
src = """
function caller {
caller:
%arg = param
%retpc = param
%tmp = alloca 4128
mcopy %tmp, %arg, 4128
invoke @callee, %tmp
ret %retpc
}

function callee {
callee:
%a = param
%retpc = param
%buf1 = alloca 4195552
%buf2 = alloca 4195424
mload %a
ret %retpc
}
"""

ctx = _run_copy_forwarding(src)
caller = ctx.get_function(IRLabel("caller"))
insts = [inst for bb in caller.get_basic_blocks() for inst in bb.instructions]

mcopy = next(inst for inst in insts if inst.opcode == "mcopy")
invoke = next(inst for inst in insts if inst.opcode == "invoke")

assert invoke.operands[1] == mcopy.operands[2]


def test_readonly_forwarding_allows_param_source_with_small_callee_frame():
# Source is a function param but the callee's transitive frame is tiny —
# any post-inlining placement is cheap, so forwarding wins.
src = """
function caller {
caller:
%arg = param
%retpc = param
%tmp = alloca 1056
mcopy %tmp, %arg, 1056
invoke @callee, %tmp
ret %retpc
}

function callee {
callee:
%a = param
%retpc = param
%frame = alloca 64
mload %a
ret %retpc
}
"""

ctx = _run_copy_forwarding(src)
caller = ctx.get_function(IRLabel("caller"))
insts = [inst for bb in caller.get_basic_blocks() for inst in bb.instructions]

invoke = next(inst for inst in insts if inst.opcode == "invoke")
assert invoke.operands[1] == IRVariable("%arg")
assert all(inst.opcode != "mcopy" for inst in insts)


def test_readonly_forwarding_rejects_larger_source_liveness_extension():
src = """
function caller {
Expand Down
74 changes: 62 additions & 12 deletions vyper/venom/passes/copy_forwarding.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class CopyForwardingPolicy:
# the one-byte MCOPY opcode from deployed bytecode.
CODE_DEPOSIT_GAS_PER_BYTE: int = 200
MIN_ELIDED_MCOPY_BYTES: int = 1
# When the copy source can't be resolved to an alloca, only block
# forwarding for catastrophic memory-expansion penalties (millions of
# gas, e.g. ABI-encode buffers of dynamic-array args). Moderate
# penalties (a few KB of frame growth) can still be outweighed by the
# forwarding win across many invocations.
UNRESOLVED_SOURCE_PENALTY_THRESHOLD: int = 1_000_000

function: IRFunction
dfg: DFGAnalysis
Expand Down Expand Up @@ -78,7 +84,28 @@ def should_block_forwarding(
dst_alloca: Allocation,
) -> bool:
src_alloca = self._copy_source_alloca(copy_inst)

copy_size = self.copy_size(copy_inst)
if copy_size is None:
copy_size = dst_alloca.alloca_size

if src_alloca is None:
# Source can't be resolved to an alloca — typically a function
# `param`. After inlining the param will resolve to a caller
# alloca whose write history is invisible here. If forwarding
# would extend that future liveness across a callee whose
# transitive frame would force the source to a catastrophic
# high address, the quadratic memory-expansion gas can dwarf
# any forwarding win. Use a high absolute penalty threshold
# so small-frame forwardings still win (~hundreds of gas) but
# ABI-encode-buffer-scale frames (megabytes → millions of gas)
# are caught.
for invoke_inst, _ in rewrite_sites:
penalty = self._memory_expansion_penalty_across_callee(invoke_inst, copy_size)
Comment thread
harkal marked this conversation as resolved.
if penalty is None:
continue
if penalty > self.UNRESOLVED_SOURCE_PENALTY_THRESHOLD:
return True
return False

has_read_access, has_write_access = self._alloca_has_accesses_that_can_skip_copy(
Expand All @@ -87,10 +114,6 @@ def should_block_forwarding(
if not has_read_access and not has_write_access:
return False

copy_size = self.copy_size(copy_inst)
if copy_size is None:
copy_size = dst_alloca.alloca_size

for invoke_inst, _ in rewrite_sites:
if self._alloca_has_read_after(src_alloca, invoke_inst):
continue
Expand Down Expand Up @@ -285,19 +308,46 @@ def _callee_reserved_intervals(self, callee: IRFunction) -> list[tuple[int, int]
if alloca in allocator.allocated
]

# Allocator hasn't run yet (first, pre-inline forwarding pass).
# Walk transitive callees to estimate the frame footprint —
# local-only would miss deep ABI-encode buffers and the cost
# check would let through forwardings that become quadratic
# memory-expansion bombs after inlining.
intervals: list[tuple[int, int]] = []
ptr = 0
for bb in callee.get_basic_blocks():
for inst in bb.instructions:
if inst.opcode != "alloca":
continue
size = inst.operands[0]
assert isinstance(size, IRLiteral)
intervals.append((ptr, size.value))
ptr += size.value
for alloca_size in self._collect_transitive_alloca_sizes(callee):
intervals.append((ptr, alloca_size))
ptr += alloca_size

return intervals

def _collect_transitive_alloca_sizes(self, callee: IRFunction) -> list[int]:
sizes: list[int] = []
visited: set[IRFunction] = set()
stack: list[IRFunction] = [callee]
while len(stack) > 0:
fn = stack.pop()
if fn in visited:
continue
visited.add(fn)
for bb in fn.get_basic_blocks():
for inst in bb.instructions:
if inst.opcode == "alloca":
size_op = inst.operands[0]
assert isinstance(size_op, IRLiteral)
sizes.append(size_op.value)
continue
if inst.opcode != "invoke":
continue
target = inst.operands[0]
if not isinstance(target, IRLabel):
Comment thread
harkal marked this conversation as resolved.
Outdated
continue
sub_callee = self.function.ctx.functions.get(target)
if sub_callee is None:
continue
stack.append(sub_callee)
return sizes

def _get_invoke_callee(self, invoke_inst: IRInstruction) -> IRFunction:
target = invoke_inst.operands[0]
assert isinstance(target, IRLabel)
Expand Down
Loading