Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions dace/codegen/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from dace.sdfg.sdfg import SDFG, InterstateEdge
from dace.sdfg.graph import Edge
from dace.codegen.common import unparse_interstate_edge
from dace.codegen.targets.cpp import sym2cpp

if TYPE_CHECKING:
from dace.codegen.targets.framecode import DaCeCodeGenerator
Expand All @@ -34,6 +35,52 @@ def _child_of(node: SDFGState, parent: SDFGState, ptree: Dict[SDFGState, SDFGSta
return False


def _generate_explicit_alloc_free(edge: Edge[InterstateEdge], sdfg: SDFG) -> str:
"""
Emit ``new[]`` / ``delete[]`` statements for any arrays listed in the
``alloc`` / ``free`` properties of *edge*.

These arrays must have ``AllocationLifetime.Explicit``; their pointers live
in the SDFG state struct (``__state->__<cfg_id>_<name>``). Size
expressions are converted from symbolic form via :func:`sym2cpp`.

:param edge: The interstate edge being processed.
:param sdfg: The enclosing SDFG (provides ``cfg_id`` and ``arrays``).
:returns: C++ source fragment (may be empty string).
"""
code = ''

for arr_name in edge.data.alloc:
arr = sdfg.arrays[arr_name]
size_expr = ' * '.join(sym2cpp(s) for s in arr.shape)
code += (
f'__state->__{sdfg.cfg_id}_{arr_name} = '
f'new {arr.dtype.ctype}[{size_expr}];\n'
)

for entry in edge.data.reuse:
if len(entry) == 2:
new_arr, donor_arr = entry
code += (
f'__state->__{sdfg.cfg_id}_{new_arr} = '
f'__state->__{sdfg.cfg_id}_{donor_arr};\n'
f'__state->__{sdfg.cfg_id}_{donor_arr} = nullptr;\n'
)
else:
new_arr, donor_arr, offset_bytes = entry
dtype = sdfg.arrays[new_arr].dtype.ctype
code += (
f'__state->__{sdfg.cfg_id}_{new_arr} = '
f'({dtype}*)((char*)__state->__{sdfg.cfg_id}_{donor_arr} '
f'+ {offset_bytes});\n'
)

for arr_name in edge.data.free:
code += f'delete[] __state->__{sdfg.cfg_id}_{arr_name};\n'

return code


def _generate_interstate_edge_code(edge: Edge[InterstateEdge],
sdfg: SDFG,
cfg: ControlFlowRegion,
Expand Down Expand Up @@ -62,6 +109,8 @@ def _generate_interstate_edge_code(edge: Edge[InterstateEdge],
for variable, value in edge.data.assignments.items()
] + [''])

expr += _generate_explicit_alloc_free(edge, sdfg)

if not assignments_only:
dst: ControlFlowBlock = edge.dst
expr += 'goto __state_{}_{};\n'.format(cfg.cfg_id, re.sub(r'\s+', '_', dst.label))
Expand Down
7 changes: 5 additions & 2 deletions dace/codegen/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,10 @@ def dispatch_allocate(self,
if datadesc.lifetime == dtypes.AllocationLifetime.Persistent:
declaration_stream = CodeIOStream()
callsite_stream = self.frame._initcode
elif datadesc.lifetime == dtypes.AllocationLifetime.External:
elif datadesc.lifetime in (dtypes.AllocationLifetime.External, dtypes.AllocationLifetime.Explicit):
# External: managed outside generated code.
# Explicit: managed by alloc/free on interstate edges; discard streams so no
# auto new[]/delete[] is emitted but defined_vars tracking still runs.
declaration_stream = CodeIOStream()
callsite_stream = CodeIOStream()
else:
Expand All @@ -504,7 +507,7 @@ def dispatch_deallocate(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: ScopeSubg

if datadesc.lifetime == dtypes.AllocationLifetime.Persistent:
callsite_stream = self.frame._exitcode
elif datadesc.lifetime == dtypes.AllocationLifetime.External:
elif datadesc.lifetime in (dtypes.AllocationLifetime.External, dtypes.AllocationLifetime.Explicit):
return

self._array_dispatchers[datadesc.storage].deallocate_array(sdfg, cfg, dfg, state_id, node, datadesc,
Expand Down
81 changes: 81 additions & 0 deletions dace/codegen/instrumentation/allocation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import json
import os

import dace
from dace.codegen.instrumentation.papi import MapEntry
from dace.codegen.instrumentation.provider import SDFG
from dace.sdfg.nodes import AccessNode, EntryNode
from dace.transformation.passes.symbol_propagation import SDFGState
from dace.sdfg.nodes import Node


type StateAlloc = dict[AccessNode,list[SDFGState]]
type NodeAlloc = dict[AccessNode, list[Node]]


def inScope(scopedict: dict[Node, SDFGState | Node | None], node: Node, scope: Node) -> bool:
node_scope = scopedict[node]
return node_scope != None and (node_scope == scope or inScope(scopedict, node_scope, scope) if isinstance(node_scope, Node) else False)



def create_allocation_report(to : dict[SDFG | SDFGState | EntryNode, list[tuple[SDFG, SDFGState | None,AccessNode | None, bool, bool, bool]]]):

#state_alloc: dict[AccessNode,list[SDFGState]] = {}
#node_alloc: dict[AccessNode, list[Node]] = {}

state_alloc: dict[str, list[str]] = {}
node_alloc: dict[str, list[str]] = {}

all_alloc: dict[str, list[str]] = {}

report: dict[SDFG, dict[str, list[str]]] = {}

for scope in to:
for alloc_info in to[scope]:

sdfg: SDFG = alloc_info[0]
state : SDFGState | None = alloc_info[1]
access_node =alloc_info[2]

nodes_allocated: list[Node] = []
states_allocated: list[SDFGState] = []

if issubclass(type(scope),SDFG):
#TODO: find example where SDFG is the scope and implement
pass
elif issubclass(type(scope),SDFGState):
#highlight all nodes and the state itself
nodes_allocated = list(scope.nodes()) if isinstance(scope, SDFGState) else []
states_allocated = [scope] if isinstance(scope, SDFGState) else []
elif issubclass(type(scope),EntryNode):
if isinstance(scope, MapEntry):
nodes_allocated = []
scope_dict = state.scope_dict() if state != None else {}
for node in state.nodes() if state != None else []:
if inScope(scope_dict, node, scope) or node == scope:
nodes_allocated.append(node)
states_allocated = [state] if state != None else []

if access_node != None:
state_alloc[access_node.guid] = [state.guid for state in states_allocated]
node_alloc[access_node.guid] = [node.guid for node in nodes_allocated]
all_alloc[access_node.guid] = state_alloc[access_node.guid] + node_alloc[access_node.guid]
if sdfg in report.keys():
report[sdfg].update(all_alloc)
else:
report[sdfg] = all_alloc




for sdfg in report:
os.makedirs(f"{sdfg.build_folder}/perf", exist_ok=True)
with open(f"{sdfg.build_folder}/perf/allocation-report-{str(hash(str(report[sdfg])))}.json", "x") as f:
json.dump(report[sdfg],f)





return
9 changes: 5 additions & 4 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def copy_expr(
dt = ""

is_global = data_desc.lifetime in (dtypes.AllocationLifetime.Global, dtypes.AllocationLifetime.Persistent,
dtypes.AllocationLifetime.External)
dtypes.AllocationLifetime.External, dtypes.AllocationLifetime.Explicit)
defined_types = None
# Non-free symbol dependent Arrays due to their shape
dependent_shape = (isinstance(data_desc, data.Array) and not isinstance(data_desc, data.View) and any(
Expand Down Expand Up @@ -247,9 +247,10 @@ def ptr(name: str, desc: data.Data, sdfg: SDFG = None, framecode: 'DaCeCodeGener
if root in sdfg.arrays and isinstance(sdfg.arrays[root], data.Structure):
name = name.replace('.', '->')

# Special case: If memory is persistent and defined in this SDFG, add state
# struct to name
if (desc.transient and desc.lifetime in (dtypes.AllocationLifetime.Persistent, dtypes.AllocationLifetime.External)):
# Special case: If memory is persistent/external/explicit and defined in this
# SDFG, add state struct to name (the pointer lives in the state struct).
if (desc.transient and desc.lifetime in (dtypes.AllocationLifetime.Persistent, dtypes.AllocationLifetime.External,
dtypes.AllocationLifetime.Explicit)):

if desc.storage == dtypes.StorageType.CPU_ThreadLocal: # Use unambiguous name for thread-local arrays
return f'__{sdfg.cfg_id}_{name}'
Expand Down
23 changes: 23 additions & 0 deletions dace/codegen/targets/framecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from dace.sdfg.state import ControlFlowBlock, ControlFlowRegion, LoopRegion
from dace.transformation.passes.analysis import StateReachability, loop_analysis

from dace.codegen.instrumentation.allocation import create_allocation_report


def _get_or_eval_sdfg_first_arg(func, sdfg):
if callable(func):
Expand Down Expand Up @@ -636,6 +638,26 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG):
self.to_allocate[top_sdfg].append((sdfg, first_state_instance, first_node_instance, True, True, True))
self.where_allocated[(sdfg, name)] = top_sdfg
continue
elif top_lifetime is dtypes.AllocationLifetime.Explicit:
# Explicit lifetime: the pointer is declared in the state struct,
# but allocation/deallocation is handled via alloc/free on interstate edges.
# We register the variable in defined_vars (so references compile) but
# emit no auto new[]/delete[].

definition = desc.as_arg(name=f'__{sdfg.cfg_id}_{name}') + ';'

if top_storage != dtypes.StorageType.CPU_ThreadLocal:
self.statestruct.append(definition)

alloc_node = first_node_instance if first_node_instance is not None else nodes.AccessNode(name)
alloc_state = first_state_instance

# allocate=True so defined_vars is populated; dispatcher discards streams
# (same as External). deallocate=False so no auto-deallocation is attempted.
self.to_allocate[top_sdfg].append(
(sdfg, alloc_state, alloc_node, True, True, False))
self.where_allocated[(sdfg, name)] = top_sdfg
continue
elif top_lifetime is dtypes.AllocationLifetime.Global:
# Global memory is allocated in the beginning of the program
# exists in the library state structure (to be passed along
Expand Down Expand Up @@ -820,6 +842,7 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG):
self.where_allocated[(sdfg, name)] = curscope
else:
self.where_allocated[(sdfg, name)] = cursdfg
create_allocation_report(self.to_allocate)

def allocate_arrays_in_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, scope: Union[nodes.EntryNode, SDFGState,
SDFG],
Expand Down
1 change: 1 addition & 0 deletions dace/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class AllocationLifetime(Enum):
Global = auto() #: Allocated throughout the entire program (outer SDFG)
Persistent = auto() #: Allocated throughout multiple invocations (init/exit)
External = auto() #: Allocated and managed outside the generated code
Explicit = auto() #: Allocated/Deallocated via explicit alloc/free on interstate edges


@undefined_safe_enum
Expand Down
4 changes: 4 additions & 0 deletions dace/libraries/allocation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
from .make_explicit import make_explicit
from .hoist import hoist_alloc_out_of_loop
from .reuse import _apply_reuse, buffer_reuse_same_pass, buffer_reuse_same_pass_ua, buffer_reuse_cross_pass
Loading