From ef4a1f9ca6435bd822775e7759292485747b3a76 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 9 Mar 2026 15:51:14 +0100 Subject: [PATCH 01/13] Enforce deterministic code generation through alphabetical ordering of the underlying graph --- dace/sdfg/sdfg.py | 37 ++++++++++++++++++++++++++++++ dace/sdfg/utils.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 3ea7d63c29..0b2e7427e6 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -2508,6 +2508,10 @@ def compile(self, output_file=None, validate=True, return_program_handle=True) - # Fill in scope entry/exit connectors sdfg.fill_scope_connectors() + # Canonicalize the SDFG to ensure that code generation is deterministic + # and does not depend on the order of states or edges in the SDFG. + sdfg.canonicalize() + # Generate code for the program by traversing the SDFG state by state program_objects = codegen.generate_code(sdfg, validate=validate) except Exception: @@ -3006,3 +3010,36 @@ def recheck_using_explicit_control_flow(self) -> bool: break self.root_sdfg.using_explicit_control_flow = found_explicit_cf_block return found_explicit_cf_block + + def canonicalize(self, visited=None): + """ + Forces all internal dictionaries and graph structures into a deterministic, + lexicographical order to guarantee stable code generation. + """ + if visited is None: + visited = set() + + if id(self) in visited: + return + visited.add(id(self)) + + from dace.sdfg.utils import canonicalize_graph_dicts + + # 1. Sort Arrays, Symbols, and Constants in-place + for attr in ['_arrays', 'symbols', 'constants_prop']: + if hasattr(self, attr): + val = getattr(self, attr) + if val and hasattr(val, 'keys') and hasattr(val, 'pop'): + for k in sorted(list(val.keys())): + val[k] = val.pop(k) + + # 2. Canonicalize the top-level graph + canonicalize_graph_dicts(self) + + # 3. Recursively canonicalize all states and nested SDFGs + for state in self.nodes(): + canonicalize_graph_dicts(state) + + for node in state.nodes(): + if hasattr(node, 'sdfg') and node.sdfg is not None: + node.sdfg.canonicalize(visited) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 035be3fdc1..8bbaf9d865 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -2754,3 +2754,60 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): if expanded_something: states.append(state) + + +def get_deterministic_node_key(node): + """Generates a stable string key for Graph nodes to break topological tie-breaks.""" + node_type = type(node).__name__ + identifier = getattr(node, 'label', getattr(node, 'name', getattr(node, 'data', str(node)))) + return f"{node_type}_{identifier}" + + +def get_deterministic_edge_key(edge): + """Generates a stable string key for Graph edges, including Memlet data for Multi-Edges.""" + src_conn = getattr(edge, 'src_conn', '') + dst_conn = getattr(edge, 'dst_conn', '') + # Include stringified Memlet data to differentiate parallel edges + data_str = str(getattr(edge, 'data', '')) + return f"{get_deterministic_node_key(edge.src)}:{src_conn}->{get_deterministic_node_key(edge.dst)}:{dst_conn}_{data_str}" + + +def canonicalize_graph_dicts(graph): + """Sorts internal nodes, edge dictionaries, and NetworkX graphs in-place.""" + + # 1. Sort Node dictionary in-place + if hasattr(graph, '_nodes'): + for k in sorted(list(graph._nodes.keys()), key=get_deterministic_node_key): + graph._nodes[k] = graph._nodes.pop(k) + + # Sort the nested edge dictionaries inside _nodes in-place + for node, (in_edges, out_edges) in graph._nodes.items(): + for e_key in sorted(list(in_edges.keys()), key=lambda k: get_deterministic_edge_key(in_edges[k])): + in_edges[e_key] = in_edges.pop(e_key) + for e_key in sorted(list(out_edges.keys()), key=lambda k: get_deterministic_edge_key(out_edges[k])): + out_edges[e_key] = out_edges.pop(e_key) + + # 2. Sort master Edge dictionary in-place + if hasattr(graph, '_edges'): + for e_key in sorted(list(graph._edges.keys()), key=lambda k: get_deterministic_edge_key(graph._edges[k])): + graph._edges[e_key] = graph._edges.pop(e_key) + + # 3. Rebuild the NetworkX graph to ensure downstream utilities are also deterministic + if hasattr(graph, '_nx'): + old_nx = graph._nx + graph._nx = type(old_nx)() + + for n in graph._nodes.keys(): + graph._nx.add_node(n, **old_nx.nodes.get(n, {})) + + for e_obj in graph._edges.values(): + edge_attrs = {'data': e_obj.data} + + if hasattr(e_obj, 'src_conn'): + edge_attrs['src_conn'] = e_obj.src_conn + edge_attrs['dst_conn'] = e_obj.dst_conn + + if hasattr(e_obj, 'key'): + graph._nx.add_edge(e_obj.src, e_obj.dst, key=e_obj.key, **edge_attrs) + else: + graph._nx.add_edge(e_obj.src, e_obj.dst, **edge_attrs) From dd86a26184ba743ca0fd68b8cd1cf539681193e7 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 10 Mar 2026 14:56:57 +0100 Subject: [PATCH 02/13] Sort SDFG Alphabetically: Renaming, docstrings --- dace/codegen/codegen.py | 1 + dace/sdfg/sdfg.py | 27 ++++++++++++------------ dace/sdfg/utils.py | 46 +++++++++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 29 deletions(-) diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py index fc6791599f..acc7e36f14 100644 --- a/dace/codegen/codegen.py +++ b/dace/codegen/codegen.py @@ -246,6 +246,7 @@ def generate_code(sdfg: SDFG, validate=True) -> List[CodeObject]: } # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS + sdfg.sort_sdfg_alphabetically() # Generate frame code (and the rest of the code) (global_code, frame_code, used_targets, used_environments) = frame.generate_code(sdfg, None) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 0b2e7427e6..ee4677554f 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -2508,10 +2508,6 @@ def compile(self, output_file=None, validate=True, return_program_handle=True) - # Fill in scope entry/exit connectors sdfg.fill_scope_connectors() - # Canonicalize the SDFG to ensure that code generation is deterministic - # and does not depend on the order of states or edges in the SDFG. - sdfg.canonicalize() - # Generate code for the program by traversing the SDFG state by state program_objects = codegen.generate_code(sdfg, validate=validate) except Exception: @@ -3011,10 +3007,17 @@ def recheck_using_explicit_control_flow(self) -> bool: self.root_sdfg.using_explicit_control_flow = found_explicit_cf_block return found_explicit_cf_block - def canonicalize(self, visited=None): + def sort_sdfg_alphabetically(self, visited=None): """ Forces all internal dictionaries and graph structures into a deterministic, lexicographical order to guarantee stable code generation. + + This method operates in-place and recursively processes all internal + dataflow states and nested SDFGs. + + :param visited: A set of memory addresses (IDs) of already processed SDFGs. + Used internally to prevent infinite recursion in the event + of cyclic nested SDFG references. """ if visited is None: visited = set() @@ -3023,9 +3026,9 @@ def canonicalize(self, visited=None): return visited.add(id(self)) - from dace.sdfg.utils import canonicalize_graph_dicts - - # 1. Sort Arrays, Symbols, and Constants in-place + # Avoid import loops + from dace.sdfg.utils import sort_graph_dicts_alphabetically + for attr in ['_arrays', 'symbols', 'constants_prop']: if hasattr(self, attr): val = getattr(self, attr) @@ -3033,13 +3036,11 @@ def canonicalize(self, visited=None): for k in sorted(list(val.keys())): val[k] = val.pop(k) - # 2. Canonicalize the top-level graph - canonicalize_graph_dicts(self) + sort_graph_dicts_alphabetically(self) - # 3. Recursively canonicalize all states and nested SDFGs for state in self.nodes(): - canonicalize_graph_dicts(state) + sort_graph_dicts_alphabetically(state) for node in state.nodes(): if hasattr(node, 'sdfg') and node.sdfg is not None: - node.sdfg.canonicalize(visited) + node.sdfg.sort_sdfg_alphabetically(visited) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 8bbaf9d865..9566bfa45d 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -2757,25 +2757,41 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): def get_deterministic_node_key(node): - """Generates a stable string key for Graph nodes to break topological tie-breaks.""" + """ + Generates a stable string key for Graph nodes to ensure deterministic sorting. + + :param node: The DaCe graph node object to be evaluated. + :return: A stable string representation of the node. + """ node_type = type(node).__name__ identifier = getattr(node, 'label', getattr(node, 'name', getattr(node, 'data', str(node)))) return f"{node_type}_{identifier}" def get_deterministic_edge_key(edge): - """Generates a stable string key for Graph edges, including Memlet data for Multi-Edges.""" - src_conn = getattr(edge, 'src_conn', '') - dst_conn = getattr(edge, 'dst_conn', '') - # Include stringified Memlet data to differentiate parallel edges - data_str = str(getattr(edge, 'data', '')) - return f"{get_deterministic_node_key(edge.src)}:{src_conn}->{get_deterministic_node_key(edge.dst)}:{dst_conn}_{data_str}" - - -def canonicalize_graph_dicts(graph): - """Sorts internal nodes, edge dictionaries, and NetworkX graphs in-place.""" - - # 1. Sort Node dictionary in-place + """ + Generates a stable string key for Graph edges to ensure deterministic sorting. + + :param edge: The DaCe graph edge object (or InterstateEdge) to be evaluated. + :return: A stable string representation of the edge. + """ + return str(edge) + + +def sort_graph_dicts_alphabetically(graph): + """ + Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place. + + This function performs three critical phases: + 1. Alphabetizes the master `_nodes` dictionary and its nested adjacency lists. + 2. Alphabetizes the master `_edges` dictionary. + 3. Tears down and sequentially rebuilds the underlying NetworkX graph (`_nx`) + using the newly sorted nodes and edges. + + :param graph: The DaCe graph structure (SDFG, SDFGState, or generic Graph) + whose internal structures need to be stabilized. + """ + if hasattr(graph, '_nodes'): for k in sorted(list(graph._nodes.keys()), key=get_deterministic_node_key): graph._nodes[k] = graph._nodes.pop(k) @@ -2786,13 +2802,11 @@ def canonicalize_graph_dicts(graph): in_edges[e_key] = in_edges.pop(e_key) for e_key in sorted(list(out_edges.keys()), key=lambda k: get_deterministic_edge_key(out_edges[k])): out_edges[e_key] = out_edges.pop(e_key) - - # 2. Sort master Edge dictionary in-place + if hasattr(graph, '_edges'): for e_key in sorted(list(graph._edges.keys()), key=lambda k: get_deterministic_edge_key(graph._edges[k])): graph._edges[e_key] = graph._edges.pop(e_key) - # 3. Rebuild the NetworkX graph to ensure downstream utilities are also deterministic if hasattr(graph, '_nx'): old_nx = graph._nx graph._nx = type(old_nx)() From cd408634a98d1d29b518d9d2353d46be1cf4e0b2 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 11 Mar 2026 09:01:32 +0100 Subject: [PATCH 03/13] Sort SDFG Alphabetically for deterministic code generation --- dace/sdfg/sdfg.py | 15 ++++---- dace/sdfg/utils.py | 39 +++++++++++++++----- tests/sdfg/deterministic_sort_test.py | 53 +++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 tests/sdfg/deterministic_sort_test.py diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index ee4677554f..4014cb3ef7 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3007,21 +3007,22 @@ def recheck_using_explicit_control_flow(self) -> bool: self.root_sdfg.using_explicit_control_flow = found_explicit_cf_block return found_explicit_cf_block - def sort_sdfg_alphabetically(self, visited=None): + def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: """ - Forces all internal dictionaries and graph structures into a deterministic, + Forces all internal dictionaries and graph structures into a deterministic, lexicographical order to guarantee stable code generation. - This method operates in-place and recursively processes all internal + This method operates in-place and recursively processes all internal dataflow states and nested SDFGs. - :param visited: A set of memory addresses (IDs) of already processed SDFGs. - Used internally to prevent infinite recursion in the event + + :param visited: A set of memory addresses (IDs) of already processed SDFGs. + Used internally to prevent infinite recursion in the event of cyclic nested SDFG references. """ if visited is None: visited = set() - + if id(self) in visited: return visited.add(id(self)) @@ -3040,7 +3041,7 @@ def sort_sdfg_alphabetically(self, visited=None): for state in self.nodes(): sort_graph_dicts_alphabetically(state) - + for node in state.nodes(): if hasattr(node, 'sdfg') and node.sdfg is not None: node.sdfg.sort_sdfg_alphabetically(visited) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 9566bfa45d..952dd7588a 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -7,6 +7,7 @@ import warnings import networkx as nx import time +import re import dace.sdfg.nodes from dace.codegen import compiled_sdfg as csdfg @@ -2756,46 +2757,64 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): states.append(state) -def get_deterministic_node_key(node): +def get_deterministic_node_key(node: Any) -> str: """ Generates a stable string key for Graph nodes to ensure deterministic sorting. + Strips memory addresses, sequential IDs, partial hashes, and UUIDs. + :param node: The DaCe graph node object to be evaluated. :return: A stable string representation of the node. """ node_type = type(node).__name__ - identifier = getattr(node, 'label', getattr(node, 'name', getattr(node, 'data', str(node)))) + raw_identifier = getattr(node, 'label', getattr(node, 'name', getattr(node, 'data', str(node)))) + + # 1. Strip memory addresses (e.g., <... object at 0x...>) + identifier = re.sub(r' at 0x[0-9a-fA-F]+', '', raw_identifier) + + # 2. Strip full UUIDs (supports hyphens, underscores, or flat 32-char hex) + # Catches: 550e8400-e29b-41d4-a716-446655440000, 550e8400_e29b... or 550e8400e29b... + identifier = re.sub( + r'_?[0-9a-fA-F]{8}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{12}', '', + identifier) + return f"{node_type}_{identifier}" -def get_deterministic_edge_key(edge): +def get_deterministic_edge_key(edge: Any) -> str: """ Generates a stable string key for Graph edges to ensure deterministic sorting. + :param edge: The DaCe graph edge object (or InterstateEdge) to be evaluated. :return: A stable string representation of the edge. """ - return str(edge) + src_conn = getattr(edge, 'src_conn', '') + dst_conn = getattr(edge, 'dst_conn', '') + data_str = str(getattr(edge, 'data', '')) + return f"{get_deterministic_node_key(edge.src)}:{src_conn}->{get_deterministic_node_key(edge.dst)}:{dst_conn}_{data_str}" -def sort_graph_dicts_alphabetically(graph): + +def sort_graph_dicts_alphabetically(graph: Any) -> None: """ Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place. This function performs three critical phases: 1. Alphabetizes the master `_nodes` dictionary and its nested adjacency lists. 2. Alphabetizes the master `_edges` dictionary. - 3. Tears down and sequentially rebuilds the underlying NetworkX graph (`_nx`) + 3. Tears down and sequentially rebuilds the underlying NetworkX graph (`_nx`) using the newly sorted nodes and edges. - :param graph: The DaCe graph structure (SDFG, SDFGState, or generic Graph) + + :param graph: The DaCe graph structure (SDFG, SDFGState, or generic Graph) whose internal structures need to be stabilized. """ if hasattr(graph, '_nodes'): for k in sorted(list(graph._nodes.keys()), key=get_deterministic_node_key): graph._nodes[k] = graph._nodes.pop(k) - + # Sort the nested edge dictionaries inside _nodes in-place for node, (in_edges, out_edges) in graph._nodes.items(): for e_key in sorted(list(in_edges.keys()), key=lambda k: get_deterministic_edge_key(in_edges[k])): @@ -2810,10 +2829,10 @@ def sort_graph_dicts_alphabetically(graph): if hasattr(graph, '_nx'): old_nx = graph._nx graph._nx = type(old_nx)() - + for n in graph._nodes.keys(): graph._nx.add_node(n, **old_nx.nodes.get(n, {})) - + for e_obj in graph._edges.values(): edge_attrs = {'data': e_obj.data} diff --git a/tests/sdfg/deterministic_sort_test.py b/tests/sdfg/deterministic_sort_test.py new file mode 100644 index 0000000000..4e7e398427 --- /dev/null +++ b/tests/sdfg/deterministic_sort_test.py @@ -0,0 +1,53 @@ +import dace +import random +from dace.sdfg.utils import get_deterministic_node_key, get_deterministic_edge_key + + +def test_sdfg_alphabetical_sorting(): + """ + Tests that the SDFG and its internal states can be forced into a strictly + deterministic topological order, regardless of dictionary insertion history. + """ + # 1. Create a simple SDFG + sdfg = dace.SDFG('deterministic_test') + sdfg.add_array('A', [10], dace.float64) + sdfg.add_array('B', [10], dace.float64) + + state = sdfg.add_state('state0') + a = state.add_read('A') + b = state.add_write('B') + tasklet = state.add_tasklet('compute', {'a'}, {'b'}, 'b = a + 1') + + state.add_edge(a, None, tasklet, 'a', dace.Memlet.from_array('A', sdfg.arrays['A'])) + state.add_edge(tasklet, 'b', b, None, dace.Memlet.from_array('B', sdfg.arrays['B'])) + + # 2. Intentionally scramble the internal dictionaries to simulate non-determinism + # Scramble top-level arrays + array_items = list(sdfg._arrays.items()) + random.shuffle(array_items) + + # Scramble state nodes + node_items = list(state._nodes.items()) + random.shuffle(node_items) + state._nodes = dict(node_items) + + # 3. Apply the canonicalizer + sdfg.sort_sdfg_alphabetically() + + # 4. Assert that the underlying dictionaries are now strictly ordered + node_keys = list(state._nodes.keys()) + expected_node_keys = sorted(node_keys, key=get_deterministic_node_key) + + edge_keys = list(state._edges.keys()) + expected_edge_keys = sorted(edge_keys, key=lambda k: get_deterministic_edge_key(state._edges[k])) + + assert node_keys == expected_node_keys, "Graph nodes were not deterministically sorted!" + assert edge_keys == expected_edge_keys, "Graph edges were not deterministically sorted!" + + # Ensure networkx backend was also rebuilt deterministically + nx_nodes = list(state._nx.nodes()) + assert nx_nodes == expected_node_keys, "NetworkX nodes do not match DaCe dict order!" + + +if __name__ == "__main__": + test_sdfg_alphabetical_sorting() From c5434653bc3d204cea3ad6ef33295b2e5186da7f Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 12 Mar 2026 09:15:17 +0100 Subject: [PATCH 04/13] Sort SDFG Alphabetically for deterministic code generation: more stable node/edge key generation for deterministic sorting --- dace/sdfg/sdfg.py | 23 +++- dace/sdfg/utils.py | 155 ++++++++++++++++++++------ tests/sdfg/deterministic_sort_test.py | 27 ++++- 3 files changed, 161 insertions(+), 44 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 4014cb3ef7..96e396153a 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3009,12 +3009,20 @@ def recheck_using_explicit_control_flow(self) -> bool: def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: """ - Forces all internal dictionaries and graph structures into a deterministic, - lexicographical order to guarantee stable code generation. + Forces all internal dictionaries, graph structures, and metadata registries + into a deterministic, semantically-aware order to guarantee stable code generation. - This method operates in-place and recursively processes all internal - dataflow states and nested SDFGs. + In DaCe, the code generators rely heavily on the iteration order + of internal dictionaries. This method performs a deep, in-place stabilization + of the entire SDFG hierarchy to eliminate stochastic compilation jitter + caused by memory addresses or volatile UUIDs. + The stabilization process executes in four phases: + 1. Global Metadata: Alphabetizes arrays, symbols, and constants. + 2. State Machine: Sorts the top-level SDFG (States and Interstate Edges) + using semantic topological keys. + 3. Dataflow: Sorts the internal nodes and memlet edges within every State. + 4. Recursion: Recursively applies this stabilization to all Nested SDFGs. :param visited: A set of memory addresses (IDs) of already processed SDFGs. Used internally to prevent infinite recursion in the event @@ -3023,6 +3031,7 @@ def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: if visited is None: visited = set() + # Cycle prevention for recursive nested SDFGs if id(self) in visited: return visited.add(id(self)) @@ -3030,18 +3039,24 @@ def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: # Avoid import loops from dace.sdfg.utils import sort_graph_dicts_alphabetically + # 1. Stabilize Global Metadata (Arrays, Symbols, Constants) for attr in ['_arrays', 'symbols', 'constants_prop']: if hasattr(self, attr): val = getattr(self, attr) + # Ensure the attribute exists, is dict-like, and supports pop if val and hasattr(val, 'keys') and hasattr(val, 'pop'): + # Cast keys to a list to safely iterate while mutating the dict for k in sorted(list(val.keys())): val[k] = val.pop(k) + # 2. Stabilize the top-level State Machine (States and Interstate edges) sort_graph_dicts_alphabetically(self) + # 3. Stabilize the Dataflow inside each State for state in self.nodes(): sort_graph_dicts_alphabetically(state) + # 4. Recurse into Nested SDFGs for node in state.nodes(): if hasattr(node, 'sdfg') and node.sdfg is not None: node.sdfg.sort_sdfg_alphabetically(visited) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 952dd7588a..e0a50801fd 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -8,6 +8,7 @@ import networkx as nx import time import re +import hashlib import dace.sdfg.nodes from dace.codegen import compiled_sdfg as csdfg @@ -2757,82 +2758,168 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): states.append(state) +# Pre-compile the regex for performance since it will be called thousands of times +# Strips: +# 1. Memory addresses (0x...) +# 2. Full UUIDs (with hyphens, underscores, or flat hex) +# 3. Partial UUIDs / Short hashes appended at the end (e.g., _a1b2c3d4) +VOLATILE_STR_REGEX = re.compile( + r'(0x[0-9a-fA-F]+|_?[0-9a-fA-F]{8}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{12}|_[0-9a-fA-F]{8}$)' +) + + def get_deterministic_node_key(node: Any) -> str: """ - Generates a stable string key for Graph nodes to ensure deterministic sorting. - Strips memory addresses, sequential IDs, partial hashes, and UUIDs. + Generates a highly stable, deterministic string key for DaCe graph nodes + based on their semantic properties rather than memory locations. + During SDFG compilation, relying on memory addresses or volatile UUIDs + for sorting leads to non-deterministic code generation. This function + extracts the intrinsic semantic identity of a node to ensure structural + collisions are resolved deterministically. - :param node: The DaCe graph node object to be evaluated. - :return: A stable string representation of the node. + The generated key incorporates: + - Node type and cleaned label (stripping UUIDs and hex addresses) + - Graph topology (in-degree and out-degree) + - Interface semantics (in/out connectors) + - Loop and memory semantics (map parameters, schedules, access types) + - Internal code logic (via a stable MD5 hash of Tasklet code) + - Nested SDFG structural size + + :param node: The DaCe graph node (e.g., Tasklet, AccessNode, MapEntry) + to be evaluated. + :return: A stable string representing the node's semantic identity. """ node_type = type(node).__name__ - raw_identifier = getattr(node, 'label', getattr(node, 'name', getattr(node, 'data', str(node)))) - # 1. Strip memory addresses (e.g., <... object at 0x...>) - identifier = re.sub(r' at 0x[0-9a-fA-F]+', '', raw_identifier) + # Extract core identifier + raw_label = getattr(node, 'data', getattr(node, 'label', str(node))) + + # Strip volatile metadata + clean_label = VOLATILE_STR_REGEX.sub('', str(raw_label)) + + parts = [node_type, clean_label] + + # 1. Topological Context + in_degree = getattr(node, 'in_degree', 0) + out_degree = getattr(node, 'out_degree', 0) + parts.append(f"i{in_degree}o{out_degree}") + + # 2. Interface Semantics (Connectors for Tasklets / NestedSDFGs) + if hasattr(node, 'in_connectors') and node.in_connectors: + parts.append("inC:" + "-".join(sorted(node.in_connectors.keys()))) + if hasattr(node, 'out_connectors') and node.out_connectors: + parts.append("outC:" + "-".join(sorted(node.out_connectors.keys()))) - # 2. Strip full UUIDs (supports hyphens, underscores, or flat 32-char hex) - # Catches: 550e8400-e29b-41d4-a716-446655440000, 550e8400_e29b... or 550e8400e29b... - identifier = re.sub( - r'_?[0-9a-fA-F]{8}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{12}', '', - identifier) + # 3. Loop Semantics (Map Parameters & Schedules) + if hasattr(node, 'map') and hasattr(node.map, 'params'): + parts.append("map:" + "-".join(node.map.params)) + if hasattr(node.map, 'schedule'): + parts.append(f"sch:{str(node.map.schedule)}") - return f"{node_type}_{identifier}" + # 4. Memory Semantics (Access Types for AccessNodes) + if hasattr(node, 'access'): + parts.append(f"acc:{str(node.access)}") + + # 5. Internal Code Semantics (Tasklets) + if hasattr(node, 'code') and hasattr(node.code, 'as_string'): + # Hash the code to prevent massive strings while guaranteeing uniqueness. + # MD5 is used because Python's built-in hash() is non-deterministic across runs. + code_str = str(node.code.as_string).strip() + code_hash = hashlib.md5(code_str.encode('utf-8')).hexdigest()[:8] + parts.append(f"code:{code_hash}") + + # 6. Nested SDFG Differentiation + if hasattr(node, 'sdfg') and node.sdfg: + parts.append(f"nsdfg_states:{len(node.sdfg.nodes())}") + + return "_".join(parts) def get_deterministic_edge_key(edge: Any) -> str: """ - Generates a stable string key for Graph edges to ensure deterministic sorting. + Generates a highly stable string key for graph edges to ensure + deterministic sorting. + This function extracts the semantic connection points (connectors) + and the data payload (Memlets or Interstate conditions), explicitly + stripping volatile memory addresses and generated UUIDs to prevent + non-deterministic compiler graph traversals. - :param edge: The DaCe graph edge object (or InterstateEdge) to be evaluated. - :return: A stable string representation of the edge. + :param edge: The DaCe graph edge (or InterstateEdge) to be evaluated. + :return: A stable string representation of the edge's routing and payload. """ - src_conn = getattr(edge, 'src_conn', '') - dst_conn = getattr(edge, 'dst_conn', '') - data_str = str(getattr(edge, 'data', '')) + # 1. Extract raw strings + raw_src_conn = str(getattr(edge, 'src_conn', '')) + raw_dst_conn = str(getattr(edge, 'dst_conn', '')) + raw_data_str = str(getattr(edge, 'data', '')) + + # 2. Strip volatile hashes/addresses from connectors and payload + clean_src_conn = VOLATILE_STR_REGEX.sub('', raw_src_conn) + clean_dst_conn = VOLATILE_STR_REGEX.sub('', raw_dst_conn) + clean_data_str = VOLATILE_STR_REGEX.sub('', raw_data_str) - return f"{get_deterministic_node_key(edge.src)}:{src_conn}->{get_deterministic_node_key(edge.dst)}:{dst_conn}_{data_str}" + # 3. Retrieve the stabilized keys for the source and destination nodes + src_key = get_deterministic_node_key(edge.src) + dst_key = get_deterministic_node_key(edge.dst) + + return f"{src_key}[{clean_src_conn}]->{dst_key}[{clean_dst_conn}]({clean_data_str})" def sort_graph_dicts_alphabetically(graph: Any) -> None: """ - Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place. + Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place + using semantically-aware deterministic keys. - This function performs three critical phases: - 1. Alphabetizes the master `_nodes` dictionary and its nested adjacency lists. - 2. Alphabetizes the master `_edges` dictionary. - 3. Tears down and sequentially rebuilds the underlying NetworkX graph (`_nx`) - using the newly sorted nodes and edges. + In DaCe, the order in which code is generated heavily depends on + the iteration order of the graph's internal dictionaries. This function performs + a deep, in-place sort of these structures to guarantee that graph traversal + and code generation are perfectly deterministic across different executions. + The stabilization process occurs in four phases: + 1. Alphabetizes the master `_nodes` dictionary based on semantic node keys. + 2. Alphabetizes the nested adjacency lists (`in_edges`, `out_edges`) for + every node based on semantic edge keys. + 3. Alphabetizes the master `_edges` dictionary. + 4. Tears down and sequentially rebuilds the underlying NetworkX graph (`_nx`) + so its internal node/edge registries match the newly stabilized order. - :param graph: The DaCe graph structure (SDFG, SDFGState, or generic Graph) - whose internal structures need to be stabilized. + :param graph: The DaCe graph structure (e.g., SDFGState or generic Graph) + whose internal dictionaries need to be stabilized. """ - + # 1. Sort the master Nodes dictionary if hasattr(graph, '_nodes'): - for k in sorted(list(graph._nodes.keys()), key=get_deterministic_node_key): + sorted_nodes = sorted(graph._nodes.keys(), key=get_deterministic_node_key) + for k in sorted_nodes: + # Pop and re-insert to enforce deterministic insertion order graph._nodes[k] = graph._nodes.pop(k) - # Sort the nested edge dictionaries inside _nodes in-place + # 2. Sort the nested adjacency lists (In/Out Edges) within each node for node, (in_edges, out_edges) in graph._nodes.items(): - for e_key in sorted(list(in_edges.keys()), key=lambda k: get_deterministic_edge_key(in_edges[k])): + sorted_in = sorted(in_edges.keys(), key=lambda k: get_deterministic_edge_key(in_edges[k])) + for e_key in sorted_in: in_edges[e_key] = in_edges.pop(e_key) - for e_key in sorted(list(out_edges.keys()), key=lambda k: get_deterministic_edge_key(out_edges[k])): + + sorted_out = sorted(out_edges.keys(), key=lambda k: get_deterministic_edge_key(out_edges[k])) + for e_key in sorted_out: out_edges[e_key] = out_edges.pop(e_key) + # 3. Sort the master Edges dictionary if hasattr(graph, '_edges'): - for e_key in sorted(list(graph._edges.keys()), key=lambda k: get_deterministic_edge_key(graph._edges[k])): + sorted_edge_keys = sorted(graph._edges.keys(), key=lambda k: get_deterministic_edge_key(graph._edges[k])) + for e_key in sorted_edge_keys: graph._edges[e_key] = graph._edges.pop(e_key) + # 4. Rebuild the NetworkX backend to match the new deterministic order if hasattr(graph, '_nx'): old_nx = graph._nx graph._nx = type(old_nx)() + # Add nodes sequentially for n in graph._nodes.keys(): graph._nx.add_node(n, **old_nx.nodes.get(n, {})) + # Add edges sequentially using the newly sorted master edge list for e_obj in graph._edges.values(): edge_attrs = {'data': e_obj.data} diff --git a/tests/sdfg/deterministic_sort_test.py b/tests/sdfg/deterministic_sort_test.py index 4e7e398427..5a40019914 100644 --- a/tests/sdfg/deterministic_sort_test.py +++ b/tests/sdfg/deterministic_sort_test.py @@ -22,14 +22,29 @@ def test_sdfg_alphabetical_sorting(): state.add_edge(tasklet, 'b', b, None, dace.Memlet.from_array('B', sdfg.arrays['B'])) # 2. Intentionally scramble the internal dictionaries to simulate non-determinism - # Scramble top-level arrays - array_items = list(sdfg._arrays.items()) - random.shuffle(array_items) + + def scramble_dict_in_place(d): + """Helper to randomize dictionary insertion order without changing its type.""" + if not d: return + keys = list(d.keys()) + random.shuffle(keys) + for k in keys: + # Pop and re-insert to shuffle the underlying Python 3.7+ insertion order + d[k] = d.pop(k) + + # Scramble top-level arrays (Safely mutating the NestedDict in-place) + scramble_dict_in_place(sdfg._arrays) # Scramble state nodes - node_items = list(state._nodes.items()) - random.shuffle(node_items) - state._nodes = dict(node_items) + scramble_dict_in_place(state._nodes) + + # Scramble master edges + scramble_dict_in_place(state._edges) + + # Scramble nested adjacency lists (in_edges / out_edges) + for node, (in_edges, out_edges) in state._nodes.items(): + scramble_dict_in_place(in_edges) + scramble_dict_in_place(out_edges) # 3. Apply the canonicalizer sdfg.sort_sdfg_alphabetically() From 161a0d332f514e5603a59151f78b0ab4194cf907 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 12 Mar 2026 17:54:41 +0100 Subject: [PATCH 05/13] Sort SDFG Alphabetically for deterministic code generation --- dace/codegen/codegen.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py index acc7e36f14..cc2dddfc71 100644 --- a/dace/codegen/codegen.py +++ b/dace/codegen/codegen.py @@ -245,9 +245,10 @@ def generate_code(sdfg: SDFG, validate=True) -> List[CodeObject]: for k, v in frame._dispatcher.instrumentation.items() } - # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS sdfg.sort_sdfg_alphabetically() + # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS + # Generate frame code (and the rest of the code) (global_code, frame_code, used_targets, used_environments) = frame.generate_code(sdfg, None) target_objects = [ From 73737cf86fb2558b65877a11956e3336b71a6408 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Fri, 13 Mar 2026 15:14:03 +0100 Subject: [PATCH 06/13] Sort SDFG Alphabetically for deterministic code generation: Optimizations --- dace/codegen/codegen.py | 2 -- dace/sdfg/sdfg.py | 20 +++++++++--- dace/sdfg/utils.py | 32 +++++++++++++++---- .../transformation/passes/pattern_matching.py | 3 ++ 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py index cc2dddfc71..fc6791599f 100644 --- a/dace/codegen/codegen.py +++ b/dace/codegen/codegen.py @@ -245,8 +245,6 @@ def generate_code(sdfg: SDFG, validate=True) -> List[CodeObject]: for k, v in frame._dispatcher.instrumentation.items() } - sdfg.sort_sdfg_alphabetically() - # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS # Generate frame code (and the rest of the code) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 96e396153a..fd2f7c300d 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3007,7 +3007,7 @@ def recheck_using_explicit_control_flow(self) -> bool: self.root_sdfg.using_explicit_control_flow = found_explicit_cf_block return found_explicit_cf_block - def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: + def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[Set[int]] = None) -> None: """ Forces all internal dictionaries, graph structures, and metadata registries into a deterministic, semantically-aware order to guarantee stable code generation. @@ -3024,6 +3024,10 @@ def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: 3. Dataflow: Sorts the internal nodes and memlet edges within every State. 4. Recursion: Recursively applies this stabilization to all Nested SDFGs. + :param rebuild_nx: If True, rebuilds the internal NetworkX graph in each + sorted graph. Default is False for performance, since + DaCe's codegen and pattern matching do not rely on the + internal _nx iteration order. :param visited: A set of memory addresses (IDs) of already processed SDFGs. Used internally to prevent infinite recursion in the event of cyclic nested SDFG references. @@ -3037,7 +3041,13 @@ def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: visited.add(id(self)) # Avoid import loops - from dace.sdfg.utils import sort_graph_dicts_alphabetically + from dace.sdfg.utils import sort_graph_dicts_alphabetically, get_deterministic_node_key + + # Invalidate the node key cache before each full sort pass to prevent + # stale keys from a previous SDFG state being reused after transformations + # have modified the graph structure. + if hasattr(get_deterministic_node_key, '__defaults__'): + get_deterministic_node_key.__defaults__[0].clear() # 1. Stabilize Global Metadata (Arrays, Symbols, Constants) for attr in ['_arrays', 'symbols', 'constants_prop']: @@ -3050,13 +3060,13 @@ def sort_sdfg_alphabetically(self, visited: Optional[Set[int]] = None) -> None: val[k] = val.pop(k) # 2. Stabilize the top-level State Machine (States and Interstate edges) - sort_graph_dicts_alphabetically(self) + sort_graph_dicts_alphabetically(self, rebuild_nx=rebuild_nx) # 3. Stabilize the Dataflow inside each State for state in self.nodes(): - sort_graph_dicts_alphabetically(state) + sort_graph_dicts_alphabetically(state, rebuild_nx=rebuild_nx) # 4. Recurse into Nested SDFGs for node in state.nodes(): if hasattr(node, 'sdfg') and node.sdfg is not None: - node.sdfg.sort_sdfg_alphabetically(visited) + node.sdfg.sort_sdfg_alphabetically(rebuild_nx=rebuild_nx, visited=visited) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index e0a50801fd..b828793f4b 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -2768,7 +2768,7 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): ) -def get_deterministic_node_key(node: Any) -> str: +def get_deterministic_node_key(node: Any, _cache: Dict[int, str] = {}) -> str: """ Generates a highly stable, deterministic string key for DaCe graph nodes based on their semantic properties rather than memory locations. @@ -2786,10 +2786,20 @@ def get_deterministic_node_key(node: Any) -> str: - Internal code logic (via a stable MD5 hash of Tasklet code) - Nested SDFG structural size + Results are cached per node identity to avoid redundant recomputation + during sort comparisons. The cache is invalidated at the start of each + sort_sdfg_alphabetically() call. + :param node: The DaCe graph node (e.g., Tasklet, AccessNode, MapEntry) to be evaluated. + :param _cache: Internal mutable default dict used as an identity-based + cache. Cleared between sort passes to avoid stale keys. :return: A stable string representing the node's semantic identity. """ + node_id = id(node) + if node_id in _cache: + return _cache[node_id] + node_type = type(node).__name__ # Extract core identifier @@ -2833,7 +2843,9 @@ def get_deterministic_node_key(node: Any) -> str: if hasattr(node, 'sdfg') and node.sdfg: parts.append(f"nsdfg_states:{len(node.sdfg.nodes())}") - return "_".join(parts) + result = "_".join(parts) + _cache[node_id] = result + return result def get_deterministic_edge_key(edge: Any) -> str: @@ -2866,7 +2878,7 @@ def get_deterministic_edge_key(edge: Any) -> str: return f"{src_key}[{clean_src_conn}]->{dst_key}[{clean_dst_conn}]({clean_data_str})" -def sort_graph_dicts_alphabetically(graph: Any) -> None: +def sort_graph_dicts_alphabetically(graph: Any, rebuild_nx: bool = False) -> None: """ Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place using semantically-aware deterministic keys. @@ -2881,11 +2893,17 @@ def sort_graph_dicts_alphabetically(graph: Any) -> None: 2. Alphabetizes the nested adjacency lists (`in_edges`, `out_edges`) for every node based on semantic edge keys. 3. Alphabetizes the master `_edges` dictionary. - 4. Tears down and sequentially rebuilds the underlying NetworkX graph (`_nx`) - so its internal node/edge registries match the newly stabilized order. + 4. (Optional) Tears down and sequentially rebuilds the underlying NetworkX + graph (`_nx`) so its internal node/edge registries match the newly + stabilized order. Skipped by default since pattern matching builds + its own NetworkX digraph via collapse_multigraph_to_nx. :param graph: The DaCe graph structure (e.g., SDFGState or generic Graph) whose internal dictionaries need to be stabilized. + :param rebuild_nx: If True, rebuilds the internal NetworkX graph to match + the new order. Default is False for performance, since + DaCe's codegen and pattern matching do not rely on the + internal _nx iteration order. """ # 1. Sort the master Nodes dictionary if hasattr(graph, '_nodes'): @@ -2910,8 +2928,8 @@ def sort_graph_dicts_alphabetically(graph: Any) -> None: for e_key in sorted_edge_keys: graph._edges[e_key] = graph._edges.pop(e_key) - # 4. Rebuild the NetworkX backend to match the new deterministic order - if hasattr(graph, '_nx'): + # 4. Optionally rebuild the NetworkX backend to match the new deterministic order + if rebuild_nx and hasattr(graph, '_nx'): old_nx = graph._nx graph._nx = type(old_nx)() diff --git a/dace/transformation/passes/pattern_matching.py b/dace/transformation/passes/pattern_matching.py index 9f557527f0..48856df03d 100644 --- a/dace/transformation/passes/pattern_matching.py +++ b/dace/transformation/passes/pattern_matching.py @@ -213,6 +213,9 @@ def _apply_pass(self, sdfg: SDFG, pipeline_results: Dict[str, Any], apply_once: if len(xforms) != len(set(xforms)): raise ValueError('Transformation set must be unique') + # Sort SDFG dictionaries for deterministic pattern matching. + sdfg.sort_sdfg_alphabetically() + if self.order_by_transformation: applied_anything = True while applied_anything: From 7d18a30f789689c325c936f5f402d5ab093296e6 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 16 Mar 2026 09:37:58 +0100 Subject: [PATCH 07/13] Sort SDFG for deterministic code generation: fixes --- dace/codegen/codegen.py | 3 + dace/sdfg/sdfg.py | 34 +-- dace/sdfg/utils.py | 93 ++++---- .../transformation/passes/pattern_matching.py | 3 - tests/sdfg/deterministic_sort_test.py | 211 +++++++++++++++--- 5 files changed, 245 insertions(+), 99 deletions(-) diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py index fc6791599f..a74f6459d2 100644 --- a/dace/codegen/codegen.py +++ b/dace/codegen/codegen.py @@ -245,6 +245,9 @@ def generate_code(sdfg: SDFG, validate=True) -> List[CodeObject]: for k, v in frame._dispatcher.instrumentation.items() } + # Sort SDFG dictionaries for deterministic pattern matching. + sdfg.sort_sdfg_alphabetically() + # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS # Generate frame code (and the rest of the code) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index fd2f7c300d..157c7a53ca 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3030,39 +3030,41 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S internal _nx iteration order. :param visited: A set of memory addresses (IDs) of already processed SDFGs. Used internally to prevent infinite recursion in the event - of cyclic nested SDFG references. + of cyclic nested SDFG references. Also serves as the signal + for the top-level entry point: the node key cache is only + cleared when ``visited`` is ``None`` (i.e., on the first call), + not on recursive calls into nested SDFGs where parent keys + are still valid. """ + # Avoid import loops + from dace.sdfg.utils import sort_graph_dicts_alphabetically, _node_key_cache + if visited is None: visited = set() + # Only clear the cache at the top-level entry point, not on + # recursive calls into nested SDFGs where parent keys are + # still valid. + _node_key_cache.clear() # Cycle prevention for recursive nested SDFGs if id(self) in visited: return visited.add(id(self)) - # Avoid import loops - from dace.sdfg.utils import sort_graph_dicts_alphabetically, get_deterministic_node_key - - # Invalidate the node key cache before each full sort pass to prevent - # stale keys from a previous SDFG state being reused after transformations - # have modified the graph structure. - if hasattr(get_deterministic_node_key, '__defaults__'): - get_deterministic_node_key.__defaults__[0].clear() - # 1. Stabilize Global Metadata (Arrays, Symbols, Constants) for attr in ['_arrays', 'symbols', 'constants_prop']: if hasattr(self, attr): val = getattr(self, attr) - # Ensure the attribute exists, is dict-like, and supports pop - if val and hasattr(val, 'keys') and hasattr(val, 'pop'): - # Cast keys to a list to safely iterate while mutating the dict - for k in sorted(list(val.keys())): - val[k] = val.pop(k) + # Ensure the attribute exists, is dict-like, and supports clear/update + if val and hasattr(val, 'keys') and hasattr(val, 'clear'): + sorted_items = sorted(val.items(), key=lambda item: item[0]) + val.clear() + val.update(sorted_items) # 2. Stabilize the top-level State Machine (States and Interstate edges) sort_graph_dicts_alphabetically(self, rebuild_nx=rebuild_nx) - # 3. Stabilize the Dataflow inside each State + # 3. Stabilize the Dataflow inside each State and recurse into Nested SDFGs for state in self.nodes(): sort_graph_dicts_alphabetically(state, rebuild_nx=rebuild_nx) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 0c20e3802b..bb5c81cf80 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -2766,17 +2766,34 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): states.append(state) -# Pre-compile the regex for performance since it will be called thousands of times -# Strips: -# 1. Memory addresses (0x...) -# 2. Full UUIDs (with hyphens, underscores, or flat hex) -# 3. Partial UUIDs / Short hashes appended at the end (e.g., _a1b2c3d4) -VOLATILE_STR_REGEX = re.compile( - r'(0x[0-9a-fA-F]+|_?[0-9a-fA-F]{8}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{4}[-_]?[0-9a-fA-F]{12}|_[0-9a-fA-F]{8}$)' -) +# Module-level cache for node keys, cleared at the start of each +# sort_sdfg_alphabetically() call to prevent stale entries after +# transformations modify the graph structure. Keyed by id(node). +_node_key_cache: Dict[int, str] = {} -def get_deterministic_node_key(node: Any, _cache: Dict[int, str] = {}) -> str: +def _resolve_degree(node: Any, attr: str) -> int: + """ + Safely resolves in_degree or out_degree from a node, handling both + the case where it is a plain attribute (int) and where it is a bound + method (as on NetworkX nodes or DaCe Graph objects). + + :param node: The graph node to inspect. + :param attr: The attribute name ('in_degree' or 'out_degree'). + :return: The integer degree value, or 0 if the attribute is absent. + """ + val = getattr(node, attr, None) + if val is None: + return 0 + if callable(val): + try: + return val() + except TypeError: + return 0 + return val + + +def get_deterministic_node_key(node: Any) -> str: """ Generates a highly stable, deterministic string key for DaCe graph nodes based on their semantic properties rather than memory locations. @@ -2787,7 +2804,7 @@ def get_deterministic_node_key(node: Any, _cache: Dict[int, str] = {}) -> str: collisions are resolved deterministically. The generated key incorporates: - - Node type and cleaned label (stripping UUIDs and hex addresses) + - Node type and label - Graph topology (in-degree and out-degree) - Interface semantics (in/out connectors) - Loop and memory semantics (map parameters, schedules, access types) @@ -2800,28 +2817,23 @@ def get_deterministic_node_key(node: Any, _cache: Dict[int, str] = {}) -> str: :param node: The DaCe graph node (e.g., Tasklet, AccessNode, MapEntry) to be evaluated. - :param _cache: Internal mutable default dict used as an identity-based - cache. Cleared between sort passes to avoid stale keys. :return: A stable string representing the node's semantic identity. """ node_id = id(node) - if node_id in _cache: - return _cache[node_id] + if node_id in _node_key_cache: + return _node_key_cache[node_id] node_type = type(node).__name__ # Extract core identifier raw_label = getattr(node, 'data', getattr(node, 'label', str(node))) - # Strip volatile metadata - clean_label = VOLATILE_STR_REGEX.sub('', str(raw_label)) - - parts = [node_type, clean_label] + parts = [node_type, str(raw_label)] # 1. Topological Context - in_degree = getattr(node, 'in_degree', 0) - out_degree = getattr(node, 'out_degree', 0) - parts.append(f"i{in_degree}o{out_degree}") + in_deg = _resolve_degree(node, 'in_degree') + out_deg = _resolve_degree(node, 'out_degree') + parts.append(f"i{in_deg}o{out_deg}") # 2. Interface Semantics (Connectors for Tasklets / NestedSDFGs) if hasattr(node, 'in_connectors') and node.in_connectors: @@ -2852,7 +2864,7 @@ def get_deterministic_node_key(node: Any, _cache: Dict[int, str] = {}) -> str: parts.append(f"nsdfg_states:{len(node.sdfg.nodes())}") result = "_".join(parts) - _cache[node_id] = result + _node_key_cache[node_id] = result return result @@ -2862,8 +2874,7 @@ def get_deterministic_edge_key(edge: Any) -> str: deterministic sorting. This function extracts the semantic connection points (connectors) - and the data payload (Memlets or Interstate conditions), explicitly - stripping volatile memory addresses and generated UUIDs to prevent + and the data payload (Memlets or Interstate conditions) to prevent non-deterministic compiler graph traversals. :param edge: The DaCe graph edge (or InterstateEdge) to be evaluated. @@ -2874,16 +2885,11 @@ def get_deterministic_edge_key(edge: Any) -> str: raw_dst_conn = str(getattr(edge, 'dst_conn', '')) raw_data_str = str(getattr(edge, 'data', '')) - # 2. Strip volatile hashes/addresses from connectors and payload - clean_src_conn = VOLATILE_STR_REGEX.sub('', raw_src_conn) - clean_dst_conn = VOLATILE_STR_REGEX.sub('', raw_dst_conn) - clean_data_str = VOLATILE_STR_REGEX.sub('', raw_data_str) - - # 3. Retrieve the stabilized keys for the source and destination nodes + # 2. Retrieve the stabilized keys for the source and destination nodes src_key = get_deterministic_node_key(edge.src) dst_key = get_deterministic_node_key(edge.dst) - return f"{src_key}[{clean_src_conn}]->{dst_key}[{clean_dst_conn}]({clean_data_str})" + return f"{src_key}[{raw_src_conn}]->{dst_key}[{raw_dst_conn}]({raw_data_str})" def sort_graph_dicts_alphabetically(graph: Any, rebuild_nx: bool = False) -> None: @@ -2915,26 +2921,25 @@ def sort_graph_dicts_alphabetically(graph: Any, rebuild_nx: bool = False) -> Non """ # 1. Sort the master Nodes dictionary if hasattr(graph, '_nodes'): - sorted_nodes = sorted(graph._nodes.keys(), key=get_deterministic_node_key) - for k in sorted_nodes: - # Pop and re-insert to enforce deterministic insertion order - graph._nodes[k] = graph._nodes.pop(k) + sorted_node_items = sorted(graph._nodes.items(), key=lambda item: get_deterministic_node_key(item[0])) + graph._nodes.clear() + graph._nodes.update(sorted_node_items) # 2. Sort the nested adjacency lists (In/Out Edges) within each node for node, (in_edges, out_edges) in graph._nodes.items(): - sorted_in = sorted(in_edges.keys(), key=lambda k: get_deterministic_edge_key(in_edges[k])) - for e_key in sorted_in: - in_edges[e_key] = in_edges.pop(e_key) + sorted_in_items = sorted(in_edges.items(), key=lambda item: get_deterministic_edge_key(item[1])) + in_edges.clear() + in_edges.update(sorted_in_items) - sorted_out = sorted(out_edges.keys(), key=lambda k: get_deterministic_edge_key(out_edges[k])) - for e_key in sorted_out: - out_edges[e_key] = out_edges.pop(e_key) + sorted_out_items = sorted(out_edges.items(), key=lambda item: get_deterministic_edge_key(item[1])) + out_edges.clear() + out_edges.update(sorted_out_items) # 3. Sort the master Edges dictionary if hasattr(graph, '_edges'): - sorted_edge_keys = sorted(graph._edges.keys(), key=lambda k: get_deterministic_edge_key(graph._edges[k])) - for e_key in sorted_edge_keys: - graph._edges[e_key] = graph._edges.pop(e_key) + sorted_edge_items = sorted(graph._edges.items(), key=lambda item: get_deterministic_edge_key(item[1])) + graph._edges.clear() + graph._edges.update(sorted_edge_items) # 4. Optionally rebuild the NetworkX backend to match the new deterministic order if rebuild_nx and hasattr(graph, '_nx'): diff --git a/dace/transformation/passes/pattern_matching.py b/dace/transformation/passes/pattern_matching.py index 48856df03d..9f557527f0 100644 --- a/dace/transformation/passes/pattern_matching.py +++ b/dace/transformation/passes/pattern_matching.py @@ -213,9 +213,6 @@ def _apply_pass(self, sdfg: SDFG, pipeline_results: Dict[str, Any], apply_once: if len(xforms) != len(set(xforms)): raise ValueError('Transformation set must be unique') - # Sort SDFG dictionaries for deterministic pattern matching. - sdfg.sort_sdfg_alphabetically() - if self.order_by_transformation: applied_anything = True while applied_anything: diff --git a/tests/sdfg/deterministic_sort_test.py b/tests/sdfg/deterministic_sort_test.py index 5a40019914..28d4bbda1b 100644 --- a/tests/sdfg/deterministic_sort_test.py +++ b/tests/sdfg/deterministic_sort_test.py @@ -1,68 +1,207 @@ -import dace +import copy import random + +import dace from dace.sdfg.utils import get_deterministic_node_key, get_deterministic_edge_key -def test_sdfg_alphabetical_sorting(): - """ - Tests that the SDFG and its internal states can be forced into a strictly - deterministic topological order, regardless of dictionary insertion history. - """ - # 1. Create a simple SDFG +def _scramble_dict_in_place(d): + """Helper to randomize dictionary insertion order without changing its type.""" + if not d: + return + keys = list(d.keys()) + random.shuffle(keys) + items = [(k, d[k]) for k in keys] + d.clear() + d.update(items) + + +def _scramble_sdfg(sdfg): + """Deeply scramble all internal dictionaries of an SDFG to simulate non-determinism.""" + # Scramble top-level metadata + _scramble_dict_in_place(sdfg._arrays) + if hasattr(sdfg, 'symbols') and sdfg.symbols: + _scramble_dict_in_place(sdfg.symbols) + if hasattr(sdfg, 'constants_prop') and sdfg.constants_prop: + _scramble_dict_in_place(sdfg.constants_prop) + + # Scramble state machine level + _scramble_dict_in_place(sdfg._nodes) + _scramble_dict_in_place(sdfg._edges) + + # Scramble dataflow inside each state + for state in sdfg.nodes(): + _scramble_dict_in_place(state._nodes) + _scramble_dict_in_place(state._edges) + + for node, (in_edges, out_edges) in state._nodes.items(): + _scramble_dict_in_place(in_edges) + _scramble_dict_in_place(out_edges) + + # Recurse into nested SDFGs + for node in state.nodes(): + if hasattr(node, 'sdfg') and node.sdfg is not None: + _scramble_sdfg(node.sdfg) + + +def _snapshot_order(sdfg): + """Capture the current iteration order of all internal dictionaries as a hashable snapshot.""" + result = [] + + # Metadata + result.append(('arrays', tuple(sdfg._arrays.keys()))) + + # State machine + state_node_keys = tuple(get_deterministic_node_key(n) for n in sdfg._nodes.keys()) + result.append(('sdfg_nodes', state_node_keys)) + + # Dataflow per state + for i, state in enumerate(sdfg.nodes()): + node_keys = tuple(get_deterministic_node_key(n) for n in state._nodes.keys()) + edge_keys = tuple(get_deterministic_edge_key(state._edges[k]) for k in state._edges.keys()) + result.append((f'state_{i}_nodes', node_keys)) + result.append((f'state_{i}_edges', edge_keys)) + + for j, (node, (in_edges, out_edges)) in enumerate(state._nodes.items()): + in_keys = tuple(get_deterministic_edge_key(in_edges[k]) for k in in_edges.keys()) + out_keys = tuple(get_deterministic_edge_key(out_edges[k]) for k in out_edges.keys()) + result.append((f'state_{i}_node_{j}_in', in_keys)) + result.append((f'state_{i}_node_{j}_out', out_keys)) + + return tuple(result) + + +def _build_test_sdfg(): + """Build a simple SDFG with enough structure to exercise all sorting paths.""" sdfg = dace.SDFG('deterministic_test') sdfg.add_array('A', [10], dace.float64) sdfg.add_array('B', [10], dace.float64) + sdfg.add_scalar('s', dace.float64, transient=True) state = sdfg.add_state('state0') a = state.add_read('A') b = state.add_write('B') tasklet = state.add_tasklet('compute', {'a'}, {'b'}, 'b = a + 1') - state.add_edge(a, None, tasklet, 'a', dace.Memlet.from_array('A', sdfg.arrays['A'])) state.add_edge(tasklet, 'b', b, None, dace.Memlet.from_array('B', sdfg.arrays['B'])) - # 2. Intentionally scramble the internal dictionaries to simulate non-determinism - - def scramble_dict_in_place(d): - """Helper to randomize dictionary insertion order without changing its type.""" - if not d: return - keys = list(d.keys()) - random.shuffle(keys) - for k in keys: - # Pop and re-insert to shuffle the underlying Python 3.7+ insertion order - d[k] = d.pop(k) - - # Scramble top-level arrays (Safely mutating the NestedDict in-place) - scramble_dict_in_place(sdfg._arrays) + return sdfg - # Scramble state nodes - scramble_dict_in_place(state._nodes) - # Scramble master edges - scramble_dict_in_place(state._edges) +def test_sdfg_alphabetical_sorting_basic(): + """ + Tests that the SDFG and its internal states can be forced into a strictly + deterministic topological order, regardless of dictionary insertion history. + """ + sdfg = _build_test_sdfg() + state = sdfg.nodes()[0] - # Scramble nested adjacency lists (in_edges / out_edges) - for node, (in_edges, out_edges) in state._nodes.items(): - scramble_dict_in_place(in_edges) - scramble_dict_in_place(out_edges) + # Scramble everything + _scramble_sdfg(sdfg) - # 3. Apply the canonicalizer + # Apply the canonicalizer sdfg.sort_sdfg_alphabetically() - # 4. Assert that the underlying dictionaries are now strictly ordered + # Assert that graph nodes are sorted node_keys = list(state._nodes.keys()) expected_node_keys = sorted(node_keys, key=get_deterministic_node_key) + assert node_keys == expected_node_keys, "Graph nodes were not deterministically sorted!" + # Assert that graph edges are sorted edge_keys = list(state._edges.keys()) expected_edge_keys = sorted(edge_keys, key=lambda k: get_deterministic_edge_key(state._edges[k])) - - assert node_keys == expected_node_keys, "Graph nodes were not deterministically sorted!" assert edge_keys == expected_edge_keys, "Graph edges were not deterministically sorted!" - # Ensure networkx backend was also rebuilt deterministically + # Assert that metadata dicts are sorted + array_keys = list(sdfg._arrays.keys()) + assert array_keys == sorted(array_keys), "SDFG arrays were not alphabetically sorted!" + + # Assert that per-node adjacency lists are sorted + for node, (in_edges, out_edges) in state._nodes.items(): + in_keys = list(in_edges.keys()) + expected_in = sorted(in_keys, key=lambda k: get_deterministic_edge_key(in_edges[k])) + assert in_keys == expected_in, f"In-edges for {node} were not deterministically sorted!" + + out_keys = list(out_edges.keys()) + expected_out = sorted(out_keys, key=lambda k: get_deterministic_edge_key(out_edges[k])) + assert out_keys == expected_out, f"Out-edges for {node} were not deterministically sorted!" + + +def test_sdfg_alphabetical_sorting_rebuild_nx(): + """ + Tests that when rebuild_nx=True, the NetworkX backend matches + the sorted DaCe dictionary order. + """ + sdfg = _build_test_sdfg() + state = sdfg.nodes()[0] + + _scramble_sdfg(sdfg) + + # Sort with NX rebuild enabled + sdfg.sort_sdfg_alphabetically(rebuild_nx=True) + + # The NX node order must match the _nodes dict order nx_nodes = list(state._nx.nodes()) - assert nx_nodes == expected_node_keys, "NetworkX nodes do not match DaCe dict order!" + dace_nodes = list(state._nodes.keys()) + assert nx_nodes == dace_nodes, ("NetworkX node order does not match sorted DaCe _nodes dict order!") + + +def test_sdfg_alphabetical_sorting_stability(): + """ + Tests that regardless of the initial scrambling, the sort always + produces the same canonical order. Runs multiple scramble+sort + cycles with different random seeds. + """ + reference_snapshot = None + + for seed in range(10): + sdfg = _build_test_sdfg() + + random.seed(seed) + _scramble_sdfg(sdfg) + + sdfg.sort_sdfg_alphabetically() + + snapshot = _snapshot_order(sdfg) + + if reference_snapshot is None: + reference_snapshot = snapshot + else: + assert snapshot == reference_snapshot, (f"Sort produced different order with seed={seed}! " + f"Expected:\n{reference_snapshot}\nGot:\n{snapshot}") + + +def test_sdfg_alphabetical_sorting_idempotency(): + """ + Tests that sorting an already-sorted SDFG produces the same result, + i.e., the operation is idempotent. + """ + sdfg = _build_test_sdfg() + + _scramble_sdfg(sdfg) + + # Sort once + sdfg.sort_sdfg_alphabetically() + snapshot_first = _snapshot_order(sdfg) + + # Sort again + sdfg.sort_sdfg_alphabetically() + snapshot_second = _snapshot_order(sdfg) + + assert snapshot_first == snapshot_second, ("Sorting is not idempotent! Second sort produced a different order.") if __name__ == "__main__": - test_sdfg_alphabetical_sorting() + test_sdfg_alphabetical_sorting_basic() + print("PASSED: test_sdfg_alphabetical_sorting_basic") + + test_sdfg_alphabetical_sorting_rebuild_nx() + print("PASSED: test_sdfg_alphabetical_sorting_rebuild_nx") + + test_sdfg_alphabetical_sorting_stability() + print("PASSED: test_sdfg_alphabetical_sorting_stability") + + test_sdfg_alphabetical_sorting_idempotency() + print("PASSED: test_sdfg_alphabetical_sorting_idempotency") + + print("\nAll tests passed!") From a7c1aa6a8c04e6b3a3b45c035e79b08e81b19348 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 16 Mar 2026 10:09:32 +0100 Subject: [PATCH 08/13] Sort SDFG for deterministic code generation: Configurable --- dace/config_schema.yml | 11 +++++++++++ dace/sdfg/sdfg.py | 7 +++++++ ...est.py => sdfg_alphabetical_sorting_test.py} | 17 +++++++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) rename tests/sdfg/{deterministic_sort_test.py => sdfg_alphabetical_sorting_test.py} (92%) diff --git a/dace/config_schema.yml b/dace/config_schema.yml index 2b05d45232..28e1efefb9 100644 --- a/dace/config_schema.yml +++ b/dace/config_schema.yml @@ -251,6 +251,17 @@ required: If set, specifies additional arguments to the initial invocation of ``cmake``. + sdfg_alphabetical_sorting: + type: bool + default: false + title: SDFG alphabetical sorting + description: > + When enabled, sorts all internal SDFG dictionaries + (nodes, edges, arrays, symbols) into a canonical + alphabetical order before code generation. This + guarantees reproducible output across consecutive + compilations at the cost of additional sorting overhead. + ############################################# # CPU compiler cpu: diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 157c7a53ca..ff2256bd9c 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3024,6 +3024,9 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S 3. Dataflow: Sorts the internal nodes and memlet edges within every State. 4. Recursion: Recursively applies this stabilization to all Nested SDFGs. + This method is a no-op unless the ``compiler.sdfg_alphabetical_sorting`` configuration + option is set to ``True``. + :param rebuild_nx: If True, rebuilds the internal NetworkX graph in each sorted graph. Default is False for performance, since DaCe's codegen and pattern matching do not rely on the @@ -3036,6 +3039,10 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S not on recursive calls into nested SDFGs where parent keys are still valid. """ + # Only perform sorting when deterministic code generation is enabled. + if not Config.get_bool('compiler', 'sdfg_alphabetical_sorting'): + return + # Avoid import loops from dace.sdfg.utils import sort_graph_dicts_alphabetically, _node_key_cache diff --git a/tests/sdfg/deterministic_sort_test.py b/tests/sdfg/sdfg_alphabetical_sorting_test.py similarity index 92% rename from tests/sdfg/deterministic_sort_test.py rename to tests/sdfg/sdfg_alphabetical_sorting_test.py index 28d4bbda1b..09ff343ee6 100644 --- a/tests/sdfg/deterministic_sort_test.py +++ b/tests/sdfg/sdfg_alphabetical_sorting_test.py @@ -4,6 +4,9 @@ import dace from dace.sdfg.utils import get_deterministic_node_key, get_deterministic_edge_key +# Enable alphabetical sorting for all tests in this module. +dace.Config.set("compiler", "sdfg_alphabetical_sorting", value=True) + def _scramble_dict_in_place(d): """Helper to randomize dictionary insertion order without changing its type.""" @@ -143,7 +146,9 @@ def test_sdfg_alphabetical_sorting_rebuild_nx(): # The NX node order must match the _nodes dict order nx_nodes = list(state._nx.nodes()) dace_nodes = list(state._nodes.keys()) - assert nx_nodes == dace_nodes, ("NetworkX node order does not match sorted DaCe _nodes dict order!") + assert nx_nodes == dace_nodes, ( + "NetworkX node order does not match sorted DaCe _nodes dict order!" + ) def test_sdfg_alphabetical_sorting_stability(): @@ -167,8 +172,10 @@ def test_sdfg_alphabetical_sorting_stability(): if reference_snapshot is None: reference_snapshot = snapshot else: - assert snapshot == reference_snapshot, (f"Sort produced different order with seed={seed}! " - f"Expected:\n{reference_snapshot}\nGot:\n{snapshot}") + assert snapshot == reference_snapshot, ( + f"Sort produced different order with seed={seed}! " + f"Expected:\n{reference_snapshot}\nGot:\n{snapshot}" + ) def test_sdfg_alphabetical_sorting_idempotency(): @@ -188,7 +195,9 @@ def test_sdfg_alphabetical_sorting_idempotency(): sdfg.sort_sdfg_alphabetically() snapshot_second = _snapshot_order(sdfg) - assert snapshot_first == snapshot_second, ("Sorting is not idempotent! Second sort produced a different order.") + assert snapshot_first == snapshot_second, ( + "Sorting is not idempotent! Second sort produced a different order." + ) if __name__ == "__main__": From c2c7db3652d327539e6a126e88252675e65a6dd0 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 16 Mar 2026 10:24:55 +0100 Subject: [PATCH 09/13] Sort SDFG for deterministic code generation: formating --- tests/sdfg/sdfg_alphabetical_sorting_test.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/sdfg/sdfg_alphabetical_sorting_test.py b/tests/sdfg/sdfg_alphabetical_sorting_test.py index 09ff343ee6..6e073e75c8 100644 --- a/tests/sdfg/sdfg_alphabetical_sorting_test.py +++ b/tests/sdfg/sdfg_alphabetical_sorting_test.py @@ -146,9 +146,7 @@ def test_sdfg_alphabetical_sorting_rebuild_nx(): # The NX node order must match the _nodes dict order nx_nodes = list(state._nx.nodes()) dace_nodes = list(state._nodes.keys()) - assert nx_nodes == dace_nodes, ( - "NetworkX node order does not match sorted DaCe _nodes dict order!" - ) + assert nx_nodes == dace_nodes, ("NetworkX node order does not match sorted DaCe _nodes dict order!") def test_sdfg_alphabetical_sorting_stability(): @@ -172,10 +170,8 @@ def test_sdfg_alphabetical_sorting_stability(): if reference_snapshot is None: reference_snapshot = snapshot else: - assert snapshot == reference_snapshot, ( - f"Sort produced different order with seed={seed}! " - f"Expected:\n{reference_snapshot}\nGot:\n{snapshot}" - ) + assert snapshot == reference_snapshot, (f"Sort produced different order with seed={seed}! " + f"Expected:\n{reference_snapshot}\nGot:\n{snapshot}") def test_sdfg_alphabetical_sorting_idempotency(): @@ -195,9 +191,7 @@ def test_sdfg_alphabetical_sorting_idempotency(): sdfg.sort_sdfg_alphabetically() snapshot_second = _snapshot_order(sdfg) - assert snapshot_first == snapshot_second, ( - "Sorting is not idempotent! Second sort produced a different order." - ) + assert snapshot_first == snapshot_second, ("Sorting is not idempotent! Second sort produced a different order.") if __name__ == "__main__": From d47c6394508854dbf9cdf5d7a23e2e11cc3160d0 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 16 Mar 2026 14:03:22 +0100 Subject: [PATCH 10/13] Sort SDFG for deterministic code generation: addressing comments --- dace/codegen/codegen.py | 5 +- dace/sdfg/sdfg.py | 53 +++--- dace/sdfg/utils.py | 177 +++++++++++-------- tests/sdfg/sdfg_alphabetical_sorting_test.py | 80 +++++++-- 4 files changed, 189 insertions(+), 126 deletions(-) diff --git a/dace/codegen/codegen.py b/dace/codegen/codegen.py index a74f6459d2..1a6e5a6636 100644 --- a/dace/codegen/codegen.py +++ b/dace/codegen/codegen.py @@ -245,8 +245,9 @@ def generate_code(sdfg: SDFG, validate=True) -> List[CodeObject]: for k, v in frame._dispatcher.instrumentation.items() } - # Sort SDFG dictionaries for deterministic pattern matching. - sdfg.sort_sdfg_alphabetically() + # Sort SDFG for deterministic code generation, if enabled. + if Config.get_bool('compiler', 'sdfg_alphabetical_sorting'): + sdfg.sort_sdfg_alphabetically() # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index ff2256bd9c..b987e7b6af 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3019,13 +3019,13 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S The stabilization process executes in four phases: 1. Global Metadata: Alphabetizes arrays, symbols, and constants. - 2. State Machine: Sorts the top-level SDFG (States and Interstate Edges) - using semantic topological keys. - 3. Dataflow: Sorts the internal nodes and memlet edges within every State. - 4. Recursion: Recursively applies this stabilization to all Nested SDFGs. - - This method is a no-op unless the ``compiler.sdfg_alphabetical_sorting`` configuration - option is set to ``True``. + 2. Control Flow Regions: Sorts every ``ControlFlowRegion`` in this + SDFG (including the SDFG itself, ``LoopRegion``s, + ``ConditionalBlock``s, etc.) at the state machine level. + 3. Dataflow: Sorts the internal nodes and memlet edges within every + ``SDFGState``. + 4. Nested SDFGs: Recursively applies this stabilization to all + ``NestedSDFG`` nodes found in the states. :param rebuild_nx: If True, rebuilds the internal NetworkX graph in each sorted graph. Default is False for performance, since @@ -3033,31 +3033,23 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S internal _nx iteration order. :param visited: A set of memory addresses (IDs) of already processed SDFGs. Used internally to prevent infinite recursion in the event - of cyclic nested SDFG references. Also serves as the signal - for the top-level entry point: the node key cache is only - cleared when ``visited`` is ``None`` (i.e., on the first call), - not on recursive calls into nested SDFGs where parent keys - are still valid. - """ - # Only perform sorting when deterministic code generation is enabled. - if not Config.get_bool('compiler', 'sdfg_alphabetical_sorting'): - return - + of cyclic nested SDFG references. + """ # Avoid import loops - from dace.sdfg.utils import sort_graph_dicts_alphabetically, _node_key_cache + from dace.sdfg.utils import sort_graph_dicts_alphabetically if visited is None: visited = set() - # Only clear the cache at the top-level entry point, not on - # recursive calls into nested SDFGs where parent keys are - # still valid. - _node_key_cache.clear() # Cycle prevention for recursive nested SDFGs if id(self) in visited: return visited.add(id(self)) + # Stack-local cache, private to this sort invocation. + # Safe for concurrent SDFG processing — no global state. + _cache: Dict[int, tuple] = {} + # 1. Stabilize Global Metadata (Arrays, Symbols, Constants) for attr in ['_arrays', 'symbols', 'constants_prop']: if hasattr(self, attr): @@ -3068,14 +3060,19 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S val.clear() val.update(sorted_items) - # 2. Stabilize the top-level State Machine (States and Interstate edges) - sort_graph_dicts_alphabetically(self, rebuild_nx=rebuild_nx) + # 2. Stabilize all ControlFlowRegions (state machine level). + # This includes the SDFG itself, plus any nested LoopRegions, + # ConditionalBlocks, etc. — but does NOT recurse into nested SDFGs. + for cfr in self.all_control_flow_regions(recursive=False): + sort_graph_dicts_alphabetically(cfr, rebuild_nx=rebuild_nx, _cache=_cache) - # 3. Stabilize the Dataflow inside each State and recurse into Nested SDFGs - for state in self.nodes(): - sort_graph_dicts_alphabetically(state, rebuild_nx=rebuild_nx) + # 3. Stabilize the Dataflow inside each SDFGState. + # all_states() traverses into nested ControlFlowRegions but NOT + # into nested SDFGs. + for state in self.all_states(): + sort_graph_dicts_alphabetically(state, rebuild_nx=rebuild_nx, _cache=_cache) # 4. Recurse into Nested SDFGs for node in state.nodes(): - if hasattr(node, 'sdfg') and node.sdfg is not None: + if isinstance(node, nd.NestedSDFG): node.sdfg.sort_sdfg_alphabetically(rebuild_nx=rebuild_nx, visited=visited) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index bb5c81cf80..2f58acd540 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -7,7 +7,6 @@ import warnings import networkx as nx import time -import re import hashlib import dace.sdfg.nodes @@ -2766,36 +2765,9 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): states.append(state) -# Module-level cache for node keys, cleared at the start of each -# sort_sdfg_alphabetically() call to prevent stale entries after -# transformations modify the graph structure. Keyed by id(node). -_node_key_cache: Dict[int, str] = {} - - -def _resolve_degree(node: Any, attr: str) -> int: - """ - Safely resolves in_degree or out_degree from a node, handling both - the case where it is a plain attribute (int) and where it is a bound - method (as on NetworkX nodes or DaCe Graph objects). - - :param node: The graph node to inspect. - :param attr: The attribute name ('in_degree' or 'out_degree'). - :return: The integer degree value, or 0 if the attribute is absent. - """ - val = getattr(node, attr, None) - if val is None: - return 0 - if callable(val): - try: - return val() - except TypeError: - return 0 - return val - - -def get_deterministic_node_key(node: Any) -> str: +def get_deterministic_node_key(node: Any, _cache: Optional[Dict[int, tuple]] = None, graph: Any = None) -> tuple: """ - Generates a highly stable, deterministic string key for DaCe graph nodes + Generates a highly stable, deterministic key for DaCe graph nodes based on their semantic properties rather than memory locations. During SDFG compilation, relying on memory addresses or volatile UUIDs @@ -2803,37 +2775,61 @@ def get_deterministic_node_key(node: Any) -> str: extracts the intrinsic semantic identity of a node to ensure structural collisions are resolved deterministically. - The generated key incorporates: - - Node type and label - - Graph topology (in-degree and out-degree) + The generated key is a tuple of ``(node_type, label, dynamic_parts)`` + which enforces lexicographic ordering: nodes are first grouped by type, + then by label, with the remaining semantic properties acting as a + tiebreaker. + + The dynamic parts incorporate: + - Graph topology (in-degree and out-degree, when the containing graph + is provided) - Interface semantics (in/out connectors) - Loop and memory semantics (map parameters, schedules, access types) - Internal code logic (via a stable MD5 hash of Tasklet code) - Nested SDFG structural size - Results are cached per node identity to avoid redundant recomputation - during sort comparisons. The cache is invalidated at the start of each - sort_sdfg_alphabetically() call. + When called from the sorting path, an optional ``_cache`` dict can be + provided to avoid redundant recomputation during sort comparisons. + The cache is private to each ``sort_sdfg_alphabetically()`` invocation + and lives on the stack, so concurrent SDFG processing is safe. :param node: The DaCe graph node (e.g., Tasklet, AccessNode, MapEntry) to be evaluated. - :return: A stable string representing the node's semantic identity. - """ - node_id = id(node) - if node_id in _node_key_cache: - return _node_key_cache[node_id] + :param _cache: Optional node-key cache dict, private to the current + ``sort_sdfg_alphabetically()`` call. When ``None``, + no caching is performed. + :param graph: The containing graph (e.g., SDFGState or ControlFlowRegion) + used to compute in-degree and out-degree. When ``None``, + degree information is omitted from the key. + :return: A tuple ``(node_type, label, dynamic_parts)`` representing + the node's semantic identity. + """ + if _cache is not None: + node_id = id(node) + if node_id in _cache: + return _cache[node_id] node_type = type(node).__name__ # Extract core identifier - raw_label = getattr(node, 'data', getattr(node, 'label', str(node))) + if hasattr(node, 'data'): + raw_label = node.data + elif hasattr(node, 'label'): + raw_label = node.label + else: + raw_label = str(node) - parts = [node_type, str(raw_label)] + parts = [] - # 1. Topological Context - in_deg = _resolve_degree(node, 'in_degree') - out_deg = _resolve_degree(node, 'out_degree') - parts.append(f"i{in_deg}o{out_deg}") + # 1. Topological Context (requires the containing graph) + if graph is not None: + try: + in_deg = graph.in_degree(node) + out_deg = graph.out_degree(node) + parts.append(f"i{in_deg}o{out_deg}") + except (ValueError, KeyError): + # Node might not belong to this graph (e.g., during NX rebuild) + pass # 2. Interface Semantics (Connectors for Tasklets / NestedSDFGs) if hasattr(node, 'in_connectors') and node.in_connectors: @@ -2863,36 +2859,59 @@ def get_deterministic_node_key(node: Any) -> str: if hasattr(node, 'sdfg') and node.sdfg: parts.append(f"nsdfg_states:{len(node.sdfg.nodes())}") - result = "_".join(parts) - _node_key_cache[node_id] = result + # Tuple enforces lexicographic order: group by type, then label, + # then use dynamic parts as a tiebreaker. + result = (node_type, str(raw_label), "_".join(parts)) + if _cache is not None: + _cache[id(node)] = result return result -def get_deterministic_edge_key(edge: Any) -> str: +def get_deterministic_edge_key(edge: Any, _cache: Optional[Dict[int, tuple]] = None, graph: Any = None) -> tuple: """ - Generates a highly stable string key for graph edges to ensure + Generates a highly stable key for graph edges to ensure deterministic sorting. This function extracts the semantic connection points (connectors) and the data payload (Memlets or Interstate conditions) to prevent non-deterministic compiler graph traversals. + For interstate edges, whose data object lacks a stable ``__str__()`` + and would produce a memory-address-based representation, the function + extracts the ``condition`` and ``assignments`` attributes instead. + :param edge: The DaCe graph edge (or InterstateEdge) to be evaluated. - :return: A stable string representation of the edge's routing and payload. + :param _cache: Optional node-key cache dict, passed through to + :func:`get_deterministic_node_key`. + :param graph: The containing graph, passed through to + :func:`get_deterministic_node_key` for degree computation. + :return: A tuple representing the edge's routing and payload. """ - # 1. Extract raw strings + # 1. Extract connector strings raw_src_conn = str(getattr(edge, 'src_conn', '')) raw_dst_conn = str(getattr(edge, 'dst_conn', '')) - raw_data_str = str(getattr(edge, 'data', '')) - # 2. Retrieve the stabilized keys for the source and destination nodes - src_key = get_deterministic_node_key(edge.src) - dst_key = get_deterministic_node_key(edge.dst) + # 2. Extract data payload, handling InterstateEdge specially since it + # lacks __str__() and would produce an unstable memory-address-based + # representation. + edge_data = getattr(edge, 'data', '') + if hasattr(edge_data, 'condition') and hasattr(edge_data, 'assignments'): + cond_str = str(edge_data.condition) + assign_str = str(edge_data.assignments) + raw_data_str = f"cond:{cond_str}_assign:{assign_str}" + else: + raw_data_str = str(edge_data) + + # 3. Retrieve the stabilized keys for the source and destination nodes + src_key = get_deterministic_node_key(edge.src, _cache, graph) + dst_key = get_deterministic_node_key(edge.dst, _cache, graph) - return f"{src_key}[{raw_src_conn}]->{dst_key}[{raw_dst_conn}]({raw_data_str})" + return (src_key, raw_src_conn, dst_key, raw_dst_conn, raw_data_str) -def sort_graph_dicts_alphabetically(graph: Any, rebuild_nx: bool = False) -> None: +def sort_graph_dicts_alphabetically(graph: Any, + rebuild_nx: bool = False, + _cache: Optional[Dict[int, tuple]] = None) -> None: """ Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place using semantically-aware deterministic keys. @@ -2918,28 +2937,32 @@ def sort_graph_dicts_alphabetically(graph: Any, rebuild_nx: bool = False) -> Non the new order. Default is False for performance, since DaCe's codegen and pattern matching do not rely on the internal _nx iteration order. + :param _cache: Optional node-key cache dict, private to the current + ``sort_sdfg_alphabetically()`` call. Passed through to + key functions to avoid redundant computation. """ # 1. Sort the master Nodes dictionary - if hasattr(graph, '_nodes'): - sorted_node_items = sorted(graph._nodes.items(), key=lambda item: get_deterministic_node_key(item[0])) - graph._nodes.clear() - graph._nodes.update(sorted_node_items) - - # 2. Sort the nested adjacency lists (In/Out Edges) within each node - for node, (in_edges, out_edges) in graph._nodes.items(): - sorted_in_items = sorted(in_edges.items(), key=lambda item: get_deterministic_edge_key(item[1])) - in_edges.clear() - in_edges.update(sorted_in_items) - - sorted_out_items = sorted(out_edges.items(), key=lambda item: get_deterministic_edge_key(item[1])) - out_edges.clear() - out_edges.update(sorted_out_items) + sorted_node_items = sorted(graph._nodes.items(), + key=lambda item: get_deterministic_node_key(item[0], _cache, graph)) + graph._nodes.clear() + graph._nodes.update(sorted_node_items) + + # 2. Sort the nested adjacency lists (In/Out Edges) within each node + for node, (in_edges, out_edges) in graph._nodes.items(): + sorted_in_items = sorted(in_edges.items(), key=lambda item: get_deterministic_edge_key(item[1], _cache, graph)) + in_edges.clear() + in_edges.update(sorted_in_items) + + sorted_out_items = sorted(out_edges.items(), + key=lambda item: get_deterministic_edge_key(item[1], _cache, graph)) + out_edges.clear() + out_edges.update(sorted_out_items) # 3. Sort the master Edges dictionary - if hasattr(graph, '_edges'): - sorted_edge_items = sorted(graph._edges.items(), key=lambda item: get_deterministic_edge_key(item[1])) - graph._edges.clear() - graph._edges.update(sorted_edge_items) + sorted_edge_items = sorted(graph._edges.items(), + key=lambda item: get_deterministic_edge_key(item[1], _cache, graph)) + graph._edges.clear() + graph._edges.update(sorted_edge_items) # 4. Optionally rebuild the NetworkX backend to match the new deterministic order if rebuild_nx and hasattr(graph, '_nx'): diff --git a/tests/sdfg/sdfg_alphabetical_sorting_test.py b/tests/sdfg/sdfg_alphabetical_sorting_test.py index 6e073e75c8..be2488329f 100644 --- a/tests/sdfg/sdfg_alphabetical_sorting_test.py +++ b/tests/sdfg/sdfg_alphabetical_sorting_test.py @@ -1,4 +1,3 @@ -import copy import random import dace @@ -55,19 +54,19 @@ def _snapshot_order(sdfg): result.append(('arrays', tuple(sdfg._arrays.keys()))) # State machine - state_node_keys = tuple(get_deterministic_node_key(n) for n in sdfg._nodes.keys()) + state_node_keys = tuple(get_deterministic_node_key(n, graph=sdfg) for n in sdfg._nodes.keys()) result.append(('sdfg_nodes', state_node_keys)) # Dataflow per state for i, state in enumerate(sdfg.nodes()): - node_keys = tuple(get_deterministic_node_key(n) for n in state._nodes.keys()) - edge_keys = tuple(get_deterministic_edge_key(state._edges[k]) for k in state._edges.keys()) + node_keys = tuple(get_deterministic_node_key(n, graph=state) for n in state._nodes.keys()) + edge_keys = tuple(get_deterministic_edge_key(state._edges[k], graph=state) for k in state._edges.keys()) result.append((f'state_{i}_nodes', node_keys)) result.append((f'state_{i}_edges', edge_keys)) for j, (node, (in_edges, out_edges)) in enumerate(state._nodes.items()): - in_keys = tuple(get_deterministic_edge_key(in_edges[k]) for k in in_edges.keys()) - out_keys = tuple(get_deterministic_edge_key(out_edges[k]) for k in out_edges.keys()) + in_keys = tuple(get_deterministic_edge_key(in_edges[k], graph=state) for k in in_edges.keys()) + out_keys = tuple(get_deterministic_edge_key(out_edges[k], graph=state) for k in out_edges.keys()) result.append((f'state_{i}_node_{j}_in', in_keys)) result.append((f'state_{i}_node_{j}_out', out_keys)) @@ -107,12 +106,12 @@ def test_sdfg_alphabetical_sorting_basic(): # Assert that graph nodes are sorted node_keys = list(state._nodes.keys()) - expected_node_keys = sorted(node_keys, key=get_deterministic_node_key) + expected_node_keys = sorted(node_keys, key=lambda n: get_deterministic_node_key(n, graph=state)) assert node_keys == expected_node_keys, "Graph nodes were not deterministically sorted!" # Assert that graph edges are sorted edge_keys = list(state._edges.keys()) - expected_edge_keys = sorted(edge_keys, key=lambda k: get_deterministic_edge_key(state._edges[k])) + expected_edge_keys = sorted(edge_keys, key=lambda k: get_deterministic_edge_key(state._edges[k], graph=state)) assert edge_keys == expected_edge_keys, "Graph edges were not deterministically sorted!" # Assert that metadata dicts are sorted @@ -122,11 +121,11 @@ def test_sdfg_alphabetical_sorting_basic(): # Assert that per-node adjacency lists are sorted for node, (in_edges, out_edges) in state._nodes.items(): in_keys = list(in_edges.keys()) - expected_in = sorted(in_keys, key=lambda k: get_deterministic_edge_key(in_edges[k])) + expected_in = sorted(in_keys, key=lambda k: get_deterministic_edge_key(in_edges[k], graph=state)) assert in_keys == expected_in, f"In-edges for {node} were not deterministically sorted!" out_keys = list(out_edges.keys()) - expected_out = sorted(out_keys, key=lambda k: get_deterministic_edge_key(out_edges[k])) + expected_out = sorted(out_keys, key=lambda k: get_deterministic_edge_key(out_edges[k], graph=state)) assert out_keys == expected_out, f"Out-edges for {node} were not deterministically sorted!" @@ -194,17 +193,60 @@ def test_sdfg_alphabetical_sorting_idempotency(): assert snapshot_first == snapshot_second, ("Sorting is not idempotent! Second sort produced a different order.") +def _build_multistate_test_sdfg(): + """Build an SDFG with multiple states and interstate edges to exercise + ControlFlowRegion sorting paths.""" + sdfg = dace.SDFG('multistate_test') + sdfg.add_array('A', [10], dace.float64) + sdfg.add_array('B', [10], dace.float64) + sdfg.add_scalar('s', dace.float64, transient=True) + + state0 = sdfg.add_state('init') + state1 = sdfg.add_state('compute') + state2 = sdfg.add_state('finalize') + + # Add interstate edges + sdfg.add_edge(state0, state1, dace.InterstateEdge()) + sdfg.add_edge(state1, state2, dace.InterstateEdge()) + + # Add dataflow in the compute state + a = state1.add_read('A') + b = state1.add_write('B') + tasklet = state1.add_tasklet('work', {'a'}, {'b'}, 'b = a * 2') + state1.add_edge(a, None, tasklet, 'a', dace.Memlet.from_array('A', sdfg.arrays['A'])) + state1.add_edge(tasklet, 'b', b, None, dace.Memlet.from_array('B', sdfg.arrays['B'])) + + return sdfg + + +def test_sdfg_alphabetical_sorting_multistate(): + """ + Tests that an SDFG with multiple states and interstate edges is + sorted correctly at both the state machine and dataflow levels. + Exercises the all_control_flow_regions() / all_states() paths. + """ + reference_snapshot = None + + for seed in range(10): + sdfg = _build_multistate_test_sdfg() + + random.seed(seed) + _scramble_sdfg(sdfg) + + sdfg.sort_sdfg_alphabetically() + + snapshot = _snapshot_order(sdfg) + + if reference_snapshot is None: + reference_snapshot = snapshot + else: + assert snapshot == reference_snapshot, (f"Multi-state sort produced different order with seed={seed}! " + f"Expected:\n{reference_snapshot}\nGot:\n{snapshot}") + + if __name__ == "__main__": test_sdfg_alphabetical_sorting_basic() - print("PASSED: test_sdfg_alphabetical_sorting_basic") - test_sdfg_alphabetical_sorting_rebuild_nx() - print("PASSED: test_sdfg_alphabetical_sorting_rebuild_nx") - test_sdfg_alphabetical_sorting_stability() - print("PASSED: test_sdfg_alphabetical_sorting_stability") - test_sdfg_alphabetical_sorting_idempotency() - print("PASSED: test_sdfg_alphabetical_sorting_idempotency") - - print("\nAll tests passed!") + test_sdfg_alphabetical_sorting_multistate() From 06b41b4b5595eb963ad87279dd6652b1f44c0db1 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 17 Mar 2026 11:39:15 +0100 Subject: [PATCH 11/13] Sort SDFG for deterministic code generation: addressing comments --- dace/sdfg/sdfg.py | 35 ++++++-------------- tests/sdfg/sdfg_alphabetical_sorting_test.py | 21 +++++++----- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index b987e7b6af..12d5d3e82e 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -3007,7 +3007,7 @@ def recheck_using_explicit_control_flow(self) -> bool: self.root_sdfg.using_explicit_control_flow = found_explicit_cf_block return found_explicit_cf_block - def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[Set[int]] = None) -> None: + def sort_sdfg_alphabetically(self, rebuild_nx: bool = False) -> None: """ Forces all internal dictionaries, graph structures, and metadata registries into a deterministic, semantically-aware order to guarantee stable code generation. @@ -3031,48 +3031,35 @@ def sort_sdfg_alphabetically(self, rebuild_nx: bool = False, visited: Optional[S sorted graph. Default is False for performance, since DaCe's codegen and pattern matching do not rely on the internal _nx iteration order. - :param visited: A set of memory addresses (IDs) of already processed SDFGs. - Used internally to prevent infinite recursion in the event - of cyclic nested SDFG references. """ # Avoid import loops from dace.sdfg.utils import sort_graph_dicts_alphabetically - if visited is None: - visited = set() - - # Cycle prevention for recursive nested SDFGs - if id(self) in visited: - return - visited.add(id(self)) - - # Stack-local cache, private to this sort invocation. + # Stack-local cache for node keys, private to this sort invocation. # Safe for concurrent SDFG processing — no global state. - _cache: Dict[int, tuple] = {} + node_key_cache: Dict[int, tuple] = {} # 1. Stabilize Global Metadata (Arrays, Symbols, Constants) for attr in ['_arrays', 'symbols', 'constants_prop']: - if hasattr(self, attr): - val = getattr(self, attr) - # Ensure the attribute exists, is dict-like, and supports clear/update - if val and hasattr(val, 'keys') and hasattr(val, 'clear'): - sorted_items = sorted(val.items(), key=lambda item: item[0]) - val.clear() - val.update(sorted_items) + val = getattr(self, attr) + if val: + sorted_items = sorted(val.items(), key=lambda item: item[0]) + val.clear() + val.update(sorted_items) # 2. Stabilize all ControlFlowRegions (state machine level). # This includes the SDFG itself, plus any nested LoopRegions, # ConditionalBlocks, etc. — but does NOT recurse into nested SDFGs. for cfr in self.all_control_flow_regions(recursive=False): - sort_graph_dicts_alphabetically(cfr, rebuild_nx=rebuild_nx, _cache=_cache) + sort_graph_dicts_alphabetically(cfr, rebuild_nx=rebuild_nx, node_key_cache=node_key_cache) # 3. Stabilize the Dataflow inside each SDFGState. # all_states() traverses into nested ControlFlowRegions but NOT # into nested SDFGs. for state in self.all_states(): - sort_graph_dicts_alphabetically(state, rebuild_nx=rebuild_nx, _cache=_cache) + sort_graph_dicts_alphabetically(state, rebuild_nx=rebuild_nx, node_key_cache=node_key_cache) # 4. Recurse into Nested SDFGs for node in state.nodes(): if isinstance(node, nd.NestedSDFG): - node.sdfg.sort_sdfg_alphabetically(rebuild_nx=rebuild_nx, visited=visited) + node.sdfg.sort_sdfg_alphabetically(rebuild_nx=rebuild_nx) diff --git a/tests/sdfg/sdfg_alphabetical_sorting_test.py b/tests/sdfg/sdfg_alphabetical_sorting_test.py index be2488329f..e80ef644ae 100644 --- a/tests/sdfg/sdfg_alphabetical_sorting_test.py +++ b/tests/sdfg/sdfg_alphabetical_sorting_test.py @@ -54,19 +54,19 @@ def _snapshot_order(sdfg): result.append(('arrays', tuple(sdfg._arrays.keys()))) # State machine - state_node_keys = tuple(get_deterministic_node_key(n, graph=sdfg) for n in sdfg._nodes.keys()) + state_node_keys = tuple(get_deterministic_node_key(sdfg, n) for n in sdfg._nodes.keys()) result.append(('sdfg_nodes', state_node_keys)) # Dataflow per state for i, state in enumerate(sdfg.nodes()): - node_keys = tuple(get_deterministic_node_key(n, graph=state) for n in state._nodes.keys()) - edge_keys = tuple(get_deterministic_edge_key(state._edges[k], graph=state) for k in state._edges.keys()) + node_keys = tuple(get_deterministic_node_key(state, n) for n in state._nodes.keys()) + edge_keys = tuple(get_deterministic_edge_key(state, state._edges[k]) for k in state._edges.keys()) result.append((f'state_{i}_nodes', node_keys)) result.append((f'state_{i}_edges', edge_keys)) for j, (node, (in_edges, out_edges)) in enumerate(state._nodes.items()): - in_keys = tuple(get_deterministic_edge_key(in_edges[k], graph=state) for k in in_edges.keys()) - out_keys = tuple(get_deterministic_edge_key(out_edges[k], graph=state) for k in out_edges.keys()) + in_keys = tuple(get_deterministic_edge_key(state, in_edges[k]) for k in in_edges.keys()) + out_keys = tuple(get_deterministic_edge_key(state, out_edges[k]) for k in out_edges.keys()) result.append((f'state_{i}_node_{j}_in', in_keys)) result.append((f'state_{i}_node_{j}_out', out_keys)) @@ -99,6 +99,7 @@ def test_sdfg_alphabetical_sorting_basic(): state = sdfg.nodes()[0] # Scramble everything + random.seed(42) _scramble_sdfg(sdfg) # Apply the canonicalizer @@ -106,12 +107,12 @@ def test_sdfg_alphabetical_sorting_basic(): # Assert that graph nodes are sorted node_keys = list(state._nodes.keys()) - expected_node_keys = sorted(node_keys, key=lambda n: get_deterministic_node_key(n, graph=state)) + expected_node_keys = sorted(node_keys, key=lambda n: get_deterministic_node_key(state, n)) assert node_keys == expected_node_keys, "Graph nodes were not deterministically sorted!" # Assert that graph edges are sorted edge_keys = list(state._edges.keys()) - expected_edge_keys = sorted(edge_keys, key=lambda k: get_deterministic_edge_key(state._edges[k], graph=state)) + expected_edge_keys = sorted(edge_keys, key=lambda k: get_deterministic_edge_key(state, state._edges[k])) assert edge_keys == expected_edge_keys, "Graph edges were not deterministically sorted!" # Assert that metadata dicts are sorted @@ -121,11 +122,11 @@ def test_sdfg_alphabetical_sorting_basic(): # Assert that per-node adjacency lists are sorted for node, (in_edges, out_edges) in state._nodes.items(): in_keys = list(in_edges.keys()) - expected_in = sorted(in_keys, key=lambda k: get_deterministic_edge_key(in_edges[k], graph=state)) + expected_in = sorted(in_keys, key=lambda k: get_deterministic_edge_key(state, in_edges[k])) assert in_keys == expected_in, f"In-edges for {node} were not deterministically sorted!" out_keys = list(out_edges.keys()) - expected_out = sorted(out_keys, key=lambda k: get_deterministic_edge_key(out_edges[k], graph=state)) + expected_out = sorted(out_keys, key=lambda k: get_deterministic_edge_key(state, out_edges[k])) assert out_keys == expected_out, f"Out-edges for {node} were not deterministically sorted!" @@ -137,6 +138,7 @@ def test_sdfg_alphabetical_sorting_rebuild_nx(): sdfg = _build_test_sdfg() state = sdfg.nodes()[0] + random.seed(42) _scramble_sdfg(sdfg) # Sort with NX rebuild enabled @@ -180,6 +182,7 @@ def test_sdfg_alphabetical_sorting_idempotency(): """ sdfg = _build_test_sdfg() + random.seed(42) _scramble_sdfg(sdfg) # Sort once From a45635edca4266d81f4159fda9c13801e3b441de Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 17 Mar 2026 11:39:32 +0100 Subject: [PATCH 12/13] Sort SDFG for deterministic code generation: addressing comments --- dace/sdfg/utils.py | 125 +++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 67 deletions(-) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 2f58acd540..7cda7011ca 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -7,7 +7,6 @@ import warnings import networkx as nx import time -import hashlib import dace.sdfg.nodes from dace.codegen import compiled_sdfg as csdfg @@ -2765,7 +2764,7 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): states.append(state) -def get_deterministic_node_key(node: Any, _cache: Optional[Dict[int, tuple]] = None, graph: Any = None) -> tuple: +def get_deterministic_node_key(graph: Any, node: Any, node_key_cache: Optional[Dict[int, tuple]] = None) -> tuple: """ Generates a highly stable, deterministic key for DaCe graph nodes based on their semantic properties rather than memory locations. @@ -2781,33 +2780,33 @@ def get_deterministic_node_key(node: Any, _cache: Optional[Dict[int, tuple]] = N tiebreaker. The dynamic parts incorporate: - - Graph topology (in-degree and out-degree, when the containing graph - is provided) - - Interface semantics (in/out connectors) - - Loop and memory semantics (map parameters, schedules, access types) - - Internal code logic (via a stable MD5 hash of Tasklet code) - - Nested SDFG structural size - - When called from the sorting path, an optional ``_cache`` dict can be - provided to avoid redundant recomputation during sort comparisons. + - Graph topology (in-degree and out-degree) + - Interface semantics (in/out connectors, always included for + ``nd.Node`` subclasses even when empty) + - Loop and memory semantics (map parameters, ranges, schedules) + - Internal code logic (Tasklet code string) + - Nested SDFG structural size and symbol mapping + + When called from the sorting path, an optional ``node_key_cache`` dict + can be provided to avoid redundant recomputation during sort comparisons. The cache is private to each ``sort_sdfg_alphabetically()`` invocation and lives on the stack, so concurrent SDFG processing is safe. + :param graph: The containing graph (e.g., SDFGState or ControlFlowRegion) + used to compute in-degree and out-degree. May be ``None``, + in which case degree information is omitted from the key. :param node: The DaCe graph node (e.g., Tasklet, AccessNode, MapEntry) to be evaluated. - :param _cache: Optional node-key cache dict, private to the current - ``sort_sdfg_alphabetically()`` call. When ``None``, - no caching is performed. - :param graph: The containing graph (e.g., SDFGState or ControlFlowRegion) - used to compute in-degree and out-degree. When ``None``, - degree information is omitted from the key. + :param node_key_cache: Optional node-key cache dict, private to the current + ``sort_sdfg_alphabetically()`` call. When ``None``, + no caching is performed. :return: A tuple ``(node_type, label, dynamic_parts)`` representing the node's semantic identity. """ - if _cache is not None: + if node_key_cache is not None: node_id = id(node) - if node_id in _cache: - return _cache[node_id] + if node_id in node_key_cache: + return node_key_cache[node_id] node_type = type(node).__name__ @@ -2831,43 +2830,38 @@ def get_deterministic_node_key(node: Any, _cache: Optional[Dict[int, tuple]] = N # Node might not belong to this graph (e.g., during NX rebuild) pass - # 2. Interface Semantics (Connectors for Tasklets / NestedSDFGs) - if hasattr(node, 'in_connectors') and node.in_connectors: - parts.append("inC:" + "-".join(sorted(node.in_connectors.keys()))) - if hasattr(node, 'out_connectors') and node.out_connectors: - parts.append("outC:" + "-".join(sorted(node.out_connectors.keys()))) + # 2. Interface Semantics (Connectors) + if isinstance(node, nd.Node): + parts.append("inC:{" + "-".join(sorted(node.in_connectors.keys())) + "}") + parts.append("outC:{" + "-".join(sorted(node.out_connectors.keys())) + "}") - # 3. Loop Semantics (Map Parameters & Schedules) - if hasattr(node, 'map') and hasattr(node.map, 'params'): + # 3. Map Semantics (Parameters, Ranges & Schedules) + if isinstance(node, (nd.MapEntry, nd.MapExit)): parts.append("map:" + "-".join(node.map.params)) - if hasattr(node.map, 'schedule'): - parts.append(f"sch:{str(node.map.schedule)}") - - # 4. Memory Semantics (Access Types for AccessNodes) - if hasattr(node, 'access'): - parts.append(f"acc:{str(node.access)}") + parts.append("range:" + str(node.map.range)) + parts.append(f"sch:{str(node.map.schedule)}") - # 5. Internal Code Semantics (Tasklets) - if hasattr(node, 'code') and hasattr(node.code, 'as_string'): - # Hash the code to prevent massive strings while guaranteeing uniqueness. - # MD5 is used because Python's built-in hash() is non-deterministic across runs. + # 4. Internal Code Semantics (Tasklets) + if isinstance(node, nd.Tasklet): code_str = str(node.code.as_string).strip() - code_hash = hashlib.md5(code_str.encode('utf-8')).hexdigest()[:8] - parts.append(f"code:{code_hash}") + parts.append(f"code:{code_str}") - # 6. Nested SDFG Differentiation - if hasattr(node, 'sdfg') and node.sdfg: + # 5. Nested SDFG Differentiation + if isinstance(node, nd.NestedSDFG): parts.append(f"nsdfg_states:{len(node.sdfg.nodes())}") + # Include symbol mapping for further differentiation + sorted_syms = dict(sorted({str(k): str(v) for k, v in node.symbol_mapping.items()}.items())) + parts.append(f"symmap:{sorted_syms}") # Tuple enforces lexicographic order: group by type, then label, # then use dynamic parts as a tiebreaker. result = (node_type, str(raw_label), "_".join(parts)) - if _cache is not None: - _cache[id(node)] = result + if node_key_cache is not None: + node_key_cache[id(node)] = result return result -def get_deterministic_edge_key(edge: Any, _cache: Optional[Dict[int, tuple]] = None, graph: Any = None) -> tuple: +def get_deterministic_edge_key(graph: Any, edge: Any, node_key_cache: Optional[Dict[int, tuple]] = None) -> tuple: """ Generates a highly stable key for graph edges to ensure deterministic sorting. @@ -2880,11 +2874,11 @@ def get_deterministic_edge_key(edge: Any, _cache: Optional[Dict[int, tuple]] = N and would produce a memory-address-based representation, the function extracts the ``condition`` and ``assignments`` attributes instead. - :param edge: The DaCe graph edge (or InterstateEdge) to be evaluated. - :param _cache: Optional node-key cache dict, passed through to - :func:`get_deterministic_node_key`. :param graph: The containing graph, passed through to :func:`get_deterministic_node_key` for degree computation. + :param edge: The DaCe graph edge (or InterstateEdge) to be evaluated. + :param node_key_cache: Optional node-key cache dict, passed through to + :func:`get_deterministic_node_key`. :return: A tuple representing the edge's routing and payload. """ # 1. Extract connector strings @@ -2897,21 +2891,21 @@ def get_deterministic_edge_key(edge: Any, _cache: Optional[Dict[int, tuple]] = N edge_data = getattr(edge, 'data', '') if hasattr(edge_data, 'condition') and hasattr(edge_data, 'assignments'): cond_str = str(edge_data.condition) - assign_str = str(edge_data.assignments) - raw_data_str = f"cond:{cond_str}_assign:{assign_str}" + sorted_assigns = dict(sorted({str(k): str(v) for k, v in edge_data.assignments.items()}.items())) + raw_data_str = f"cond:{cond_str}_assign:{sorted_assigns}" else: raw_data_str = str(edge_data) # 3. Retrieve the stabilized keys for the source and destination nodes - src_key = get_deterministic_node_key(edge.src, _cache, graph) - dst_key = get_deterministic_node_key(edge.dst, _cache, graph) + src_key = get_deterministic_node_key(graph, edge.src, node_key_cache) + dst_key = get_deterministic_node_key(graph, edge.dst, node_key_cache) return (src_key, raw_src_conn, dst_key, raw_dst_conn, raw_data_str) -def sort_graph_dicts_alphabetically(graph: Any, +def sort_graph_dicts_alphabetically(graph: Union['ControlFlowRegion', 'SDFGState'], rebuild_nx: bool = False, - _cache: Optional[Dict[int, tuple]] = None) -> None: + node_key_cache: Optional[Dict[int, tuple]] = None) -> None: """ Sorts internal graph nodes, edge dictionaries, and NetworkX backends in-place using semantically-aware deterministic keys. @@ -2931,36 +2925,33 @@ def sort_graph_dicts_alphabetically(graph: Any, stabilized order. Skipped by default since pattern matching builds its own NetworkX digraph via collapse_multigraph_to_nx. - :param graph: The DaCe graph structure (e.g., SDFGState or generic Graph) + :param graph: The DaCe graph structure (e.g., SDFGState or ControlFlowRegion) whose internal dictionaries need to be stabilized. :param rebuild_nx: If True, rebuilds the internal NetworkX graph to match the new order. Default is False for performance, since DaCe's codegen and pattern matching do not rely on the internal _nx iteration order. - :param _cache: Optional node-key cache dict, private to the current - ``sort_sdfg_alphabetically()`` call. Passed through to - key functions to avoid redundant computation. + :param node_key_cache: Optional node-key cache dict, private to the current + ``sort_sdfg_alphabetically()`` call. Passed through to + key functions to avoid redundant computation. """ # 1. Sort the master Nodes dictionary sorted_node_items = sorted(graph._nodes.items(), - key=lambda item: get_deterministic_node_key(item[0], _cache, graph)) + key=lambda item: get_deterministic_node_key(graph, item[0], node_key_cache)) graph._nodes.clear() graph._nodes.update(sorted_node_items) # 2. Sort the nested adjacency lists (In/Out Edges) within each node for node, (in_edges, out_edges) in graph._nodes.items(): - sorted_in_items = sorted(in_edges.items(), key=lambda item: get_deterministic_edge_key(item[1], _cache, graph)) - in_edges.clear() - in_edges.update(sorted_in_items) - - sorted_out_items = sorted(out_edges.items(), - key=lambda item: get_deterministic_edge_key(item[1], _cache, graph)) - out_edges.clear() - out_edges.update(sorted_out_items) + for edges in [in_edges, out_edges]: + sorted_edges = sorted(edges.items(), + key=lambda item: get_deterministic_edge_key(graph, item[1], node_key_cache)) + edges.clear() + edges.update(sorted_edges) # 3. Sort the master Edges dictionary sorted_edge_items = sorted(graph._edges.items(), - key=lambda item: get_deterministic_edge_key(item[1], _cache, graph)) + key=lambda item: get_deterministic_edge_key(graph, item[1], node_key_cache)) graph._edges.clear() graph._edges.update(sorted_edge_items) From 02c57ca0ec176d82d644e505ae6f380c25ad3cf0 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 2 Apr 2026 12:04:01 +0200 Subject: [PATCH 13/13] Sort SDFG for deterministic code generation: further fixes --- dace/sdfg/utils.py | 187 ++++++++++++++++++++++----------------------- 1 file changed, 92 insertions(+), 95 deletions(-) diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 7cda7011ca..41e9f3e014 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -2766,42 +2766,39 @@ def expand_nodes(sdfg: SDFG, predicate: Callable[[nd.Node], bool]): def get_deterministic_node_key(graph: Any, node: Any, node_key_cache: Optional[Dict[int, tuple]] = None) -> tuple: """ - Generates a highly stable, deterministic key for DaCe graph nodes - based on their semantic properties rather than memory locations. - - During SDFG compilation, relying on memory addresses or volatile UUIDs - for sorting leads to non-deterministic code generation. This function - extracts the intrinsic semantic identity of a node to ensure structural - collisions are resolved deterministically. - - The generated key is a tuple of ``(node_type, label, dynamic_parts)`` - which enforces lexicographic ordering: nodes are first grouped by type, - then by label, with the remaining semantic properties acting as a - tiebreaker. - - The dynamic parts incorporate: - - Graph topology (in-degree and out-degree) - - Interface semantics (in/out connectors, always included for - ``nd.Node`` subclasses even when empty) - - Loop and memory semantics (map parameters, ranges, schedules) - - Internal code logic (Tasklet code string) - - Nested SDFG structural size and symbol mapping - - When called from the sorting path, an optional ``node_key_cache`` dict - can be provided to avoid redundant recomputation during sort comparisons. - The cache is private to each ``sort_sdfg_alphabetically()`` invocation - and lives on the stack, so concurrent SDFG processing is safe. - - :param graph: The containing graph (e.g., SDFGState or ControlFlowRegion) - used to compute in-degree and out-degree. May be ``None``, - in which case degree information is omitted from the key. - :param node: The DaCe graph node (e.g., Tasklet, AccessNode, MapEntry) - to be evaluated. - :param node_key_cache: Optional node-key cache dict, private to the current - ``sort_sdfg_alphabetically()`` call. When ``None``, - no caching is performed. - :return: A tuple ``(node_type, label, dynamic_parts)`` representing - the node's semantic identity. + Generates a deterministic key for DaCe graph nodes based on their + semantic properties rather than memory locations. + + The generated key is a tuple of typed components that ensures correct + lexicographic ordering — integers compare as integers, not as digit + strings (e.g., ``10 > 3`` but ``"10" < "3"``). + + The key components are: + + 0. **Node type** (``str``): Class name (e.g., ``"AccessNode"``). + 1. **Label** (``str``): Data descriptor name or node label. + 2. **In-degree** (``int``): Number of incoming edges. + 3. **Out-degree** (``int``): Number of outgoing edges. + 4. **In-connectors** (``tuple[str, ...]``): Sorted input connector names. + 5. **Out-connectors** (``tuple[str, ...]``): Sorted output connector names. + 6. **Extra** (``str``): Type-specific tiebreaker — map parameters and + range for ``MapEntry``/``MapExit``, code string for ``Tasklet``, + state count and symbol mapping for ``NestedSDFG``. + + An optional ``node_key_cache`` dict (keyed by ``id(node)``) can be + provided to avoid redundant recomputation when the same node is + referenced by multiple edges during sorting. The cache is private to + each ``sort_sdfg_alphabetically()`` invocation and lives on the stack, + so concurrent SDFG processing is safe. + + :param graph: The containing graph (e.g., ``SDFGState`` or + ``ControlFlowRegion``) used to compute in-degree and + out-degree. May be ``None``, in which case degree + defaults to zero. + :param node: The DaCe graph node to be evaluated. + :param node_key_cache: Optional cache dict to avoid redundant + recomputation during sort comparisons. + :return: A 7-tuple representing the node's semantic identity. """ if node_key_cache is not None: node_id = id(node) @@ -2812,50 +2809,37 @@ def get_deterministic_node_key(graph: Any, node: Any, node_key_cache: Optional[D # Extract core identifier if hasattr(node, 'data'): - raw_label = node.data + raw_label = str(node.data) elif hasattr(node, 'label'): - raw_label = node.label + raw_label = str(node.label) else: raw_label = str(node) - parts = [] - - # 1. Topological Context (requires the containing graph) + # Topological context + in_deg = 0 + out_deg = 0 if graph is not None: try: in_deg = graph.in_degree(node) out_deg = graph.out_degree(node) - parts.append(f"i{in_deg}o{out_deg}") except (ValueError, KeyError): - # Node might not belong to this graph (e.g., during NX rebuild) pass - # 2. Interface Semantics (Connectors) - if isinstance(node, nd.Node): - parts.append("inC:{" + "-".join(sorted(node.in_connectors.keys())) + "}") - parts.append("outC:{" + "-".join(sorted(node.out_connectors.keys())) + "}") + # Interface semantics (connectors) + in_conn = tuple(sorted(node.in_connectors.keys())) if isinstance(node, nd.Node) else () + out_conn = tuple(sorted(node.out_connectors.keys())) if isinstance(node, nd.Node) else () - # 3. Map Semantics (Parameters, Ranges & Schedules) + # Type-specific tiebreakers + extra = '' if isinstance(node, (nd.MapEntry, nd.MapExit)): - parts.append("map:" + "-".join(node.map.params)) - parts.append("range:" + str(node.map.range)) - parts.append(f"sch:{str(node.map.schedule)}") - - # 4. Internal Code Semantics (Tasklets) - if isinstance(node, nd.Tasklet): - code_str = str(node.code.as_string).strip() - parts.append(f"code:{code_str}") - - # 5. Nested SDFG Differentiation - if isinstance(node, nd.NestedSDFG): - parts.append(f"nsdfg_states:{len(node.sdfg.nodes())}") - # Include symbol mapping for further differentiation + extra = '-'.join(node.map.params) + ':' + str(node.map.range) + ':' + str(node.map.schedule) + elif isinstance(node, nd.Tasklet): + extra = str(node.code.as_string).strip() + elif isinstance(node, nd.NestedSDFG): sorted_syms = dict(sorted({str(k): str(v) for k, v in node.symbol_mapping.items()}.items())) - parts.append(f"symmap:{sorted_syms}") + extra = f"states:{len(node.sdfg.nodes())}_syms:{sorted_syms}" - # Tuple enforces lexicographic order: group by type, then label, - # then use dynamic parts as a tiebreaker. - result = (node_type, str(raw_label), "_".join(parts)) + result = (node_type, raw_label, in_deg, out_deg, in_conn, out_conn, extra) if node_key_cache is not None: node_key_cache[id(node)] = result return result @@ -2863,44 +2847,57 @@ def get_deterministic_node_key(graph: Any, node: Any, node_key_cache: Optional[D def get_deterministic_edge_key(graph: Any, edge: Any, node_key_cache: Optional[Dict[int, tuple]] = None) -> tuple: """ - Generates a highly stable key for graph edges to ensure - deterministic sorting. + Generates a deterministic key for graph edges to ensure stable sorting. + + The key groups edges first by their endpoint node pair, then by + connectors, then by data payload:: - This function extracts the semantic connection points (connectors) - and the data payload (Memlets or Interstate conditions) to prevent - non-deterministic compiler graph traversals. + (src_node_key, dst_node_key, src_connector, dst_connector, data_str) - For interstate edges, whose data object lacks a stable ``__str__()`` - and would produce a memory-address-based representation, the function - extracts the ``condition`` and ``assignments`` attributes instead. + For Memlet edges, only the data descriptor name (plus ``wcr`` and + ``dynamic`` flags) is used as the payload — the full Memlet string + representation is avoided because it includes subset detail that + does not improve disambiguation but produces unnecessarily different + sort orders. + + For interstate edges, the condition and sorted assignments are + extracted, since ``InterstateEdge.__str__()`` produces an unstable + memory-address-based representation. :param graph: The containing graph, passed through to :func:`get_deterministic_node_key` for degree computation. - :param edge: The DaCe graph edge (or InterstateEdge) to be evaluated. - :param node_key_cache: Optional node-key cache dict, passed through to - :func:`get_deterministic_node_key`. - :return: A tuple representing the edge's routing and payload. - """ - # 1. Extract connector strings - raw_src_conn = str(getattr(edge, 'src_conn', '')) - raw_dst_conn = str(getattr(edge, 'dst_conn', '')) - - # 2. Extract data payload, handling InterstateEdge specially since it - # lacks __str__() and would produce an unstable memory-address-based - # representation. - edge_data = getattr(edge, 'data', '') - if hasattr(edge_data, 'condition') and hasattr(edge_data, 'assignments'): - cond_str = str(edge_data.condition) - sorted_assigns = dict(sorted({str(k): str(v) for k, v in edge_data.assignments.items()}.items())) - raw_data_str = f"cond:{cond_str}_assign:{sorted_assigns}" - else: - raw_data_str = str(edge_data) - - # 3. Retrieve the stabilized keys for the source and destination nodes + :param edge: The DaCe graph edge to be evaluated. + :param node_key_cache: Optional cache dict for node keys. + :return: A 5-tuple representing the edge's routing and payload. + """ + # 1. Retrieve the stabilized keys for source and destination nodes src_key = get_deterministic_node_key(graph, edge.src, node_key_cache) dst_key = get_deterministic_node_key(graph, edge.dst, node_key_cache) - return (src_key, raw_src_conn, dst_key, raw_dst_conn, raw_data_str) + # 2. Extract connector strings + src_conn = str(getattr(edge, 'src_conn', '') or '') + dst_conn = str(getattr(edge, 'dst_conn', '') or '') + + # 3. Extract deterministic data payload + edge_data = getattr(edge, 'data', None) + if edge_data is None: + data_str = '' + elif hasattr(edge_data, 'condition') and hasattr(edge_data, 'assignments'): + # InterstateEdge: extract condition and sorted assignments + cond_str = str(edge_data.condition) + sorted_assigns = tuple(sorted((str(k), str(v)) for k, v in edge_data.assignments.items())) + data_str = f"cond:{cond_str}_assign:{sorted_assigns}" + elif hasattr(edge_data, 'data'): + # Memlet: use data name + wcr + dynamic flag + data_str = str(edge_data.data or '') + if edge_data.wcr: + data_str += f"_wcr:{edge_data.wcr}" + if edge_data.dynamic: + data_str += "_dyn" + else: + data_str = str(edge_data) + + return (src_key, dst_key, src_conn, dst_conn, data_str) def sort_graph_dicts_alphabetically(graph: Union['ControlFlowRegion', 'SDFGState'],