From b7348fde73904a36db90d2a6ff0f58ee8b74212a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 27 Mar 2026 13:38:53 +0100 Subject: [PATCH 1/6] Add synchronization statements to ensure timer output correctness on the GPU --- src/solvers/dgsem_tree/dg_2d.jl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 55002f3da25..7afa07b3b73 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -113,18 +113,23 @@ function rhs!(du, u, t, backend = trixi_backend(u) # Reset du - @trixi_timeit timer() "reset ∂u/∂t" set_zero!(du, dg, cache) + @trixi_timeit timer() "reset ∂u/∂t" begin + set_zero!(du, dg, cache) + KernelAbstractions.synchronize(backend) + end # Calculate volume integral @trixi_timeit timer() "volume integral" begin calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) + KernelAbstractions.synchronize(backend) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin prolong2interfaces!(backend, cache, u, mesh, equations, dg) + KernelAbstractions.synchronize(backend) end # Calculate interface fluxes @@ -132,23 +137,27 @@ function rhs!(du, u, t, calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) + KernelAbstractions.synchronize(backend) end # Prolong solution to boundaries @trixi_timeit timer() "prolong2boundaries" begin prolong2boundaries!(cache, u, mesh, equations, dg) + KernelAbstractions.synchronize(backend) end # Calculate boundary fluxes @trixi_timeit timer() "boundary flux" begin calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) + KernelAbstractions.synchronize(backend) end # Prolong solution to mortars @trixi_timeit timer() "prolong2mortars" begin prolong2mortars!(cache, u, mesh, equations, dg.mortar, dg) + KernelAbstractions.synchronize(backend) end # Calculate mortar fluxes @@ -156,21 +165,26 @@ function rhs!(du, u, t, calc_mortar_flux!(cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.mortar, dg.surface_integral, dg, cache) + KernelAbstractions.synchronize(backend) end # Calculate surface integrals @trixi_timeit timer() "surface integral" begin calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) + KernelAbstractions.synchronize(backend) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, - cache) + @trixi_timeit timer() "Jacobian" begin + apply_jacobian!(backend, du, mesh, equations, dg, cache) + KernelAbstractions.synchronize(backend) + end # Calculate source terms @trixi_timeit timer() "source terms" begin calc_sources!(du, u, t, source_terms, equations, dg, cache) + KernelAbstractions.synchronize(backend) end return nothing From d91cd5b5976a80b9e92ecaba07d9aba5e2a5e715 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 27 Mar 2026 15:27:54 +0100 Subject: [PATCH 2/6] use trixi_timit macro with a backend --- src/Trixi.jl | 14 +++++++++++++- src/solvers/dgsem_tree/dg_2d.jl | 33 +++++++++++---------------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 42340659520..091bba02b0e 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -90,11 +90,23 @@ export TriangulateIO # for type parameter in DGMultiMesh using TriplotBase: TriplotBase using TriplotRecipes: DGTriPseudocolor @reexport using TrixiBase: trixi_include -using TrixiBase: TrixiBase, @trixi_timeit, timer +using TrixiBase: TrixiBase, timer @reexport using SimpleUnPack: @unpack using SimpleUnPack: @pack! using DataStructures: BinaryHeap, FasterForward, extract_all! +import TrixiBase: @trixi_timeit +macro trixi_timeit(backend, timer_output, label, expr) + expr = quote + local val = $(esc(expr)) + if $(esc(backend)) !== nothing + (KernelAbstractions.synchronize)($(esc(backend))) + end + val + end + return :(@trixi_timeit($(esc(timer_output)), $(esc(label)), $(expr))) +end + using UUIDs: UUID # finite difference SBP operators diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 7afa07b3b73..d85bd760316 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -113,78 +113,67 @@ function rhs!(du, u, t, backend = trixi_backend(u) # Reset du - @trixi_timeit timer() "reset ∂u/∂t" begin + @trixi_timeit backend timer() "reset ∂u/∂t" begin set_zero!(du, dg, cache) - KernelAbstractions.synchronize(backend) end # Calculate volume integral - @trixi_timeit timer() "volume integral" begin + @trixi_timeit backend timer() "volume integral" begin calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) - KernelAbstractions.synchronize(backend) end # Prolong solution to interfaces - @trixi_timeit timer() "prolong2interfaces" begin + @trixi_timeit backend timer() "prolong2interfaces" begin prolong2interfaces!(backend, cache, u, mesh, equations, dg) - KernelAbstractions.synchronize(backend) end # Calculate interface fluxes - @trixi_timeit timer() "interface flux" begin + @trixi_timeit backend timer() "interface flux" begin calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) - KernelAbstractions.synchronize(backend) end # Prolong solution to boundaries - @trixi_timeit timer() "prolong2boundaries" begin + @trixi_timeit backend timer() "prolong2boundaries" begin prolong2boundaries!(cache, u, mesh, equations, dg) - KernelAbstractions.synchronize(backend) end # Calculate boundary fluxes - @trixi_timeit timer() "boundary flux" begin + @trixi_timeit backend timer() "boundary flux" begin calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) - KernelAbstractions.synchronize(backend) end # Prolong solution to mortars - @trixi_timeit timer() "prolong2mortars" begin + @trixi_timeit backend timer() "prolong2mortars" begin prolong2mortars!(cache, u, mesh, equations, dg.mortar, dg) - KernelAbstractions.synchronize(backend) end # Calculate mortar fluxes - @trixi_timeit timer() "mortar flux" begin + @trixi_timeit backend timer() "mortar flux" begin calc_mortar_flux!(cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.mortar, dg.surface_integral, dg, cache) - KernelAbstractions.synchronize(backend) end # Calculate surface integrals - @trixi_timeit timer() "surface integral" begin + @trixi_timeit backend timer() "surface integral" begin calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) - KernelAbstractions.synchronize(backend) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" begin + @trixi_timeit backend timer() "Jacobian" begin apply_jacobian!(backend, du, mesh, equations, dg, cache) - KernelAbstractions.synchronize(backend) end # Calculate source terms - @trixi_timeit timer() "source terms" begin + @trixi_timeit backend timer() "source terms" begin calc_sources!(du, u, t, source_terms, equations, dg, cache) - KernelAbstractions.synchronize(backend) end return nothing From 8dcbdac4470c691bdbac93433650d58522ffa872 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 27 Mar 2026 16:08:02 +0100 Subject: [PATCH 3/6] don't commit piracy --- src/Trixi.jl | 13 +------------ src/auxiliary/auxiliary.jl | 20 ++++++++++++++++++++ src/solvers/dgsem_tree/dg_2d.jl | 22 +++++++++++----------- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 091bba02b0e..0dda0b0a710 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -90,22 +90,11 @@ export TriangulateIO # for type parameter in DGMultiMesh using TriplotBase: TriplotBase using TriplotRecipes: DGTriPseudocolor @reexport using TrixiBase: trixi_include -using TrixiBase: TrixiBase, timer +using TrixiBase: TrixiBase, @trixi_timeit, timer @reexport using SimpleUnPack: @unpack using SimpleUnPack: @pack! using DataStructures: BinaryHeap, FasterForward, extract_all! -import TrixiBase: @trixi_timeit -macro trixi_timeit(backend, timer_output, label, expr) - expr = quote - local val = $(esc(expr)) - if $(esc(backend)) !== nothing - (KernelAbstractions.synchronize)($(esc(backend))) - end - val - end - return :(@trixi_timeit($(esc(timer_output)), $(esc(label)), $(expr))) -end using UUIDs: UUID diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index e56cd399f7a..1cbcc2b9958 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -82,6 +82,26 @@ end return ncalls_first end +""" + @trixi_timeit_ext backend timer() "some label" expression + +This macro is an extension of [`@trixi_timeit`](@ref) that also synchronizes the given `backend` after executing the given `expression`. +This is useful to get accurate timing measurements for GPU backends, where the execution of kernels is asynchronous. +The synchronization ensures that all GPU operations are completed before the timer is stopped. + +See also [`@trixi_timeit`](@ref). +""" +macro trixi_timeit_ext(backend, timer_output, label, expr) + expr = quote + local val = $(esc(expr)) + if $(esc(backend)) !== nothing + (KernelAbstractions.synchronize)($(esc(backend))) + end + val + end + return :(@trixi_timeit($(esc(timer_output)), $(esc(label)), $(expr))) +end + """ examples_dir() diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index d85bd760316..739706d209b 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -113,66 +113,66 @@ function rhs!(du, u, t, backend = trixi_backend(u) # Reset du - @trixi_timeit backend timer() "reset ∂u/∂t" begin + @trixi_timeit_ext backend timer() "reset ∂u/∂t" begin set_zero!(du, dg, cache) end # Calculate volume integral - @trixi_timeit backend timer() "volume integral" begin + @trixi_timeit_ext backend timer() "volume integral" begin calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces - @trixi_timeit backend timer() "prolong2interfaces" begin + @trixi_timeit_ext backend timer() "prolong2interfaces" begin prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes - @trixi_timeit backend timer() "interface flux" begin + @trixi_timeit_ext backend timer() "interface flux" begin calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end # Prolong solution to boundaries - @trixi_timeit backend timer() "prolong2boundaries" begin + @trixi_timeit_ext backend timer() "prolong2boundaries" begin prolong2boundaries!(cache, u, mesh, equations, dg) end # Calculate boundary fluxes - @trixi_timeit backend timer() "boundary flux" begin + @trixi_timeit_ext backend timer() "boundary flux" begin calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg) end # Prolong solution to mortars - @trixi_timeit backend timer() "prolong2mortars" begin + @trixi_timeit_ext backend timer() "prolong2mortars" begin prolong2mortars!(cache, u, mesh, equations, dg.mortar, dg) end # Calculate mortar fluxes - @trixi_timeit backend timer() "mortar flux" begin + @trixi_timeit_ext backend timer() "mortar flux" begin calc_mortar_flux!(cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.mortar, dg.surface_integral, dg, cache) end # Calculate surface integrals - @trixi_timeit backend timer() "surface integral" begin + @trixi_timeit_ext backend timer() "surface integral" begin calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit backend timer() "Jacobian" begin + @trixi_timeit_ext backend timer() "Jacobian" begin apply_jacobian!(backend, du, mesh, equations, dg, cache) end # Calculate source terms - @trixi_timeit backend timer() "source terms" begin + @trixi_timeit_ext backend timer() "source terms" begin calc_sources!(du, u, t, source_terms, equations, dg, cache) end From c75e3857b19081a91333a5aaa9abf8ecdc2aa08a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 27 Mar 2026 18:03:56 +0100 Subject: [PATCH 4/6] Update src/Trixi.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/Trixi.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 0dda0b0a710..42340659520 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -95,7 +95,6 @@ using TrixiBase: TrixiBase, @trixi_timeit, timer using SimpleUnPack: @pack! using DataStructures: BinaryHeap, FasterForward, extract_all! - using UUIDs: UUID # finite difference SBP operators From b690653b0ba22491f5a60509dd7aae27e166b55d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 29 Mar 2026 14:59:27 +0200 Subject: [PATCH 5/6] fixup! Add synchronization statements to ensure timer output correctness on the GPU Co-authored-by: Benedict <135045760+benegee@users.noreply.github.com> From 4f4e48e6a6711a441e66ac3df48b625146004acb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 29 Mar 2026 15:02:15 +0200 Subject: [PATCH 6/6] only synchronize when timers are enabled --- src/auxiliary/auxiliary.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 1cbcc2b9958..28e4c47d339 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -94,8 +94,8 @@ See also [`@trixi_timeit`](@ref). macro trixi_timeit_ext(backend, timer_output, label, expr) expr = quote local val = $(esc(expr)) - if $(esc(backend)) !== nothing - (KernelAbstractions.synchronize)($(esc(backend))) + if $(esc(backend)) !== nothing && $(TrixiBase).timeit_debug_enabled() + $(KernelAbstractions.synchronize)($(esc(backend))) end val end