EnzymeAD · gdalle · Oct 8, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/lib/EnzymeCore/src/EnzymeCore.jl b/lib/EnzymeCore/src/EnzymeCore.jl
@@ -8,6 +8,7 @@ export DefaultABI, FFIABI, InlineABI, NonGenABI
 export BatchDuplicatedFunc
 export within_autodiff, ignore_derivatives
 export needs_primal
+export ChunkStrategy, SingleChunk, AutoChunk, pick_chunksize
 
 function batch_size end
 
@@ -797,4 +798,46 @@ end
 
 Combined(mode::ReverseMode) = mode
 
+"""
+    ChunkStrategy
+
+Abstract type gathering strategies for chunk size selection.
+
+# See also
+
+- [`SingleChunk`](@ref)
+- [`AutoChunk`](@ref)
+"""
+abstract type ChunkStrategy end
+
+"""
+    SingleChunk()
+
+Select chunk size equal to the number of elements, so that the corresponding array is processed in a single chunk.
+"""
+struct SingleChunk <: ChunkStrategy end
+
+"""
+    AutoChunk()
+
+Select chunk size automatically based on internal Enzyme-specific heuristics, which are subject to change.
+"""
+struct AutoChunk <: ChunkStrategy end
+
+const DEFAULT_CHUNK_SIZE = 16
+
+"""
+    pick_chunksize(s::ChunkStrategy, a::AbstractArray)
+
+Return the chunk size chosen by strategy `s` based on the dimension of array `a`, as a `Val{C}` object.
+
+- In forward-mode gradients and Jacobians, `a` would be the input array.
+- In reverse-mode Jacobians, `a` would be the output array.
+
+!!! warning
+    For `SingleChunk` and `AutoChunk` strategies, this function is type-unstable.
+"""
+pick_chunksize(::SingleChunk, a::AbstractArray) = Val(length(a))
+pick_chunksize(::AutoChunk, a::AbstractArray) = Val(min(DEFAULT_CHUNK_SIZE, length(a)))
+
 end # module EnzymeCore
diff --git a/lib/EnzymeCore/test/chunk.jl b/lib/EnzymeCore/test/chunk.jl
@@ -0,0 +1,12 @@
+using Test
+using EnzymeCore
+
+@testset "SingleChunk" begin
+    @test pick_chunksize(SingleChunk(), ones(10)) == Val(10)
+    @test pick_chunksize(SingleChunk(), ones(100)) == Val(100)
+end
+
+@testset "AutoChunk" begin
+    @test pick_chunksize(AutoChunk(), ones(10)) == Val(10)
+    @test pick_chunksize(AutoChunk(), ones(100)) == Val(16)
+end
diff --git a/lib/EnzymeCore/test/runtests.jl b/lib/EnzymeCore/test/runtests.jl
@@ -36,12 +36,13 @@ using EnzymeCore
     @testset "Mode modification" begin
         include("mode_modification.jl")
     end
-end
-
-@testset "within_autodiff" begin
-    @test !EnzymeCore.within_autodiff()
-end
-
-@testset "ignore_derivatives" begin
-    @test EnzymeCore.ignore_derivatives(3) == 3
+    @testset "Chunk strategy" begin
+        include("chunk.jl")
+    end
+    @testset "within_autodiff" begin
+        @test !EnzymeCore.within_autodiff()
+    end
+    @testset "ignore_derivatives" begin
+        @test EnzymeCore.ignore_derivatives(3) == 3
+    end
 end
diff --git a/src/Enzyme.jl b/src/Enzyme.jl
@@ -111,8 +111,11 @@ export autodiff,
     make_zero!,
     remake_zero!
 
+import EnzymeCore: ChunkStrategy, SingleChunk, AutoChunk, pick_chunksize
+
 export jacobian, gradient, gradient!, hvp, hvp!, hvp_and_gradient!
 export batch_size, onehot, chunkedonehot
+export SingleChunk, AutoChunk
 
 using LinearAlgebra
 import SparseArrays

diff --git a/src/sugar.jl b/src/sugar.jl
@@ -428,6 +428,10 @@ end
     return ((one(x),),)
 end
 
+@inline function chunkedonehot(x, strategy::ChunkStrategy)
+    return chunkedonehot(x, pick_chunksize(strategy, x))
+end
+
 @inline tupleconcat(x) = x
 @inline tupleconcat(x, y) = (x..., y...)
 @inline tupleconcat(x, y, z...) = (x..., tupleconcat(y, z...)...)
@@ -502,10 +506,11 @@ end
 @inline specialize_output(output, input) = output
 
 """
-    gradient(::ForwardMode, f, x; shadows=onehot(x), chunk=nothing)
+    gradient(::ForwardMode, f, x, args...; chunk=nothing, shadows=create_shadows(chunk, x, args...))
 
-Compute the gradient of an array-input function `f` using forward mode. The
-optional keyword argument `shadow` is a vector of one-hot vectors of type `x`
+Compute the gradient of an array-input function `f` using forward mode.
+The optional keyword argument `chunk` optionally denotes the chunk size to use: it can be either `nothing`, `Val(C)` for some `C`, `SingleChunk()` or `AutoChunk()`.
+The optional keyword argument `shadow` is a vector of one-hot vectors of type `x`
 which are used to forward-propagate into the return. For performance reasons,
 this should be computed once, outside the call to `gradient`, rather than
 within this call.
@@ -788,7 +793,7 @@ end
 """
     jacobian(::ForwardMode, args...; kwargs...)
 
-Equivalent to gradient(::ForwardMode, args...; kwargs...)
+Equivalent to `gradient(::ForwardMode, args...; kwargs...)`.
 """
 @inline function jacobian(fm::ForwardMode, args...; kwargs...)
     gradient(fm, args...; kwargs...)
@@ -915,6 +920,8 @@ end
     chunksize = if chunk <: Val
         chunk.parameters[1]
     else
+        # TODO: handle SingleChunk and MaxChunk
+        # this will change the generated function because the chunksize might be determined at runtime
         1
     end
     num = ((n_out_val + chunksize - 1) ÷ chunksize)
@@ -1173,7 +1180,7 @@ end
     jacobian(::ReverseMode, f, x)
 
 Compute the jacobian of a array-output function `f` using (potentially vector)
-reverse mode. The `chunk` argument optionally denotes the chunk size to use and
+reverse mode. The `chunk` argument optionally denotes the chunk size to use (it can be either `nothing` or `Val(C)` for some `C`) and
 `n_outs` optionally denotes the shape of the array returned by `f` (e.g `size(f(x))`).
 
 Example:

diff --git a/test/sugar.jl b/test/sugar.jl
@@ -666,3 +666,43 @@ end
     # @show J_r_3(u, A, x)
     # @show J_f_3(u, A, x)
 end
+
+fchunk1(x) = sum(sin, x)
+fchunk2(x) = map(sin, x) + map(cos, reverse(x))
+
+@testset "Chunking strategies" begin
+    @testset "ChunkedOneHot" begin
+        @test chunkedonehot(ones(10), SingleChunk()) isa Tuple{NTuple{10}}
+        @test chunkedonehot(ones(30), SingleChunk()) isa Tuple{NTuple{30}}
+        @test chunkedonehot(ones(10), AutoChunk()) isa Tuple{NTuple{10}}
+        @test chunkedonehot(ones(30), AutoChunk()) isa Tuple{NTuple{16}, NTuple{14}}
+        @test chunkedonehot(ones(30), AutoChunk()) isa Tuple{NTuple{16}, NTuple{14}}
+        @test chunkedonehot(ones(40), AutoChunk()) isa Tuple{NTuple{16}, NTuple{16}, NTuple{8}}
+    end
+
+    @testset "Forward gradient" begin
+        for n in (10, 30)
+            x = ones(n)
+            g = gradient(Forward, fchunk1, x)
+            @test g == gradient(Forward, fchunk1, x; chunk = SingleChunk())
+            @test g == gradient(Forward, fchunk1, x; chunk = AutoChunk())
+        end
+    end
+    @testset "Forward Jacobian" begin
+        for n in (10, 30)
+            x = ones(n)
+            J = jacobian(Forward, fchunk2, x)
+            @test J == jacobian(Forward, fchunk2, x; chunk = SingleChunk())
+            @test J == jacobian(Forward, fchunk2, x; chunk = AutoChunk())
+        end
+    end
+    @testset "Reverse Jacobian" begin
+        for n in (10, 30)
+            x = ones(n)
+            J = jacobian(Forward, fchunk2, x)
+            # TODO: fix this
+            @test_broken J == jacobian(Reverse, fchunk2, x; chunk = SingleChunk())
+            @test_broken J == jacobian(Reverse, fchunk2, x; chunk = AutoChunk())
+        end
+    end
+end