Skip to content
6 changes: 5 additions & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Expand All @@ -15,6 +16,7 @@ IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
Expand All @@ -35,13 +37,14 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
Calculus = "0.5"
ComponentArrays = "0.15"
ComponentArrays = "0.15.34"
DataInterpolations = "3.10, 4, 5, 6, 7, 8"
DelayDiffEq = "5"
DelimitedFiles = "1"
DiffEqCallbacks = "2.24, 3, 4"
DiffEqNoiseProcess = "5.14"
DifferentialEquations = "7"
DifferentiationInterface = "0.6, 0.7"
Documenter = "1"
Enzyme = "0.12, 0.13"
Flux = "0.14, 0.15, 0.16"
Expand All @@ -50,6 +53,7 @@ IterTools = "1"
Lux = "1"
LuxCUDA = "0.3"
MLUtils = "0.4"
Mooncake = "0.5"
Optimization = "3.9, 4, 5"
OptimizationOptimJL = "0.2, 0.3, 0.4"
OptimizationOptimisers = "0.2, 0.3"
Expand Down
7 changes: 5 additions & 2 deletions docs/src/Benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ Quick summary:
import OrdinaryDiffEq as ODE
import Lux
import SciMLSensitivity as SMS
import Zygote
import Mooncake
import DifferentiationInterface as DI
import BenchmarkTools
import Random
import ComponentArrays as CA
Expand Down Expand Up @@ -80,7 +81,9 @@ for sensealg in (SMS.InterpolatingAdjoint(autojacvec = SMS.ZygoteVJP()),
return loss
end

t = BenchmarkTools.@belapsed Zygote.gradient($loss_neuralode, $u0, $ps, $st)
backend = DI.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
loss_ps = p -> loss_neuralode(u0, p, st)
t = BenchmarkTools.@belapsed DI.gradient($loss_ps, $backend, $ps)
println("$(sensealg) took $(t)s")
end

Expand Down
15 changes: 10 additions & 5 deletions docs/src/examples/dde/delay_diffeq.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import Optimization as OPT
import SciMLSensitivity as SMS
import OptimizationPolyalgorithms as OPA
import DelayDiffEq as DDE
import Mooncake

# Define the same LV equation, but including a delay parameter
function delay_lotka_volterra!(du, u, h, p, t)
Expand All @@ -35,7 +36,7 @@ prob_dde = DDE.DDEProblem(delay_lotka_volterra!, u0, h, (0.0, 10.0),

function predict_dde(p)
return Array(ODE.solve(prob_dde, DDE.MethodOfSteps(ODE.Tsit5());
u0, p, saveat = 0.1, sensealg = SMS.ReverseDiffAdjoint()))
u0, p, saveat = 0.1))
end

loss_dde(p) = sum(abs2, x - 1 for x in predict_dde(p))
Expand All @@ -50,14 +51,18 @@ callback = function (state, l; doplot = false)
return false
end

adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss_dde(x), adtype)
optprob = OPT.OptimizationProblem(optf, p)
result_dde = OPT.solve(optprob, OPA.PolyOpt(); maxiters = 300, callback)
```

Notice that we chose `sensealg = ReverseDiffAdjoint()` to utilize the ReverseDiff.jl
reverse-mode to handle the delay differential equation.
The `sensealg` is left at its default. For DDEs the automatic choice is
[`ForwardDiffSensitivity`](@ref) (which differentiates through
`MethodOfSteps` via dual numbers) for problems with fewer than 100
parameters, and [`ReverseDiffAdjoint`](@ref) for larger ones —
[continuous adjoints](@ref sensitivity_diffeq) are not yet defined for
DDEs, so the discretize-then-optimize methods are the only option.

We define a callback to display the solution at the current parameters for each step of the training:

Expand All @@ -76,7 +81,7 @@ end
We use `Optimization.solve` to optimize the parameters for our loss function:

```@example dde
adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss_dde(x), adtype)
optprob = OPT.OptimizationProblem(optf, p)
result_dde = OPT.solve(optprob, OPA.PolyOpt(); callback)
Expand Down
9 changes: 7 additions & 2 deletions docs/src/examples/hybrid_jump/bouncing_ball.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import OptimizationPolyalgorithms as OPA
import SciMLSensitivity as SMS
import OrdinaryDiffEq as ODE
import DiffEqCallbacks as DEC
import Mooncake

function f(du, u, p, t)
du[1] = u[2]
Expand Down Expand Up @@ -44,11 +45,15 @@ the value 20:
function loss(θ)
sol = ODE.solve(prob, ODE.Tsit5(), p = [9.8, θ[1]]; callback)
target = 20.0
abs2(sol[end][1] - target)
# Use `last(sol.u)[1]` instead of `sol[end][1]` — Mooncake's pullback for
# `getindex(::ODESolution, end)` currently has a `BoundsError` bug
# (`SciMLBaseMooncakeExt._scatter_pullback`). Indexing the underlying
# `sol.u::Vector{Vector{Float64}}` directly avoids the bad path.
abs2(last(sol.u)[1] - target)
end

loss([0.8])
adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss(x), adtype)
optprob = OPT.OptimizationProblem(optf, [0.8])
@time res = OPT.solve(optprob, OPA.PolyOpt(), maxiters = 300)
Expand Down
10 changes: 6 additions & 4 deletions docs/src/examples/hybrid_jump/hybrid_diffeq.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import ComponentArrays as CA
import Random
import SciMLSensitivity as SMS
import Lux
import Mooncake
import OrdinaryDiffEq as ODE
import Plots
import Optimization as OPT
Expand Down Expand Up @@ -50,9 +51,7 @@ cb = DEC.PresetTimeCallback(dosetimes, affect!, save_positions = (false, false))

function predict_n_ode(p)
_prob = ODE.remake(prob; p)
Array(ODE.solve(_prob, ODE.Tsit5(); u0 = z0, p, callback = cb, saveat = t,
sensealg = SMS.ReverseDiffAdjoint()))[1:2, :]
#Array(solve(prob,Tsit5();u0=z0,p,saveat=t))[1:2,:]
Array(ODE.solve(_prob, ODE.Tsit5(); u0 = z0, p, callback = cb, saveat = t))[1:2, :]
end

function loss_n_ode(p, _)
Expand All @@ -73,7 +72,10 @@ cba = function (state, l; doplot = false) #callback function to observe training
end

res = OPT.solve(
OPT.OptimizationProblem(OPT.OptimizationFunction(loss_n_ode, OPT.AutoZygote()),
OPT.OptimizationProblem(
OPT.OptimizationFunction(
loss_n_ode, OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
),
CA.ComponentArray(ps)),
OPO.Adam(0.05); callback = cba, maxiters = 1000)
```
Expand Down
1 change: 1 addition & 0 deletions docs/src/examples/neural_ode/simplechains.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Faster Neural Ordinary Differential Equations with SimpleChains


[SimpleChains](https://github.com/PumasAI/SimpleChains.jl) has demonstrated performance boosts of ~5x and ~30x when compared to other mainstream deep learning frameworks like Pytorch for the training and evaluation in the specific case of small neural networks. For the nitty-gritty details, as well as, some SciML related videos around the need and applications of such a library, we can refer to this [blogpost](https://julialang.org/blog/2022/04/simple-chains/). As for doing Scientific Machine Learning, how do we even begin with training neural ODEs with any generic deep learning library?

## Training Data
Expand Down
3 changes: 2 additions & 1 deletion docs/src/examples/ode/exogenous_input.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import OptimizationPolyalgorithms as OPA
import OptimizationOptimisers as OPO
import Plots
import Random
import Mooncake

rng = Random.default_rng()
tspan = (0.1, 10.0)
Expand Down Expand Up @@ -93,7 +94,7 @@ function loss(p)
return sum(abs2.(y[1:N] .- sol')) / N
end

adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss(x), adtype)
optprob = OPT.OptimizationProblem(optf, CA.ComponentArray{Float64}(p_model))

Expand Down
13 changes: 8 additions & 5 deletions docs/src/examples/ode/second_order_adjoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ optimization, while `KrylovTrustRegion` will utilize a Krylov-based method
with Hessian-vector products (never forming the Hessian) for large parameter
optimizations.


```@example secondorderadjoints
import SciMLSensitivity as SMS
import Lux
Expand All @@ -23,6 +24,7 @@ import OrdinaryDiffEq as ODE
import Plots
import Random
import OptimizationOptimJL as OOJ
import Mooncake

u0 = Float32[2.0; 0.0]
datasize = 30
Expand Down Expand Up @@ -83,13 +85,14 @@ callback = function (state, l; doplot = false)
return l < 0.01
end

adtype = OPT.AutoZygote()
optf = OPT.OptimizationFunction((x, p) -> loss_neuralode(x), adtype)

optprob1 = OPT.OptimizationProblem(optf, ps)
adtype1 = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf1 = OPT.OptimizationFunction((x, p) -> loss_neuralode(x), adtype1)
optprob1 = OPT.OptimizationProblem(optf1, ps)
pstart = OPT.solve(optprob1, OPO.Adam(0.01); callback, maxiters = 100).u

optprob2 = OPT.OptimizationProblem(optf, pstart)
adtype2 = OPT.AutoZygote()
optf2 = OPT.OptimizationFunction((x, p) -> loss_neuralode(x), adtype2)
optprob2 = OPT.OptimizationProblem(optf2, pstart)
pmin = OPT.solve(optprob2, OOJ.NewtonTrustRegion(); callback, maxiters = 200)
```

Expand Down
3 changes: 2 additions & 1 deletion docs/src/examples/ode/second_order_neural.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import OptimizationOptimisers as OPO
import RecursiveArrayTools
import Random
import ComponentArrays as CA
import Mooncake

u0 = Float32[0.0; 2.0]
du0 = Float32[0.0; 0.0]
Expand Down Expand Up @@ -61,7 +62,7 @@ callback = function (state, l)
l < 0.01
end

adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss_n_ode(x), adtype)
optprob = OPT.OptimizationProblem(optf, ps)

Expand Down
1 change: 1 addition & 0 deletions docs/src/examples/optimal_control/feedback_control.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ You can also mix a known differential equation and a neural differential
equation, so that the parameters and the neural network are estimated
simultaneously!


We will assume that we know the dynamics of the second equation
(linear dynamics), and our goal is to find a neural network that is dependent
on the current state of the dynamical system that will control the second
Expand Down
1 change: 0 additions & 1 deletion docs/src/examples/optimal_control/optimal_control.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ import Optimization as OPT
import OptimizationOptimJL as OOJ
import OptimizationOptimisers as OPO
import SciMLSensitivity as SMS
import Zygote
import Plots
import Statistics
import Random
Expand Down
7 changes: 4 additions & 3 deletions docs/src/examples/pde/brusselator.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ First, we have to define and configure the neural network that has to be used fo

```@example bruss
import Lux, Random, Optimization as OPT, OptimizationOptimJL as OOJ,
SciMLSensitivity as SMS, Zygote
SciMLSensitivity as SMS, Mooncake

model = Lux.Chain(Lux.Dense(2 => 16, tanh), Lux.Dense(16 => 1))
rng = Random.default_rng()
Expand Down Expand Up @@ -223,12 +223,13 @@ function loss_fn(ps, _)
end
```

Once the loss function is defined, we use the ADAM optimizer to train the neural network. The optimization problem is defined using SciML's `Optimization.jl` tools, and gradients are computed via automatic differentiation using `AutoZygote()` from `SciMLSensitivity`:
Once the loss function is defined, we use the ADAM optimizer to train the neural network. The optimization problem is defined using SciML's `Optimization.jl` tools, and gradients are computed via automatic differentiation using Mooncake through the `SciMLSensitivity` adjoint chain:

```@example bruss
println("[Training] Starting optimization...")
import OptimizationOptimisers as OPO
optf = OPT.OptimizationFunction(loss_fn, SMS.AutoZygote())
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction(loss_fn, adtype)
optprob = OPT.OptimizationProblem(optf, ps_init)
loss_history = Float32[]

Expand Down
8 changes: 4 additions & 4 deletions docs/src/examples/pde/pde_constrained.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This example uses a prediction model to optimize the one-dimensional Heat Equati
```@example pde
import SciMLSensitivity as SMS
import DelimitedFiles, Plots
import OrdinaryDiffEq as ODE, Optimization as OPT, OptimizationPolyalgorithms as OPA, Zygote
import OrdinaryDiffEq as ODE, Optimization as OPT, OptimizationPolyalgorithms as OPA, Mooncake

# Problem setup parameters:
Lx = 10.0
Expand Down Expand Up @@ -92,7 +92,7 @@ cb((; u = ps), loss(ps)) # Testing callback function
Plots.scatter(sol[:, end], label = "Truth", size = (800, 500))
Plots.plot!(PRED[end][:, end], lw = 2, label = "Prediction")

adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss(x), adtype)

optprob = OPT.OptimizationProblem(optf, ps)
Expand All @@ -107,7 +107,7 @@ res = OPT.solve(optprob, OPA.PolyOpt(), callback = cb)
```@example pde2
import SciMLSensitivity as SMS
import DelimitedFiles, Plots
import OrdinaryDiffEq as ODE, Optimization as OPT, OptimizationPolyalgorithms as OPA, Zygote
import OrdinaryDiffEq as ODE, Optimization as OPT, OptimizationPolyalgorithms as OPA, Mooncake
```

### Parameters
Expand Down Expand Up @@ -283,7 +283,7 @@ The resulting best parameters are stored in `res` and `res.u` returns the
parameters that minimize the cost function.

```@example pde2
adtype = OPT.AutoZygote()
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss(x), adtype)

optprob = OPT.OptimizationProblem(optf, ps)
Expand Down
1 change: 1 addition & 0 deletions docs/src/examples/sde/SDE_control.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ to ultimately prepare and stabilize the qubit in the excited state.
Before getting to the explanation, here's some code to start with. We will
follow a full explanation of the definition and training process:


```@example
# load packages
import SciMLSensitivity as SMS, Optimization as OPT, OptimizationOptimisers as OPO
Expand Down
6 changes: 4 additions & 2 deletions docs/src/examples/sde/optimization_sde.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ function cb2(st, l)
end
```

We can then use `Optimization.solve` to fit the SDE:
We can then use `Optimization.solve` to fit the SDE.


```@example sde
import Optimization as OPT, Zygote, OptimizationOptimisers as OPO
Expand Down Expand Up @@ -178,7 +179,8 @@ end
Let's optimize

```@example sde
adtype = OPT.AutoZygote()
import Mooncake
adtype = OPT.AutoMooncake(; config = Mooncake.Config(; friendly_tangents = true))
optf = OPT.OptimizationFunction((x, p) -> loss_sde(x), adtype)

optprob = OPT.OptimizationProblem(optf, p)
Expand Down
13 changes: 8 additions & 5 deletions docs/src/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,20 @@ Enzyme.autodiff(Enzyme.Reverse, Enzyme.Duplicated(_f, _tmp6),
This is exactly the inner core Enzyme call and if this fails, that is the issue that
needs to be fixed.

And similarly, for out-of-place functions the Zygote isolation is as follows:
And similarly, for out-of-place functions the Mooncake isolation is as follows:

```julia
import Mooncake
p = prob.p
y = prob.u0
f = prob.f
λ = zero(prob.u0)
_dy, back = Zygote.pullback(y, p) do u, p
vec(f(u, p, t))
end
tmp1, tmp2 = back(λ)
# Build the Mooncake pullback for the inner-rhs evaluation `f(u, p)` and
# apply the cotangent `λ` to recover the seed gradients `tmp1` (wrt `y`)
# and `tmp2` (wrt `p`).
g = (u, p) -> vec(f(u, p, t))
cache = Mooncake.prepare_pullback_cache(g, y, p)
_dy, (_, tmp1, tmp2) = Mooncake.value_and_pullback!!(cache, λ, g, y, p)
```

## How do I use custom parameter types with adjoint sensitivity analysis?
Expand Down
Loading
Loading