Skip to content

Conversation

ChrisRackauckas
Copy link
Member

No description provided.

@ChrisRackauckas
Copy link
Member Author

ROBER is benchmarking for type inference issues.

using NonlinearSolve, MINPACK, ModelingToolkit, OrdinaryDiffEqBDF, OrdinaryDiffEqNonlinearSolve, Sundials, LSODA
using OrdinaryDiffEqNonlinearSolve: NonlinearSolveAlg
using ModelingToolkit: t_nounits as t, D_nounits as D
RUS = RadiusUpdateSchemes

@parameters k₁ k₂ k₃
@variables y₁(t) y₂(t) y₃(t)

eqs = [D(y₁) ~ -k₁ * y₁ + k₃ * y₂ * y₃,
    D(y₂) ~ k₁ * y₁ - k₂ * y₂^2 - k₃ * y₂ * y₃,
    D(y₃) ~ k₂ * y₂^2]
rober = ODESystem(eqs, t; name = :rober) |> structural_simplify |> complete
prob = ODEProblem(rober, [y₁, y₂, y₃] .=> [1.0; 0.0; 0.0], (0.0, 1e5), [k₁, k₂, k₃] .=> (0.04, 3e7, 1e4), jac = true)

nlalgs = [
    NonlinearSolveAlg(TrustRegion(autodiff = AutoFiniteDiff()))
    NonlinearSolveAlg(TrustRegion(autodiff = AutoFiniteDiff(), radius_update_scheme = RUS.Bastin))
    NonlinearSolveAlg(NewtonRaphson(autodiff = AutoFiniteDiff()))
    NonlinearSolveAlg(LevenbergMarquardt(autodiff = AutoFiniteDiff()))
    NonlinearSolveAlg(LevenbergMarquardt(autodiff = AutoFiniteDiff()))
    NonlinearSolveAlg(Broyden())
    NonlinearSolveAlg(PseudoTransient(autodiff = AutoFiniteDiff()))
    NonlinearSolveAlg(DFSane())
    NonlinearSolveAlg(CMINPACK(; method=:hybr))
]
sol = solve(prob, FBDF(autodiff=false, nlsolve = nlalgs[3]));
sol = solve(prob, FBDF());

using Plots
plot(sol)
using BenchmarkTools
@btime sol = solve(prob, FBDF(autodiff=false, nlsolve = nlalgs[3]));

@btime sol = solve(prob, FBDF(autodiff=false, nlsolve = nlalgs[1]));
@btime sol = solve(prob, FBDF(autodiff=false));

@btime sol = solve(prob, FBDF());
@btime sol = solve(prob, lsoda());
@btime sol = solve(prob, CVODE_BDF());

sol = solve(prob, FBDF())
sol = solve(prob, CVODE_BDF())
solve(prob, FBDF(autodiff=false, nlsolve = nlalgs[3]));

@profview for i in 1:100 solve(prob, FBDF(autodiff=false, nlsolve = nlalgs[3])) end

OrdinaryDiffEqBDF.nlsolve!(nlsolver, integ, integ.cache, false) 
@inferred SciMLBase.get_du(_cache, _idx)

@which OrdinaryDiffEqBDF.nlsolve!(nlsolver, integ, integ.cache, false)
@which OrdinaryDiffEqBDF.compute_step!(nlsolver, integ)

integ = init(prob, FBDF(autodiff=false, nlsolve = NonlinearSolveAlg(NewtonRaphson(autodiff = AutoFiniteDiff()))))
(;ts, u_history, order, u_corrector, bdf_coeffs, r, nlsolver, weights, terk_tmp, terkp1_tmp, atmp, tmp, equi_ts, u₀, ts_tmp, step_limiter!) = integ.cache;
(;z, tmp, ztmp, γ, α, cache, method) = nlsolver;
(;tstep, invγdt, atmp, ustep )= cache;

@inferred NonlinearSolveBase.Utils.evaluate_f!(nlsolver.cache.cache, nlsolver.cache.cache.u, nlsolver.cache.cache.p)

@which OrdinaryDiffEqNonlinearSolve.step!(nlsolver.cache.cache)
@which NonlinearSolveBase.InternalAPI.step!(nlsolver.cache.cache)
@profview for i in 1:100 OrdinaryDiffEqBDF.nlsolve!(nlsolver, integ, integ.cache, false) end

BRUSS is benchmarking for reuse correctness.

using LinearAlgebra, SparseArrays

const N = 32
const xyd_brusselator = range(0, stop = 1, length = N)
brusselator_f(x, y, t) = (((x - 0.3)^2 + (y - 0.6)^2) <= 0.1^2) * (t >= 1.1) * 5.0
limit(a, N) = a == N + 1 ? 1 : a == 0 ? N : a
function brusselator_2d_loop(du, u, p, t)
    A, B, alpha, dx = p
    alpha = alpha / dx^2
    @inbounds for I in CartesianIndices((N, N))
        i, j = Tuple(I)
        x, y = xyd_brusselator[I[1]], xyd_brusselator[I[2]]
        ip1, im1, jp1, jm1 = limit(i + 1, N), limit(i - 1, N), limit(j + 1, N),
        limit(j - 1, N)
        du[i, j, 1] = alpha * (u[im1, j, 1] + u[ip1, j, 1] + u[i, jp1, 1] + u[i, jm1, 1] -
                       4u[i, j, 1]) +
                      B + u[i, j, 1]^2 * u[i, j, 2] - (A + 1) * u[i, j, 1] +
                      brusselator_f(x, y, t)
        du[i, j, 2] = alpha * (u[im1, j, 2] + u[ip1, j, 2] + u[i, jp1, 2] + u[i, jm1, 2] -
                       4u[i, j, 2]) +
                      A * u[i, j, 1] - u[i, j, 1]^2 * u[i, j, 2]
    end
end
p = (3.4, 1.0, 10.0, step(xyd_brusselator))

function init_brusselator_2d(xyd)
    N = length(xyd)
    u = zeros(N, N, 2)
    for I in CartesianIndices((N, N))
        x = xyd[I[1]]
        y = xyd[I[2]]
        u[I, 1] = 22 * (y * (1 - y))^(3 / 2)
        u[I, 2] = 27 * (x * (1 - x))^(3 / 2)
    end
    u
end
u0 = init_brusselator_2d(xyd_brusselator)
prob2 = ODEProblem(brusselator_2d_loop, u0, (0.0, 11.5), p)

sol = solve(prob2, FBDF(autodiff=false, nlsolve = nlalgs[1]), dt = 1e-6);
sol = solve(prob2, FBDF(autodiff=false));

@btime sol = solve(prob2, FBDF(autodiff=false, nlsolve = nlalgs[3]), dt = 1e-6);
@btime sol = solve(prob2, FBDF(autodiff=false));
@btime sol = solve(prob2, FBDF());
@btime sol = solve(prob2, FBDF(autodiff=false, nlsolve = nlalgs[3]));

@btime sol = solve(prob2, FBDF(autodiff=false, nlsolve = nlalgs[1]), dt=1e-6);

One oddity is that the initializations seem off: this is solved by setting a starting dt, but it should be investigated why that's necessary.

@oscardssmith
Copy link
Member

We really should fix our stiff alg initial dt algorithm...

@ChrisRackauckas ChrisRackauckas mentioned this pull request Aug 10, 2025
12 tasks
@oscardssmith oscardssmith force-pushed the jac_reuse_nonlinearsolve branch from 53963d1 to f6b6c00 Compare October 3, 2025 14:45
@oscardssmith
Copy link
Member

rebased. Looks like we're seeing lots of nonlinear solver convergence failures.

Copy link
Contributor

github-actions bot commented Oct 3, 2025

Benchmark Results (Julia v1)

Time benchmarks
master 514799c... master / 514799c...
construction/linear_N50 27.6 ± 14 μs 27.6 ± 14 μs 1 ± 0.73
construction/lotka_volterra 18.5 ± 0.19 μs 18.8 ± 0.24 μs 0.988 ± 0.016
construction/rober 18.5 ± 0.18 μs 18.6 ± 0.19 μs 0.996 ± 0.014
nonstiff/fitzhugh_nagumo/BS3 0.148 ± 0.0099 ms 0.143 ± 0.01 ms 1.03 ± 0.1
nonstiff/fitzhugh_nagumo/DP5 0.0993 ± 0.0096 ms 0.0952 ± 0.009 ms 1.04 ± 0.14
nonstiff/fitzhugh_nagumo/Tsit5 0.118 ± 0.0087 ms 0.113 ± 0.0072 ms 1.04 ± 0.1
nonstiff/fitzhugh_nagumo/Vern6 0.17 ± 0.0082 ms 0.167 ± 0.0084 ms 1.02 ± 0.071
nonstiff/fitzhugh_nagumo/Vern7 0.116 ± 0.0083 ms 0.113 ± 0.0082 ms 1.03 ± 0.1
nonstiff/lotka_volterra/BS3 0.291 ± 0.011 ms 0.29 ± 0.011 ms 1 ± 0.054
nonstiff/lotka_volterra/DP5 0.0449 ± 0.017 ms 0.0453 ± 0.017 ms 0.99 ± 0.53
nonstiff/lotka_volterra/Tsit5 0.061 ± 0.023 ms 0.0601 ± 0.023 ms 1.02 ± 0.55
nonstiff/lotka_volterra/Vern6 0.0805 ± 0.011 ms 0.0814 ± 0.011 ms 0.988 ± 0.19
nonstiff/lotka_volterra/Vern7 0.0481 ± 0.021 ms 0.0482 ± 0.021 ms 0.999 ± 0.61
nonstiff/pleiades/BS3 0.0866 ± 0.018 s 0.0872 ± 0.014 s 0.993 ± 0.26
nonstiff/pleiades/DP5 1.26 ± 0.093 ms 1.26 ± 0.11 ms 1 ± 0.12
nonstiff/pleiades/Tsit5 15.4 ± 5.9 ms 15.2 ± 5.6 ms 1.01 ± 0.54
nonstiff/pleiades/Vern6 7.34 s 7.31 s 1
nonstiff/pleiades/Vern7 7.88 s 7.87 s 1
scaling/brusselator_2d/16x16 0.304 ± 0.025 s 0.308 ± 0.011 s 0.985 ± 0.089
scaling/brusselator_2d/32x32 7.14 s 7.27 s 0.982
scaling/brusselator_2d/8x8 11.2 ± 0.24 ms 11.2 ± 0.26 ms 0.998 ± 0.032
scaling/linear/N10 0.0372 ± 0.017 ms 0.0376 ± 0.017 ms 0.989 ± 0.63
scaling/linear/N100 0.795 ± 0.018 ms 0.798 ± 0.018 ms 0.996 ± 0.032
scaling/linear/N50 0.214 ± 0.011 ms 0.214 ± 0.012 ms 1 ± 0.076
stiff/pollution/FBDF 0.584 ± 0.013 ms 0.584 ± 0.014 ms 1 ± 0.032
stiff/pollution/KenCarp4 0.549 ± 0.011 ms 0.548 ± 0.011 ms 1 ± 0.028
stiff/pollution/Rodas4 0.777 ± 0.019 ms 0.759 ± 0.014 ms 1.02 ± 0.031
stiff/pollution/Rosenbrock23 1.37 ± 0.035 ms 1.34 ± 0.03 ms 1.02 ± 0.034
stiff/pollution/TRBDF2 0.508 ± 0.011 ms 0.513 ± 0.011 ms 0.99 ± 0.031
stiff/rober/FBDF 0.597 ± 0.01 ms 0.598 ± 0.01 ms 0.999 ± 0.024
stiff/rober/KenCarp4 0.768 ± 0.019 ms 0.769 ± 0.019 ms 0.998 ± 0.035
stiff/rober/Rodas4 0.394 ± 0.0095 ms 0.399 ± 0.0094 ms 0.987 ± 0.033
stiff/rober/Rosenbrock23 0.271 ± 0.0093 ms 0.265 ± 0.0095 ms 1.02 ± 0.051
stiff/rober/TRBDF2 1.72 ± 0.013 ms 1.73 ± 0.014 ms 0.994 ± 0.011
stiff/van_der_pol/FBDF 9.77 ± 0.083 ms 9.73 ± 0.084 ms 1 ± 0.012
stiff/van_der_pol/KenCarp4 4.75 ± 0.064 ms 4.79 ± 0.061 ms 0.992 ± 0.018
stiff/van_der_pol/Rodas4 6.97 ± 0.051 ms 7.56 ± 0.049 ms 0.921 ± 0.009
stiff/van_der_pol/Rosenbrock23 20 ± 0.2 ms 20.4 ± 0.36 ms 0.984 ± 0.02
stiff/van_der_pol/TRBDF2 2.98 ± 0.078 ms 2.92 ± 0.073 ms 1.02 ± 0.037
time_to_load 3.14 ± 0.006 s 3.14 ± 0.01 s 1 ± 0.0038
Memory benchmarks
master 514799c... master / 514799c...
construction/linear_N50 0.071 k allocs: 0.0411 MB 0.071 k allocs: 0.0411 MB 1
construction/lotka_volterra 0.065 k allocs: 2.45 kB 0.065 k allocs: 2.45 kB 1
construction/rober 0.065 k allocs: 2.42 kB 0.065 k allocs: 2.42 kB 1
nonstiff/fitzhugh_nagumo/BS3 3.67 k allocs: 0.163 MB 3.67 k allocs: 0.163 MB 1
nonstiff/fitzhugh_nagumo/DP5 2.62 k allocs: 0.126 MB 2.62 k allocs: 0.126 MB 1
nonstiff/fitzhugh_nagumo/Tsit5 3.98 k allocs: 0.181 MB 3.98 k allocs: 0.181 MB 1
nonstiff/fitzhugh_nagumo/Vern6 4.52 k allocs: 0.207 MB 4.52 k allocs: 0.207 MB 1
nonstiff/fitzhugh_nagumo/Vern7 3.89 k allocs: 0.165 MB 3.89 k allocs: 0.165 MB 1
nonstiff/lotka_volterra/BS3 7.86 k allocs: 0.365 MB 7.86 k allocs: 0.365 MB 1
nonstiff/lotka_volterra/DP5 1.2 k allocs: 0.0536 MB 1.2 k allocs: 0.0536 MB 1
nonstiff/lotka_volterra/Tsit5 2.16 k allocs: 0.0924 MB 2.16 k allocs: 0.0924 MB 1
nonstiff/lotka_volterra/Vern6 2.23 k allocs: 0.0979 MB 2.23 k allocs: 0.0979 MB 1
nonstiff/lotka_volterra/Vern7 1.64 k allocs: 0.073 MB 1.64 k allocs: 0.073 MB 1
nonstiff/pleiades/BS3 0.685 M allocs: 0.0675 GB 0.685 M allocs: 0.0675 GB 1
nonstiff/pleiades/DP5 6.63 k allocs: 0.51 MB 6.63 k allocs: 0.51 MB 1
nonstiff/pleiades/Tsit5 0.186 M allocs: 20.4 MB 0.186 M allocs: 20.4 MB 1
nonstiff/pleiades/Vern6 0.038 G allocs: 4.05 GB 0.038 G allocs: 4.05 GB 1
nonstiff/pleiades/Vern7 0.044 G allocs: 4.63 GB 0.044 G allocs: 4.63 GB 1
scaling/brusselator_2d/16x16 3.57 k allocs: 0.152 GB 3.57 k allocs: 0.152 GB 1
scaling/brusselator_2d/32x32 3.39 k allocs: 2.22 GB 3.39 k allocs: 2.22 GB 1
scaling/brusselator_2d/8x8 2.62 k allocs: 8.76 MB 2.62 k allocs: 8.76 MB 1
scaling/linear/N10 0.752 k allocs: 0.0514 MB 0.752 k allocs: 0.0514 MB 1
scaling/linear/N100 2.27 k allocs: 0.901 MB 2.27 k allocs: 0.901 MB 1
scaling/linear/N50 1.66 k allocs: 0.348 MB 1.66 k allocs: 0.348 MB 1
stiff/pollution/FBDF 1.5 k allocs: 0.288 MB 1.5 k allocs: 0.288 MB 1
stiff/pollution/KenCarp4 0.508 k allocs: 0.134 MB 0.508 k allocs: 0.134 MB 1
stiff/pollution/Rodas4 1.2 k allocs: 0.36 MB 1.2 k allocs: 0.36 MB 1
stiff/pollution/Rosenbrock23 2.58 k allocs: 0.814 MB 2.58 k allocs: 0.814 MB 1
stiff/pollution/TRBDF2 0.779 k allocs: 0.168 MB 0.779 k allocs: 0.168 MB 1
stiff/rober/FBDF 2.87 k allocs: 0.136 MB 2.87 k allocs: 0.136 MB 1
stiff/rober/KenCarp4 1.28 k allocs: 0.0565 MB 1.28 k allocs: 0.0565 MB 1
stiff/rober/Rodas4 1.95 k allocs: 0.0984 MB 1.95 k allocs: 0.0984 MB 1
stiff/rober/Rosenbrock23 2.23 k allocs: 0.108 MB 2.23 k allocs: 0.108 MB 1
stiff/rober/TRBDF2 7.7 k allocs: 0.359 MB 7.7 k allocs: 0.359 MB 1
stiff/van_der_pol/FBDF 0.0444 M allocs: 2.2 MB 0.0444 M allocs: 2.2 MB 1
stiff/van_der_pol/KenCarp4 3.91 k allocs: 0.173 MB 3.91 k allocs: 0.173 MB 1
stiff/van_der_pol/Rodas4 0.0335 M allocs: 1.73 MB 0.0335 M allocs: 1.73 MB 1
stiff/van_der_pol/Rosenbrock23 0.189 M allocs: 8.37 MB 0.189 M allocs: 8.37 MB 1
stiff/van_der_pol/TRBDF2 4.64 k allocs: 0.201 MB 4.64 k allocs: 0.201 MB 1
time_to_load 0.159 k allocs: 11.2 kB 0.159 k allocs: 11.2 kB 1

@oscardssmith
Copy link
Member

reduced MWE:

using NonlinearSolve, OrdinaryDiffEqBDF, OrdinaryDiffEqNonlinearSolve
using OrdinaryDiffEqNonlinearSolve: NonlinearSolveAlg
using LinearAlgebra, SparseArrays

const N = 32
const xyd_brusselator = range(0, stop = 1, length = N)
brusselator_f(x, y, t) = (((x - 0.3)^2 + (y - 0.6)^2) <= 0.1^2) * (t >= 1.1) * 5.0
limit(a, N) = a == N + 1 ? 1 : a == 0 ? N : a
function brusselator_2d_loop(du, u, p, t)
    A, B, alpha, dx = p
    alpha = alpha / dx^2
    @inbounds for I in CartesianIndices((N, N))
        i, j = Tuple(I)
        x, y = xyd_brusselator[I[1]], xyd_brusselator[I[2]]
        ip1, im1, jp1, jm1 = limit(i + 1, N), limit(i - 1, N), limit(j + 1, N),
        limit(j - 1, N)
        du[i, j, 1] = alpha * (u[im1, j, 1] + u[ip1, j, 1] + u[i, jp1, 1] + u[i, jm1, 1] -
                       4u[i, j, 1]) +
                      B + u[i, j, 1]^2 * u[i, j, 2] - (A + 1) * u[i, j, 1] +
                      brusselator_f(x, y, t)
        du[i, j, 2] = alpha * (u[im1, j, 2] + u[ip1, j, 2] + u[i, jp1, 2] + u[i, jm1, 2] -
                       4u[i, j, 2]) +
                      A * u[i, j, 1] - u[i, j, 1]^2 * u[i, j, 2]
    end
end
p = (3.4, 1.0, 10.0, step(xyd_brusselator))

function init_brusselator_2d(xyd)
    N = length(xyd)
    u = zeros(N, N, 2)
    for I in CartesianIndices((N, N))
        x = xyd[I[1]]
        y = xyd[I[2]]
        u[I, 1] = 22 * (y * (1 - y))^(3 / 2)
        u[I, 2] = 27 * (x * (1 - x))^(3 / 2)
    end
    u
end
u0 = init_brusselator_2d(xyd_brusselator)
prob2 = ODEProblem(brusselator_2d_loop, u0, (0.0, 11.5), p)

nlsolve = NonlinearSolveAlg(TrustRegion(autodiff = AutoFiniteDiff()))
integ = solve(prob2, FBDF(autodiff=AutoFiniteDiff(); nlsolve ), dt = 1e-6);
step!(integ, 1e-4)
step!(integ)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants