diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 00000000..cbf9b48a --- /dev/null +++ b/bench/README.md @@ -0,0 +1,23 @@ +# Benchmark: TRON subsolver comparison (CG vs MINRES) + +This benchmark compares `tron(..., subsolver = :cg)` with `tron(..., subsolver = :minres)` using Krylov.jl. + +## Requirements + +- Julia 1.10 (as in Project.toml) +- From the package root, run `julia --project=.` to use the package environment. Ensure Krylov.jl is available in the environment (Project.toml pins Krylov = "0.10.0"). + +## Run + +```sh +julia --project=. bench/bench_tron_minres_vs_cg.jl +``` + +## Output + +- CSV printed to stdout with columns: prob,subsolver,elapsed,iter,status,objective,dual + +## Notes + +- The script uses synthetic logistic-like problems. Adjust sizes in the script if you want larger/smaller tests or longer time limits. +- If Krylov.jl does not support `:minres` with your installed version, you may need to update the environment to a Krylov release with MINRES trust-region support. diff --git a/bench/bench_tron_minres_vs_cg.jl b/bench/bench_tron_minres_vs_cg.jl new file mode 100644 index 00000000..e662ebbc --- /dev/null +++ b/bench/bench_tron_minres_vs_cg.jl @@ -0,0 +1,57 @@ +# Benchmark TRON subsolver: CG vs MINRES +# Usage: julia --project=. bench/bench_tron_minres_vs_cg.jl + +using Pkg, LinearAlgebra, Printf, Dates +Pkg.activate(joinpath(@__DIR__, "..")) + + +using JSOSolvers, Random, ADNLPModels + +# Build a logistic-like smooth convex objective as an ADNLPModel +function make_logistic(n, m; rng = Random.GLOBAL_RNG) + A = randn(rng, m, n) + xtrue = randn(rng, n) + y = sign.(A * xtrue + 0.1 * randn(rng, m)) + f(x) = begin + z = A * x + s = @. log(1 + exp(-y * z)) + sum(s) + end + gradf(x) = begin + z = A * x + sig = @. 1 / (1 + exp(y * z)) + return A' * (-y .* sig) + end + x0 = zeros(n) + l = fill(-Inf, n) + u = fill(Inf, n) + model = ADNLPModel((x->f(x)), x0, l, u; grad = (x->gradf(x))) + return model +end + +function run_one(model; subsolver = :cg, max_time = 10.0) + t0 = time() + stats = tron(model, subsolver = subsolver, max_time = max_time, verbose = 0, subsolver_verbose = 0) + t = time() - t0 + return (subsolver = subsolver, elapsed = t, iter = stats.iter, status = stats.status, objective = stats.objective, dual = stats.dual_feas) +end + +function main() + rng = MersenneTwister(1234) + problems = [make_logistic(1000, 2000, rng = rng), make_logistic(2000, 4000, rng = rng)] + + results = [] + for (i, model) in enumerate(problems) + for sub in (:cg, :minres) + push!(results, merge((prob = i,), run_one(model, subsolver = sub, max_time = 60.0))) + end + end + + # Print summary + println("prob,subsolver,elapsed,iter,status,objective,dual") + for r in results + @printf("%d,%s,%.6f,%d,%s,%.12g,%.6g\n", r[:prob], string(r[:subsolver]), r[:elapsed], r[:iter], string(r[:status]), r[:objective], r[:dual]) + end +end + +main() diff --git a/bench/results_tron_minres_vs_cg.csv b/bench/results_tron_minres_vs_cg.csv new file mode 100644 index 00000000..8ed140b4 --- /dev/null +++ b/bench/results_tron_minres_vs_cg.csv @@ -0,0 +1,5 @@ +prob,subsolver,elapsed,iter,status,objective,dual +1,cg,68.995797,2,max_time,195.150087826,118.665 +1,minres,74.223810,2,max_time,237.635885566,129.697 +2,cg,182.949333,1,max_time,948.148566607,623.416 +2,minres,139.410956,1,max_time,1027.95775894,632.073 diff --git a/src/tron.jl b/src/tron.jl index f2dc5972..da4ee9ef 100644 --- a/src/tron.jl +++ b/src/tron.jl @@ -223,12 +223,19 @@ end kwargs..., ) where {T, V} dict = Dict(kwargs) - subsolver_keys = intersect(keys(dict), tron_keys) - subsolver_kwargs = Dict(k => dict[k] for k in subsolver_keys) - solver = TronSolver(nlp; μ₀ = μ₀, μ₁ = μ₁, σ = σ, subsolver_kwargs...) - for k in subsolver_keys - pop!(dict, k) + + # Extract subsolver if provided (default :cg) and ensure it is consumed here + subsolver = haskey(dict, :subsolver) ? dict[:subsolver] : :cg + if haskey(dict, :subsolver) + pop!(dict, :subsolver) end + + # Construct solver with chosen subsolver. We intentionally do not forward + # additional TRON trust-region kwargs here to keep the call simple; the + # TronSolver constructor will use defaults for those parameters. + solver = TronSolver(nlp; μ₀ = μ₀, μ₁ = μ₁, σ = σ, subsolver = subsolver) + + # Remaining dict contains kwargs meant for solve! (we removed :subsolver) return solve!(solver, nlp; x = x, dict...) end @@ -582,6 +589,46 @@ function cauchy!( return α, s, status end +""" + run_krylov_subsolver!(workspace, A, b; radius, rtol, atol, timemax, verbose) + +Small compatibility wrapper around Krylov.jl's `krylov_solve!` to handle +workspaces that don't accept the `radius` keyword (e.g. Minres). We first +attempt to call with `radius`, and on a MethodError we retry without it. +""" +function run_krylov_subsolver!(workspace, A, b; radius=nothing, rtol=1e-6, atol=0.0, timemax=Inf, verbose=0) + # Try calling with `radius` first (covers CG-like workspaces). + if radius !== nothing + try + return krylov_solve!(workspace, A, b; radius = radius, rtol = rtol, atol = atol, timemax = timemax, verbose = verbose) + catch e + # If the failure is due to an unsupported keyword (MethodError), fall + # back to calling with a callback that enforces the trust-region by + # inspecting the Krylov workspace's current solution `x` at each + # iteration. Any other error is rethrown. + if isa(e, MethodError) + # Krylov callbacks receive the workspace and should return `true` + # to request termination. We build a closure that captures `radius`. + function tr_callback(cb_workspace) + # Some workspaces store the current iterate in `x`. + if !hasfield(typeof(cb_workspace), :x) + error("Krylov workspace of type $(typeof(cb_workspace)) does not have an `x` field. Trust-region constraint cannot be enforced. Please use a compatible workspace type.") + end + xcur = getfield(cb_workspace, :x) + # Stop when the squared norm of the current solution reaches the squared radius (avoids sqrt). + return norm(xcur)^2 >= radius^2 + end + + return krylov_solve!(workspace, A, b; rtol = rtol, atol = atol, timemax = timemax, verbose = verbose, callback = tr_callback) + else + rethrow(e) + end + end + else + return krylov_solve!(workspace, A, b; rtol = rtol, atol = atol, timemax = timemax, verbose = verbose) + end +end + """ projected_newton!(solver, x, H, g, Δ, cgtol, ℓ, u, s, Hs; max_time = Inf, max_cgiter = 50, subsolver_verbose = 0) @@ -639,10 +686,12 @@ function projected_newton!( cg_op_diag[i] = ifix[i] ? 0 : 1 # implictly changes cg_op and so ZHZ end - cg!( + # Use a compatibility wrapper so workspaces with differing keyword + # signatures (MINRES doesn't accept `radius`) are handled gracefully. + run_krylov_subsolver!( cg_solver, ZHZ, - cgs_rhs, + cgs_rhs; radius = Δ, rtol = cgtol, atol = zero(T),