Add Zygote

Vaibhavdixit02 · Vaibhavdixit02 · commit 44fb589b0e13 · 2024-02-21T11:10:45.000-05:00
diff --git a/ext/OptimizationZygoteExt.jl b/ext/OptimizationZygoteExt.jl
@@ -6,7 +6,7 @@ import OptimizationBase.ADTypes: AutoZygote
 isdefined(Base, :get_extension) ? (using Zygote, Zygote.ForwardDiff) :
 (using ..Zygote, ..Zygote.ForwardDiff)
 
-function OptimizationBase.instantiate_function(f, x, adtype::AutoZygote, p,
+function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, adtype::AutoZygote, p,
         num_cons = 0)
     _f = (θ, args...) -> f(θ, p, args...)[1]
     if f.grad === nothing
@@ -83,7 +83,7 @@ function OptimizationBase.instantiate_function(f, x, adtype::AutoZygote, p,
         lag_h, f.lag_hess_prototype)
 end
 
-function OptimizationBase.instantiate_function(f, cache::OptimizationBase.ReInitCache,
+function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache,
         adtype::AutoZygote, num_cons = 0)
     _f = (θ, args...) -> f(θ, cache.p, args...)[1]
     if f.grad === nothing
@@ -160,4 +160,167 @@ function OptimizationBase.instantiate_function(f, cache::OptimizationBase.ReInit
         lag_h, f.lag_hess_prototype)
 end
 
+
+function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x, adtype::AutoZygote, p,
+        num_cons = 0)
+    _f = (θ, args...) -> f(θ, p, args...)[1]
+    if f.grad === nothing
+        grad = function (θ, args...)
+            val = Zygote.gradient(x -> _f(x, args...), θ)[1]
+            if val === nothing
+                return zero(typeof(θ))
+            else
+                return val
+            end
+        end
+    else
+        grad = (θ, args...) -> f.grad(θ, p, args...)
+    end
+
+    if f.hess === nothing
+        hess = function (θ, args...)
+            return ForwardDiff.jacobian(θ) do θ
+                return Zygote.gradient(x -> _f(x, args...), θ)[1]
+            end
+        end
+    else
+        hess = (θ, args...) -> f.hess(θ, p, args...)
+    end
+
+    if f.hv === nothing
+        hv = function (H, θ, v, args...)
+            _θ = ForwardDiff.Dual.(θ, v)
+            res = grad(_θ, args...)
+            return getindex.(ForwardDiff.partials.(res), 1)
+        end
+    else
+        hv = f.hv
+    end
+
+    if f.cons === nothing
+        cons = nothing
+    else
+        cons = (θ) -> f.cons(θ, p)
+        cons_oop = cons
+    end
+
+    if cons !== nothing && f.cons_j === nothing
+        cons_j = function (θ)
+            if num_cons > 1
+                return first(Zygote.jacobian(cons_oop, θ))
+            else
+                return first(Zygote.jacobian(cons_oop, θ))[1, :]
+            end
+        end
+    else
+        cons_j = (θ) -> f.cons_j(θ, p)
+    end
+
+    if cons !== nothing && f.cons_h === nothing
+        fncs = [(x) -> cons_oop(x)[i] for i in 1:num_cons]
+        cons_h = function (θ)
+            return map(1:num_cons) do i
+                Zygote.hessian(fncs[i], θ)
+            end
+        end
+    else
+        cons_h = (θ) -> f.cons_h(θ, p)
+    end
+
+    if f.lag_h === nothing
+        lag_h = nothing # Consider implementing this
+    else
+        lag_h = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p)
+    end
+
+    return OptimizationFunction{false}(f.f, adtype; grad = grad, hess = hess, hv = hv,
+        cons = cons, cons_j = cons_j, cons_h = cons_h,
+        hess_prototype = f.hess_prototype,
+        cons_jac_prototype = f.cons_jac_prototype,
+        cons_hess_prototype = f.cons_hess_prototype,
+        lag_h, f.lag_hess_prototype)
+end
+
+function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, cache::OptimizationBase.ReInitCache,
+        adtype::AutoZygote, num_cons = 0)
+    _f = (θ, args...) -> f(θ, cache.p, args...)[1]
+    p = cache.p
+    
+    if f.grad === nothing
+        grad = function (θ, args...)
+            val = Zygote.gradient(x -> _f(x, args...), θ)[1]
+            if val === nothing
+                return zero(typeof(θ))
+            else
+                return val
+            end
+        end
+    else
+        grad = (θ, args...) -> f.grad(θ, p, args...)
+    end
+
+    if f.hess === nothing
+        hess = function (θ, args...)
+            return ForwardDiff.jacobian(θ) do θ
+                Zygote.gradient(x -> _f(x, args...), θ)[1]
+            end
+        end
+    else
+        hess = (θ, args...) -> f.hess(θ, p, args...)
+    end
+
+    if f.hv === nothing
+        hv = function (H, θ, v, args...)
+            _θ = ForwardDiff.Dual.(θ, v)
+            res = grad(_θ, args...)
+            return getindex.(ForwardDiff.partials.(res), 1)
+        end
+    else
+        hv = f.hv
+    end
+
+    if f.cons === nothing
+        cons = nothing
+    else
+        cons = (θ) -> f.cons(θ, p)
+        cons_oop = cons
+    end
+
+    if cons !== nothing && f.cons_j === nothing
+        cons_j = function (θ)
+            if num_cons > 1
+                return first(Zygote.jacobian(cons_oop, θ))
+            else
+                return first(Zygote.jacobian(cons_oop, θ))[1, :]
+            end
+        end
+    else
+        cons_j = (θ) -> f.cons_j(θ, p)
+    end
+
+    if cons !== nothing && f.cons_h === nothing
+        fncs = [(x) -> cons_oop(x)[i] for i in 1:num_cons]
+        cons_h = function (θ)
+            return map(1:num_cons) do i
+                Zygote.hessian(fncs[i], θ)
+            end
+        end
+    else
+        cons_h = (θ) -> f.cons_h(θ, p)
+    end
+
+    if f.lag_h === nothing
+        lag_h = nothing # Consider implementing this
+    else
+        lag_h = (θ, σ, μ) -> f.lag_h(θ, σ, μ, p)
+    end
+
+    return OptimizationFunction{false}(f.f, adtype; grad = grad, hess = hess, hv = hv,
+        cons = cons, cons_j = cons_j, cons_h = cons_h,
+        hess_prototype = f.hess_prototype,
+        cons_jac_prototype = f.cons_jac_prototype,
+        cons_hess_prototype = f.cons_hess_prototype,
+        lag_h, f.lag_hess_prototype)
+end
+
 end
diff --git a/test/adtests.jl b/test/adtests.jl
@@ -645,7 +645,7 @@ optprob.hess(H2, x0)
     @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0]
 
     @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]]
-    
+
     cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
     optf = OptimizationFunction{false}(rosenbrock, OptimizationBase.AutoSparseReverseDiff(), cons = cons)
     optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoSparseReverseDiff(),
@@ -664,13 +664,39 @@ optprob.hess(H2, x0)
     
     @test optprob.grad(x0) == G1
     @test optprob.hess(x0) == H1
+    @test optprob.cons(x0) == [0.0]
+
+    @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0]
+
+    @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]]
+
+    cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
+    optf = OptimizationFunction{false}(rosenbrock, OptimizationBase.AutoSparseReverseDiff(true), cons = cons)
+    optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoSparseReverseDiff(true),
+        nothing, 2)
+
+    @test optprob.grad(x0) == G1
+    @test Array(optprob.hess(x0)) ≈ H1
     @test optprob.cons(x0) == [0.0, 0.0]
     @test optprob.cons_j([5.0, 3.0]) ≈ [10.0 6.0; -0.149013 -0.958924] rtol = 1e-6
     @test Array.(optprob.cons_h(x0)) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]]
 
+    cons = (x, p) -> [x[1]^2 + x[2]^2]
+    optf = OptimizationFunction{false}(rosenbrock, OptimizationBase.AutoZygote(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoZygote(),
+        nothing, 1)
+    
+    @test optprob.grad(x0) == G1
+    @test optprob.hess(x0) == H1
+    @test optprob.cons(x0) == [0.0]
+
+    @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0]
+
+    @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]]
+
     cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]]
-    optf = OptimizationFunction{false}(rosenbrock, OptimizationBase.AutoSparseReverseDiff(true), cons = cons)
-    optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoSparseReverseDiff(true),
+    optf = OptimizationFunction{false}(rosenbrock, OptimizationBase.AutoZygote(), cons = cons)
+    optprob = OptimizationBase.instantiate_function(optf, x0, OptimizationBase.AutoZygote(),
         nothing, 2)
 
     @test optprob.grad(x0) == G1