From 5a5b2760377e0915bde280ef4e9c79b07a815c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Sat, 19 Jul 2025 23:27:44 +0200 Subject: [PATCH] Implement gradient --- src/reverse_mode.jl | 80 +++++++++++++++++++++++++++++++++++---------- test/ArrayDiff.jl | 17 +++++++--- 2 files changed, 74 insertions(+), 23 deletions(-) diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl index dc083a4..36a50ed 100644 --- a/src/reverse_mode.jl +++ b/src/reverse_mode.jl @@ -335,27 +335,71 @@ Reverse-mode evaluation of an expression tree given in `f`. * This function assumes that `f.reverse_storage` has been initialized with 0.0. """ function _reverse_eval(f::_SubexpressionStorage) - @assert length(f.reverse_storage) >= length(f.nodes) - @assert length(f.partials_storage) >= length(f.nodes) + @assert length(f.reverse_storage) >= _length(f.sizes) + @assert length(f.partials_storage) >= _length(f.sizes) # f.nodes is already in order such that parents always appear before # children so a forward pass through nodes is a backwards pass through the # tree. - f.reverse_storage[1] = one(Float64) - for k in 2:length(f.nodes) + children_arr = SparseArrays.rowvals(f.adj) + for i in _storage_range(f.sizes, 1) + f.reverse_storage[i] = one(Float64) + end + for k in 1:length(f.nodes) + @show f.reverse_storage node = f.nodes[k] - if node.type == Nonlinear.NODE_VALUE || - node.type == Nonlinear.NODE_LOGIC || - node.type == Nonlinear.NODE_COMPARISON || - node.type == Nonlinear.NODE_PARAMETER + children_indices = SparseArrays.nzrange(f.adj, k) + if node.type == MOI.Nonlinear.NODE_CALL_MULTIVARIATE + if node.index in + eachindex(MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS) + op = MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS[node.index] + if op == :vect + @assert _eachindex(f.sizes, k) == eachindex(children_indices) + for j in eachindex(children_indices) + rev_parent = @s f.reverse_storage[k] + ix = children_arr[children_indices[j]] + # partial is 1 so we can ignore it + @s f.reverse_storage[ix] = rev_parent + end + continue + elseif op == :dot + # Node `k` is scalar, the jacobian w.r.t. each vectorized input + # child is a row vector whose entries are stored in `f.partials_storage` + rev_parent = @s f.reverse_storage[k] + for j in _eachindex(f.sizes, k) + for child_idx in children_indices + ix = children_arr[child_idx] + partial = @j f.partials_storage[ix] + val = ifelse( + rev_parent == 0.0 && !isfinite(partial), + rev_parent, + rev_parent * partial, + ) + @j f.reverse_storage[ix] = val + end + end + continue + end + end + elseif node.type != MOI.Nonlinear.NODE_CALL_UNIVARIATE continue end - rev_parent = f.reverse_storage[node.parent] - partial = f.partials_storage[k] - f.reverse_storage[k] = ifelse( - rev_parent == 0.0 && !isfinite(partial), - rev_parent, - rev_parent * partial, - ) + # Node `k` has same size as its children. + # The Jacobian (between the vectorized versions) is diagonal and the diagonal entries + # are stored in `f.partials_storage` + for j in _eachindex(f.sizes, k) + rev_parent = @j f.reverse_storage[k] + for child_idx in children_indices + ix = children_arr[child_idx] + @assert _size(f.sizes, k) == _size(f.sizes, ix) + partial = @j f.partials_storage[ix] + val = ifelse( + rev_parent == 0.0 && !isfinite(partial), + rev_parent, + rev_parent * partial, + ) + @j f.reverse_storage[ix] = val + end + end end return end @@ -406,12 +450,12 @@ function _extract_reverse_pass_inner( subexpressions::AbstractVector{T}, scale::T, ) where {T} - @assert length(f.reverse_storage) >= length(f.nodes) + @assert length(f.reverse_storage) >= _length(f.sizes) for (k, node) in enumerate(f.nodes) if node.type == Nonlinear.NODE_VARIABLE - output[node.index] += scale * f.reverse_storage[k] + output[node.index] += scale * @s f.reverse_storage[k] elseif node.type == Nonlinear.NODE_SUBEXPRESSION - subexpressions[node.index] += scale * f.reverse_storage[k] + subexpressions[node.index] += scale * @s f.reverse_storage[k] end end return diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index 78c10b2..6dd1f6a 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -32,19 +32,23 @@ function test_objective_dot_univariate() @test sizes.size_offset == [0, 1, 0, 0, 0] @test sizes.size == [1, 1] @test sizes.storage_offset == [0, 1, 2, 3, 4, 5] - @test MOI.eval_objective(evaluator, [1.2]) == 1.2^2 + x = [1.2] + @test MOI.eval_objective(evaluator, x) == x[1]^2 + g = ones(1) + MOI.eval_objective_gradient(evaluator, g, x) + @test g[1] == 2x[1] return end function test_objective_dot_bivariate() model = Nonlinear.Model() x = MOI.VariableIndex(1) - y = MOI.VariableIndex(1) + y = MOI.VariableIndex(2) Nonlinear.set_objective( model, :(dot([$x, $y] - [1, 2], -[1, 2] + [$x, $y])), ) - evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x]) + evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x, y]) MOI.initialize(evaluator, [:Grad]) sizes = evaluator.backend.objective.expr.sizes @test sizes.ndims == [0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0] @@ -52,8 +56,11 @@ function test_objective_dot_bivariate() @test sizes.size == [2, 2, 2, 2, 2, 2, 2] @test sizes.storage_offset == [0, 1, 3, 5, 6, 7, 9, 10, 11, 13, 15, 17, 18, 19, 21, 22, 23] - g = [NaN] - @test MOI.eval_objective(evaluator, [5, -1]) ≈ 25 + x = [5, -1] + @test MOI.eval_objective(evaluator, x) ≈ 25 + g = ones(2) + MOI.eval_objective_gradient(evaluator, g, x) + @test g == 2(x - [1, 2]) return end