Single policy update (#46)

solliolli · web-flow · commit 0a71431004b9 · 2023-10-03T15:24:45.000+03:00
Implements the single update policy heuristic
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -87,6 +87,13 @@ LocalDecisionStrategy(::Node, ::Vector{VariableRef})
 DecisionStrategy(::DecisionVariables)
 ```
 
+## `heuristics.jl`
+### Single policy update
+```@docs
+randomStrategy
+singlePolicyUpdate
+```
+
 ## `analysis.jl`
 ```@docs
 CompatiblePaths
diff --git a/docs/src/decision-programming/decision-model.md b/docs/src/decision-programming/decision-model.md
@@ -55,6 +55,9 @@ The motivation for using the minimum of these bounds is that it depends on the p
 ## Lazy Probability Cut
 Constraint $(6)$ is a complicating constraint involving all path compatibility variables $x(s)$ and thus adding it directly to the model may slow down the overall solution process. It may be beneficial to instead add it as a *lazy constraint*. In the solver, a lazy constraint is only generated when an incumbent solution violates it. In some instances, this allows the MILP solver to prune nodes of the branch-and-bound tree more efficiently.
 
+## Single Policy Update
+To obtain (hopefully good) starting solutions, the SPU heuristic described in [^3] can be used. The heuristic finds a locally optimal strategy in the sense that the strategy cannot be improved by changing any single local strategy. With large problems, the heuristic can quickly provide a solution that would otherwise take very long to obtain.
+
 
 ## Expected Value
 The **expected value** objective is defined using the path compatibility variables $x(𝐬)$ and their associated path probabilities $p(𝐬)$ and path utilities $\mathcal{U}(𝐬)$.
@@ -144,6 +147,8 @@ where the parameter $w∈[0, 1]$ expresses the decision maker's **risk tolerance
 
 
 ## References
-[^1]: Salo, A., Andelmin, J., & Oliveira, F. (2019). Decision Programming for Multi-Stage Optimization under Uncertainty, 1–35. Retrieved from [http://arxiv.org/abs/1910.09196](http://arxiv.org/abs/1910.09196)
+[^1]: Salo, A., Andelmin, J., & Oliveira, F. (2022). Decision programming for mixed-integer multi-stage optimization under uncertainty. European Journal of Operational Research, 299(2), 550-565.
 
 [^2]: Hölsä, O. (2020). Decision Programming Framework for Evaluating Testing Costs of Disease-Prone Pigs. Retrieved from [http://urn.fi/URN:NBN:fi:aalto-202009295618](http://urn.fi/URN:NBN:fi:aalto-202009295618)
+
+[^3]: Hankimaa, H., Herrala, O., Oliveira, F., Tollander de Balsch, J. (2023). DecisionProgramming.jl -- A framework for modelling decision problems using mathematical programming. Retrieved from [https://arxiv.org/abs/2307.13299](https://arxiv.org/abs/2307.13299)
diff --git a/docs/src/examples/pig-breeding.md b/docs/src/examples/pig-breeding.md
@@ -181,14 +181,19 @@ EV = expected_value(model, diagram, x_s)
 @objective(model, Max, EV)
 ```
 
-and set up the solver and solve the problem.
+and set up the solver.
 
 ```julia
 optimizer = optimizer_with_attributes(
     () -> Gurobi.Optimizer(Gurobi.Env()),
     "IntFeasTol"      => 1e-9,
 )
 set_optimizer(model, optimizer)
+```
+
+Finally, we use the single policy update heuristic to obtain an initial solution and then solve the problem.
+```
+spu = singlePolicyUpdate(diagram, model, z, x_s)
 optimize!(model)
 ```
 
diff --git a/examples/n_monitoring.jl b/examples/n_monitoring.jl
@@ -76,6 +76,7 @@ optimizer = optimizer_with_attributes(
     "IntFeasTol"      => 1e-9,
 )
 set_optimizer(model, optimizer)
+
 optimize!(model)
 
 @info("Extracting results.")
diff --git a/examples/pig_breeding.jl b/examples/pig_breeding.jl
@@ -66,6 +66,9 @@ optimizer = optimizer_with_attributes(
     "IntFeasTol"      => 1e-9,
 )
 set_optimizer(model, optimizer)
+
+spu = singlePolicyUpdate(diagram, model, z, x_s)
+@info("Single policy update found solution $(spu[end][1]) in $(spu[end][2]/1000) seconds.")
 optimize!(model)
 
 @info("Extracting results.")
diff --git a/src/DecisionProgramming.jl b/src/DecisionProgramming.jl
@@ -4,6 +4,7 @@ include("influence_diagram.jl")
 include("decision_model.jl")
 include("random.jl")
 include("analysis.jl")
+include("heuristics.jl")
 include("printing.jl")
 
 export Node,
@@ -56,6 +57,9 @@ export CompatiblePaths,
     value_at_risk,
     conditional_value_at_risk
 
+export randomStrategy,
+    singlePolicyUpdate
+
 export print_decision_strategy,
     print_utility_distribution,
     print_state_probabilities,
diff --git a/src/heuristics.jl b/src/heuristics.jl
@@ -0,0 +1,184 @@
+"""
+    randomStrategy(diagram::InfluenceDiagram)
+
+Generates a random decision strategy for the problem. Returns the strategy as well as 
+the expected utility of the strategy and the paths that are compatible with the strategy.
+
+# Arguments
+- `diagram::InfluenceDiagram`: Influence diagram structure.
+
+!!! warning
+    This function does not exclude forbidden paths: the strategy returned by this function might be forbidden if the diagram has forbidden state combinations.
+
+# Examples
+```julia
+objval, Z, S_active = randomStrategy(diagram)
+```
+"""
+function randomStrategy(diagram::InfluenceDiagram)
+
+    # Initialize empty vector for local decision strategies
+    # Z_d = Vector{LocalDecisionStrategy}[] # Doesn't work for some reason...
+    Z_d = []
+
+    # Loop through all decision nodes and set local decision strategies
+    for j in diagram.D
+        I_j = diagram.I_j[j]
+
+        # Generate a matrix of correct dimensions to represent the strategy
+        dims = diagram.S[[I_j; j]]
+        data = zeros(Int, Tuple(dims))
+        n_states = size(data)[end]
+
+        # For each information state, choose a random decision state 
+        for s_Ij in paths(diagram.S[I_j])
+            data[s_Ij..., rand(1:n_states)] = 1
+        end
+        push!(Z_d, LocalDecisionStrategy(j,data))
+    end
+
+    # Construct a decision strategy and obtain the compatible paths
+    Z = DecisionStrategy(diagram.D, diagram.I_j[diagram.D], Z_d)
+    S_active = CompatiblePaths(diagram, Z)
+
+    # Calculate the expected utility corresponding to the strategy
+    EU = sum(diagram.P(s)*diagram.U(s) for s in S_active)
+
+    return EU, Z, collect(S_active)
+end
+
+
+function findBestStrategy(diagram, j, s_Ij, S_active, model, EU)
+    # Check that the model is either a minimization or maximization problem
+    if objective_sense(model) == MOI.MIN_SENSE
+        bestsofar = (0, Inf, [])
+    elseif objective_sense(model) == MOI.MAX_SENSE
+        bestsofar = (0, -Inf, [])
+    else
+        throw("The given model is not a maximization or minimization problem.")
+    end
+
+    # Loop through all decision states and save the one corresponding to the best expected value
+    for s_j in 1:num_states(diagram,diagram.Names[j])
+        # Get the expected value corresponding to a strategy where the information state s_Ij maps to s_j 
+        # and the strategy stays otherwise the same. Note that the strategy is represented by the active paths.
+        EU_new, S_active_new = get_value(diagram, S_active, j, s_j, s_Ij, EU)
+
+        # Update the best value so far
+        if objective_sense(model) == MOI.MIN_SENSE
+            if EU_new <= bestsofar[2]
+                bestsofar = (s_j, EU_new, S_active_new)
+            end
+        else #objective_sense(model) == MOI.MAX_SENSE
+            if EU_new >= bestsofar[2]
+                bestsofar = (s_j, EU_new, S_active_new)
+            end
+        end
+    end
+    return bestsofar
+end
+
+
+function get_value(diagram, S_active, j, s_j, s_Ij, EU)
+    I_j = diagram.I_j[j] # Information set of node j
+    # Loop through all compatible paths and update the ones that correspond to the given information state s_Ij
+    # and update the expected utility whenever a path is updated
+    S_active_new = copy(S_active)
+    for (k, s) in enumerate(S_active)
+        if s[I_j] == s_Ij
+            EU -= diagram.P(s)*diagram.U(s)
+            s_new = [s_j for s_j in s]
+            s_new[j] = s_j
+            s_new = Tuple(s_new)
+            S_active_new[k] = s_new
+            EU += diagram.P(s_new)*diagram.U(s_new)
+        end
+    end
+
+    return EU, S_active_new
+end
+
+function set_MIP_start(diagram, Z, S_active, z, x_s)
+    for (k,j) in enumerate(Z.D)
+        for s_Ij in paths(diagram.S[Z.I_d[k]])
+                set_start_value(z.z[k][s_Ij..., Z.Z_d[k](s_Ij)], 1)
+        end
+    end
+
+    for s in S_active
+        set_start_value(x_s[s], 1)
+    end
+end
+
+"""
+    singlePolicyUpdate(diagram::InfluenceDiagram, model::Model)
+
+Finds a feasible solution using single policy update and sets the model start values to that solution.
+Returns a vector of tuples consisting of the value of each improved solution starting from a random policy, 
+time (in milliseconds) since the function call and the decision strategy that gave the improved value.
+The purpose of all this output is to allow us to examine how fast the method finds good solutions.
+
+# Arguments
+- `diagram::InfluenceDiagram`: Influence diagram structure.
+- `model::Model`: The decision model, modelled in JuMP
+- `z::DecisionVariables`: The decision variables
+- `x_s::PathCompatibilityVariables`: The path compatibility variables
+
+!!! warning
+    This function does not exclude forbidden paths: the strategies explored by this function might be forbidden if the diagram has forbidden state combinations.
+
+# Examples
+```julia
+solutionhistory = singlePolicyUpdate(diagram, model)
+```
+"""
+function singlePolicyUpdate(diagram::InfluenceDiagram, model::Model, z::DecisionVariables, x_s::PathCompatibilityVariables)
+    t1 = time_ns() # Start time
+
+    # Initialize empty values
+    solutionhistory = [] 
+    lastchange = nothing
+
+    # Get an initial (random) solution
+    EU, strategy, S_active = randomStrategy(diagram)
+    push!(solutionhistory, (EU, (time_ns()-t1)/1E6, deepcopy(strategy)))
+
+    # In principle, this always converges, but we set a maximum number of iterations anyway to avoid very long solution times
+    for iter in 1:20
+        # Loop through all nodes
+        for (idx, j) in enumerate(diagram.D)
+            # println("Node $(diagram.Names[j]), iteration $iter")
+            I_j = diagram.I_j[j]
+            # Loop through all information states
+            for s_Ij in paths(diagram.S[I_j])
+                # Check if any improvement has happened since the last time this node and information state was visited
+                # If not, the algorithm terminates with a locally optimal solution
+                if iter >= 2
+                    if lastchange == (j, s_Ij)
+                        set_MIP_start(diagram, solutionhistory[end][3], S_active, z, x_s)
+                        return solutionhistory
+                    end
+                end
+
+                # Find the best decision alternative s_j for information state s_Ij
+                s_j, bestval, S_active = findBestStrategy(diagram, j, s_Ij, S_active, model, EU)
+                
+                # If the strategy improved, save the new strategy and its expected utility
+                if (objective_sense(model) == MOI.MIN_SENSE && bestval < EU-1E-9) || (objective_sense(model) == MOI.MAX_SENSE && bestval > EU+1E-9)
+                    lastchange = (j, s_Ij)
+                    localstrategy = strategy.Z_d[idx].data
+                    localstrategy[s_Ij..., :] .= 0
+                    localstrategy[s_Ij..., s_j] = 1
+                    strategy.Z_d[idx] = LocalDecisionStrategy(j, localstrategy)
+                    EU = bestval
+                    push!(solutionhistory, (EU, (time_ns()-t1)/1E6, deepcopy(strategy)))
+                end
+            end
+        end
+    end
+
+    # Set the best found solution as the MIP start to the model
+    set_MIP_start(diagram, solutionhistory[end][3], S_active, z, x_s)
+
+    return solutionhistory
+end
diff --git a/test/heuristics.jl b/test/heuristics.jl
@@ -0,0 +1,71 @@
+using Test, Logging, Random, JuMP
+using DecisionProgramming
+
+
+@info("Creating a pig farm problem with 3 decision stages")
+const N = 4
+
+@info("Creating the influence diagram.")
+diagram = InfluenceDiagram()
+
+add_node!(diagram, ChanceNode("H1", [], ["ill", "healthy"]))
+for i in 1:N-1
+    # Testing result
+    add_node!(diagram, ChanceNode("T$i", ["H$i"], ["positive", "negative"]))
+    # Decision to treat
+    add_node!(diagram, DecisionNode("D$i", ["T$i"], ["treat", "pass"]))
+    # Cost of treatment
+    add_node!(diagram, ValueNode("C$i", ["D$i"]))
+    # Health of next period
+    add_node!(diagram, ChanceNode("H$(i+1)", ["H$(i)", "D$(i)"], ["ill", "healthy"]))
+end
+add_node!(diagram, ValueNode("MP", ["H$N"]))
+
+generate_arcs!(diagram)
+
+# Add probabilities for node H1
+add_probabilities!(diagram, "H1", [0.1, 0.9])
+
+# Declare proability matrix for health nodes H_2, ... H_N-1, which have identical information sets and states
+X_H = ProbabilityMatrix(diagram, "H2")
+X_H["healthy", "pass", :] = [0.2, 0.8]
+X_H["healthy", "treat", :] = [0.1, 0.9]
+X_H["ill", "pass", :] = [0.9, 0.1]
+X_H["ill", "treat", :] = [0.5, 0.5]
+
+# Declare proability matrix for test result nodes T_1...T_N
+X_T = ProbabilityMatrix(diagram, "T1")
+X_T["ill", "positive"] = 0.8
+X_T["ill", "negative"] = 0.2
+X_T["healthy", "negative"] = 0.9
+X_T["healthy", "positive"] = 0.1
+
+for i in 1:N-1
+    add_probabilities!(diagram, "T$i", X_T)
+    add_probabilities!(diagram, "H$(i+1)", X_H)
+end
+
+for i in 1:N-1
+    add_utilities!(diagram, "C$i", [-100.0, 0.0])
+end
+
+add_utilities!(diagram, "MP", [300.0, 1000.0])
+
+generate_diagram!(diagram, positive_path_utility = true)
+
+
+@info("Creating the decision model.")
+model = Model()
+z = DecisionVariables(model, diagram)
+x_s = PathCompatibilityVariables(model, diagram, z, probability_cut = false)
+EV = expected_value(model, diagram, x_s)
+@objective(model, Max, EV)
+
+
+spu = singlePolicyUpdate(diagram, model, z, x_s)
+@info("Single policy update found solution $(spu[end][1]) in $(spu[end][2]/1000) seconds.")
+
+@test spu[end][1] <= 726.8121 + 1E-9 # Result is not better than the known optimal value
+spu_pairs = zip(spu[1:end-1], spu[2:end])
+@test all(pair -> pair[1][1] < pair[2][1], spu_pairs) # Solution values increase
+@test all(pair -> pair[1][2] < pair[2][2], spu_pairs) # Times increase
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -4,4 +4,5 @@ using Test
     include("influence_diagram.jl")
     include("random.jl")
     include("decision_model.jl")
+    include("heuristics.jl")
 end

Original file line number	Diff line number	Diff line change
`@@ -76,6 +76,7 @@ optimizer = optimizer_with_attributes(`
`76`	`76`	`"IntFeasTol" => 1e-9,`
`77`	`77`	`)`
`78`	`78`	`set_optimizer(model, optimizer)`
	`79`	`+`
`79`	`80`	`optimize!(model)`
`80`	`81`
`81`	`82`	`@info("Extracting results.")`
Original file line number	Diff line number	Diff line change
`@@ -66,6 +66,9 @@ optimizer = optimizer_with_attributes(`
`66`	`66`	`"IntFeasTol" => 1e-9,`
`67`	`67`	`)`
`68`	`68`	`set_optimizer(model, optimizer)`
	`69`	`+`
	`70`	`+spu = singlePolicyUpdate(diagram, model, z, x_s)`
	`71`	`+@info("Single policy update found solution $(spu[end][1]) in $(spu[end][2]/1000) seconds.")`
`69`	`72`	`optimize!(model)`
`70`	`73`
`71`	`74`	`@info("Extracting results.")`