using LinearAlgebra
using Optim
struct BasicTrustRegion{T <: Real}
η1:: T
η2:: T
γ1:: T
γ2:: T
end
function BTRDefaults()
return BasicTrustRegion(0.01,0.9,0.5,0.5)
end
The state of the algorithm is declared with the additional keyword mutable at the content has to be modified from iteration to iteration.
mutable struct BTRState
iter::Int
x::Vector
xcand::Vector
g::Vector
step::Vector
Δ::Float64
ρ::Float64
function BTRState()
return new()
end
end
function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
# If the iteration is successful, update the iterate
if (state.ρ >= b.η1)
state.x = copy(state.xcand)
return true
else
return false
end
end
function updateRadius!(state::BTRState, b::BasicTrustRegion)
if (state.ρ >= b.η2)
# very successful iterate
stepnorm = norm(state.step)
state.Δ = min(1e20,max(4*stepnorm,state.Δ))
elseif (state.ρ >= b.η1)
# successful iterate
state.Δ *= b.γ2
else
# unsuccessful iterate
state.Δ *= b.γ1
end
end
Check the Cauchy Step value.
function CauchyStep(g::Vector,H::Matrix,Δ::Float64)
q = dot(g,H*g)
normg = norm(g)
if (q <= 0)
# the curvature along g is non positive
τ = 1.0
else
# the curvature along g is positive
τ = min((normg*normg*normg)/(q*Δ),1.0)
end
return -τ*g*Δ/normg
end
function btr(f::Function, g!::Function, H!::Function,
x0::Vector, tol::Float64 = 1e-6, verbose::Bool = false)
b = BTRDefaults()
state = BTRState()
state.iter = 0
state.Δ = 1.0
state.x = x0
n=length(x0)
state.g = zeros(n)
H = zeros(n,n)
fx = f(x0)
g!(state.g, x0)
H!(H, x0)
nmax = 100000
tol2 = tol*tol
function model(s::Vector, g::Vector, H::Matrix)
return dot(s, g)+0.5*dot(s, H*s)
end
if (verbose)
println(state)
end
while (dot(state.g,state.g) > tol2 && state.iter < nmax)
# Compute the step by approximately minimize the model
state.step = CauchyStep(state.g, H, state.Δ)
state.xcand = state.x+state.step
# Compute the actual reduction over the predicted reduction
fcand = f(state.xcand)
state.ρ = (fcand-fx)/(model(state.step, state.g, H))
if (acceptCandidate!(state, b))
g!(state.g, state.x)
H!(H, state.x)
fx = fcand
end
if (verbose)
println(state)
end
updateRadius!(state, b)
state.iter += 1
end
return state
end
function rosenbrock(x::Vector)
return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function rosenbrock_gradient!(storage::Vector, x::Vector)
storage[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
storage[2] = 200.0 * (x[2] - x[1]^2)
end
function rosenbrock_hessian!(storage::Matrix, x::Vector)
storage[1, 1] = 2.0 - 400.0 * x[2] + 1200.0 * x[1]^2
storage[1, 2] = -400.0 * x[1]
storage[2, 1] = -400.0 * x[1]
storage[2, 2] = 200.0
end
state = btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, [0,0])
state.Δ
Let's check the first iteration, starting from (0,0). In order to compute the step length, we first have to know the gradient and the Hessian at (0,0).
x = [0; 0]
grad=zeros(2)
hess=zeros(2,2)
rosenbrock_gradient!(grad,x)
rosenbrock_hessian!(hess,x)
We know that $$ \alpha^* = \min \left\{ \frac{\Delta_k}{\| \nabla f(x_k) \|_k}, \frac{\| \nabla f(x_k) \|_2^2}{\nabla f(x_k)^T H_k \nabla f(x_k)} \right\} $$ The first term of the minimization is
1.0/norm(grad)
grad
The second is
dot(grad,grad)/dot(grad,hess*grad)
Therefore $\alpha^* = 0.5$ and $x_1 = (0,0) + 0.5*\nabla f(0.0) = (1,0)$. We can easily check this value by looking at the first iterations of the optimization procedure.
state = btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, [0,0], 1.0, true)
using ForwardDiff
f(x) = x[1]^4 + x[1]^2 + x[1]*x[2] + (1+x[2])^2
g = x -> ForwardDiff.gradient(f, x);
H = x -> ForwardDiff.hessian(f, x);
function g!(storage::Vector, x::Vector)
s = g(x)
storage[1:length(s)] = s[1:length(s)]
end
function H!(storage::Matrix, x::Vector)
s = H(x)
n, m = size(s)
storage[1:n,1:m] = s[1:length(s)]
end
state = btr(f, g!, H!, [0,0], 1e-6, true)