using BenchmarkTools
using LinearAlgebra
BasicTrustRegion will gather the constants of the method.
struct BasicTrustRegion{T <: Real}
η1:: T
η2:: T
γ1:: T
γ2:: T
end
function BTRDefaults()
return BasicTrustRegion(0.01,0.9,0.5,0.5)
end
# We define a type to store the state at a given iteration.
mutable struct BTRState
iter::Int
x::Vector
xcand::Vector
g::Vector
step::Vector
Δ::Float64
ρ::Float64
tol::Float64
trace
keepTrace::Bool
function BTRState()
state = new()
state.tol = 1e-6
state.keepTrace = false
return state
end
end
function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
# If the iteration is successful, update the iterate
if (state.ρ >= b.η1)
return true
else
return false
end
end
function updateRadius!(state::BTRState, b::BasicTrustRegion)
if (state.ρ >= b.η2)
stepnorm = norm(state.step)
state.Δ = min(1e20,max(4*stepnorm,state.Δ))
elseif (state.ρ >= b.η1)
state.Δ *= b.γ2
else
state.Δ *= b.γ1
end
end
function btr(f::Function, g!::Function, H!::Function, Step::Function,
x0::Vector, state:: BTRState = BTRState(), ApproxH::Bool = false, verbose::Bool = false)
b = BTRDefaults()
state.iter = 0
state.x = x0
n=length(x0)
tol2 = state.tol*state.tol
state.g = zeros(n)
# A better initialization procedure should be used with quasi-Newton approximations
# We could rely on some preconditioner.
H = zeros(n,n)+I
fx = f(x0)
g!(x0, state.g)
state.Δ = 0.1*norm(state.g) # 1.0
if (ApproxH)
y = zeros(n)
gcand = zeros(n)
# H!(H, y, state.step)
else
H!(x0, H)
end
nmax = 1000
if (state.keepTrace)
state.trace= x0'
end
function model(s::Vector, g::Vector, H::Matrix)
return dot(s, g)+0.5*dot(s, H*s)
end
while (dot(state.g,state.g) > tol2 && state.iter < nmax)
# Compute the step by approximately minimize the model
state.step = Step(state.g, H, state.Δ)
state.xcand = state.x+state.step
# Compute the actual reduction over the predicted reduction
fcand = f(state.xcand)
state.ρ = (fcand-fx)/(model(state.step, state.g, H))
if (ApproxH)
g!(state.xcand, gcand)
y = gcand-state.g
sy = dot(state.step,y)
# if (sy < 1e-6)
# println(state.iter, ". ", state.ρ, " ", state.Δ, " ", norm(state.step), " ", (model(state.step, state.g, H)), " ", norm(y), " ", sy, " ", norm(state.g))
# end
H = H!(H, y, state.step)
end
if (acceptCandidate!(state, b))
state.x = copy(state.xcand)
if (ApproxH == false)
g!(state.x, state.g)
H!(state.x, H)
else
state.g = copy(gcand)
end
fx = fcand
end
if (state.keepTrace)
state.trace= [state.trace ; state.x']
end
updateRadius!(state, b)
state.iter += 1
end
return state
end
function CauchyStep(g::Vector, H::Matrix, Δ::Float64)
q = dot(g,H*g)
normg = norm(g)
if (q <= 0)
τ = 1.0
else
τ = min((normg*normg*normg)/(q*Δ),1.0)
end
return -τ*g*Δ/normg
end
function BFGSUpdate(B::Matrix, y::Vector, s::Vector)
sy = dot(s,y)
if (sy > 1e-8)
Bs = B*s
B = B - (Bs*Bs')/dot(s, Bs) + (y*y')/sy
end
return B
end
function stopCG(normg::Float64, normg0::Float64, k::Int, kmax::Int, χ::Float64 = 0.1, θ::Float64 = 0.5)
if ((k == kmax) || (normg <= normg0*min(χ, normg0^θ)))
return true
else
return false
end
end
function TruncatedCG(g::Vector, H::Matrix, Δ::Float64)
n = length(g)
s = zeros(n)
normg0 = norm(g)
v = g
d = -v
gv = dot(g,v)
norm2d = gv
norm2s = 0
sMd = 0
k = 0
Δ2 = Δ*Δ
while (stopCG(norm(g), normg0, k, n) == false)
Hd = H*d
κ = dot(d,Hd)
# Is the curvature negative in the direction d?
if (κ <= 0)
if (k == 0)
s = d/norm(d)*Δ
else
σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
s += σ*d
end
break
end
α = gv/κ
# Check is the model minimizer is outside the trust region
norm2s += α*(2*sMd+α*norm2d)
if (norm2s >= Δ2)
if (k == 0)
s = d/norm(d)*Δ
else
σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
s += σ*d
end
break
end
# The model minimizer is inside the trust region
s += α*d
g += α*Hd
v = g
newgv = dot(g,v)
β = newgv/gv
gv = newgv
d = -v+β*d
sMd = β*(sMd+α*norm2d)
norm2d = gv+β*β*norm2d
k += 1;
end
return s
end
function rosenbrock(x::Vector)
return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function rosenbrock_gradient!(x::Vector, storage::Vector)
storage[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
storage[2] = 200.0 * (x[2] - x[1]^2)
end
function rosenbrock_hessian!(x::Vector, storage::Matrix)
storage[1, 1] = 2.0 - 400.0 * x[2] + 1200.0 * x[1]^2
storage[1, 2] = -400.0 * x[1]
storage[2, 1] = -400.0 * x[1]
storage[2, 2] = 200.0
end
defaultState = BTRState()
defaultState.tol
state = btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, CauchyStep, [0,0])
state = btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, TruncatedCG, [0,0])
defaultState.tol = 1e-4
state = btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, CauchyStep, [0,0], defaultState)
state = btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, TruncatedCG, [0,0], defaultState)
@benchmark btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, CauchyStep, [0,0], defaultState)
@benchmark btr(rosenbrock, rosenbrock_gradient!, rosenbrock_hessian!, TruncatedCG, [0,0], defaultState)
state = btr(rosenbrock, rosenbrock_gradient!, BFGSUpdate, TruncatedCG, [0,0], defaultState, true)
@benchmark btr(rosenbrock, rosenbrock_gradient!, BFGSUpdate, TruncatedCG, [0,0], defaultState, true)
state = btr(rosenbrock, rosenbrock_gradient!, BFGSUpdate, CauchyStep, [0,0], defaultState, true)
"Trust-Region Methods", Introduction $$ f(x,y) = -10x^2+10y^2+4\sin(xy)-2x+x^4 $$
using ForwardDiff
defaultState.tol = 1e-6
f(x::Vector) = -10*x[1]^2+10*x[2]^2+4*sin(x[1]*x[2])-2*x[1]+x[1]^4
g = x -> ForwardDiff.gradient(f, x);
H = x -> ForwardDiff.hessian(f, x)
function g!(x::Vector, storage::Vector)
s = g(x)
storage[1:length(s)] = s[1:length(s)]
end
function H!(x::Vector, storage::Matrix)
s = H(x)
n, m = size(s)
storage[1:n,1:m] = s[1:length(s)]
end
state = btr(f, g!, H!, CauchyStep, [0,0])
state = btr(f, g!, H!, TruncatedCG, [0,0])
state = btr(f, g!, BFGSUpdate, CauchyStep, [0,0], BTRState(), true)
state = btr(f, g!, BFGSUpdate, TruncatedCG, [0,0], BTRState(), true)
f([2.30663, -0.332309])
state = btr(f, g!, H!, TruncatedCG, [-2.0,-2.0], BTRState())
f([-2.21022, 0.329748])
st = BTRState()
st.keepTrace = true
state = btr(f, g!, BFGSUpdate, TruncatedCG, [-2.0,-2.0], st, true)
st.trace