using LinearAlgebra
The simplest approach consists to exploit the fact that $\forall\, t > 0$, $$ d = P_{\mathcal{X}}(x - t\nabla f(x)) - x $$ is a descent direction. If we fix the value of $t$, for instance $t = 1$, we can directly adapt our linesearch method, using the Armijo backtracking method.
function linesearch(f::Function, g::Function, h:: Function,
x0::Vector, direction::Function, steplength::Function,
δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)
k = 0
x = x0
δ2 = δ*δ
n = length(x)
dfx = zeros(n)
# Compute the search direction
d, dfx = direction(f,g,h,x)
while (stop(d, k, nmax, δ2) == false)
# Compute the step length along d
α = steplength(f,dfx,x,d)
# Update the iterate
x += α*d
# Compute the search direction
d, dfx = direction(f,g,h,x)
k += 1
end
return x, dfx, k
end
function ArmijoStep(f::Function, dfx::Vector, x::Vector, d:: Vector,
αmax:: Float64 = 1.0, β:: Float64 =0.1, κ:: Float64 =0.2)
α = αmax
fx = f(x)
fxcand = f(x+α*d)
while ((fxcand >= fx+α*β*dot(dfx,d)) && (α > 1e-16))
α *= κ
fxcand = f(x+α*d)
end
return α
end
We will consider bounds constraints. The projection is then very easy to obtain.
function boundsProjection(x::Vector, lower::Vector, upper::Vector)
return max.(min.(upper,x),lower)
end
We test the last function.
l = [ 0 for i = 1:10 ]
u = [ 10 for i = 1:10 ]
x = [ -5+2.0*i for i = 1:10 ]
z = boundsProjection(x,l,u)
We will consider the example function given in the introduction chapter of "Trust-Region Methods" (Conn, Gould, and Toint, 2000): $$ f(x,y) = -10x^2+10y^2+4\sin(xy)-2x+x^4 $$
using ForwardDiff
f(x::Vector) = -10*x[1]^2+10*x[2]^2+4*sin(x[1]*x[2])-2*x[1]+x[1]^4
using Plots
plotly()
default(size=(600,600), fc=:heat)
x, y = -3.0:0.1:3.0, -3.0:0.1:3.0
z = Surface((x,y)->f([x,y]), x, y)
surface(x,y,z)
Plots.contour(x,y,z, linealpha = 0.1, levels=2000)
g = x -> ForwardDiff.gradient(f, x);
H = x -> ForwardDiff.hessian(f, x)
function g!(storage::Vector, x::Vector)
storage[1:length(x)] = g(x)
end
function H!(storage::Matrix, x::Vector)
n = length(x)
storage[1:n,1:n] = H(x)
end
function stopping_unconstrained(x::Vector, ∇x::Vector, k::Int, nmax::Int, tol::Float64)
if (dot(∇x,∇x) < tol)
return true;
elseif (k >= nmax)
return true;
else
return false;
end
end
We can define some usual search directions.
function NewtonDirection(f::Function, g:: Function, h:: Function, x::Vector)
n = length(x)
∇f = ones(n)
H = zeros(n,n)+I
g!(∇f,x)
h(H,x)
return -H\∇f, ∇f
end
function steepestDirection(f::Function, g:: Function, h:: Function, x::Vector)
n = length(x)
∇f = zeros(n)
g!(∇f,x)
return -∇f, ∇f
end
function stopping(d::Vector, k::Int, nmax::Int, tol::Float64)
if (dot(d,d) < tol)
return true;
elseif (k >= nmax)
return true;
else
return false;
end
end
The unconstrained minimization gives
x, df, k = linesearch(f, g!, H!, [0.0,0.0], steepestDirection, ArmijoStep)
f(x)
Let consider simple bounds: $$ 0 \leq x[i] \leq 10,\ \forall i $$
l = [0.0; 0.0]
u = [10.0; 10.0]
mutable struct Bounds
l::Array{Float64,1}
u::Array{Float64,1}
end
function (projectBounds::Bounds)(x::Vector)
return max.(min.(projectBounds.u,x),projectBounds.l)
end
projectBounds = Bounds(l, u)
projectBounds.l = [0.0; 0.0]
projectBounds.u = [Inf; Inf]
function projectedDirection(f::Function, g:: Function, h:: Function, x::Vector)
n = length(x)
∇f = zeros(n)
g!(∇f,x)
d = x-∇f
d = projectBounds(d)
d = d-x
return d, ∇f
end
x, df, k = linesearch(f, g!, H!, [1.0,1.0], projectedDirection, ArmijoStep)
x, f(x)
function (projectDirection::Bounds)(d::Vector, x::Vector)
d += x
d = max.(min.(projectDirection.u,d),projectDirection.l)
d -= x
return d
end
projectDirection = Bounds(l, u)
function linesearchProjection(f::Function, g::Function, h:: Function,
x0::Vector, direction::Function, step::Function,
δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)
k = 0
x = x0
δ2 = δ*δ
n = length(x)
∇f = zeros(n)
# Compute the search direction
d, ∇f = direction(f,g,h,x)
s = projectDirection(d, x)
while (stop(s, k, nmax, δ2) == false)
# Compute the step length along d
s = step(f,∇f,x,d)
# Update the iterate
x += s
# Compute the search direction
d, ∇f = direction(f,g,h,x)
s = projectDirection(d, x)
k += 1
end
return x, ∇f, k
end
function GoldsteinStep(f::Function, ∇f::Vector, x::Vector, d:: Vector,
αmax:: Float64 = 1.0, β:: Float64 =0.1, κ:: Float64 =0.2)
α = αmax
fx = f(x)
# Project d
s = α*d
s = projectDirection(s, x)
fxcand = f(x+s)
while ((fxcand >= fx+β*dot(∇f,s)) && (α > 1e-16))
α *= κ
# Project d
s = α*d
s = projectDirection(s, x)
fxcand = f(x+s)
end
return s
end
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, GoldsteinStep)
k, x, f(x)
The choice of $\alpha$max influences the algorithm behavior. We will define a structure allowing to easily modify the parameters of the linearsearch.
abstract type Step end
mutable struct LineSearchStep <: Step
αmax::Float64
β::Float64
κ::Float64
end
goldsteinstep = LineSearchStep(1.0, 0.1, 0.2)
function (goldsteinstep::LineSearchStep)(f::Function, ∇f::Vector, x::Vector, d:: Vector)
α = goldsteinstep.αmax
fx = f(x)
# Project d
s = α*d
s = projectDirection(s, x)
fxcand = f(x+s)
while ((fxcand >= fx+goldsteinstep.β*dot(∇f,s)) && (α > 1e-16))
α *= goldsteinstep.κ
# Project d
s = α*d
s = projectDirection(s, x)
fxcand = f(x+s)
end
return s
end
function linesearchProjection(f::Function, g::Function, h:: Function,
x0::Vector, direction::Function, step::Step,
δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)
k = 0
x = x0
δ2 = δ*δ
n = length(x)
∇f = zeros(n)
# Compute the search direction
d, ∇f = direction(f,g,h,x)
s = projectDirection(d, x)
while (stop(s, k, nmax, δ2) == false)
# Compute the step length along d
s = step(f,∇f,x,d)
# Update the iterate
x += s
# Compute the search direction
d, ∇f = direction(f,g,h,x)
s = projectDirection(d, x)
k += 1
end
return x, ∇f, k
end
methods(linesearchProjection)
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
goldsteinstep.αmax = 2.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
goldsteinstep.αmax = 1000.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
goldsteinstep.αmax = 200000.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
typeof(goldsteinstep)
We should be able to automatically adjust αmax. The trust-region framework is particularly adapted for this.
Projection is not truncation!
function (truncatedDirection::Bounds)(f::Function, g:: Function, h:: Function, x::Vector, αmax = 1.0)
n = length(x)
∇f = zeros(n)
g!(∇f,x)
α = αmax
for i = 1:n
if ∇f[i] > 0
α = min(α,(x[i]-truncatedDirection.l[i])/∇f[i])
elseif ∇f[i] < 0
α = min(α,(x[i]-truncatedDirection.u[i])/∇f[i])
end
end
d = -∇f*α
return d, ∇f
end
function (truncatedDirection::Bounds)(x::Vector, d::Vector, αmax = 1.0)
n = length(x)
α = αmax
for i = 1:n
if d[i] < 0
α = min(α,(truncatedDirection.l[i]-x[i])/d[i])
elseif d[i] > 0
α = min(α,(truncatedDirection.u[i]-x[i])/d[i])
end
end
d *= α
return d
end
x = [1,1]
d, dfx = steepestDirection(f,g!,H!,x)
d, dfx = projectedDirection(f,g!,H!,x)
x+d
truncatedDirection = Bounds(l, u)
d, dfx = truncatedDirection(f,g!,H!,x)
x+d
function linesearchTruncation(f::Function, g::Function, h:: Function,
x0::Vector, direction::Function, steplength::Function,
δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)
k = 0
x = x0
δ2 = δ*δ
n = length(x)
∇f = zeros(n)
# Compute the search direction
d, ∇f = direction(f,g,h,x)
s = truncatedDirection(x, d)
println(s)
while (stop(s, k, nmax, δ2) == false)
# Compute the step length along d
α = steplength(f,dfx,x,d)
# Update the iterate
x += α*s
# Compute the search direction
d, ∇f = direction(f,g,h,x)
s = truncatedDirection(x, d)
k += 1
end
return x, ∇f, k
end
x, dfx, k = linesearchTruncation(f, g!, H!, [1.0,1.0], steepestDirection, ArmijoStep)
f(x)
The method stopped before we have converged to a solution, as we are limited to very small steps as soon as we are close to the boundary.
We first reproduce the basic trust-region algorithm
struct BasicTrustRegion{T <: Real}
η1:: T
η2:: T
γ1:: T
γ2:: T
end
function BTRDefaults()
return BasicTrustRegion(0.01,0.9,0.5,0.5)
end
struct UnconstrainedStep <: Step
end
mutable struct BTRState
iter::Int
x::Vector
xcand::Vector
g::Vector
H::Matrix
step::Vector
Δ::Float64
ρ::Float64
tol::Float64
trace
keepTrace::Bool
function BTRState()
state = new()
state.tol = 1e-6
state.keepTrace = false
return state
end
end
function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
# If the iteration is successful, update the iterate
if (state.ρ >= b.η1)
return true
else
return false
end
end
function updateRadius!(state::BTRState, b::BasicTrustRegion)
if (state.ρ >= b.η2)
stepnorm = norm(state.step)
state.Δ = min(1e20,max(4*stepnorm,state.Δ))
elseif (state.ρ >= b.η1)
state.Δ *= b.γ2
else
state.Δ *= b.γ1
end
end
function BFGSUpdate(B::Matrix, y::Vector, s::Vector)
Bs = B*s
return B - (Bs*Bs')/dot(s, Bs) + (y*y')/dot(s,y)
end
function stopCG(normg::Float64, normg0::Float64, k::Int, kmax::Int, χ::Float64 = 0.1, θ::Float64 = 0.5)
if ((k == kmax) || (normg <= normg0*min(χ, normg0^θ)))
return true
else
return false
end
end
defaultState = BTRState()
function btrStopUnconstrained(state::BTRState)
nmax = 1000
if ((dot(state.g,state.g) <= state.tol*state.tol) || (state.iter >= nmax))
return true
end
return false
end
function btrStopStep(state::BTRState)
if (dot(state.step, state.step) < state.tol*state.tol)
return true
end
return false
end
function btr(f::Function, g!::Function, H!::Function, Step::Function,
x0::Vector, state:: BTRState = BTRState(), stop:: Function = btrStopUnconstrained,
ApproxH::Bool = false, verbose::Bool = false)
b = BTRDefaults()
state.iter = 0
state.x = x0
n=length(x0)
state.step = zeros(n)
state.step[1] = +Inf
state.g = zeros(n)
# A better initialization procedure should be used with quasi-Newton approximations
# We could rely on some preconditioner.
state.H = zeros(n,n)+I
fx = f(x0)
g!(state.g, x0)
state.Δ = 0.1*norm(state.g) # 1.0
if (ApproxH)
y = zeros(n)
gcand = zeros(n)
# H!(H, y, state.step)
else
H!(state.H, x0)
end
if (state.keepTrace)
state.trace= x0'
end
function model(s::Vector, g::Vector, H::Matrix)
return dot(s, g)+0.5*dot(s, H*s)
end
while (!stop(state))
# Compute the step by approximately minimize the model
state.step = Step(state)
state.xcand = state.x+state.step
# Compute the actual reduction over the predicted reduction
fcand = f(state.xcand)
state.ρ = (fcand-fx)/(model(state.step, state.g, state.H))
if (ApproxH)
g!(gcand, state.xcand)
y = gcand-state.g
sy = dot(state.step,y)
# if (sy < 1e-6)
# println(state.iter, ". ", state.ρ, " ", state.Δ, " ", norm(state.step), " ", (model(state.step, state.g, H)), " ", norm(y), " ", sy, " ", norm(state.g))
# end
state.H = H!(state.H, y, state.step)
end
if (acceptCandidate!(state, b))
state.x = copy(state.xcand)
if (ApproxH == false)
g!(state.g, state.x)
H!(state.H, state.x)
else
state.g = copy(gcand)
end
fx = fcand
end
if (state.keepTrace)
state.trace= [state.trace ; state.x']
end
updateRadius!(state, b)
state.iter += 1
end
return state
end
function CauchyStep(g::Vector, H::Matrix, Δ::Float64)
q = dot(g,H*g)
normg = norm(g)
if (q <= 0)
τ = 1.0
else
τ = min((normg*normg*normg)/(q*Δ),1.0)
end
return -τ*g*Δ/normg
end
function CauchyStep(state:: BTRState)
return CauchyStep(state.g, state.H, state.Δ)
end
function TruncatedCG(g::Vector, H::Matrix, Δ::Float64)
n = length(g)
s = zeros(n)
normg0 = norm(g)
v = g
d = -v
gv = dot(g,v)
norm2d = gv
norm2s = 0
sMd = 0
k = 0
Δ2 = Δ*Δ
while (stopCG(norm(g), normg0, k, n) == false)
Hd = H*d
κ = dot(d,Hd)
# Is the curvature negative in the direction d?
if (κ <= 0)
σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
s += σ*d
break
end
α = gv/κ
# Check is the model minimizer is outside the trust region
norm2s += α*(2*sMd+α*norm2d)
if (norm2s >= Δ2)
σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
s += σ*d
break
end
# The model minimizer is inside the trust region
s += α*d
g += α*Hd
v = g
newgv = dot(g,v)
β = newgv/gv
gv = newgv
d = -v+β*d
sMd = β*(sMd+α*norm2d)
norm2d = gv+β*β*norm2d
k += 1;
end
return s
end
function TruncatedCG(state:: BTRState)
return TruncatedCG(state.g, state.H, state.Δ)
end
state = btr(f, g!, H!, CauchyStep, [1.0,1.0])
state = btr(f, g!, H!, TruncatedCG, [1.0,1.0])
We can compute the generalized Cauchy point using Algorithm 12.2.2 in Trust-Region methods.
In progress...
mutable struct BoundsConstrainedStep <: Step
l:: Vector # lower bounds
u:: Vector # Upper bounds
β1:: Float64
β2:: Float64
μ:: Float64
ν:: Float64
function BoundsConstrainedStep(l::Vector, u::Vector,
β1::Float64 = 0.1, β2::Float64 = 0.75, ν::Float64 = 0.2, μ::Float64 = 0.1)
bcs = new()
bcs.l = copy(l)
bcs.u = copy(u)
bcs.β1 = β1
bcs.μ = μ
return bcs
end
end
GCP = BoundsConstrainedStep(l,u)
typeof(GCP) == BoundsConstrainedStep
function projectTangentConeBounds!(x::Vector, d::Vector, l::Vector, u::Vector, tol::Float64 = 1e-12)
for i = 1:length(x)
if (x[i]-l[i] < tol)
d[i] = max(0, d[i])
end
if (u[i]-x[i] < tol)
d[i] = min(0, d[i])
end
end
return d
end
function model(s::Vector, g::Vector, H::Matrix)
return dot(s, g)+0.5*dot(s, H*s)
end
# Algorithm 12.2.2 in Conn, Gould, and Toint, "Trust-region methods", SIAM, 2000.
# There is still some bug somewhere
function (GCP::BoundsConstrainedStep)(x:: Vector, g::Vector, H::Matrix, Δ::Float64)
tmin = 0
tmax = Inf
t = Δ/norm(g)
Δ2 = Δ*Δ
j = 0
s = zeros(length(x)) # prevents s to be a local variable of the while loop.
cont = true
# The condition on j is set as a crude safeguard.
while (cont && (j < 50))
p = boundsProjection(x-t*g, GCP.l, GCP.u) # compute the point on the projected gradient path
s = p-x
mk = model(s, g, H)
gs = dot(g,s)
# println(j, " ", tmin, ", ", tmax, " s = ", s, " ", x, x+s, -t*g)
if ((dot(s,s) > Δ2) || (mk > GCP.β1*gs))
tmax = t
elseif ((dot(s,s) < GCP.μ*GCP.μ*Δ2) && (mk < GCP.β2*gs) &&
(norm(projectTangentConeBounds!(p,-g,GCP.l,GCP.u)) > GCP.ν*abs(gs)/Δ))
tmin = t
else
cont = false
return s
end
if tmax < Inf
t = (tmin+tmax)/2
else
t *= 2;
end
j += 1
end
return s
end
Δ = state.Δ = 1.1
x = state.x = [1, 0.9]
l = [0.0, 0.0]
u = [10.0, 10.0]
g(x), H(x), Δ
s = GCP(x, g(x), H(x), Δ)
state.g = g(x)
state.H = H(x)
function boundsstep(state:: BTRState)
s = GCP(state.x, state.g, state.H, state.Δ)
println("Step = ", s, state.x, state.x+s)
return s
end
s = boundsstep(state)
state = btr(f, g!, H!, boundsstep, [1.0,1.0], state, btrStopStep)
The stopping criteria should be improved.
state.x
In progress...
Another possibility is to project the gradient, and when a constraint becomes active, to continue the search along the set of active constraints. See https://wiki.mcs.anl.gov/leyffer/images/0/01/07-bndCons.pdf
For the projected gradient, we can first determine the active set at the current iterate, incorporating information about the current search direction.
function activeSetBounds(x:: Vector, direction:: Vector, b::Bounds, tol = 10*eps(Float64))
n = length(x)
# Initialize the number of free variables
nfree = n # nfree will contain the number of free variables
# free designs which variables are free, and which variables are constrained to a bound.
# free[i] = 1 if x[i] is free, and free[0] otherwise.
free = ones(n)
for i=1:n
if (((abs(b.l[i]-x[i]) <= tol) && direction[i] < 0) || ((abs(x[i]-b.u[i]) <= tol) && direction[i] > 0))
free[i] = 0
nfree -= 1
end
end
return free, nfree
end
l = [0; 0]
u = [1; 1]
b = Bounds(l, u)
d = [-1; -1]
x = [0; 1]
free, nfree = activeSetBounds(x, d, b)
Knowing the active set, we can compute the restricted direction.
function reducedDirectionBounds(d:: Vector, free:: Vector)
return d.*free
end
rd = reducedDirectionBounds(d, free)
We can use the reduced gradient to compute the Cauchy point. Recall the computation of the Cauchy step in the unconstrained case.
function CauchyStep(g::Vector,H::Matrix,Δ::Float64)
q = dot(g,H*g)
normg = norm(g)
if (q <= 0)
τ = 1.0
else
τ = min((normg*normg*normg)/(q*Δ),1.0)
end
return -τ*g*Δ/normg
end
We will now examine the curvature with the reduced gradient instead of the gradient.
If the curvature is negative, we have to search along the projected gradient a step that does not lead outside the trust region.
If the curvature is positive, we will check if the model minimization is inside the intersection of the feasible set and the trust-region. If it is not, we again have to search along the projected gradient. In the context of conjugated gradient, if the trust-region boundary has not been reached yet, we update the set of free variables and computed the next search direction.
Projected conjugate gradient: https://epubs.siam.org/doi/pdf/10.1137/S1052623498345075
TO DO!