Projection methods

In [1]:
using LinearAlgebra

Projected gradient

Basic linesearch algorithm

The simplest approach consists to exploit the fact that $\forall\, t > 0$, $$ d = P_{\mathcal{X}}(x - t\nabla f(x)) - x $$ is a descent direction. If we fix the value of $t$, for instance $t = 1$, we can directly adapt our linesearch method, using the Armijo backtracking method.

In [2]:
function linesearch(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, steplength::Function,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    dfx = zeros(n)

    # Compute the search direction
    d, dfx = direction(f,g,h,x)

    while (stop(d, k, nmax, δ2) == false)
        # Compute the step length along d
        α = steplength(f,dfx,x,d)
        # Update the iterate
        x += α*d
        # Compute the search direction
        d, dfx = direction(f,g,h,x)
        k += 1
    end

    return x, dfx, k
end
Out[2]:
linesearch (generic function with 4 methods)
In [3]:
function ArmijoStep(f::Function, dfx::Vector, x::Vector, d:: Vector,
    αmax:: Float64 = 1.0, β:: Float64 =0.1, κ:: Float64 =0.2)
    
    α = αmax
    
    fx = f(x)
    fxcand = f(x+α*d)

    while ((fxcand >= fx+α*β*dot(dfx,d)) && (α > 1e-16))
        α *= κ
        fxcand = f(x+α*d)        
    end
    
    return α
end
Out[3]:
ArmijoStep (generic function with 4 methods)

We will consider bounds constraints. The projection is then very easy to obtain.

In [4]:
function boundsProjection(x::Vector, lower::Vector, upper::Vector)
    return max.(min.(upper,x),lower)
end
Out[4]:
boundsProjection (generic function with 1 method)

We test the last function.

In [5]:
l = [ 0 for i = 1:10 ]
u = [ 10 for i = 1:10 ]
x = [ -5+2.0*i for i = 1:10 ]
z = boundsProjection(x,l,u)
Out[5]:
10-element Array{Float64,1}:
  0.0
  0.0
  1.0
  3.0
  5.0
  7.0
  9.0
 10.0
 10.0
 10.0

Example

We will consider the example function given in the introduction chapter of "Trust-Region Methods" (Conn, Gould, and Toint, 2000): $$ f(x,y) = -10x^2+10y^2+4\sin(xy)-2x+x^4 $$

In [6]:
using ForwardDiff

f(x::Vector) = -10*x[1]^2+10*x[2]^2+4*sin(x[1]*x[2])-2*x[1]+x[1]^4
Out[6]:
f (generic function with 1 method)
In [7]:
using Plots
plotly()
┌ Info: For saving to png with the Plotly backend ORCA has to be installed.
└ @ Plots C:\Users\slash\.julia\packages\Plots\GzWxB\src\backends.jl:363
Out[7]:
Plots.PlotlyBackend()
In [8]:
default(size=(600,600), fc=:heat)
x, y = -3.0:0.1:3.0, -3.0:0.1:3.0
z = Surface((x,y)->f([x,y]), x, y)
surface(x,y,z)
Out[8]:
Plots.jl
In [9]:
Plots.contour(x,y,z, linealpha = 0.1, levels=2000)
Out[9]:
Plots.jl
In [10]:
g = x -> ForwardDiff.gradient(f, x);
H = x -> ForwardDiff.hessian(f, x)

function g!(storage::Vector, x::Vector)
    storage[1:length(x)] = g(x)
end

function H!(storage::Matrix, x::Vector)
    n = length(x)
    storage[1:n,1:n] = H(x)
end
Out[10]:
H! (generic function with 1 method)
In [11]:
function stopping_unconstrained(x::Vector, ∇x::Vector, k::Int, nmax::Int, tol::Float64)
    if (dot(∇x,∇x) < tol)
        return true;
    elseif (k >= nmax)
        return true;
    else
        return false;
    end
end
Out[11]:
stopping_unconstrained (generic function with 1 method)

We can define some usual search directions.

In [12]:
function NewtonDirection(f::Function, g:: Function, h:: Function, x::Vector)
    n = length(x)
    ∇f = ones(n)
    H = zeros(n,n)+I
    g!(∇f,x)
    h(H,x)
    return -H\∇f, ∇f
end
Out[12]:
NewtonDirection (generic function with 1 method)
In [13]:
function steepestDirection(f::Function, g:: Function, h:: Function, x::Vector)
    n = length(x)
    ∇f = zeros(n)
    g!(∇f,x)
    return -∇f, ∇f
end
Out[13]:
steepestDirection (generic function with 1 method)
In [14]:
function stopping(d::Vector, k::Int, nmax::Int, tol::Float64)
    if (dot(d,d) < tol)
        return true;
    elseif (k >= nmax)
        return true;
    else
        return false;
    end
end
Out[14]:
stopping (generic function with 1 method)

The unconstrained minimization gives

In [15]:
x, df, k = linesearch(f, g!, H!, [0.0,0.0], steepestDirection, ArmijoStep)
Out[15]:
([2.3066301125619457, -0.3323086499407665], [-6.694542946661386e-7, -5.345261122613465e-8], 58)
In [16]:
f(x)
Out[16]:
-31.180733385187978

Constraints

Let consider simple bounds: $$ 0 \leq x[i] \leq 10,\ \forall i $$

In [17]:
l = [0.0; 0.0]
u = [10.0; 10.0]
Out[17]:
2-element Array{Float64,1}:
 10.0
 10.0
In [18]:
mutable struct Bounds
    l::Array{Float64,1}
    u::Array{Float64,1}
end

function (projectBounds::Bounds)(x::Vector)
    return max.(min.(projectBounds.u,x),projectBounds.l)
end
In [19]:
projectBounds = Bounds(l, u)
Out[19]:
Bounds([0.0, 0.0], [10.0, 10.0])
In [20]:
projectBounds.l = [0.0; 0.0]
projectBounds.u = [Inf; Inf]
Out[20]:
2-element Array{Float64,1}:
 Inf
 Inf
In [21]:
function projectedDirection(f::Function, g:: Function, h:: Function, x::Vector)
    n = length(x)
    ∇f = zeros(n)
    g!(∇f,x)

    d = x-∇f
    d = projectBounds(d)
    d = d-x

    return d, ∇f
end
Out[21]:
projectedDirection (generic function with 1 method)
In [22]:
x, df, k = linesearch(f, g!, H!, [1.0,1.0], projectedDirection, ArmijoStep)
Out[22]:
([2.284484124446568, 0.0], [-7.202996954447372e-7, 9.137936497786272], 43)
In [23]:
x, f(x)
Out[23]:
([2.284484124446568, 0.0], -29.521065172290175)

Projected path

In [24]:
function (projectDirection::Bounds)(d::Vector, x::Vector)
    d += x
    d = max.(min.(projectDirection.u,d),projectDirection.l)
    d -= x

    return d
end
In [25]:
projectDirection = Bounds(l, u)
Out[25]:
Bounds([0.0, 0.0], [10.0, 10.0])
In [26]:
function linesearchProjection(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, step::Function,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    ∇f = zeros(n)

    # Compute the search direction
    d, ∇f = direction(f,g,h,x)
    s = projectDirection(d, x)

    while (stop(s, k, nmax, δ2) == false)
        # Compute the step length along d
        s = step(f,∇f,x,d)
        # Update the iterate
        x += s
        # Compute the search direction
        d, ∇f = direction(f,g,h,x)
        s = projectDirection(d, x)
        k += 1
    end

    return x, ∇f, k
end
Out[26]:
linesearchProjection (generic function with 4 methods)
In [27]:
function GoldsteinStep(f::Function, ∇f::Vector, x::Vector, d:: Vector,
    αmax:: Float64 = 1.0, β:: Float64 =0.1, κ:: Float64 =0.2)

    α = αmax
    
    fx = f(x)
    # Project d
    s = α*d
    s = projectDirection(s, x)
    fxcand = f(x+s)

    while ((fxcand >= fx+β*dot(∇f,s)) && (α > 1e-16))
        α *= κ
        # Project d        
        s = α*d
        s = projectDirection(s, x)
        fxcand = f(x+s)
    end
    
    return s
end
Out[27]:
GoldsteinStep (generic function with 4 methods)
In [28]:
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, GoldsteinStep)
Out[28]:
([2.2844841247197536, 0.0], [-7.086547810786215e-7, 9.137936498879014], 42)
In [29]:
k, x, f(x)
Out[29]:
(42, [2.2844841247197536, 0.0], -29.521065172290175)

The choice of $\alpha$max influences the algorithm behavior. We will define a structure allowing to easily modify the parameters of the linearsearch.

In [30]:
abstract type Step end

mutable struct LineSearchStep <: Step 
    αmax::Float64
    β::Float64
    κ::Float64
end

goldsteinstep = LineSearchStep(1.0, 0.1, 0.2)
Out[30]:
LineSearchStep(1.0, 0.1, 0.2)
In [31]:
function (goldsteinstep::LineSearchStep)(f::Function, ∇f::Vector, x::Vector, d:: Vector)

    α = goldsteinstep.αmax
    
    fx = f(x)
    # Project d
    s = α*d
    s = projectDirection(s, x)
    fxcand = f(x+s)

    while ((fxcand >= fx+goldsteinstep.β*dot(∇f,s)) && (α > 1e-16))
        α *= goldsteinstep.κ
        # Project d        
        s = α*d
        s = projectDirection(s, x)
        fxcand = f(x+s)
    end
    
    return s
end
In [32]:
function linesearchProjection(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, step::Step,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    ∇f = zeros(n)

    # Compute the search direction
    d, ∇f = direction(f,g,h,x)
    s = projectDirection(d, x)

    while (stop(s, k, nmax, δ2) == false)
        # Compute the step length along d
        s = step(f,∇f,x,d)
        # Update the iterate
        x += s
        # Compute the search direction
        d, ∇f = direction(f,g,h,x)
        s = projectDirection(d, x)
        k += 1
    end

    return x, ∇f, k
end
Out[32]:
linesearchProjection (generic function with 8 methods)
In [33]:
methods(linesearchProjection)
Out[33]:
# 8 methods for generic function linesearchProjection:
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Function) in Main at In[26]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Function, δ::Float64) in Main at In[26]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Function, δ::Float64, stop::Function) in Main at In[26]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Function, δ::Float64, stop::Function, nmax::Int64) in Main at In[26]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Step) in Main at In[32]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Step, δ::Float64) in Main at In[32]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Step, δ::Float64, stop::Function) in Main at In[32]:5
  • linesearchProjection(f::Function, g::Function, h::Function, x0::Array{T,1} where T, direction::Function, step::Step, δ::Float64, stop::Function, nmax::Int64) in Main at In[32]:5
In [34]:
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
Out[34]:
([2.2844841247197536, 0.0], [-7.086547810786215e-7, 9.137936498879014], 42)
In [35]:
goldsteinstep.αmax = 2.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
Out[35]:
([2.2844841221111016, 0.0], [-8.198522607472114e-7, 9.137936488444407], 13)
In [36]:
goldsteinstep.αmax = 1000.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
Out[36]:
([2.284484129661535, 0.0], [-4.980043826208203e-7, 9.13793651864614], 23)
In [37]:
goldsteinstep.αmax = 200000.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)
Out[37]:
([2.2844841268133513, 0.0], [-6.19412226399163e-7, 9.137936507253405], 9)
In [38]:
typeof(goldsteinstep)
Out[38]:
LineSearchStep

We should be able to automatically adjust αmax. The trust-region framework is particularly adapted for this.

Truncation

Projection is not truncation!

In [39]:
function (truncatedDirection::Bounds)(f::Function, g:: Function, h:: Function, x::Vector, αmax = 1.0)
    n = length(x)
    ∇f = zeros(n)
    g!(∇f,x)

    α = αmax
    for i = 1:n
        if ∇f[i] > 0
            α = min(α,(x[i]-truncatedDirection.l[i])/∇f[i])
        elseif ∇f[i] < 0
            α = min(α,(x[i]-truncatedDirection.u[i])/∇f[i])
        end
    end
    
    d = -∇f*α

    return d, ∇f
end
In [40]:
function (truncatedDirection::Bounds)(x::Vector, d::Vector, αmax = 1.0)
    n = length(x)

    α = αmax
    for i = 1:n
        if d[i] < 0
            α = min(α,(truncatedDirection.l[i]-x[i])/d[i])
        elseif d[i] > 0
            α = min(α,(truncatedDirection.u[i]-x[i])/d[i])
        end
    end
    
    d *= α

    return d
end
In [41]:
x = [1,1]
d, dfx = steepestDirection(f,g!,H!,x)
Out[41]:
([15.83879077652744, -22.16120922347256], [-15.83879077652744, 22.16120922347256])
In [42]:
d, dfx = projectedDirection(f,g!,H!,x)
Out[42]:
([15.83879077652744, -1.0], [-15.83879077652744, 22.16120922347256])
In [43]:
x+d
Out[43]:
2-element Array{Float64,1}:
 16.83879077652744
  0.0             
In [44]:
truncatedDirection = Bounds(l, u)
d, dfx = truncatedDirection(f,g!,H!,x)
Out[44]:
([0.7147078761275993, -1.0], [-15.83879077652744, 22.16120922347256])
In [45]:
x+d
Out[45]:
2-element Array{Float64,1}:
 1.7147078761275993
 0.0               
In [46]:
function linesearchTruncation(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, steplength::Function,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    ∇f = zeros(n)

    # Compute the search direction
    d, ∇f = direction(f,g,h,x)
    s = truncatedDirection(x, d)
    println(s)

    while (stop(s, k, nmax, δ2) == false)
        # Compute the step length along d
        α = steplength(f,dfx,x,d)
        # Update the iterate
        x += α*s
        # Compute the search direction
        d, ∇f = direction(f,g,h,x)
        s = truncatedDirection(x, d)
        k += 1
    end

    return x, ∇f, k
end
Out[46]:
linesearchTruncation (generic function with 4 methods)
In [47]:
x, dfx, k = linesearchTruncation(f, g!, H!, [1.0,1.0], steepestDirection, ArmijoStep)
[0.7147078761275993, -1.0]
Out[47]:
([2.063773419593784, 7.048843440897225e-7], [-8.115694776214774, 8.255107776053283], 347)
In [48]:
f(x)
Out[48]:
-28.57869819250214

The method stopped before we have converged to a solution, as we are limited to very small steps as soon as we are close to the boundary.

Projected gradient and trust-region

We first reproduce the basic trust-region algorithm

In [49]:
struct BasicTrustRegion{T <: Real}
    η1:: T
    η2:: T
    γ1:: T
    γ2:: T
end

function BTRDefaults()
    return BasicTrustRegion(0.01,0.9,0.5,0.5)
end

struct UnconstrainedStep <: Step
end

mutable struct BTRState

    iter::Int
    x::Vector
    xcand::Vector
    g::Vector
    H::Matrix
    step::Vector
    Δ::Float64
    ρ::Float64

    tol::Float64

    trace
    keepTrace::Bool
    
    function BTRState()
        state = new()
        state.tol = 1e-6
        state.keepTrace = false
        return state
    end
end


function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
    # If the iteration is successful, update the iterate
    if (state.ρ >= b.η1)
        return true
    else
        return false
    end
end

function updateRadius!(state::BTRState, b::BasicTrustRegion)
    if (state.ρ >= b.η2)
        stepnorm = norm(state.step)
        state.Δ = min(1e20,max(4*stepnorm,state.Δ))
    elseif (state.ρ >= b.η1)
        state.Δ *= b.γ2
    else
        state.Δ *= b.γ1
    end
end

function BFGSUpdate(B::Matrix, y::Vector, s::Vector)
    Bs = B*s
    return B - (Bs*Bs')/dot(s, Bs) + (y*y')/dot(s,y)
end

function stopCG(normg::Float64, normg0::Float64, k::Int, kmax::Int, χ::Float64 = 0.1, θ::Float64 = 0.5)
    if ((k == kmax) || (normg <= normg0*min(χ, normg0^θ)))
        return true
    else
        return false
    end
end

defaultState = BTRState()
Out[49]:
BTRState(861389456, #undef, #undef, #undef, #undef, #undef, 4.25574116e-315, 1.625147046e-315, 1.0e-6, #undef, false)
In [74]:
function btrStopUnconstrained(state::BTRState)
    nmax = 1000

    if ((dot(state.g,state.g) <= state.tol*state.tol) || (state.iter >= nmax))
        return true
    end
    
    return false
end
Out[74]:
btrStopUnconstrained (generic function with 1 method)
In [98]:
function btrStopStep(state::BTRState)
    if (dot(state.step, state.step) < state.tol*state.tol)
        return true
    end
    
    return false
end
Out[98]:
btrStopStep (generic function with 1 method)
In [86]:
function btr(f::Function, g!::Function, H!::Function, Step::Function,
    x0::Vector, state:: BTRState = BTRState(), stop:: Function = btrStopUnconstrained,
    ApproxH::Bool = false, verbose::Bool = false)
    
    b = BTRDefaults()
    state.iter = 0
    state.x = x0
    n=length(x0)

    state.step = zeros(n)
    state.step[1] = +Inf

    state.g = zeros(n)
    # A better initialization procedure should be used with quasi-Newton approximations
    # We could rely on some preconditioner.
    state.H = zeros(n,n)+I
    
    fx = f(x0)
    g!(state.g, x0)
    state.Δ = 0.1*norm(state.g) # 1.0

    if (ApproxH)
        y = zeros(n)
        gcand = zeros(n)
        # H!(H, y, state.step)
    else
        H!(state.H, x0)
    end
    
    if (state.keepTrace)
        state.trace= x0'
    end
    
    function model(s::Vector, g::Vector, H::Matrix)
        return dot(s, g)+0.5*dot(s, H*s)
    end
    
    while (!stop(state))
        # Compute the step by approximately minimize the model
        state.step = Step(state)
        state.xcand = state.x+state.step
        
        # Compute the actual reduction over the predicted reduction
        fcand = f(state.xcand)
        state.ρ = (fcand-fx)/(model(state.step, state.g, state.H))

       if (ApproxH)
            g!(gcand, state.xcand)
            y = gcand-state.g
            sy = dot(state.step,y)
#            if (sy < 1e-6)
#                println(state.iter, ". ", state.ρ, " ", state.Δ, " ", norm(state.step), " ", (model(state.step, state.g, H)), " ", norm(y), " ", sy, " ", norm(state.g))
#            end
            state.H = H!(state.H, y, state.step)
        end

        if (acceptCandidate!(state, b))
            state.x = copy(state.xcand)
            if (ApproxH == false)
                g!(state.g, state.x)
                H!(state.H, state.x)
            else
                state.g = copy(gcand)
            end
            fx = fcand
        end

        if (state.keepTrace)
            state.trace= [state.trace ; state.x']
        end
        
        updateRadius!(state, b)
        state.iter += 1
    end
    
    return state
end
Out[86]:
btr (generic function with 5 methods)
In [87]:
function CauchyStep(g::Vector, H::Matrix, Δ::Float64)
    q = dot(g,H*g)
    normg = norm(g)
    if (q <= 0)
        τ = 1.0
    else
        τ = min((normg*normg*normg)/(q*Δ),1.0)
    end
    return -τ*g*Δ/normg
end

function CauchyStep(state:: BTRState)
    return CauchyStep(state.g, state.H, state.Δ)
end
Out[87]:
CauchyStep (generic function with 2 methods)
In [88]:
function TruncatedCG(g::Vector, H::Matrix, Δ::Float64)
    n = length(g)
    s = zeros(n)

    normg0 = norm(g)
    v = g
    d = -v
    gv = dot(g,v)
    norm2d = gv
    norm2s = 0
    sMd = 0
    k = 0
    Δ2 = Δ*Δ

    while (stopCG(norm(g), normg0, k, n) == false)
        Hd = H*d
        κ = dot(d,Hd)
 
        # Is the curvature negative in the direction d?
        if (κ <= 0)
            σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
            s += σ*d
            break
        end

        α = gv/κ

        # Check is the model minimizer is outside the trust region
        norm2s += α*(2*sMd+α*norm2d)
        if (norm2s >= Δ2)
            σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
            s += σ*d
            break
        end

        # The model minimizer is inside the trust region
        s += α*d
        g += α*Hd
        v = g
        newgv = dot(g,v)
        β = newgv/gv
        gv = newgv
        d = -v+β*d
        sMd = β*(sMd+α*norm2d)
        norm2d = gv+β*β*norm2d
        
        k += 1;
    end
    
    return s
end

function TruncatedCG(state:: BTRState)
    return TruncatedCG(state.g, state.H, state.Δ)
end
Out[88]:
TruncatedCG (generic function with 2 methods)
In [89]:
state = btr(f, g!, H!, CauchyStep, [1.0,1.0])
Out[89]:
BTRState(8, [2.306630130114104, -0.33230864878785144], [2.306630130114104, -0.33230864878785144], [1.063943741996809e-7, -1.2961098860841958e-10], [44.15289727064744 0.754635329776236; 0.754635329776236 34.761904044961156], [1.6874772340955503e-10, 1.3888848506182687e-7], 0.4460773308500046, 0.9748060332431441, 1.0e-6, #undef, false)
In [90]:
state = btr(f, g!, H!, TruncatedCG, [1.0,1.0])
Out[90]:
BTRState(7, [2.3066301277034675, -0.3323086487317943], [2.3066301277034675, -0.3323086487317943], [8.526512829121202e-14, -1.0746958878371515e-13], [44.15289713679731 0.7546353369937435; 0.7546353369937435 34.76190399984316], [-3.547643308201812e-8, 6.156113901275451e-8], 0.8064365595032105, 1.0039546332276776, 1.0e-6, #undef, false)

We can compute the generalized Cauchy point using Algorithm 12.2.2 in Trust-Region methods.

In progress...

In [57]:
mutable struct BoundsConstrainedStep <: Step
    l:: Vector     # lower bounds
    u:: Vector     # Upper bounds
    β1:: Float64
    β2:: Float64
    μ:: Float64
    ν:: Float64
    
    function BoundsConstrainedStep(l::Vector, u::Vector,
                β1::Float64 = 0.1, β2::Float64 = 0.75, ν::Float64 = 0.2, μ::Float64 = 0.1)
        bcs = new()
        bcs.l = copy(l)
        bcs.u = copy(u)
        bcs.β1 = β1
        bcs.μ = μ
        return bcs
    end
end
In [58]:
GCP = BoundsConstrainedStep(l,u)

typeof(GCP) == BoundsConstrainedStep
Out[58]:
true
In [59]:
function projectTangentConeBounds!(x::Vector, d::Vector, l::Vector, u::Vector, tol::Float64 = 1e-12)
    for i = 1:length(x)
        if (x[i]-l[i] < tol)
            d[i] = max(0, d[i])
        end
        if (u[i]-x[i] < tol)
            d[i] = min(0, d[i])
        end
    end
    
    return d
end
Out[59]:
projectTangentConeBounds! (generic function with 2 methods)
In [60]:
function model(s::Vector, g::Vector, H::Matrix)
    return dot(s, g)+0.5*dot(s, H*s)
end
Out[60]:
model (generic function with 1 method)
In [101]:
# Algorithm 12.2.2 in Conn, Gould, and Toint, "Trust-region methods", SIAM, 2000.

# There is still some bug somewhere
function (GCP::BoundsConstrainedStep)(x:: Vector, g::Vector, H::Matrix, Δ::Float64)
    tmin = 0
    tmax = Inf
    t = Δ/norm(g)
    Δ2 = Δ*Δ
    j = 0
    s = zeros(length(x))   # prevents s to be a local variable of the while loop.
    
    cont = true
    
    # The condition on j is set as a crude safeguard.
    while (cont && (j < 50))
        p = boundsProjection(x-t*g, GCP.l, GCP.u)  # compute the point on the projected gradient path
        s = p-x
        mk = model(s, g, H)
        gs = dot(g,s)

   #     println(j, " ", tmin, ", ", tmax, " s = ", s, " ", x, x+s, -t*g)

        if ((dot(s,s) > Δ2) || (mk > GCP.β1*gs))
            tmax = t
        elseif ((dot(s,s) < GCP.μ*GCP.μ*Δ2) && (mk < GCP.β2*gs) &&
            (norm(projectTangentConeBounds!(p,-g,GCP.l,GCP.u)) > GCP.ν*abs(gs)/Δ))
            tmin = t
        else
            cont = false
            return s
        end
        
        if tmax < Inf
            t = (tmin+tmax)/2
        else
            t *= 2;
        end
        j += 1
    end

    return s
end
In [92]:
Δ = state.Δ = 1.1
x = state.x = [1, 0.9]
l = [0.0, 0.0]
u = [10.0, 10.0]
g(x), H(x), Δ
Out[92]:
([-15.762204114225607, 20.48643987308266], [-10.537979187193045 -0.33353700157628285; -0.33353700157628285 16.866692361490067], 1.1)
In [93]:
s = GCP(x, g(x), H(x), Δ)
Out[93]:
2-element Array{Float64,1}:
  0.670772847559197 
 -0.8718163722810705
In [94]:
state.g = g(x)
state.H = H(x)
Out[94]:
2×2 Array{Float64,2}:
 -10.538     -0.333537
  -0.333537  16.8667  
In [95]:
function boundsstep(state:: BTRState)
    s = GCP(state.x, state.g, state.H, state.Δ)
    println("Step = ", s, state.x, state.x+s)
    return s
end
Out[95]:
boundsstep (generic function with 1 method)
In [96]:
s = boundsstep(state)
Step = [0.670772847559197, -0.8718163722810705][1.0, 0.9][1.670772847559197, 0.02818362771892957]
Out[96]:
2-element Array{Float64,1}:
  0.670772847559197 
 -0.8718163722810705
In [99]:
state = btr(f, g!, H!, boundsstep, [1.0,1.0], state, btrStopStep)
Step = [1.5838790776527443, -1.0][1.0, 1.0][2.5838790776527443, 0.0]
Step = [-0.2823029589804307, 0.0][2.5838790776527443, 0.0][2.3015761186723136, 0.0]
Step = [-0.030433030322345456, 0.0][2.3015761186723136, 0.0][2.271143088349968, 0.0]
Step = [0.024222701948566794, 0.0][2.271143088349968, 0.0][2.295365790298535, 0.0]
Step = [-0.019451362156669205, 0.0][2.295365790298535, 0.0][2.2759144281418657, 0.0]
Step = [0.015511229554155292, 0.0][2.2759144281418657, 0.0][2.291425657696021, 0.0]
Step = [-0.012439332659284652, 0.0][2.291425657696021, 0.0][2.2789863250367364, 0.0]
Step = [0.009931251677202546, 0.0][2.2789863250367364, 0.0][2.288917576713939, 0.0]
Step = [-0.007957530259744772, 0.0][2.288917576713939, 0.0][2.280960046454194, 0.0]
Step = [0.006357798032613271, 0.0][2.280960046454194, 0.0][2.2873178444868074, 0.0]
Step = [-0.005091396295545891, 0.0][2.2873178444868074, 0.0][2.2822264481912615, 0.0]
Step = [0.004069761509173819, 0.0][2.2822264481912615, 0.0][2.2862962097004353, 0.0]
Step = [-0.0032579287516747435, 0.0][2.2862962097004353, 0.0][2.2830382809487606, 0.0]
Step = [0.0026049721994607644, 0.0][2.2830382809487606, 0.0][2.2856432531482214, 0.0]
Step = [-0.0020848477387129805, 0.0][2.2856432531482214, 0.0][2.2835584054095084, 0.0]
Step = [0.0016673176558024139, 0.0][2.2835584054095084, 0.0][2.285225723065311, 0.0]
Step = [-0.001334210945256764, 0.0][2.285225723065311, 0.0][2.283891512120054, 0.0]
Step = [0.0010671394080961072, 0.0][2.283891512120054, 0.0][2.28495865152815, 0.0]
Step = [-0.0008538578055690138, 0.0][2.28495865152815, 0.0][2.284104793722581, 0.0]
Step = [0.0006829923681261896, 0.0][2.284104793722581, 0.0][2.2847877860907073, 0.0]
Step = [-0.0005464538433286315, 0.0][2.2847877860907073, 0.0][2.2842413322473787, 0.0]
Step = [0.00043712463984846295, 0.0][2.2842413322473787, 0.0][2.284678456887227, 0.0]
Step = [-0.00034972427555457486, 0.0][2.284678456887227, 0.0][2.2843287326116726, 0.0]
Step = [0.00027976368197801804, 0.0][2.2843287326116726, 0.0][2.2846084962936506, 0.0]
Step = [-0.00022382100912876624, 0.0][2.2846084962936506, 0.0][2.284384675284522, 0.0]
Step = [0.00017905036198850866, 0.0][2.284384675284522, 0.0][2.2845637256465103, 0.0]
Step = [-0.00014324441221313933, 0.0][2.2845637256465103, 0.0][2.284420481234297, 0.0]
Step = [0.00011459289007520468, 0.0][2.284420481234297, 0.0][2.2845350741243724, 0.0]
Step = [-9.167600084269978e-5, 0.0][2.2845350741243724, 0.0][2.2844433981235297, 0.0]
Step = [7.333971953471519e-5, 0.0][2.2844433981235297, 0.0][2.2845167378430644, 0.0]
Step = [-5.867246739388321e-5, 0.0][2.2845167378430644, 0.0][2.2844580653756705, 0.0]
Step = [4.693753110096566e-5, 0.0][2.2844580653756705, 0.0][2.2845050029067715, 0.0]
Step = [-3.755030823837657e-5, 0.0][2.2845050029067715, 0.0][2.284467452598533, 0.0]
Step = [3.0040065219338885e-5, 0.0][2.284467452598533, 0.0][2.2844974926637525, 0.0]
Step = [-2.403216824120591e-5, 0.0][2.2844974926637525, 0.0][2.2844734604955113, 0.0]
Step = [1.922566030465589e-5, 0.0][2.2844734604955113, 0.0][2.284492686155816, 0.0]
Step = [-1.538057578498453e-5, 0.0][2.284492686155816, 0.0][2.284477305580031, 0.0]
Step = [1.230443019961669e-5, 0.0][2.284477305580031, 0.0][2.2844896100102305, 0.0]
Step = [-9.84356363353811e-6, 0.0][2.2844896100102305, 0.0][2.284479766446597, 0.0]
Step = [7.874838443999721e-6, 0.0][2.284479766446597, 0.0][2.284487641285041, 0.0]
Step = [-6.299878731574893e-6, 0.0][2.284487641285041, 0.0][2.2844813414063094, 0.0]
Step = [5.039897880809718e-6, 0.0][2.2844813414063094, 0.0][2.2844863813041902, 0.0]
Step = [-4.031921572167363e-6, 0.0][2.2844863813041902, 0.0][2.284482349382618, 0.0]
Step = [3.2255351674059796e-6, 0.0][2.284482349382618, 0.0][2.2844855749177855, 0.0]
Step = [-2.5804294723208443e-6, 0.0][2.2844855749177855, 0.0][2.284482994488313, 0.0]
Step = [2.0643427212085896e-6, 0.0][2.284482994488313, 0.0][2.2844850588310344, 0.0]
Step = [-1.6514747254170459e-6, 0.0][2.2844850588310344, 0.0][2.284483407356309, 0.0]
Step = [1.3211794298584323e-6, 0.0][2.284483407356309, 0.0][2.284484728535739, 0.0]
Step = [-1.056943768062979e-6, 0.0][2.284484728535739, 0.0][2.2844836715919707, 0.0]
Step = [8.455548705654792e-7, 0.0][2.2844836715919707, 0.0][2.2844845171468413, 0.0]
Out[99]:
BTRState(50, [2.2844845171468413, 0.0], [2.2844845171468413, 0.0], [1.6019108386444714e-5, 9.137938068587365], [42.62643410900364 4.0; 4.0 20.0], [8.455548705654792e-7, 0.0], 1.3619714456525291, 0.9966994103976368, 1.0e-6, #undef, false)

The stopping criteria should be improved.

In [100]:
state.x
Out[100]:
2-element Array{Float64,1}:
 2.2844845171468413
 0.0               

In progress...

Another possibility is to project the gradient, and when a constraint becomes active, to continue the search along the set of active constraints. See https://wiki.mcs.anl.gov/leyffer/images/0/01/07-bndCons.pdf

For the projected gradient, we can first determine the active set at the current iterate, incorporating information about the current search direction.

In [69]:
function activeSetBounds(x:: Vector, direction:: Vector, b::Bounds, tol = 10*eps(Float64))
    n = length(x)
    # Initialize the number of free variables
    nfree = n  # nfree will contain the number of free variables
    # free designs which variables are free, and which variables are constrained to a bound.
    # free[i] = 1 if x[i] is free, and free[0] otherwise.
    free = ones(n)
    
    for i=1:n
        if (((abs(b.l[i]-x[i]) <= tol) && direction[i] < 0) || ((abs(x[i]-b.u[i]) <= tol) && direction[i] > 0))
            free[i] = 0
            nfree -= 1
        end
    end
    
    return free, nfree
end
Out[69]:
activeSetBounds (generic function with 2 methods)
In [70]:
l = [0; 0]
u = [1; 1]
b = Bounds(l, u)
d = [-1; -1]
x = [0; 1]
free, nfree = activeSetBounds(x, d, b)
Out[70]:
([0.0, 1.0], 1)

Knowing the active set, we can compute the restricted direction.

In [71]:
function reducedDirectionBounds(d:: Vector, free:: Vector)
    return d.*free
end
Out[71]:
reducedDirectionBounds (generic function with 1 method)
In [72]:
rd = reducedDirectionBounds(d, free)
Out[72]:
2-element Array{Float64,1}:
 -0.0
 -1.0

We can use the reduced gradient to compute the Cauchy point. Recall the computation of the Cauchy step in the unconstrained case.

In [73]:
function CauchyStep(g::Vector,H::Matrix,Δ::Float64)
    q = dot(g,H*g)
    normg = norm(g)

    if (q <= 0)
        τ = 1.0
    else
        τ = min((normg*normg*normg)/(q*Δ),1.0)
    end

    return -τ*g*Δ/normg
end
Out[73]:
CauchyStep (generic function with 2 methods)

We will now examine the curvature with the reduced gradient instead of the gradient.

If the curvature is negative, we have to search along the projected gradient a step that does not lead outside the trust region.

If the curvature is positive, we will check if the model minimization is inside the intersection of the feasible set and the trust-region. If it is not, we again have to search along the projected gradient. In the context of conjugated gradient, if the trust-region boundary has not been reached yet, we update the set of free variables and computed the next search direction.

TO DO!

In [ ]: