Projection methods¶

using LinearAlgebra

Projected gradient¶

Basic linesearch algorithm¶

The simplest approach consists to exploit the fact that $\forall\, t > 0$, $$ d = P_{\mathcal{X}}(x - t\nabla f(x)) - x $$ is a descent direction. If we fix the value of $t$, for instance $t = 1$, we can directly adapt our linesearch method, using the Armijo backtracking method.

function linesearch(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, steplength::Function,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    dfx = zeros(n)

    # Compute the search direction
    d, dfx = direction(f,g,h,x)

    while (stop(d, k, nmax, δ2) == false)
        # Compute the step length along d
        α = steplength(f,dfx,x,d)
        # Update the iterate
        x += α*d
        # Compute the search direction
        d, dfx = direction(f,g,h,x)
        k += 1
    end

    return x, dfx, k
end

linesearch (generic function with 4 methods)

function ArmijoStep(f::Function, dfx::Vector, x::Vector, d:: Vector,
    αmax:: Float64 = 1.0, β:: Float64 =0.1, κ:: Float64 =0.2)
    
    α = αmax
    
    fx = f(x)
    fxcand = f(x+α*d)

    while ((fxcand >= fx+α*β*dot(dfx,d)) && (α > 1e-16))
        α *= κ
        fxcand = f(x+α*d)        
    end
    
    return α
end

ArmijoStep (generic function with 4 methods)

We will consider bounds constraints. The projection is then very easy to obtain.

function boundsProjection(x::Vector, lower::Vector, upper::Vector)
    return max.(min.(upper,x),lower)
end

boundsProjection (generic function with 1 method)

We test the last function.

l = [ 0 for i = 1:10 ]
u = [ 10 for i = 1:10 ]
x = [ -5+2.0*i for i = 1:10 ]
z = boundsProjection(x,l,u)

10-element Array{Float64,1}:
  0.0
  0.0
  1.0
  3.0
  5.0
  7.0
  9.0
 10.0
 10.0
 10.0

Example¶

We will consider the example function given in the introduction chapter of "Trust-Region Methods" (Conn, Gould, and Toint, 2000): $$ f(x,y) = -10x^2+10y^2+4\sin(xy)-2x+x^4 $$

using ForwardDiff

f(x::Vector) = -10*x[1]^2+10*x[2]^2+4*sin(x[1]*x[2])-2*x[1]+x[1]^4

f (generic function with 1 method)

using Plots
plotly()

┌ Info: For saving to png with the Plotly backend ORCA has to be installed.
└ @ Plots C:\Users\slash\.julia\packages\Plots\GzWxB\src\backends.jl:363

Plots.PlotlyBackend()

default(size=(600,600), fc=:heat)
x, y = -3.0:0.1:3.0, -3.0:0.1:3.0
z = Surface((x,y)->f([x,y]), x, y)
surface(x,y,z)

Plots.contour(x,y,z, linealpha = 0.1, levels=2000)

g = x -> ForwardDiff.gradient(f, x);
H = x -> ForwardDiff.hessian(f, x)

function g!(storage::Vector, x::Vector)
    storage[1:length(x)] = g(x)
end

function H!(storage::Matrix, x::Vector)
    n = length(x)
    storage[1:n,1:n] = H(x)
end

H! (generic function with 1 method)

function stopping_unconstrained(x::Vector, ∇x::Vector, k::Int, nmax::Int, tol::Float64)
    if (dot(∇x,∇x) < tol)
        return true;
    elseif (k >= nmax)
        return true;
    else
        return false;
    end
end

stopping_unconstrained (generic function with 1 method)

We can define some usual search directions.

function NewtonDirection(f::Function, g:: Function, h:: Function, x::Vector)
    n = length(x)
    ∇f = ones(n)
    H = zeros(n,n)+I
    g!(∇f,x)
    h(H,x)
    return -H\∇f, ∇f
end

NewtonDirection (generic function with 1 method)

function steepestDirection(f::Function, g:: Function, h:: Function, x::Vector)
    n = length(x)
    ∇f = zeros(n)
    g!(∇f,x)
    return -∇f, ∇f
end

steepestDirection (generic function with 1 method)

function stopping(d::Vector, k::Int, nmax::Int, tol::Float64)
    if (dot(d,d) < tol)
        return true;
    elseif (k >= nmax)
        return true;
    else
        return false;
    end
end

stopping (generic function with 1 method)

The unconstrained minimization gives

x, df, k = linesearch(f, g!, H!, [0.0,0.0], steepestDirection, ArmijoStep)

([2.3066301125619457, -0.3323086499407665], [-6.694542946661386e-7, -5.345261122613465e-8], 58)

f(x)

-31.180733385187978

Constraints¶

Let consider simple bounds: $$ 0 \leq x[i] \leq 10,\ \forall i $$

l = [0.0; 0.0]
u = [10.0; 10.0]

2-element Array{Float64,1}:
 10.0
 10.0

mutable struct Bounds
    l::Array{Float64,1}
    u::Array{Float64,1}
end

function (projectBounds::Bounds)(x::Vector)
    return max.(min.(projectBounds.u,x),projectBounds.l)
end

projectBounds = Bounds(l, u)

Bounds([0.0, 0.0], [10.0, 10.0])

projectBounds.l = [0.0; 0.0]
projectBounds.u = [Inf; Inf]

2-element Array{Float64,1}:
 Inf
 Inf

function projectedDirection(f::Function, g:: Function, h:: Function, x::Vector)
    n = length(x)
    ∇f = zeros(n)
    g!(∇f,x)

    d = x-∇f
    d = projectBounds(d)
    d = d-x

    return d, ∇f
end

projectedDirection (generic function with 1 method)

x, df, k = linesearch(f, g!, H!, [1.0,1.0], projectedDirection, ArmijoStep)

([2.284484124446568, 0.0], [-7.202996954447372e-7, 9.137936497786272], 43)

x, f(x)

([2.284484124446568, 0.0], -29.521065172290175)

Projected path¶

function (projectDirection::Bounds)(d::Vector, x::Vector)
    d += x
    d = max.(min.(projectDirection.u,d),projectDirection.l)
    d -= x

    return d
end

projectDirection = Bounds(l, u)

Bounds([0.0, 0.0], [10.0, 10.0])

function linesearchProjection(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, step::Function,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    ∇f = zeros(n)

    # Compute the search direction
    d, ∇f = direction(f,g,h,x)
    s = projectDirection(d, x)

    while (stop(s, k, nmax, δ2) == false)
        # Compute the step length along d
        s = step(f,∇f,x,d)
        # Update the iterate
        x += s
        # Compute the search direction
        d, ∇f = direction(f,g,h,x)
        s = projectDirection(d, x)
        k += 1
    end

    return x, ∇f, k
end

linesearchProjection (generic function with 4 methods)

function GoldsteinStep(f::Function, ∇f::Vector, x::Vector, d:: Vector,
    αmax:: Float64 = 1.0, β:: Float64 =0.1, κ:: Float64 =0.2)

    α = αmax
    
    fx = f(x)
    # Project d
    s = α*d
    s = projectDirection(s, x)
    fxcand = f(x+s)

    while ((fxcand >= fx+β*dot(∇f,s)) && (α > 1e-16))
        α *= κ
        # Project d        
        s = α*d
        s = projectDirection(s, x)
        fxcand = f(x+s)
    end
    
    return s
end

GoldsteinStep (generic function with 4 methods)

x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, GoldsteinStep)

([2.2844841247197536, 0.0], [-7.086547810786215e-7, 9.137936498879014], 42)

k, x, f(x)

(42, [2.2844841247197536, 0.0], -29.521065172290175)

The choice of $\alpha$max influences the algorithm behavior. We will define a structure allowing to easily modify the parameters of the linearsearch.

abstract type Step end

mutable struct LineSearchStep <: Step 
    αmax::Float64
    β::Float64
    κ::Float64
end

goldsteinstep = LineSearchStep(1.0, 0.1, 0.2)

LineSearchStep(1.0, 0.1, 0.2)

function (goldsteinstep::LineSearchStep)(f::Function, ∇f::Vector, x::Vector, d:: Vector)

    α = goldsteinstep.αmax
    
    fx = f(x)
    # Project d
    s = α*d
    s = projectDirection(s, x)
    fxcand = f(x+s)

    while ((fxcand >= fx+goldsteinstep.β*dot(∇f,s)) && (α > 1e-16))
        α *= goldsteinstep.κ
        # Project d        
        s = α*d
        s = projectDirection(s, x)
        fxcand = f(x+s)
    end
    
    return s
end

function linesearchProjection(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, step::Step,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    ∇f = zeros(n)

    # Compute the search direction
    d, ∇f = direction(f,g,h,x)
    s = projectDirection(d, x)

    while (stop(s, k, nmax, δ2) == false)
        # Compute the step length along d
        s = step(f,∇f,x,d)
        # Update the iterate
        x += s
        # Compute the search direction
        d, ∇f = direction(f,g,h,x)
        s = projectDirection(d, x)
        k += 1
    end

    return x, ∇f, k
end

linesearchProjection (generic function with 8 methods)

methods(linesearchProjection)

x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)

([2.2844841247197536, 0.0], [-7.086547810786215e-7, 9.137936498879014], 42)

goldsteinstep.αmax = 2.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)

([2.2844841221111016, 0.0], [-8.198522607472114e-7, 9.137936488444407], 13)

goldsteinstep.αmax = 1000.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)

([2.284484129661535, 0.0], [-4.980043826208203e-7, 9.13793651864614], 23)

goldsteinstep.αmax = 200000.0
x, dfx, k = linesearchProjection(f, g!, H!, [1.0,1.0], steepestDirection, goldsteinstep)

([2.2844841268133513, 0.0], [-6.19412226399163e-7, 9.137936507253405], 9)

typeof(goldsteinstep)

LineSearchStep

We should be able to automatically adjust αmax. The trust-region framework is particularly adapted for this.

Truncation¶

Projection is not truncation!

function (truncatedDirection::Bounds)(f::Function, g:: Function, h:: Function, x::Vector, αmax = 1.0)
    n = length(x)
    ∇f = zeros(n)
    g!(∇f,x)

    α = αmax
    for i = 1:n
        if ∇f[i] > 0
            α = min(α,(x[i]-truncatedDirection.l[i])/∇f[i])
        elseif ∇f[i] < 0
            α = min(α,(x[i]-truncatedDirection.u[i])/∇f[i])
        end
    end
    
    d = -∇f*α

    return d, ∇f
end

function (truncatedDirection::Bounds)(x::Vector, d::Vector, αmax = 1.0)
    n = length(x)

    α = αmax
    for i = 1:n
        if d[i] < 0
            α = min(α,(truncatedDirection.l[i]-x[i])/d[i])
        elseif d[i] > 0
            α = min(α,(truncatedDirection.u[i]-x[i])/d[i])
        end
    end
    
    d *= α

    return d
end

x = [1,1]
d, dfx = steepestDirection(f,g!,H!,x)

([15.83879077652744, -22.16120922347256], [-15.83879077652744, 22.16120922347256])

d, dfx = projectedDirection(f,g!,H!,x)

([15.83879077652744, -1.0], [-15.83879077652744, 22.16120922347256])

x+d

2-element Array{Float64,1}:
 16.83879077652744
  0.0

truncatedDirection = Bounds(l, u)
d, dfx = truncatedDirection(f,g!,H!,x)

([0.7147078761275993, -1.0], [-15.83879077652744, 22.16120922347256])

x+d

2-element Array{Float64,1}:
 1.7147078761275993
 0.0

function linesearchTruncation(f::Function, g::Function, h:: Function,
        x0::Vector, direction::Function, steplength::Function,
        δ::Float64 = 1e-6, stop::Function = stopping, nmax::Int64 = 1000)

    k = 0
    x = x0
    δ2 = δ*δ
    n = length(x)

    ∇f = zeros(n)

    # Compute the search direction
    d, ∇f = direction(f,g,h,x)
    s = truncatedDirection(x, d)
    println(s)

    while (stop(s, k, nmax, δ2) == false)
        # Compute the step length along d
        α = steplength(f,dfx,x,d)
        # Update the iterate
        x += α*s
        # Compute the search direction
        d, ∇f = direction(f,g,h,x)
        s = truncatedDirection(x, d)
        k += 1
    end

    return x, ∇f, k
end

linesearchTruncation (generic function with 4 methods)

x, dfx, k = linesearchTruncation(f, g!, H!, [1.0,1.0], steepestDirection, ArmijoStep)

[0.7147078761275993, -1.0]

([2.063773419593784, 7.048843440897225e-7], [-8.115694776214774, 8.255107776053283], 347)

f(x)

-28.57869819250214

The method stopped before we have converged to a solution, as we are limited to very small steps as soon as we are close to the boundary.

Projected gradient and trust-region¶

We first reproduce the basic trust-region algorithm

struct BasicTrustRegion{T <: Real}
    η1:: T
    η2:: T
    γ1:: T
    γ2:: T
end

function BTRDefaults()
    return BasicTrustRegion(0.01,0.9,0.5,0.5)
end

struct UnconstrainedStep <: Step
end

mutable struct BTRState

    iter::Int
    x::Vector
    xcand::Vector
    g::Vector
    H::Matrix
    step::Vector
    Δ::Float64
    ρ::Float64

    tol::Float64

    trace
    keepTrace::Bool
    
    function BTRState()
        state = new()
        state.tol = 1e-6
        state.keepTrace = false
        return state
    end
end


function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
    # If the iteration is successful, update the iterate
    if (state.ρ >= b.η1)
        return true
    else
        return false
    end
end

function updateRadius!(state::BTRState, b::BasicTrustRegion)
    if (state.ρ >= b.η2)
        stepnorm = norm(state.step)
        state.Δ = min(1e20,max(4*stepnorm,state.Δ))
    elseif (state.ρ >= b.η1)
        state.Δ *= b.γ2
    else
        state.Δ *= b.γ1
    end
end

function BFGSUpdate(B::Matrix, y::Vector, s::Vector)
    Bs = B*s
    return B - (Bs*Bs')/dot(s, Bs) + (y*y')/dot(s,y)
end

function stopCG(normg::Float64, normg0::Float64, k::Int, kmax::Int, χ::Float64 = 0.1, θ::Float64 = 0.5)
    if ((k == kmax) || (normg <= normg0*min(χ, normg0^θ)))
        return true
    else
        return false
    end
end

defaultState = BTRState()

BTRState(861389456, #undef, #undef, #undef, #undef, #undef, 4.25574116e-315, 1.625147046e-315, 1.0e-6, #undef, false)

function btrStopUnconstrained(state::BTRState)
    nmax = 1000

    if ((dot(state.g,state.g) <= state.tol*state.tol) || (state.iter >= nmax))
        return true
    end
    
    return false
end

btrStopUnconstrained (generic function with 1 method)

function btrStopStep(state::BTRState)
    if (dot(state.step, state.step) < state.tol*state.tol)
        return true
    end
    
    return false
end

btrStopStep (generic function with 1 method)

function btr(f::Function, g!::Function, H!::Function, Step::Function,
    x0::Vector, state:: BTRState = BTRState(), stop:: Function = btrStopUnconstrained,
    ApproxH::Bool = false, verbose::Bool = false)
    
    b = BTRDefaults()
    state.iter = 0
    state.x = x0
    n=length(x0)

    state.step = zeros(n)
    state.step[1] = +Inf

    state.g = zeros(n)
    # A better initialization procedure should be used with quasi-Newton approximations
    # We could rely on some preconditioner.
    state.H = zeros(n,n)+I
    
    fx = f(x0)
    g!(state.g, x0)
    state.Δ = 0.1*norm(state.g) # 1.0

    if (ApproxH)
        y = zeros(n)
        gcand = zeros(n)
        # H!(H, y, state.step)
    else
        H!(state.H, x0)
    end
    
    if (state.keepTrace)
        state.trace= x0'
    end
    
    function model(s::Vector, g::Vector, H::Matrix)
        return dot(s, g)+0.5*dot(s, H*s)
    end
    
    while (!stop(state))
        # Compute the step by approximately minimize the model
        state.step = Step(state)
        state.xcand = state.x+state.step
        
        # Compute the actual reduction over the predicted reduction
        fcand = f(state.xcand)
        state.ρ = (fcand-fx)/(model(state.step, state.g, state.H))

       if (ApproxH)
            g!(gcand, state.xcand)
            y = gcand-state.g
            sy = dot(state.step,y)
#            if (sy < 1e-6)
#                println(state.iter, ". ", state.ρ, " ", state.Δ, " ", norm(state.step), " ", (model(state.step, state.g, H)), " ", norm(y), " ", sy, " ", norm(state.g))
#            end
            state.H = H!(state.H, y, state.step)
        end

        if (acceptCandidate!(state, b))
            state.x = copy(state.xcand)
            if (ApproxH == false)
                g!(state.g, state.x)
                H!(state.H, state.x)
            else
                state.g = copy(gcand)
            end
            fx = fcand
        end

        if (state.keepTrace)
            state.trace= [state.trace ; state.x']
        end
        
        updateRadius!(state, b)
        state.iter += 1
    end
    
    return state
end

btr (generic function with 5 methods)

function CauchyStep(g::Vector, H::Matrix, Δ::Float64)
    q = dot(g,H*g)
    normg = norm(g)
    if (q <= 0)
        τ = 1.0
    else
        τ = min((normg*normg*normg)/(q*Δ),1.0)
    end
    return -τ*g*Δ/normg
end

function CauchyStep(state:: BTRState)
    return CauchyStep(state.g, state.H, state.Δ)
end

CauchyStep (generic function with 2 methods)

function TruncatedCG(g::Vector, H::Matrix, Δ::Float64)
    n = length(g)
    s = zeros(n)

    normg0 = norm(g)
    v = g
    d = -v
    gv = dot(g,v)
    norm2d = gv
    norm2s = 0
    sMd = 0
    k = 0
    Δ2 = Δ*Δ

    while (stopCG(norm(g), normg0, k, n) == false)
        Hd = H*d
        κ = dot(d,Hd)
 
        # Is the curvature negative in the direction d?
        if (κ <= 0)
            σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
            s += σ*d
            break
        end

        α = gv/κ

        # Check is the model minimizer is outside the trust region
        norm2s += α*(2*sMd+α*norm2d)
        if (norm2s >= Δ2)
            σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ2-dot(s,s))))/norm2d
            s += σ*d
            break
        end

        # The model minimizer is inside the trust region
        s += α*d
        g += α*Hd
        v = g
        newgv = dot(g,v)
        β = newgv/gv
        gv = newgv
        d = -v+β*d
        sMd = β*(sMd+α*norm2d)
        norm2d = gv+β*β*norm2d
        
        k += 1;
    end
    
    return s
end

function TruncatedCG(state:: BTRState)
    return TruncatedCG(state.g, state.H, state.Δ)
end

TruncatedCG (generic function with 2 methods)

state = btr(f, g!, H!, CauchyStep, [1.0,1.0])

BTRState(8, [2.306630130114104, -0.33230864878785144], [2.306630130114104, -0.33230864878785144], [1.063943741996809e-7, -1.2961098860841958e-10], [44.15289727064744 0.754635329776236; 0.754635329776236 34.761904044961156], [1.6874772340955503e-10, 1.3888848506182687e-7], 0.4460773308500046, 0.9748060332431441, 1.0e-6, #undef, false)

state = btr(f, g!, H!, TruncatedCG, [1.0,1.0])

BTRState(7, [2.3066301277034675, -0.3323086487317943], [2.3066301277034675, -0.3323086487317943], [8.526512829121202e-14, -1.0746958878371515e-13], [44.15289713679731 0.7546353369937435; 0.7546353369937435 34.76190399984316], [-3.547643308201812e-8, 6.156113901275451e-8], 0.8064365595032105, 1.0039546332276776, 1.0e-6, #undef, false)

We can compute the generalized Cauchy point using Algorithm 12.2.2 in Trust-Region methods.

In progress...

mutable struct BoundsConstrainedStep <: Step
    l:: Vector     # lower bounds
    u:: Vector     # Upper bounds
    β1:: Float64
    β2:: Float64
    μ:: Float64
    ν:: Float64
    
    function BoundsConstrainedStep(l::Vector, u::Vector,
                β1::Float64 = 0.1, β2::Float64 = 0.75, ν::Float64 = 0.2, μ::Float64 = 0.1)
        bcs = new()
        bcs.l = copy(l)
        bcs.u = copy(u)
        bcs.β1 = β1
        bcs.μ = μ
        return bcs
    end
end

GCP = BoundsConstrainedStep(l,u)

typeof(GCP) == BoundsConstrainedStep

true

function projectTangentConeBounds!(x::Vector, d::Vector, l::Vector, u::Vector, tol::Float64 = 1e-12)
    for i = 1:length(x)
        if (x[i]-l[i] < tol)
            d[i] = max(0, d[i])
        end
        if (u[i]-x[i] < tol)
            d[i] = min(0, d[i])
        end
    end
    
    return d
end

projectTangentConeBounds! (generic function with 2 methods)

function model(s::Vector, g::Vector, H::Matrix)
    return dot(s, g)+0.5*dot(s, H*s)
end

model (generic function with 1 method)

# Algorithm 12.2.2 in Conn, Gould, and Toint, "Trust-region methods", SIAM, 2000.

# There is still some bug somewhere
function (GCP::BoundsConstrainedStep)(x:: Vector, g::Vector, H::Matrix, Δ::Float64)
    tmin = 0
    tmax = Inf
    t = Δ/norm(g)
    Δ2 = Δ*Δ
    j = 0
    s = zeros(length(x))   # prevents s to be a local variable of the while loop.
    
    cont = true
    
    # The condition on j is set as a crude safeguard.
    while (cont && (j < 50))
        p = boundsProjection(x-t*g, GCP.l, GCP.u)  # compute the point on the projected gradient path
        s = p-x
        mk = model(s, g, H)
        gs = dot(g,s)

   #     println(j, " ", tmin, ", ", tmax, " s = ", s, " ", x, x+s, -t*g)

        if ((dot(s,s) > Δ2) || (mk > GCP.β1*gs))
            tmax = t
        elseif ((dot(s,s) < GCP.μ*GCP.μ*Δ2) && (mk < GCP.β2*gs) &&
            (norm(projectTangentConeBounds!(p,-g,GCP.l,GCP.u)) > GCP.ν*abs(gs)/Δ))
            tmin = t
        else
            cont = false
            return s
        end
        
        if tmax < Inf
            t = (tmin+tmax)/2
        else
            t *= 2;
        end
        j += 1
    end

    return s
end

Δ = state.Δ = 1.1
x = state.x = [1, 0.9]
l = [0.0, 0.0]
u = [10.0, 10.0]
g(x), H(x), Δ

([-15.762204114225607, 20.48643987308266], [-10.537979187193045 -0.33353700157628285; -0.33353700157628285 16.866692361490067], 1.1)

s = GCP(x, g(x), H(x), Δ)

2-element Array{Float64,1}:
  0.670772847559197 
 -0.8718163722810705

state.g = g(x)
state.H = H(x)

2×2 Array{Float64,2}:
 -10.538     -0.333537
  -0.333537  16.8667

function boundsstep(state:: BTRState)
    s = GCP(state.x, state.g, state.H, state.Δ)
    println("Step = ", s, state.x, state.x+s)
    return s
end

boundsstep (generic function with 1 method)

s = boundsstep(state)

Step = [0.670772847559197, -0.8718163722810705][1.0, 0.9][1.670772847559197, 0.02818362771892957]

2-element Array{Float64,1}:
  0.670772847559197 
 -0.8718163722810705

state = btr(f, g!, H!, boundsstep, [1.0,1.0], state, btrStopStep)

Step = [1.5838790776527443, -1.0][1.0, 1.0][2.5838790776527443, 0.0]
Step = [-0.2823029589804307, 0.0][2.5838790776527443, 0.0][2.3015761186723136, 0.0]
Step = [-0.030433030322345456, 0.0][2.3015761186723136, 0.0][2.271143088349968, 0.0]
Step = [0.024222701948566794, 0.0][2.271143088349968, 0.0][2.295365790298535, 0.0]
Step = [-0.019451362156669205, 0.0][2.295365790298535, 0.0][2.2759144281418657, 0.0]
Step = [0.015511229554155292, 0.0][2.2759144281418657, 0.0][2.291425657696021, 0.0]
Step = [-0.012439332659284652, 0.0][2.291425657696021, 0.0][2.2789863250367364, 0.0]
Step = [0.009931251677202546, 0.0][2.2789863250367364, 0.0][2.288917576713939, 0.0]
Step = [-0.007957530259744772, 0.0][2.288917576713939, 0.0][2.280960046454194, 0.0]
Step = [0.006357798032613271, 0.0][2.280960046454194, 0.0][2.2873178444868074, 0.0]
Step = [-0.005091396295545891, 0.0][2.2873178444868074, 0.0][2.2822264481912615, 0.0]
Step = [0.004069761509173819, 0.0][2.2822264481912615, 0.0][2.2862962097004353, 0.0]
Step = [-0.0032579287516747435, 0.0][2.2862962097004353, 0.0][2.2830382809487606, 0.0]
Step = [0.0026049721994607644, 0.0][2.2830382809487606, 0.0][2.2856432531482214, 0.0]
Step = [-0.0020848477387129805, 0.0][2.2856432531482214, 0.0][2.2835584054095084, 0.0]
Step = [0.0016673176558024139, 0.0][2.2835584054095084, 0.0][2.285225723065311, 0.0]
Step = [-0.001334210945256764, 0.0][2.285225723065311, 0.0][2.283891512120054, 0.0]
Step = [0.0010671394080961072, 0.0][2.283891512120054, 0.0][2.28495865152815, 0.0]
Step = [-0.0008538578055690138, 0.0][2.28495865152815, 0.0][2.284104793722581, 0.0]
Step = [0.0006829923681261896, 0.0][2.284104793722581, 0.0][2.2847877860907073, 0.0]
Step = [-0.0005464538433286315, 0.0][2.2847877860907073, 0.0][2.2842413322473787, 0.0]
Step = [0.00043712463984846295, 0.0][2.2842413322473787, 0.0][2.284678456887227, 0.0]
Step = [-0.00034972427555457486, 0.0][2.284678456887227, 0.0][2.2843287326116726, 0.0]
Step = [0.00027976368197801804, 0.0][2.2843287326116726, 0.0][2.2846084962936506, 0.0]
Step = [-0.00022382100912876624, 0.0][2.2846084962936506, 0.0][2.284384675284522, 0.0]
Step = [0.00017905036198850866, 0.0][2.284384675284522, 0.0][2.2845637256465103, 0.0]
Step = [-0.00014324441221313933, 0.0][2.2845637256465103, 0.0][2.284420481234297, 0.0]
Step = [0.00011459289007520468, 0.0][2.284420481234297, 0.0][2.2845350741243724, 0.0]
Step = [-9.167600084269978e-5, 0.0][2.2845350741243724, 0.0][2.2844433981235297, 0.0]
Step = [7.333971953471519e-5, 0.0][2.2844433981235297, 0.0][2.2845167378430644, 0.0]
Step = [-5.867246739388321e-5, 0.0][2.2845167378430644, 0.0][2.2844580653756705, 0.0]
Step = [4.693753110096566e-5, 0.0][2.2844580653756705, 0.0][2.2845050029067715, 0.0]
Step = [-3.755030823837657e-5, 0.0][2.2845050029067715, 0.0][2.284467452598533, 0.0]
Step = [3.0040065219338885e-5, 0.0][2.284467452598533, 0.0][2.2844974926637525, 0.0]
Step = [-2.403216824120591e-5, 0.0][2.2844974926637525, 0.0][2.2844734604955113, 0.0]
Step = [1.922566030465589e-5, 0.0][2.2844734604955113, 0.0][2.284492686155816, 0.0]
Step = [-1.538057578498453e-5, 0.0][2.284492686155816, 0.0][2.284477305580031, 0.0]
Step = [1.230443019961669e-5, 0.0][2.284477305580031, 0.0][2.2844896100102305, 0.0]
Step = [-9.84356363353811e-6, 0.0][2.2844896100102305, 0.0][2.284479766446597, 0.0]
Step = [7.874838443999721e-6, 0.0][2.284479766446597, 0.0][2.284487641285041, 0.0]
Step = [-6.299878731574893e-6, 0.0][2.284487641285041, 0.0][2.2844813414063094, 0.0]
Step = [5.039897880809718e-6, 0.0][2.2844813414063094, 0.0][2.2844863813041902, 0.0]
Step = [-4.031921572167363e-6, 0.0][2.2844863813041902, 0.0][2.284482349382618, 0.0]
Step = [3.2255351674059796e-6, 0.0][2.284482349382618, 0.0][2.2844855749177855, 0.0]
Step = [-2.5804294723208443e-6, 0.0][2.2844855749177855, 0.0][2.284482994488313, 0.0]
Step = [2.0643427212085896e-6, 0.0][2.284482994488313, 0.0][2.2844850588310344, 0.0]
Step = [-1.6514747254170459e-6, 0.0][2.2844850588310344, 0.0][2.284483407356309, 0.0]
Step = [1.3211794298584323e-6, 0.0][2.284483407356309, 0.0][2.284484728535739, 0.0]
Step = [-1.056943768062979e-6, 0.0][2.284484728535739, 0.0][2.2844836715919707, 0.0]
Step = [8.455548705654792e-7, 0.0][2.2844836715919707, 0.0][2.2844845171468413, 0.0]

BTRState(50, [2.2844845171468413, 0.0], [2.2844845171468413, 0.0], [1.6019108386444714e-5, 9.137938068587365], [42.62643410900364 4.0; 4.0 20.0], [8.455548705654792e-7, 0.0], 1.3619714456525291, 0.9966994103976368, 1.0e-6, #undef, false)

The stopping criteria should be improved.

state.x

2-element Array{Float64,1}:
 2.2844845171468413
 0.0

In progress...

Another possibility is to project the gradient, and when a constraint becomes active, to continue the search along the set of active constraints. See https://wiki.mcs.anl.gov/leyffer/images/0/01/07-bndCons.pdf

For the projected gradient, we can first determine the active set at the current iterate, incorporating information about the current search direction.

function activeSetBounds(x:: Vector, direction:: Vector, b::Bounds, tol = 10*eps(Float64))
    n = length(x)
    # Initialize the number of free variables
    nfree = n  # nfree will contain the number of free variables
    # free designs which variables are free, and which variables are constrained to a bound.
    # free[i] = 1 if x[i] is free, and free[0] otherwise.
    free = ones(n)
    
    for i=1:n
        if (((abs(b.l[i]-x[i]) <= tol) && direction[i] < 0) || ((abs(x[i]-b.u[i]) <= tol) && direction[i] > 0))
            free[i] = 0
            nfree -= 1
        end
    end
    
    return free, nfree
end

activeSetBounds (generic function with 2 methods)

l = [0; 0]
u = [1; 1]
b = Bounds(l, u)
d = [-1; -1]
x = [0; 1]
free, nfree = activeSetBounds(x, d, b)

([0.0, 1.0], 1)

Knowing the active set, we can compute the restricted direction.

function reducedDirectionBounds(d:: Vector, free:: Vector)
    return d.*free
end

reducedDirectionBounds (generic function with 1 method)

rd = reducedDirectionBounds(d, free)

2-element Array{Float64,1}:
 -0.0
 -1.0

We can use the reduced gradient to compute the Cauchy point. Recall the computation of the Cauchy step in the unconstrained case.

function CauchyStep(g::Vector,H::Matrix,Δ::Float64)
    q = dot(g,H*g)
    normg = norm(g)

    if (q <= 0)
        τ = 1.0
    else
        τ = min((normg*normg*normg)/(q*Δ),1.0)
    end

    return -τ*g*Δ/normg
end

CauchyStep (generic function with 2 methods)

We will now examine the curvature with the reduced gradient instead of the gradient.

If the curvature is negative, we have to search along the projected gradient a step that does not lead outside the trust region.

If the curvature is positive, we will check if the model minimization is inside the intersection of the feasible set and the trust-region. If it is not, we again have to search along the projected gradient. In the context of conjugated gradient, if the trust-region boundary has not been reached yet, we update the set of free variables and computed the next search direction.

Projected conjugate gradient: https://epubs.siam.org/doi/pdf/10.1137/S1052623498345075

TO DO!