Penalty and barrier methods

In [ ]:
using BenchmarkTools
using ForwardDiff
In [ ]:
include("btr.jl")

Penalty methods

First example

$$ \min_x f(x) = x_1^2+x_1\sin(x_2)+4x_2^2 $$
In [ ]:
f(x) = x[1]^2+x[1]*sin(x[2])+4x[2]^2
In [ ]:
using Plots
#plotly()
gr()

default(size=(600,600), fc=:heat)
x, y = -1.0:0.05:1.0, -1.0:0.05:1.0
z = Surface((x,y)->f([x,y]), x, y)
surface(x,y,z, linealpha = 1)
In [ ]:
Plots.contour(x,y,z, linealpha = 0.5, levels=1200)
In [ ]:
g = x -> ForwardDiff.gradient(f, x);
H = x -> ForwardDiff.hessian(f, x)

function g!(storage::Vector, x::Vector)
    storage[1:length(x)] = g(x)
end

function H!(storage::Matrix, x::Vector)
    n = length(x)
    storage[1:n,1:n] = H(x)
end
In [ ]:
state = btr(f, g!, H!, TruncatedCG, [1.0,1.0])
In [ ]:
state.x

It is easy to see that $(0,0)$ is indeed a first-order critical point.

In [ ]:
g(state.x)

Introduce now the constraint $$ (x-1)^2+(y-1)^2 = 1 $$

In [ ]:
c(x) = (x[1]-1.0)^2+(x[2]-1.0)^2-1.0
gc = x -> ForwardDiff.gradient(c, x);
Hc = x -> ForwardDiff.hessian(c, x)

function gc!(storage::Vector, x::Vector)
    storage[1:length(x)] = gc(x)
end

function Hc!(storage::Matrix, x::Vector)
    n = length(x)
    storage[1:n,1:n] = Hc(x)
end
In [ ]:
μ = 1.0
Φ(x) = f(x)+1/(2*μ)*(c(x)*c(x))
In [ ]:
 = x -> ForwardDiff.gradient(Φ, x);
 = x -> ForwardDiff.hessian(Φ, x)

function gΦ!(storage::Vector, x::Vector)
    storage[1:length(x)] = (x)
end

function HΦ!(storage::Matrix, x::Vector)
    n = length(x)
    storage[1:n,1:n] = (x)
end
In [ ]:
state = btr(Φ, gΦ!, HΦ!, TruncatedCG, [0.0,0.0])
In [ ]:
state.x
In [ ]:
f(state.x)
In [ ]:
c(state.x)
In [ ]:
Φ(state.x)
In [ ]:
μ = 0.1
state = btr(Φ, gΦ!, HΦ!, TruncatedCG, [0.0,0.0])
In [ ]:
state.x
In [ ]:
f(state.x)
In [ ]:
c(state.x)
In [ ]:
μ = 0.01
state = btr(Φ, gΦ!, HΦ!, TruncatedCG, [0.0,0.0])
In [ ]:
c(state.x)
In [ ]:
μ = 0.00001
state = btr(Φ, gΦ!, HΦ!, TruncatedCG, [0.0,0.0])
In [ ]:
c(state.x)
In [ ]:
μ = 0.00001
state = btr(Φ, gΦ!, HΦ!, TruncatedCG, [0.0,1.0])
In [ ]:
f(state.x)
In [ ]:
state.x
In [ ]:
μ = 1
α = 0.1
x0 = [0.0,1.0]
state = btr(Φ, gΦ!, HΦ!, TruncatedCG, x0)
println(state.x)
while (μ > 1e-6)
    x0 = state.x
    state = btr(Φ, gΦ!, HΦ!, TruncatedCG, x0)
    println(state.x)
    μ *= α
end
In [ ]:
state.x

Second example

\begin{align*} \min_x \ & -5x_1^2 + x_2^2 \\ \mbox{s.t. } & x_1 = 1 \end{align*}
$$ \Phi(x,\mu) = -5x_1^2 + x_2^2+\frac{1}{2\mu} (x_1-1)^2 $$
$$ \nabla_x \Phi(x,\mu) = \begin{pmatrix} -10x_1 +\frac{1}{\mu} (x_1-1) \\ 2x_2 \end{pmatrix} $$
$$ \nabla_x \Phi(x,\mu) = 0 $$

iff $$ \begin{cases} -10x_1 +\frac{1}{\mu} (x_1-1) = 0 \\ x_2 = 0 \end{cases} $$ $$ (-10\mu + 1) x_1 = 1 $$ This is equivalent to $$ x_1 = \frac{1}{1-10\mu} $$ if $10\mu \ne 1$. If $10\mu = 1$, then $$ -10x_1 + 10(x_1-1) = -10 \ne 0 $$

$$ \nabla_{xx}^2 \Phi(x,\mu) = \begin{pmatrix} -10+\frac{1}{\mu} & 0 \\ 0 & 2 \end{pmatrix} $$

If $\mu < 0.1$, then $-10+1/\mu > 0$. Then $\nabla^2 \Phi(x, \mu)$ is positive definite. Thus the zero of the gradient is a minimizer.

If $\mu > 0.1$, then $-10+1/\mu < 0$. Then $\nabla^2 \Phi(x, \mu)$ is indefinite. Thus the zero of the gradient is a saddle point.

If $\mu = 0.1$, then there is no zero of the gradient.

In [ ]:
μ = 1.0
H = [ -10+1/μ 0 ; 0 2 ]
eigen(H)
In [ ]:
μ = 1.0e-10
H = [ -10+1/μ 0 ; 0 2 ]
eigen(H)
In [ ]:
cond(H)

Barrier methods

\begin{align*} \min_x \ & -x+1 \\ \mbox{s.t. } & x \leq 1 \end{align*}
$$ L(x,\lambda) = -x+1+\lambda(x-1) $$

KKT \begin{align*} -1+\lambda & = 0 \\ x-1 & \leq 0 \\ \lambda (x-1) & = 0\\ \lambda & \geq 0 \end{align*} We obtain $(x^*, \lambda^*) = (1,1)$.

In [ ]:
fb(x) = -x[1]+1
gi(x) = x[1]-1
B(x) = fb(x[1]) - μ*log(1-x[1])
$$ 0 = \nabla B(x) = \nabla f(x) - \mu \frac{-\nabla g(x)}{-g(x)} = \nabla f(x) - \mu \frac{\nabla g(x)}{g(x)} $$
$$ -1-\mu/(x-1) = 0 $$$$ x-1 = -\mu $$$$ x = 1-\mu $$$$ \lambda_{\mu} = -\frac{\mu}{x(\mu)-1} = -\frac{\mu}{1-\mu-1} = 1 $$
In [ ]:
gB = x -> ForwardDiff.gradient(B, x);
HB = x -> ForwardDiff.hessian(B, x)

function gB!(storage::Vector, x::Vector)
    storage[1:length(x)] = gB(x)
end

function HB!(storage::Matrix, x::Vector)
    n = length(x)
    storage[1:n,1:n] = HB(x)
end
In [ ]:
μ = 1.0
gB([0.0])
In [ ]:
state = btr(B, gB!, HB!, TruncatedCG, [0.0])
In [ ]:
state.x
In [ ]:
μ = 0.1
btr(B, gB!, HB!, TruncatedCG, [0.0])

We have to restrict the step! A simple way is to reduce the trust-region radius if a domain problem is found. We can do it using the exception process in Julia.

In [ ]:
f(x) = try
   log(x)
catch
   +Inf
end
In [ ]:
f(10)
In [ ]:
f(-10)
In [ ]:
function Bexception(x)
    try
        val = fb(x[1]) - μ*log(1-x[1])
        return val
    catch
        return +Inf
    end
end
In [ ]:
state = btr(Bexception, gB!, HB!, TruncatedCG, [0.0])
In [ ]:
state.x
In [ ]:
μ = 0.1
x = [0.0]
while (μ > 1e-10)
    state = btr(Bexception, gB!, HB!, TruncatedCG, x)
    x = state.x
    λ = -μ/gi(x)
    println("x = ", x, ", λ = ", λ)
    μ = 0.1*μ
end
In [ ]:
x
In [ ]: