_trustregion_exact.py
15.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
"""Nearly exact trust-region optimization subproblem."""
import numpy as np
from scipy.linalg import (norm, get_lapack_funcs, solve_triangular,
cho_solve)
from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
__all__ = ['_minimize_trustregion_exact',
'estimate_smallest_singular_value',
'singular_leading_submatrix',
'IterativeSubproblem']
def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None,
**trust_region_options):
"""
Minimization of scalar function of one or more variables using
a nearly exact trust-region algorithm.
Options
-------
initial_tr_radius : float
Initial trust-region radius.
max_tr_radius : float
Maximum value of the trust-region radius. No steps that are longer
than this value will be proposed.
eta : float
Trust region related acceptance stringency for proposed steps.
gtol : float
Gradient norm must be less than ``gtol`` before successful
termination.
"""
if jac is None:
raise ValueError('Jacobian is required for trust region '
'exact minimization.')
if hess is None:
raise ValueError('Hessian matrix is required for trust region '
'exact minimization.')
return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
subproblem=IterativeSubproblem,
**trust_region_options)
def estimate_smallest_singular_value(U):
"""Given upper triangular matrix ``U`` estimate the smallest singular
value and the correspondent right singular vector in O(n**2) operations.
Parameters
----------
U : ndarray
Square upper triangular matrix.
Returns
-------
s_min : float
Estimated smallest singular value of the provided matrix.
z_min : ndarray
Estimatied right singular vector.
Notes
-----
The procedure is based on [1]_ and is done in two steps. First, it finds
a vector ``e`` with components selected from {+1, -1} such that the
solution ``w`` from the system ``U.T w = e`` is as large as possible.
Next it estimate ``U v = w``. The smallest singular value is close
to ``norm(w)/norm(v)`` and the right singular vector is close
to ``v/norm(v)``.
The estimation will be better more ill-conditioned is the matrix.
References
----------
.. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H.
An estimate for the condition number of a matrix. 1979.
SIAM Journal on Numerical Analysis, 16(2), 368-375.
"""
U = np.atleast_2d(U)
m, n = U.shape
if m != n:
raise ValueError("A square triangular matrix should be provided.")
# A vector `e` with components selected from {+1, -1}
# is selected so that the solution `w` to the system
# `U.T w = e` is as large as possible. Implementation
# based on algorithm 3.5.1, p. 142, from reference [2]
# adapted for lower triangular matrix.
p = np.zeros(n)
w = np.empty(n)
# Implemented according to: Golub, G. H., Van Loan, C. F. (2013).
# "Matrix computations". Forth Edition. JHU press. pp. 140-142.
for k in range(n):
wp = (1-p[k]) / U.T[k, k]
wm = (-1-p[k]) / U.T[k, k]
pp = p[k+1:] + U.T[k+1:, k]*wp
pm = p[k+1:] + U.T[k+1:, k]*wm
if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1):
w[k] = wp
p[k+1:] = pp
else:
w[k] = wm
p[k+1:] = pm
# The system `U v = w` is solved using backward substitution.
v = solve_triangular(U, w)
v_norm = norm(v)
w_norm = norm(w)
# Smallest singular value
s_min = w_norm / v_norm
# Associated vector
z_min = v / v_norm
return s_min, z_min
def gershgorin_bounds(H):
"""
Given a square matrix ``H`` compute upper
and lower bounds for its eigenvalues (Gregoshgorin Bounds).
Defined ref. [1].
References
----------
.. [1] Conn, A. R., Gould, N. I., & Toint, P. L.
Trust region methods. 2000. Siam. pp. 19.
"""
H_diag = np.diag(H)
H_diag_abs = np.abs(H_diag)
H_row_sums = np.sum(np.abs(H), axis=1)
lb = np.min(H_diag + H_diag_abs - H_row_sums)
ub = np.max(H_diag - H_diag_abs + H_row_sums)
return lb, ub
def singular_leading_submatrix(A, U, k):
"""
Compute term that makes the leading ``k`` by ``k``
submatrix from ``A`` singular.
Parameters
----------
A : ndarray
Symmetric matrix that is not positive definite.
U : ndarray
Upper triangular matrix resulting of an incomplete
Cholesky decomposition of matrix ``A``.
k : int
Positive integer such that the leading k by k submatrix from
`A` is the first non-positive definite leading submatrix.
Returns
-------
delta : float
Amount that should be added to the element (k, k) of the
leading k by k submatrix of ``A`` to make it singular.
v : ndarray
A vector such that ``v.T B v = 0``. Where B is the matrix A after
``delta`` is added to its element (k, k).
"""
# Compute delta
delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1]
n = len(A)
# Inicialize v
v = np.zeros(n)
v[k-1] = 1
# Compute the remaining values of v by solving a triangular system.
if k != 1:
v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1])
return delta, v
class IterativeSubproblem(BaseQuadraticSubproblem):
"""Quadratic subproblem solved by nearly exact iterative method.
Notes
-----
This subproblem solver was based on [1]_, [2]_ and [3]_,
which implement similar algorithms. The algorithm is basically
that of [1]_ but ideas from [2]_ and [3]_ were also used.
References
----------
.. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods",
Siam, pp. 169-200, 2000.
.. [2] J. Nocedal and S. Wright, "Numerical optimization",
Springer Science & Business Media. pp. 83-91, 2006.
.. [3] J.J. More and D.C. Sorensen, "Computing a trust region step",
SIAM Journal on Scientific and Statistical Computing, vol. 4(3),
pp. 553-572, 1983.
"""
# UPDATE_COEFF appears in reference [1]_
# in formula 7.3.14 (p. 190) named as "theta".
# As recommended there it value is fixed in 0.01.
UPDATE_COEFF = 0.01
EPS = np.finfo(float).eps
def __init__(self, x, fun, jac, hess, hessp=None,
k_easy=0.1, k_hard=0.2):
super(IterativeSubproblem, self).__init__(x, fun, jac, hess)
# When the trust-region shrinks in two consecutive
# calculations (``tr_radius < previous_tr_radius``)
# the lower bound ``lambda_lb`` may be reused,
# facilitating the convergence. To indicate no
# previous value is known at first ``previous_tr_radius``
# is set to -1 and ``lambda_lb`` to None.
self.previous_tr_radius = -1
self.lambda_lb = None
self.niter = 0
# ``k_easy`` and ``k_hard`` are parameters used
# to determine the stop criteria to the iterative
# subproblem solver. Take a look at pp. 194-197
# from reference _[1] for a more detailed description.
self.k_easy = k_easy
self.k_hard = k_hard
# Get Lapack function for cholesky decomposition.
# The implemented SciPy wrapper does not return
# the incomplete factorization needed by the method.
self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,))
# Get info about Hessian
self.dimension = len(self.hess)
self.hess_gershgorin_lb,\
self.hess_gershgorin_ub = gershgorin_bounds(self.hess)
self.hess_inf = norm(self.hess, np.Inf)
self.hess_fro = norm(self.hess, 'fro')
# A constant such that for vectors smaler than that
# backward substituition is not reliable. It was stabilished
# based on Golub, G. H., Van Loan, C. F. (2013).
# "Matrix computations". Forth Edition. JHU press., p.165.
self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
def _initial_values(self, tr_radius):
"""Given a trust radius, return a good initial guess for
the damping factor, the lower bound and the upper bound.
The values were chosen accordingly to the guidelines on
section 7.3.8 (p. 192) from [1]_.
"""
# Upper bound for the damping factor
lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb,
self.hess_fro,
self.hess_inf))
# Lower bound for the damping factor
lambda_lb = max(0, -min(self.hess.diagonal()),
self.jac_mag/tr_radius - min(self.hess_gershgorin_ub,
self.hess_fro,
self.hess_inf))
# Improve bounds with previous info
if tr_radius < self.previous_tr_radius:
lambda_lb = max(self.lambda_lb, lambda_lb)
# Initial guess for the damping factor
if lambda_lb == 0:
lambda_initial = 0
else:
lambda_initial = max(np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
return lambda_initial, lambda_lb, lambda_ub
def solve(self, tr_radius):
"""Solve quadratic subproblem"""
lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius)
n = self.dimension
hits_boundary = True
already_factorized = False
self.niter = 0
while True:
# Compute Cholesky factorization
if already_factorized:
already_factorized = False
else:
H = self.hess+lambda_current*np.eye(n)
U, info = self.cholesky(H, lower=False,
overwrite_a=False,
clean=True)
self.niter += 1
# Check if factorization succeeded
if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO:
# Successful factorization
# Solve `U.T U p = s`
p = cho_solve((U, False), -self.jac)
p_norm = norm(p)
# Check for interior convergence
if p_norm <= tr_radius and lambda_current == 0:
hits_boundary = False
break
# Solve `U.T w = p`
w = solve_triangular(U, p, trans='T')
w_norm = norm(w)
# Compute Newton step accordingly to
# formula (4.44) p.87 from ref [2]_.
delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius
lambda_new = lambda_current + delta_lambda
if p_norm < tr_radius: # Inside boundary
s_min, z_min = estimate_smallest_singular_value(U)
ta, tb = self.get_boundaries_intersections(p, z_min,
tr_radius)
# Choose `step_len` with the smallest magnitude.
# The reason for this choice is explained at
# ref [3]_, p. 6 (Immediately before the formula
# for `tau`).
step_len = min([ta, tb], key=abs)
# Compute the quadratic term (p.T*H*p)
quadratic_term = np.dot(p, np.dot(H, p))
# Check stop criteria
relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2)
if relative_error <= self.k_hard:
p += step_len * z_min
break
# Update uncertanty bounds
lambda_ub = lambda_current
lambda_lb = max(lambda_lb, lambda_current - s_min**2)
# Compute Cholesky factorization
H = self.hess + lambda_new*np.eye(n)
c, info = self.cholesky(H, lower=False,
overwrite_a=False,
clean=True)
# Check if the factorization have succeeded
#
if info == 0: # Successful factorization
# Update damping factor
lambda_current = lambda_new
already_factorized = True
else: # Unsuccessful factorization
# Update uncertanty bounds
lambda_lb = max(lambda_lb, lambda_new)
# Update damping factor
lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
else: # Outside boundary
# Check stop criteria
relative_error = abs(p_norm - tr_radius) / tr_radius
if relative_error <= self.k_easy:
break
# Update uncertanty bounds
lambda_lb = lambda_current
# Update damping factor
lambda_current = lambda_new
elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO:
# jac_mag very close to zero
# Check for interior convergence
if lambda_current == 0:
p = np.zeros(n)
hits_boundary = False
break
s_min, z_min = estimate_smallest_singular_value(U)
step_len = tr_radius
# Check stop criteria
if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2:
p = step_len * z_min
break
# Update uncertanty bounds
lambda_ub = lambda_current
lambda_lb = max(lambda_lb, lambda_current - s_min**2)
# Update damping factor
lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
else: # Unsuccessful factorization
# Compute auxiliary terms
delta, v = singular_leading_submatrix(H, U, info)
v_norm = norm(v)
# Update uncertanty interval
lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2)
# Update damping factor
lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
self.lambda_lb = lambda_lb
self.lambda_current = lambda_current
self.previous_tr_radius = tr_radius
return p, hits_boundary