-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhomework_4_debug.py
More file actions
146 lines (111 loc) · 3.93 KB
/
homework_4_debug.py
File metadata and controls
146 lines (111 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import matplotlib.pyplot as plt
def rosenbrock_fun(xk):
""" This function returns the output of the Rosenbrock function."""
x1, x2 = xk
return 100 * ((x2 - x1 ** 2) ** 2) + (1 - x1) ** 2
def rosenbrock_gradient(xk):
""" return [df/dx1 df/dx2]"""
x1, x2 = xk
dfx1 = -400 * x2 * x1 + 400 * (x1 ** 3) - 2 + 2 * x1
dfx2 = 200 * x2 - 200 * (x1 ** 2)
return np.array([dfx1, dfx2])
def rosenbrock_hessian(xk):
""" return [d2f/dx1^2 d2f/dx1dx2
d2f/dx1dx2 d2f/dx2^2]"""
x1, x2 = xk
h = np.zeros((2, 2))
h[0, 0] = -400 * x2 + 1200 * (x1 ** 2) + 2
h[0, 1] = -400 * x1
h[1, 0] = -400 * x1
h[1, 1] = 200
return h
def mk_fun(xk, pk):
""" mk taylor approximation of the objective function"""
Bk = rosenbrock_hessian(xk)
return rosenbrock_fun(xk) + np.dot(pk, rosenbrock_gradient(xk)) + 0.5 * np.dot(pk, np.matmul(Bk, pk))
def rho_k(xk, pk):
""" return rho_k = (f(xk) - f(xk+pk))/(mk(0) - mk(pk))"""
return (rosenbrock_fun(xk) - rosenbrock_fun(xk + pk)) / (mk_fun(xk, [0, 0]) - mk_fun(xk, pk))
def get_pk_fs(gradient, hessian):
""" search direction for Newton's method."""
h_inv = np.linalg.inv(hessian)
return -np.matmul(h_inv, gradient)
def find_tau(pj, dj, delta):
""" find tau that satisfies ||pj + tau*dj|| = delta"""
djp = np.dot(dj, pj)
djd = np.dot(dj, dj)
pjp = np.dot(pj, pj)
res1 = (-2 * (djp) + np.sqrt((2 * djp) ** 2 - 4 * djd * (pjp - delta ** 2))) / (2 * djd)
res2 = (-2 * (djp) - np.sqrt((2 * djp) ** 2 - 4 * djd * (pjp - delta ** 2))) / (2 * djd)
if res1 >= 0 and res2 < 0:
return res1
elif res2 >= 0 and res1 < 0:
return res2
elif res1 >= 0 and res2 >= 0:
return False
else:
return False
def steihaug(x0, delta, eps=1e-6):
pk = np.zeros(2)
r0 = rosenbrock_gradient(x0)
rk = np.copy(r0)
d = -np.copy(r0)
xk = np.copy(x0)
Bk = rosenbrock_hessian(xk)
while True:
# negative curvature
if d.T @ Bk @ d <= 0:
# find tau that satisfies ||pj + tau*dj|| = delta
print("Steihaug - negative curvature")
tau = find_tau(pk, d, delta)
print("tau = ", tau)
return pk + tau * d
alpha = np.dot(rk, rk) / (d.T @ Bk @ d)
p_next = pk + alpha * d
# step outside trust region.
if np.linalg.norm(p_next) >= delta:
# find tau such that ||pj + tau*dj|| = delta
tau = find_tau(pk, d, delta)
print("Steihaug - reached the trust-region boundary")
print("tau = ", tau)
return pk + tau * d
r_next = rk + alpha * Bk @ d
if np.linalg.norm(r_next) <= eps * np.linalg.norm(r0):
print("Steihaug - met the stopping test.")
return p_next
b = np.dot(r_next, r_next) / np.dot(rk, rk)
d = -r_next + b * d
rk = np.copy(r_next)
pk = np.copy(p_next)
def trust_region(x0, delta=1, delta_max=300, eta=1e-3, tol=1e-8):
k = 0
xk = x0
xk_vec = [x0]
delta_prev = delta
# while optimality is not satisfied.
while np.linalg.norm(rosenbrock_gradient(xk)) > tol:
print("iteration #", k)
# get pk approximate solution. Using steihaug method.
pk = steihaug(xk, delta, eps=1e-6)
# evaluate rho_k
rk = rho_k(xk, pk)
if rk < 0.25:
delta = 0.25 * delta
else:
if rk > 0.75 and np.linalg.norm(pk) - delta < 1e-8:
delta = min(2 * delta, delta_max)
if rk > eta:
xk = xk + pk
else:
xk = xk
k += 1
print("f = ", rosenbrock_fun(xk))
print("||gradient(f(x))|| = ", np.linalg.norm(rosenbrock_gradient(xk)))
print("xk = ", xk)
print("delta = ", delta)
print("\n")
xk_vec.append(xk)
return xk, k, pk, xk_vec
if __name__ == "__main__":
trust_region([2.8, 4.])