|
21 | 21 |
|
22 | 22 | namespace fdapde { |
23 | 23 |
|
24 | | -// implementation of Broyden–Fletcher–Goldfarb–Shanno algorithm for unconstrained nonlinear optimization |
25 | | -template <int N, typename... Args> class BFGS { |
| 24 | +template <int N> class BFGS { |
26 | 25 | private: |
27 | 26 | using vector_t = std::conditional_t<N == Dynamic, Eigen::Matrix<double, Dynamic, 1>, Eigen::Matrix<double, N, 1>>; |
28 | 27 | using matrix_t = |
29 | 28 | std::conditional_t<N == Dynamic, Eigen::Matrix<double, Dynamic, Dynamic>, Eigen::Matrix<double, N, N>>; |
30 | 29 |
|
31 | | - std::tuple<Args...> callbacks_; |
32 | 30 | vector_t optimum_; |
33 | | - double value_; // objective value at optimum |
| 31 | + double value_; // objective value at optimum |
| 32 | + int n_iter_ = 0; // current iteration number |
| 33 | + std::vector<double> values_; // explored objective values during optimization |
| 34 | + matrix_t inv_hessian_; |
| 35 | + |
34 | 36 | int max_iter_; // maximum number of iterations before forced stop |
35 | | - int n_iter_ = 0; // current iteration number |
36 | 37 | double tol_; // tolerance on error before forced stop |
37 | 38 | double step_; // update step |
38 | 39 | public: |
39 | 40 | vector_t x_old, x_new, update, grad_old, grad_new; |
40 | | - matrix_t inv_hessian; |
41 | 41 | double h; |
42 | 42 | // constructor |
43 | | - BFGS() = default; |
44 | | - BFGS(int max_iter, double tol, double step) |
45 | | - requires(sizeof...(Args) != 0) |
46 | | - : max_iter_(max_iter), tol_(tol), step_(step) { } |
47 | | - BFGS(int max_iter, double tol, double step, Args&&... callbacks) : |
48 | | - callbacks_(std::make_tuple(std::forward<Args>(callbacks)...)), max_iter_(max_iter), tol_(tol), step_(step) { } |
49 | | - // copy semantic |
50 | | - BFGS(const BFGS& other) : |
51 | | - callbacks_(other.callbacks_), max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_) { } |
| 43 | + BFGS() : max_iter_(500), tol_(1e-5), step_(1e-2) { } |
| 44 | + BFGS(int max_iter, double tol, double step) : max_iter_(max_iter), tol_(tol), step_(step) { } |
| 45 | + BFGS(const BFGS& other) : max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_) { } |
52 | 46 | BFGS& operator=(const BFGS& other) { |
53 | 47 | max_iter_ = other.max_iter_; |
54 | 48 | tol_ = other.tol_; |
55 | 49 | step_ = other.step_; |
56 | | - callbacks_ = other.callbacks_; |
57 | 50 | return *this; |
58 | 51 | } |
59 | | - template <typename ObjectiveT, typename... Functor> |
60 | | - requires(sizeof...(Functor) < 2) && ((requires(Functor f, double value) { f(value); }) && ...) |
61 | | - vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Functor&&... func) { |
| 52 | + template <typename ObjectiveT, typename... Callbacks> |
| 53 | + vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { |
62 | 54 | fdapde_static_assert( |
63 | 55 | std::is_same<decltype(std::declval<ObjectiveT>().operator()(vector_t())) FDAPDE_COMMA double>::value, |
64 | | - INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); |
| 56 | + INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_CALLABLE_AT_VECTOR_TYPE); |
| 57 | + constexpr double NaN = std::numeric_limits<double>::quiet_NaN(); |
| 58 | + std::tuple<Callbacks...> callbacks_ {callbacks...}; |
65 | 59 | bool stop = false; // asserted true in case of forced stop |
66 | | - vector_t zero; |
67 | | - double error = 0; |
68 | | - auto grad = objective.gradient(); |
69 | | - n_iter_ = 0; |
| 60 | + double error = std::numeric_limits<double>::max(); |
| 61 | + int size = N == Dynamic ? x0.rows() : N; |
| 62 | + auto grad = objective.gradient(); |
70 | 63 | h = step_; |
71 | | - x_old = x0, x_new = x0; |
| 64 | + n_iter_ = 0; |
| 65 | + x_old = x0, x_new = vector_t::Constant(size, NaN); |
| 66 | + grad_old = grad(x_old), grad_new = vector_t::Constant(size, NaN); |
72 | 67 | if constexpr (N == Dynamic) { // inv_hessian approximated with identity matrix |
73 | | - inv_hessian = matrix_t::Identity(x0.rows(), x0.rows()); |
74 | | - zero = vector_t::Zero(x0.rows()); |
| 68 | + inv_hessian_ = matrix_t::Identity(size, size); |
75 | 69 | } else { |
76 | | - inv_hessian = matrix_t::Identity(); |
77 | | - zero = vector_t::Zero(); |
| 70 | + inv_hessian_ = matrix_t::Identity(); |
78 | 71 | } |
79 | | - grad_old = grad(x_old); |
80 | | - if (grad_old.isApprox(zero)) { // already at stationary point |
81 | | - optimum_ = x_old; |
82 | | - value_ = objective(optimum_); |
83 | | - if constexpr (sizeof...(Functor) == 1) { (func(value_), ...); } |
84 | | - return optimum_; |
85 | | - } |
| 72 | + update = -inv_hessian_ * grad_old; |
| 73 | + stop |= internals::exec_grad_hooks(*this, objective, callbacks_); |
86 | 74 | error = grad_old.norm(); |
| 75 | + values_.push_back(objective(x_old)); |
87 | 76 |
|
88 | 77 | while (n_iter_ < max_iter_ && error > tol_ && !stop) { |
89 | | - // compute update direction |
90 | | - update = -inv_hessian * grad_old; |
91 | | - stop |= execute_pre_update_step(*this, objective, callbacks_); |
| 78 | + stop |= internals::exec_adapt_hooks(*this, objective, callbacks_); |
92 | 79 | // update along descent direction |
93 | 80 | x_new = x_old + h * update; |
94 | 81 | grad_new = grad(x_new); |
95 | | - if (grad_new.isApprox(zero)) { // already at stationary point |
96 | | - optimum_ = x_old; |
97 | | - value_ = objective(optimum_); |
98 | | - if constexpr (sizeof...(Functor) == 1) { (func(value_), ...); } |
99 | | - return optimum_; |
100 | | - } |
101 | 82 | // update inverse hessian approximation |
102 | 83 | vector_t delta_x = x_new - x_old; |
103 | 84 | vector_t delta_grad = grad_new - grad_old; |
104 | 85 | double xg = delta_x.dot(delta_grad); |
105 | | - vector_t hx = inv_hessian * delta_grad; |
| 86 | + vector_t hx = inv_hessian_ * delta_grad; |
106 | 87 |
|
107 | 88 | matrix_t U = (1 + (delta_grad.dot(hx)) / xg) * ((delta_x * delta_x.transpose()) / xg); |
108 | 89 | matrix_t V = ((hx * delta_x.transpose() + delta_x * hx.transpose())) / xg; |
109 | | - inv_hessian += (U - V); |
| 90 | + inv_hessian_ += (U - V); |
110 | 91 | // prepare next iteration |
111 | | - if constexpr (sizeof...(Functor) == 1) { (func(objective(x_old)), ...); } |
112 | | - error = grad_new.norm(); |
| 92 | + update = -inv_hessian_ * grad_new; |
113 | 93 | stop |= |
114 | | - (execute_post_update_step(*this, objective, callbacks_) || execute_stopping_criterion(*this, objective)); |
| 94 | + (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); |
115 | 95 | x_old = x_new; |
116 | 96 | grad_old = grad_new; |
| 97 | + error = grad_new.norm(); |
| 98 | + values_.push_back(objective(x_old)); |
117 | 99 | n_iter_++; |
118 | 100 | } |
119 | 101 | optimum_ = x_old; |
120 | | - value_ = objective(optimum_); |
121 | | - if constexpr (sizeof...(Functor) == 1) { (func(value_), ...); } |
| 102 | + value_ = values_.back(); |
122 | 103 | return optimum_; |
123 | 104 | } |
124 | | - // getters |
125 | | - vector_t optimum() const { return optimum_; } |
| 105 | + // observers |
| 106 | + const vector_t& optimum() const { return optimum_; } |
126 | 107 | double value() const { return value_; } |
127 | 108 | int n_iter() const { return n_iter_; } |
| 109 | + const std::vector<double>& values() const { return values_; } |
128 | 110 | }; |
129 | 111 |
|
130 | 112 | } // namespace fdapde |
|
0 commit comments