Line data Source code
1 : /*
2 : * File: LBFGSBuffer.h
3 : * Author: Pantelis Sopasakis, Lorenzo Stella
4 : *
5 : * Created on April 13, 2016, 2:32 PM
6 : *
7 : * This file (LBFGSBuffer.h) as well as the corresponding implementation
8 : * is a C++-translation of the original source code of L. Stella (libLBFGS) which
9 : * can be found at https://github.com/lostella/libLBFGS.
10 : *
11 : * ForBES is free software: you can redistribute it and/or modify
12 : * it under the terms of the GNU Lesser General Public License as published by
13 : * the Free Software Foundation, either version 3 of the License, or
14 : * (at your option) any later version.
15 : *
16 : * ForBES is distributed in the hope that it will be useful,
17 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : * GNU Lesser General Public License for more details.
20 : *
21 : * You should have received a copy of the GNU Lesser General Public License
22 : * along with ForBES. If not, see <http://www.gnu.org/licenses/>.
23 : */
24 :
25 : #ifndef LBFGSBUFFER_H
26 : #define LBFGSBUFFER_H
27 :
28 :
29 : #include "Matrix.h"
30 : #include "MatrixFactory.h"
31 :
32 :
33 : /**
34 : * \class LBFGSBuffer
35 : * \brief Buffer for the Limited Memory BFGS algorithm
36 : * \version version 0.1
37 : * \ingroup FBSolver-group
38 : *
39 : * LBFGSBuffer is a finite length buffer for pairs \f$(s_k, y_k)\f$ which
40 : * are necessary for the computation of \f$r_k = H_k q_k\f$ in the LBFGS algorithm.
41 : *
42 : * \todo Implement generalized inner product so that this class supports general
43 : * matrices and not only vectors.
44 : */
45 : class LBFGSBuffer {
46 : public:
47 :
48 : /**
49 : *
50 : * This constant signals the <em>end of buffer</em> (EOB) while
51 : * accessing past (buffered) values.
52 : */
53 : static const long LBFGS_BUFFER_EOB;
54 :
55 : /**
56 : * Constructs a new instance of LBFGSBuffer for vectors of size \c n
57 : * and using a memory length \c mem.
58 : *
59 : * @param n size of x
60 : * @param mem memory
61 : */
62 : LBFGSBuffer(size_t n, size_t mem);
63 :
64 : /**
65 : * Default destructor.
66 : */
67 : virtual ~LBFGSBuffer();
68 :
69 : /**
70 : * Buffered values of \f$s_{k} = x_{k+1} - x_{k}\f$
71 : * @return pointer to buffered data for all \c s
72 : *
73 : * \sa #cursor
74 : * \sa #push
75 : */
76 : Matrix* get_S() const;
77 :
78 : /**
79 : * Buffered values of \f$y_{k} = \nabla f_{k+1} - \nabla f_{k}\f$
80 : * @return pointer to buffered data for all \c y
81 : *
82 : * \sa #cursor
83 : * \sa #push
84 : */
85 : Matrix* get_Y() const;
86 :
87 :
88 : /**
89 : * Buffered values of \f$\psi_{k} = \langle s_k, y_k \rangle\f$
90 : * @return pointer to buffered data for all \c y
91 : *
92 : * \sa #cursor
93 : * \sa #push
94 : */
95 : Matrix* get_Ys() const;
96 :
97 : /**
98 : * Internal buffer used for values \f$\alpha_k\f$.
99 : *
100 : * @return pointer to buffered values of alpha
101 : *
102 : * \sa #cursor
103 : * \sa #push
104 : */
105 : Matrix* get_alphas() const;
106 :
107 : /**
108 : * Current cursor position.
109 : *
110 : * For instance, if you need to access the current stored vector \f$y_k\f$
111 : * you can use the following piece of code:
112 : *
113 : * \code{.cpp}
114 : * LBFGSBuffer * buffer = new LBFGSBuffer( ... );
115 : * long c = buffer.cursor();
116 : * if (c != LBFGSBuffer::LBFGS_BUFFER_EOB){
117 : * Matrix * Y = buffer->get_Y(); // Y cache
118 : * Matrix yk = MatrixFactory::ShallowSubVector(*Y, c); // Current y
119 : * }
120 : * \endcode
121 : *
122 : * @return cursor position
123 : */
124 : long cursor() const;
125 :
126 : /**
127 : * Provide a new pair \f$(s,y)\f$ to the buffer. This method updates the
128 : * buffers of \c s, \c y and of their inner product.
129 : *
130 : * \note The buffer stores <em>hard copies</em> of the pairs (s,y) that are
131 : * provided to it.
132 : *
133 : * @param s the update difference \f$s_{k} = x_{k+1} - x_{k}\f$
134 : * @param y the gradient update different \f$y_{k} = \nabla f_{k+1} - \nabla f_{k}\f$
135 : *
136 : * @return returns \link ForBESUtils::STATUS_OK STATUS_OK\endlink on success
137 : */
138 : int push(Matrix * s, Matrix * y);
139 :
140 : /**
141 : * Resets all buffers.
142 : * @return
143 : */
144 : int reset();
145 :
146 : /**
147 : * Perform the two-loop update and computes
148 : * \f[
149 : * r_k = H_k q_k,
150 : * \f]
151 : * for a given vector \f$q_k\f$ without explicitly forming or using \f$H_k\f$.
152 : *
153 : * Details about the algorithm can be found in J. Nocedal and S.J. Wright,
154 : * <em>Numerical Optimization</em>, Second edition, Springer, 2006 (see Alg.
155 : * 7.4).
156 : *
157 : * @param q (input) given vector \f$q_k\f$
158 : *
159 : * @param r (output) result, that is \f$r_k = H_k q_k\f$.
160 : *
161 : * @param gamma0 (input) Initial guess of the Hessian at the first iteration
162 : * which is given by \f$H_k^0 = \gamma_0^0 I\f$. A possible choice is computed
163 : * by #hessian_estimate.
164 : *
165 : * @return Returns \link ForBESUtils::STATUS_OK STATUS_OK\endlink on success
166 : *
167 : * \sa #hessian_estimate
168 : */
169 : int update(const Matrix * q, Matrix * r, double & gamma0);
170 :
171 :
172 : /**
173 : * Computes an estimation of the Hessian using the buffered info.
174 : *
175 : * The Hessian approximation is a scaled-identity matrix of the form
176 : * \f$H = \gamma I\f$; this method returns the scalar \f$\gamma\f$.
177 : *
178 : * For details see Equation (7.20) in J. Nocedal and S.J. Wright,
179 : * <em>Numerical Optimization</em>, Second edition, Springer, 2006.
180 : *
181 : * @return gamma (Hessian approximation)
182 : *
183 : * \sa #update
184 : */
185 : double hessian_estimate();
186 :
187 : /**
188 : * Returns the current size of the buffer.
189 : *
190 : * @return buffer size
191 : *
192 : */
193 : size_t get_current_mem() const;
194 :
195 : /**
196 : * In order to access the \f$k-j\f$ cached vectors \f$y_{k-j}\f$ and
197 : * \f$s_{k-j}\f$, one first needs to retrieve their index in the cache
198 : * which is returned by this method
199 : *
200 : * @param j past index (j > 0)
201 : * @return cache index
202 : */
203 108 : inline long get_k_minus_j(size_t j) {
204 108 : register long t = m_idx - j;
205 108 : if (t >= 0) {
206 55 : return t;
207 53 : } else if (m_current_mem < m_mem || j >= m_mem) {
208 39 : return LBFGS_BUFFER_EOB;
209 : } else {
210 14 : return (m_mem + t);
211 : }
212 : }
213 :
214 : private:
215 :
216 : size_t m_mem; /**< memory length */
217 : size_t m_current_mem; /**< current memory (the buffer may not be full) */
218 : long m_idx; /**< current index */
219 : Matrix * m_S; /**< Buffer of past s_k */
220 : Matrix * m_Y; /**< Buffer of past y_k */
221 : Matrix * m_Ys; /**< Buffer of past values of y_k'*s_k */
222 : Matrix * m_alphas; /**< The alphas in the two-loop recursion (loop 1) */
223 :
224 :
225 : };
226 :
227 : #endif /* LBFGSBUFFER_H */
228 :
|