conspire/math/optimize/gradient_descent/
mod.rs

1#[cfg(test)]
2mod test;
3
4use super::{
5    super::{Jacobian, Matrix, Scalar, Solution, Tensor, Vector},
6    BacktrackingLineSearch, EqualityConstraint, FirstOrderOptimization, LineSearch,
7    OptimizationError, ZerothOrderRootFinding,
8};
9use crate::ABS_TOL;
10use std::{
11    fmt::{self, Debug, Formatter},
12    ops::Mul,
13};
14
15const CUTBACK_FACTOR: Scalar = 0.8;
16const CUTBACK_FACTOR_MINUS_ONE: Scalar = 1.0 - CUTBACK_FACTOR;
17const INITIAL_STEP_SIZE: Scalar = 1e-2;
18
19/// The method of gradient descent.
20pub struct GradientDescent {
21    /// Absolute error tolerance.
22    pub abs_tol: Scalar,
23    /// Lagrangian dual.
24    pub dual: bool,
25    /// Line search algorithm.
26    pub line_search: LineSearch,
27    /// Maximum number of steps.
28    pub max_steps: usize,
29    /// Relative error tolerance.
30    pub rel_tol: Option<Scalar>,
31}
32
33impl<J, X> BacktrackingLineSearch<J, X> for GradientDescent {
34    fn get_line_search(&self) -> &LineSearch {
35        &self.line_search
36    }
37}
38
39impl Debug for GradientDescent {
40    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
41        write!(
42            f,
43            "GradientDescent {{ abs_tol: {:?}, dual: {:?}, line_search: {}, max_steps: {:?}, rel_tol: {:?} }}",
44            self.abs_tol, self.dual, self.line_search, self.max_steps, self.rel_tol
45        )
46    }
47}
48
49impl Default for GradientDescent {
50    fn default() -> Self {
51        Self {
52            abs_tol: ABS_TOL,
53            dual: false,
54            line_search: LineSearch::None,
55            max_steps: 250,
56            rel_tol: None,
57        }
58    }
59}
60
61impl<X> ZerothOrderRootFinding<X> for GradientDescent
62where
63    X: Jacobian + Solution,
64    for<'a> &'a X: Mul<Scalar, Output = X>,
65    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
66{
67    fn root(
68        &self,
69        function: impl FnMut(&X) -> Result<X, String>,
70        initial_guess: X,
71        equality_constraint: EqualityConstraint,
72    ) -> Result<X, OptimizationError> {
73        match equality_constraint {
74            EqualityConstraint::Fixed(indices) => constrained_fixed(
75                self,
76                |_: &X| panic!("No line search in root finding."),
77                function,
78                initial_guess,
79                indices,
80            ),
81            EqualityConstraint::Linear(constraint_matrix, constraint_rhs) => {
82                if self.dual {
83                    constrained_dual(
84                        self,
85                        function,
86                        initial_guess,
87                        constraint_matrix,
88                        constraint_rhs,
89                    )
90                } else {
91                    constrained(
92                        self,
93                        function,
94                        initial_guess,
95                        constraint_matrix,
96                        constraint_rhs,
97                    )
98                }
99            }
100            EqualityConstraint::None => unconstrained(
101                self,
102                |_: &X| panic!("No line search in root finding."),
103                function,
104                initial_guess,
105                None,
106            ),
107        }
108    }
109}
110
111impl<X> FirstOrderOptimization<Scalar, X> for GradientDescent
112where
113    X: Jacobian + Solution,
114    for<'a> &'a X: Mul<Scalar, Output = X>,
115    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
116{
117    fn minimize(
118        &self,
119        function: impl FnMut(&X) -> Result<Scalar, String>,
120        jacobian: impl FnMut(&X) -> Result<X, String>,
121        initial_guess: X,
122        equality_constraint: EqualityConstraint,
123    ) -> Result<X, OptimizationError> {
124        match equality_constraint {
125            EqualityConstraint::Fixed(indices) => {
126                constrained_fixed(self, function, jacobian, initial_guess, indices)
127            }
128            EqualityConstraint::Linear(constraint_matrix, constraint_rhs) => {
129                if self.dual {
130                    constrained_dual(
131                        self,
132                        jacobian,
133                        initial_guess,
134                        constraint_matrix,
135                        constraint_rhs,
136                    )
137                } else {
138                    constrained(
139                        self,
140                        jacobian,
141                        initial_guess,
142                        constraint_matrix,
143                        constraint_rhs,
144                    )
145                }
146            }
147            EqualityConstraint::None => {
148                unconstrained(self, function, jacobian, initial_guess, None)
149            }
150        }
151    }
152}
153
154fn unconstrained<X>(
155    gradient_descent: &GradientDescent,
156    mut function: impl FnMut(&X) -> Result<Scalar, String>,
157    mut jacobian: impl FnMut(&X) -> Result<X, String>,
158    initial_guess: X,
159    linear_equality_constraint: Option<(&Matrix, &Vector)>,
160) -> Result<X, OptimizationError>
161where
162    X: Jacobian + Solution,
163    for<'a> &'a X: Mul<Scalar, Output = X>,
164{
165    let constraint = if let Some((constraint_matrix, multipliers)) = linear_equality_constraint {
166        Some(multipliers * constraint_matrix)
167    } else {
168        None
169    };
170    let mut residual;
171    let mut residual_change = initial_guess.clone() * 0.0;
172    let mut solution = initial_guess.clone();
173    let mut solution_change = solution.clone();
174    let mut step_size = INITIAL_STEP_SIZE;
175    let mut step_trial;
176    for _ in 0..=gradient_descent.max_steps {
177        residual = if let Some(ref extra) = constraint {
178            jacobian(&solution)? - extra
179        } else {
180            jacobian(&solution)?
181        };
182        if residual.norm_inf() < gradient_descent.abs_tol {
183            return Ok(solution);
184        } else {
185            solution_change -= &solution;
186            residual_change -= &residual;
187            step_trial =
188                residual_change.full_contraction(&solution_change) / residual_change.norm_squared();
189            if step_trial.abs() > 0.0 && !step_trial.is_nan() {
190                step_size = step_trial.abs()
191            }
192            step_size = gradient_descent.backtracking_line_search(
193                &mut function,
194                &mut jacobian,
195                &solution,
196                &residual,
197                &residual,
198                step_size,
199            )?;
200            residual_change = residual.clone();
201            solution_change = solution.clone();
202            solution -= residual * step_size;
203        }
204    }
205    Err(OptimizationError::MaximumStepsReached(
206        gradient_descent.max_steps,
207        format!("{gradient_descent:?}"),
208    ))
209}
210
211fn constrained_fixed<X>(
212    gradient_descent: &GradientDescent,
213    mut function: impl FnMut(&X) -> Result<Scalar, String>,
214    mut jacobian: impl FnMut(&X) -> Result<X, String>,
215    initial_guess: X,
216    indices: Vec<usize>,
217) -> Result<X, OptimizationError>
218where
219    X: Jacobian + Solution,
220    for<'a> &'a X: Mul<Scalar, Output = X>,
221{
222    let mut relative_scale = 0.0;
223    let mut residual: X;
224    let mut residual_change = initial_guess.clone() * 0.0;
225    let mut residual_norm;
226    let mut solution = initial_guess.clone();
227    let mut solution_change = solution.clone();
228    let mut step_size = INITIAL_STEP_SIZE;
229    let mut step_trial;
230    for iteration in 0..=gradient_descent.max_steps {
231        residual = jacobian(&solution)?;
232        residual.zero_out(&indices);
233        residual_norm = residual.norm_inf();
234        if gradient_descent.rel_tol.is_some() && iteration == 0 {
235            relative_scale = residual.norm_inf()
236        }
237        if residual_norm < gradient_descent.abs_tol {
238            return Ok(solution);
239        } else if let Some(rel_tol) = gradient_descent.rel_tol
240            && residual_norm / relative_scale < rel_tol
241        {
242            return Ok(solution);
243        } else {
244            solution_change -= &solution;
245            residual_change -= &residual;
246            step_trial =
247                residual_change.full_contraction(&solution_change) / residual_change.norm_squared();
248            if step_trial.abs() > 0.0 && !step_trial.is_nan() {
249                step_size = step_trial.abs()
250            }
251            step_size = gradient_descent.backtracking_line_search(
252                &mut function,
253                &mut jacobian,
254                &solution,
255                &residual,
256                &residual,
257                step_size,
258            )?;
259            residual_change = residual.clone();
260            solution_change = solution.clone();
261            solution -= residual * step_size;
262        }
263    }
264    Err(OptimizationError::MaximumStepsReached(
265        gradient_descent.max_steps,
266        format!("{gradient_descent:?}"),
267    ))
268}
269
270fn constrained<X>(
271    gradient_descent: &GradientDescent,
272    mut jacobian: impl FnMut(&X) -> Result<X, String>,
273    initial_guess: X,
274    constraint_matrix: Matrix,
275    constraint_rhs: Vector,
276) -> Result<X, OptimizationError>
277where
278    X: Jacobian,
279    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
280{
281    if !matches!(gradient_descent.line_search, LineSearch::None) {
282        panic!("Line search needs the exact penalty function in constrained optimization.")
283    }
284    let mut residual_solution;
285    let mut residual_solution_change = initial_guess.clone() * 0.0;
286    let mut solution = initial_guess.clone();
287    let mut solution_change = solution.clone();
288    let mut step_size_solution = INITIAL_STEP_SIZE;
289    let mut step_trial_solution;
290    let num_constraints = constraint_rhs.len();
291    let mut residual_multipliers;
292    let mut residual_multipliers_change = Vector::zero(num_constraints);
293    let mut multipliers = Vector::zero(num_constraints);
294    let mut multipliers_change = Vector::zero(num_constraints);
295    let mut step_size_multipliers = INITIAL_STEP_SIZE;
296    let mut step_trial_multipliers;
297    let mut step_size;
298    for _ in 0..=gradient_descent.max_steps {
299        residual_solution = jacobian(&solution)? - &multipliers * &constraint_matrix;
300        residual_multipliers = &constraint_rhs - &constraint_matrix * &solution;
301        if residual_solution.norm_inf() < gradient_descent.abs_tol
302            && residual_multipliers.norm_inf() < gradient_descent.abs_tol
303        {
304            return Ok(solution);
305        } else {
306            solution_change -= &solution;
307            residual_solution_change -= &residual_solution;
308            step_trial_solution = residual_solution_change.full_contraction(&solution_change)
309                / residual_solution_change.norm_squared();
310            if step_trial_solution.abs() > 0.0 && !step_trial_solution.is_nan() {
311                step_size_solution = step_trial_solution.abs()
312            }
313            residual_solution_change = residual_solution.clone();
314            solution_change = solution.clone();
315            multipliers_change -= &multipliers;
316            residual_multipliers_change -= &residual_multipliers;
317            step_trial_multipliers = residual_multipliers_change
318                .full_contraction(&multipliers_change)
319                / residual_multipliers_change.norm_squared();
320            if step_trial_multipliers.abs() > 0.0 && !step_trial_multipliers.is_nan() {
321                step_size_multipliers = step_trial_multipliers.abs()
322            }
323            residual_multipliers_change = residual_multipliers.clone();
324            multipliers_change = multipliers.clone();
325            step_size = step_size_solution.min(step_size_multipliers);
326            solution -= residual_solution * step_size;
327            multipliers += residual_multipliers * step_size;
328        }
329    }
330    Err(OptimizationError::MaximumStepsReached(
331        gradient_descent.max_steps,
332        format!("{gradient_descent:?}"),
333    ))
334}
335
336fn constrained_dual<X>(
337    gradient_descent: &GradientDescent,
338    mut jacobian: impl FnMut(&X) -> Result<X, String>,
339    initial_guess: X,
340    constraint_matrix: Matrix,
341    constraint_rhs: Vector,
342) -> Result<X, OptimizationError>
343where
344    X: Jacobian + Solution,
345    for<'a> &'a X: Mul<Scalar, Output = X>,
346    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
347{
348    if !matches!(gradient_descent.line_search, LineSearch::None) {
349        panic!("Line search needs the exact penalty function in constrained optimization.")
350    }
351    let num_constraints = constraint_rhs.len();
352    let mut multipliers = Vector::zero(num_constraints);
353    let mut multipliers_change = multipliers.clone();
354    let mut residual;
355    let mut residual_change = Vector::zero(num_constraints);
356    let mut solution = initial_guess;
357    let mut step_size = INITIAL_STEP_SIZE;
358    let mut step_trial;
359    for _ in 0..=gradient_descent.max_steps {
360        if let Ok(result) = unconstrained(
361            gradient_descent,
362            |_: &X| {
363                panic!("Line search needs the exact penalty function in constrained optimization.")
364            },
365            &mut jacobian,
366            solution.clone(),
367            Some((&constraint_matrix, &multipliers)),
368        ) {
369            solution = result;
370            residual = &constraint_rhs - &constraint_matrix * &solution;
371            if residual.norm_inf() < gradient_descent.abs_tol {
372                return Ok(solution);
373            } else {
374                multipliers_change -= &multipliers;
375                residual_change -= &residual;
376                step_trial = residual_change.full_contraction(&multipliers_change)
377                    / residual_change.norm_squared();
378                if step_trial.abs() > 0.0 && !step_trial.is_nan() {
379                    step_size = step_trial.abs()
380                }
381                residual_change = residual.clone();
382                multipliers_change = multipliers.clone();
383                multipliers += residual * step_size;
384            }
385        } else {
386            //
387            // This sort of acts like LineSearch::Error, does it not?
388            //
389            multipliers -= (multipliers.clone() - &multipliers_change) * CUTBACK_FACTOR_MINUS_ONE;
390            step_size *= CUTBACK_FACTOR;
391        }
392    }
393    Err(OptimizationError::MaximumStepsReached(
394        gradient_descent.max_steps,
395        format!("{gradient_descent:?}"),
396    ))
397}