conspire/math/optimize/gradient_descent/
mod.rs

1#[cfg(test)]
2mod test;
3
4use super::{
5    super::{Tensor, TensorRank0},
6    EqualityConstraint, FirstOrderOptimization, OptimizeError, ZerothOrderRootFinding,
7};
8use crate::ABS_TOL;
9
10/// The method of gradient descent.
11#[derive(Debug)]
12pub struct GradientDescent {
13    /// Absolute error tolerance.
14    pub abs_tol: TensorRank0,
15    /// Maximum number of steps.
16    pub max_steps: usize,
17}
18
19impl Default for GradientDescent {
20    fn default() -> Self {
21        Self {
22            abs_tol: ABS_TOL,
23            max_steps: 250,
24        }
25    }
26}
27
28impl<X> ZerothOrderRootFinding<X> for GradientDescent
29where
30    X: Tensor,
31{
32    fn root(
33        &self,
34        function: impl Fn(&X) -> Result<X, OptimizeError>,
35        initial_guess: X,
36        equality_constraint: EqualityConstraint,
37    ) -> Result<X, OptimizeError> {
38        match equality_constraint {
39            EqualityConstraint::Linear(_constraint_matrix, _constraint_rhs) => {
40                unimplemented!("This may work with gradient ascent on multipliers, or not at all.")
41            }
42            EqualityConstraint::None => {
43                let mut residual;
44                let mut residual_change = initial_guess.clone() * 0.0;
45                let mut solution = initial_guess;
46                let mut solution_change = solution.clone();
47                let mut step_size = 1e-2;
48                let mut step_trial;
49                for _ in 0..self.max_steps {
50                    residual = function(&solution)?;
51                    if residual.norm() < self.abs_tol {
52                        return Ok(solution);
53                    } else {
54                        solution_change -= &solution;
55                        residual_change -= &residual;
56                        step_trial = residual_change.full_contraction(&solution_change)
57                            / residual_change.norm_squared();
58                        if step_trial.abs() > 0.0 && !step_trial.is_nan() {
59                            step_size = step_trial.abs()
60                        } else {
61                            step_size *= 1.1
62                        }
63                        residual_change = residual.clone();
64                        solution_change = solution.clone();
65                        solution -= residual * step_size;
66                    }
67                }
68            }
69        }
70        Err(OptimizeError::MaximumStepsReached(
71            self.max_steps,
72            format!("{:?}", &self),
73        ))
74    }
75}
76
77impl<F, X> FirstOrderOptimization<F, X> for GradientDescent
78where
79    X: Tensor,
80{
81    fn minimize(
82        &self,
83        _function: impl Fn(&X) -> Result<F, OptimizeError>,
84        jacobian: impl Fn(&X) -> Result<X, OptimizeError>,
85        initial_guess: X,
86        equality_constraint: EqualityConstraint,
87    ) -> Result<X, OptimizeError> {
88        match equality_constraint {
89            EqualityConstraint::Linear(_constraint_matrix, _constraint_rhs) => {
90                unimplemented!("This may work with gradient ascent on multipliers, or not at all.")
91            }
92            EqualityConstraint::None => {
93                //
94                // How to choose short (below, dx*dg/dg*dg) or long (dx*dx/dx*dg) steps?
95                // Or even allow different options for calculating step size?
96                // Like using backtracking line search with: (1), checked decrease, (2) Armijo condition (sufficient decrease), (3) Wolfe, (4) trust region, etc. (see slides).
97                // Those methods might also be abstracted to be used in multiple places, like if you make a nonlinear conjugate gradient solver.
98                // And then within the NLCG, different formulas for beta?
99                //
100                let mut residual;
101                let mut residual_change = initial_guess.clone() * 0.0;
102                let mut solution = initial_guess;
103                let mut solution_change = solution.clone();
104                let mut step_size = 1e-2;
105                let mut step_trial;
106                for _ in 0..self.max_steps {
107                    residual = jacobian(&solution)?;
108                    if residual.norm() < self.abs_tol {
109                        return Ok(solution);
110                    } else {
111                        solution_change -= &solution;
112                        residual_change -= &residual;
113                        step_trial = residual_change.full_contraction(&solution_change)
114                            / residual_change.norm_squared();
115                        if step_trial.abs() > 0.0 && !step_trial.is_nan() {
116                            step_size = step_trial.abs()
117                        } else {
118                            step_size *= 1.1
119                        }
120                        residual_change = residual.clone();
121                        solution_change = solution.clone();
122                        solution -= residual * step_size;
123                    }
124                }
125            }
126        }
127        Err(OptimizeError::MaximumStepsReached(
128            self.max_steps,
129            format!("{:?}", &self),
130        ))
131    }
132}