conspire/math/optimize/gradient_descent/
mod.rs

1#[cfg(test)]
2mod test;
3
4use super::{
5    super::{Jacobian, Matrix, Scalar, Tensor, TensorVec, Vector},
6    EqualityConstraint, FirstOrderOptimization, LineSearch, OptimizeError, ZerothOrderRootFinding,
7};
8use crate::ABS_TOL;
9use std::ops::Mul;
10
11/// The method of gradient descent.
12#[derive(Debug)]
13pub struct GradientDescent {
14    /// Absolute error tolerance.
15    pub abs_tol: Scalar,
16    /// Line search algorithm.
17    pub line_search: Option<LineSearch>,
18    /// Maximum number of steps.
19    pub max_steps: usize,
20}
21
22impl Default for GradientDescent {
23    fn default() -> Self {
24        Self {
25            abs_tol: ABS_TOL,
26            line_search: None,
27            max_steps: 250,
28        }
29    }
30}
31
32const CUTBACK_FACTOR: Scalar = 0.8;
33const CUTBACK_FACTOR_MINUS_ONE: Scalar = 1.0 - CUTBACK_FACTOR;
34const INITIAL_STEP_SIZE: Scalar = 1e-2;
35
36impl<X> ZerothOrderRootFinding<X> for GradientDescent
37where
38    X: Jacobian,
39    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
40{
41    fn root(
42        &self,
43        function: impl Fn(&X) -> Result<X, OptimizeError>,
44        initial_guess: X,
45        equality_constraint: EqualityConstraint,
46    ) -> Result<X, OptimizeError> {
47        match equality_constraint {
48            EqualityConstraint::Linear(constraint_matrix, constraint_rhs) => dual_ascent(
49                self,
50                function,
51                initial_guess,
52                constraint_matrix,
53                constraint_rhs,
54            ),
55            EqualityConstraint::None => descent(self, function, initial_guess, None),
56        }
57    }
58}
59
60impl<F, X> FirstOrderOptimization<F, X> for GradientDescent
61where
62    X: Jacobian,
63    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
64{
65    fn minimize(
66        &self,
67        _function: impl Fn(&X) -> Result<F, OptimizeError>,
68        jacobian: impl Fn(&X) -> Result<X, OptimizeError>,
69        initial_guess: X,
70        equality_constraint: EqualityConstraint,
71    ) -> Result<X, OptimizeError> {
72        match equality_constraint {
73            EqualityConstraint::Linear(constraint_matrix, constraint_rhs) => dual_ascent(
74                self,
75                jacobian,
76                initial_guess,
77                constraint_matrix,
78                constraint_rhs,
79            ),
80            EqualityConstraint::None => descent(self, jacobian, initial_guess, None),
81        }
82    }
83}
84
85fn descent<X>(
86    gradient_descent: &GradientDescent,
87    jacobian: impl Fn(&X) -> Result<X, OptimizeError>,
88    initial_guess: X,
89    linear_equality_constraint: Option<(&Matrix, &Vector)>,
90) -> Result<X, OptimizeError>
91where
92    X: Jacobian,
93{
94    if gradient_descent.line_search.is_some() {
95        unimplemented!();
96    }
97    let constraint = if let Some((constraint_matrix, multipliers)) = linear_equality_constraint {
98        Some(multipliers * constraint_matrix)
99    } else {
100        None
101    };
102    let mut residual;
103    let mut residual_change = initial_guess.clone() * 0.0;
104    let mut solution = initial_guess.clone();
105    let mut solution_change = solution.clone();
106    let mut step_size = INITIAL_STEP_SIZE;
107    let mut step_trial;
108    for _ in 0..gradient_descent.max_steps {
109        residual = if let Some(ref extra) = constraint {
110            jacobian(&solution)? - extra
111        } else {
112            jacobian(&solution)?
113        };
114        if residual.norm_inf() < gradient_descent.abs_tol {
115            return Ok(solution);
116        } else {
117            solution_change -= &solution;
118            residual_change -= &residual;
119            step_trial =
120                residual_change.full_contraction(&solution_change) / residual_change.norm_squared();
121            if step_trial.abs() > 0.0 && !step_trial.is_nan() {
122                step_size = step_trial.abs()
123            }
124            residual_change = residual.clone();
125            solution_change = solution.clone();
126            solution -= residual * step_size;
127        }
128    }
129    Err(OptimizeError::MaximumStepsReached(
130        gradient_descent.max_steps,
131        format!("{gradient_descent:?}"),
132    ))
133}
134
135fn dual_ascent<X>(
136    gradient_descent: &GradientDescent,
137    jacobian: impl Fn(&X) -> Result<X, OptimizeError>,
138    initial_guess: X,
139    constraint_matrix: Matrix,
140    constraint_rhs: Vector,
141) -> Result<X, OptimizeError>
142where
143    X: Jacobian,
144    for<'a> &'a Matrix: Mul<&'a X, Output = Vector>,
145{
146    if gradient_descent.line_search.is_some() {
147        panic!("Line search needs the exact penalty function in constrained optimization.");
148    }
149    let num_constraints = constraint_rhs.len();
150    let mut multipliers = Vector::zero(num_constraints);
151    let mut multipliers_change = multipliers.clone();
152    let mut residual;
153    let mut residual_change = Vector::zero(num_constraints);
154    let mut solution = initial_guess;
155    let mut step_size = INITIAL_STEP_SIZE;
156    let mut step_trial;
157    for _ in 0..gradient_descent.max_steps {
158        if let Ok(result) = descent(
159            gradient_descent,
160            &jacobian,
161            solution.clone(),
162            Some((&constraint_matrix, &multipliers)),
163        ) {
164            solution = result;
165            residual = &constraint_rhs - &constraint_matrix * &solution;
166            if residual.norm_inf() < gradient_descent.abs_tol {
167                return Ok(solution);
168            } else {
169                multipliers_change -= &multipliers;
170                residual_change -= &residual;
171                step_trial = residual_change.full_contraction(&multipliers_change)
172                    / residual_change.norm_squared();
173                if step_trial.abs() > 0.0 && !step_trial.is_nan() {
174                    step_size = step_trial.abs()
175                }
176                residual_change = residual.clone();
177                multipliers_change = multipliers.clone();
178                multipliers += residual * step_size;
179            }
180        } else {
181            multipliers -= (multipliers.clone() - &multipliers_change) * CUTBACK_FACTOR_MINUS_ONE;
182            step_size *= CUTBACK_FACTOR;
183        }
184    }
185    Err(OptimizeError::MaximumStepsReached(
186        gradient_descent.max_steps,
187        format!("{gradient_descent:?}"),
188    ))
189}