From 21850fecd037c94151d3cc19952977fc9e16bf05 Mon Sep 17 00:00:00 2001 From: Pratik Fandade Date: Thu, 24 Oct 2024 04:03:43 -0400 Subject: [PATCH] Adding logistic regression & optimizing the gradient descent algorithm --- DIRECTORY.md | 1 + src/machine_learning/logistic_regression.rs | 62 +++++++++++++++++++ src/machine_learning/mod.rs | 2 + .../optimization/gradient_descent.rs | 2 +- 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/machine_learning/logistic_regression.rs diff --git a/DIRECTORY.md b/DIRECTORY.md index f4e1fa0e58c..8559cb34c93 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -156,6 +156,7 @@ * [Cholesky](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/cholesky.rs) * [K Means](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/k_means.rs) * [Linear Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/linear_regression.rs) + * [Logistic Regression](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/logistic_regression.rs) * Loss Function * [Average Margin Ranking Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/average_margin_ranking_loss.rs) * [Hinge Loss](https://github.com/TheAlgorithms/Rust/blob/master/src/machine_learning/loss_function/hinge_loss.rs) diff --git a/src/machine_learning/logistic_regression.rs b/src/machine_learning/logistic_regression.rs new file mode 100644 index 00000000000..cae85814671 --- /dev/null +++ b/src/machine_learning/logistic_regression.rs @@ -0,0 +1,62 @@ +use super::optimization::gradient_descent; +use std::f64::consts::E; + +/// Returns the wieghts after performing Logistic regression on the input data points. +pub fn logistic_regression( + data_points: Vec<(Vec, f64)>, + iterations: usize, + learning_rate: f64, +) -> Option> { + if data_points.is_empty() { + return None; + } + + let num_features = data_points[0].0.len(); + let mut params = vec![0.0; num_features]; + + let derivative_fn = |params: &[f64]| derivative(params, &data_points); + + gradient_descent(derivative_fn, &mut params, learning_rate, iterations as i32); + + Some(params) +} + +fn derivative(params: &[f64], data_points: &[(Vec, f64)]) -> Vec { + let num_features = params.len(); + let mut gradients = vec![0.0; num_features]; + + for (features, y_i) in data_points { + let z = params.iter().zip(features).map(|(p, x)| p * x).sum::(); + let prediction = 1.0 / (1.0 + E.powf(-z)); + + for (i, x_i) in features.iter().enumerate() { + gradients[i] += (prediction - y_i) * x_i; + } + } + + gradients +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_logistic_regression() { + let data = vec![ + (vec![0.0, 0.0], 0.0), + (vec![1.0, 1.0], 1.0), + (vec![2.0, 2.0], 1.0), + ]; + let result = logistic_regression(data, 10000, 0.1); + assert!(result.is_some()); + let params = result.unwrap(); + assert!((params[0] - 6.902976808251308).abs() < 1e-6); + assert!((params[1] - 2000.4659358334482).abs() < 1e-6); + } + + #[test] + fn test_empty_list_logistic_regression() { + assert_eq!(logistic_regression(vec![], 10000, 0.1), None); + } +} diff --git a/src/machine_learning/mod.rs b/src/machine_learning/mod.rs index c77fd65116b..534326d2121 100644 --- a/src/machine_learning/mod.rs +++ b/src/machine_learning/mod.rs @@ -1,12 +1,14 @@ mod cholesky; mod k_means; mod linear_regression; +mod logistic_regression; mod loss_function; mod optimization; pub use self::cholesky::cholesky; pub use self::k_means::k_means; pub use self::linear_regression::linear_regression; +pub use self::logistic_regression::logistic_regression; pub use self::loss_function::average_margin_ranking_loss; pub use self::loss_function::hng_loss; pub use self::loss_function::huber_loss; diff --git a/src/machine_learning/optimization/gradient_descent.rs b/src/machine_learning/optimization/gradient_descent.rs index 6701a688d15..fd322a23ff3 100644 --- a/src/machine_learning/optimization/gradient_descent.rs +++ b/src/machine_learning/optimization/gradient_descent.rs @@ -23,7 +23,7 @@ /// A reference to the optimized parameter vector `x`. pub fn gradient_descent( - derivative_fn: fn(&[f64]) -> Vec, + derivative_fn: impl Fn(&[f64]) -> Vec, x: &mut Vec, learning_rate: f64, num_iterations: i32,