From 7b6f7f58d6a30df4810bd64723890bda706f8db3 Mon Sep 17 00:00:00 2001 From: sarah <> Date: Sat, 16 Sep 2023 14:37:16 +0200 Subject: [PATCH] panic docs and rank-deficiency robustness for fullpivlu and colpivqr --- faer-cholesky/src/ldlt_diagonal/compute.rs | 11 +- faer-cholesky/src/ldlt_diagonal/mod.rs | 15 +- faer-cholesky/src/ldlt_diagonal/solve.rs | 98 +++++++++- faer-cholesky/src/ldlt_diagonal/update.rs | 27 +++ faer-cholesky/src/llt/compute.rs | 6 +- faer-cholesky/src/llt/inverse.rs | 8 +- faer-cholesky/src/llt/mod.rs | 13 +- faer-cholesky/src/llt/reconstruct.rs | 6 + faer-cholesky/src/llt/solve.rs | 64 ++++-- faer-cholesky/src/llt/update.rs | 30 ++- faer-lu/src/full_pivoting/compute.rs | 205 ++++++++++++++++++-- faer-lu/src/full_pivoting/inverse.rs | 4 +- faer-lu/src/full_pivoting/reconstruct.rs | 4 +- faer-lu/src/full_pivoting/solve.rs | 9 +- faer-lu/src/partial_pivoting/compute.rs | 4 +- faer-lu/src/partial_pivoting/inverse.rs | 4 +- faer-lu/src/partial_pivoting/reconstruct.rs | 4 +- faer-lu/src/partial_pivoting/solve.rs | 9 +- faer-qr/src/col_pivoting/compute.rs | 181 ++++++++++++++++- faer-qr/src/col_pivoting/inverse.rs | 4 +- faer-qr/src/col_pivoting/reconstruct.rs | 4 +- faer-qr/src/col_pivoting/solve.rs | 9 +- faer-qr/src/no_pivoting/compute.rs | 2 +- faer-qr/src/no_pivoting/inverse.rs | 4 +- faer-qr/src/no_pivoting/reconstruct.rs | 4 +- faer-qr/src/no_pivoting/solve.rs | 9 +- 26 files changed, 656 insertions(+), 82 deletions(-) diff --git a/faer-cholesky/src/ldlt_diagonal/compute.rs b/faer-cholesky/src/ldlt_diagonal/compute.rs index f9c4b24e..793fabc7 100644 --- a/faer-cholesky/src/ldlt_diagonal/compute.rs +++ b/faer-cholesky/src/ldlt_diagonal/compute.rs @@ -291,14 +291,17 @@ fn cholesky_in_place_impl( /// /// # Warning /// -/// The Cholesky decomposition may have poor numerical stability properties when used with non -/// positive definite matrices. In the general case, it is recommended to first permute the matrix -/// using [`crate::compute_cholesky_permutation`] and -/// [`permute_rows_and_cols_symmetric`](faer_core::permutation::permute_rows_and_cols_symmetric_lower). +/// The Cholesky decomposition with diagonal may have poor numerical stability properties when used +/// with non positive definite matrices. In the general case, it is recommended to first permute +/// (and conjugate when necessary) the rows and columns of the matrix using the permutation obtained +/// from [`crate::compute_cholesky_permutation`]. /// /// # Panics /// /// Panics if the input matrix is not square. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`raw_cholesky_in_place_req`]). #[track_caller] #[inline] pub fn raw_cholesky_in_place( diff --git a/faer-cholesky/src/ldlt_diagonal/mod.rs b/faer-cholesky/src/ldlt_diagonal/mod.rs index e104c89e..b01c3df1 100644 --- a/faer-cholesky/src/ldlt_diagonal/mod.rs +++ b/faer-cholesky/src/ldlt_diagonal/mod.rs @@ -3,9 +3,9 @@ //! where $D$ is a diagonal matrix, and $L$ is a unit lower triangular matrix. //! //! The Cholesky decomposition with diagonal may have poor numerical stability properties when used -//! with non positive definite matrices. In the general case, it is recommended to first permute the -//! matrix using [`crate::compute_cholesky_permutation`] and -//! [`permute_rows_and_cols_symmetric`](faer_core::permutation::permute_rows_and_cols_symmetric_lower). +//! with non positive definite matrices. In the general case, it is recommended to first permute +//! (and conjugate when necessary) the rows and columns of the matrix using the permutation obtained +//! from [`crate::compute_cholesky_permutation`]. pub mod compute; pub mod solve; @@ -239,8 +239,9 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [1, 3], + Parallelism::None, DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n, r).unwrap(), + delete_rows_and_cols_clobber_req::(n, r, Parallelism::None).unwrap(), )), ); @@ -268,8 +269,9 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [0, 2], + Parallelism::None, DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n, r).unwrap(), + delete_rows_and_cols_clobber_req::(n, r, Parallelism::None).unwrap(), )), ); @@ -297,8 +299,9 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [0, 2, 3], + Parallelism::None, DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n, r).unwrap(), + delete_rows_and_cols_clobber_req::(n, r, Parallelism::None).unwrap(), )), ); diff --git a/faer-cholesky/src/ldlt_diagonal/solve.rs b/faer-cholesky/src/ldlt_diagonal/solve.rs index d4483a6f..7f707fe3 100644 --- a/faer-cholesky/src/ldlt_diagonal/solve.rs +++ b/faer-cholesky/src/ldlt_diagonal/solve.rs @@ -1,9 +1,61 @@ -use dyn_stack::DynStack; -use faer_core::{solve, zipped, ComplexField, Conj, MatMut, MatRef, Parallelism}; +use dyn_stack::{DynStack, SizeOverflow, StackReq}; +use faer_core::{solve, zipped, ComplexField, Conj, Entity, MatMut, MatRef, Parallelism}; use reborrow::*; use assert2::assert; +/// Computes the size and alignment of required workspace for solving a linear system defined by a +/// matrix in place, given its diagonal LDLT decomposition. +pub fn solve_in_place_req( + cholesky_dimension: usize, + rhs_ncols: usize, + parallelism: Parallelism, +) -> Result { + let _ = cholesky_dimension; + let _ = rhs_ncols; + let _ = parallelism; + Ok(StackReq::default()) +} + +/// Computes the size and alignment of required workspace for solving a linear system defined by a +/// matrix out of place, given its diagonal LDLT decomposition. +pub fn solve_req( + cholesky_dimension: usize, + rhs_ncols: usize, + parallelism: Parallelism, +) -> Result { + let _ = cholesky_dimension; + let _ = rhs_ncols; + let _ = parallelism; + Ok(StackReq::default()) +} + +/// Computes the size and alignment of required workspace for solving a linear system defined by +/// the transpose of a matrix in place, given its diagonal LDLT decomposition. +pub fn solve_transpose_in_place_req( + cholesky_dimension: usize, + rhs_ncols: usize, + parallelism: Parallelism, +) -> Result { + let _ = cholesky_dimension; + let _ = rhs_ncols; + let _ = parallelism; + Ok(StackReq::default()) +} + +/// Computes the size and alignment of required workspace for solving a linear system defined by +/// the transpose of a matrix out of place, given its diagonal LDLT decomposition. +pub fn solve_transpose_req( + cholesky_dimension: usize, + rhs_ncols: usize, + parallelism: Parallelism, +) -> Result { + let _ = cholesky_dimension; + let _ = rhs_ncols; + let _ = parallelism; + Ok(StackReq::default()) +} + /// Given the Cholesky factors of a matrix $A$ and a matrix $B$ stored in `rhs`, this function /// computes the solution of the linear system: /// $$\text{Op}_A(A)X = B.$$ @@ -11,6 +63,16 @@ use assert2::assert; /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `rhs`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factors` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_in_place_req`]). #[track_caller] pub fn solve_in_place_with_conj( cholesky_factors: MatRef<'_, E>, @@ -60,6 +122,16 @@ pub fn solve_in_place_with_conj( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `rhs`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factors` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_in_place_req`]). #[track_caller] pub fn solve_transpose_in_place_with_conj( cholesky_factors: MatRef<'_, E>, @@ -88,6 +160,17 @@ pub fn solve_transpose_in_place_with_conj( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `dst`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factors` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// * `dst` must have `n` rows, and the same number of columns as `rhs`. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_req`]). #[track_caller] pub fn solve_transpose_with_conj( dst: MatMut<'_, E>, @@ -109,6 +192,17 @@ pub fn solve_transpose_with_conj( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `dst`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factors` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// * `dst` must have `n` rows, and the same number of columns as `rhs`. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_req`]). #[track_caller] pub fn solve_with_conj( dst: MatMut<'_, E>, diff --git a/faer-cholesky/src/ldlt_diagonal/update.rs b/faer-cholesky/src/ldlt_diagonal/update.rs index c9ea3ba9..94e279b6 100644 --- a/faer-cholesky/src/ldlt_diagonal/update.rs +++ b/faer-cholesky/src/ldlt_diagonal/update.rs @@ -637,6 +637,14 @@ impl<'a, E: ComplexField> RankRUpdate<'a, E> { /// /// The matrix $W$ and the vector $\alpha$ are clobbered, meaning that the values they contain after /// the function is called are unspecified. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// * `cholesky_factors` must be square of dimension `n`. +/// * `w` must have `n` rows. +/// * `alpha` must have one column. +/// * `alpha` must have the same number of rows as the number of columns in `w`. #[track_caller] pub fn rank_r_update_clobber( cholesky_factors: MatMut<'_, E>, @@ -726,7 +734,9 @@ pub(crate) fn rank_update_indices( pub fn delete_rows_and_cols_clobber_req( dim: usize, number_of_rows_to_remove: usize, + parallelism: Parallelism, ) -> Result { + let _ = parallelism; let r = number_of_rows_to_remove; StackReq::try_all_of([temp_mat_req::(dim, r)?, temp_mat_req::(r, 1)?]) } @@ -740,12 +750,22 @@ pub fn delete_rows_and_cols_clobber_req( /// /// The indices are clobbered, meaning that the values that the slice contains after the function /// is called are unspecified. +/// +/// # Panics +/// +/// Panics if the matrix is not square, the indices are out of bounds, or the list of indices +/// contains duplicates. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`delete_rows_and_cols_clobber_req`]). #[track_caller] pub fn delete_rows_and_cols_clobber( cholesky_factors: MatMut<'_, E>, indices: &mut [usize], + parallelism: Parallelism, stack: DynStack<'_>, ) { + let _ = parallelism; let n = cholesky_factors.nrows(); let r = indices.len(); assert!(cholesky_factors.ncols() == n); @@ -824,6 +844,13 @@ pub fn insert_rows_and_cols_clobber_req( /// /// The inserted matrix is clobbered, meaning that the values it contains after the function /// is called are unspecified. +/// +/// # Panics +/// +/// Panics if the index is out of bounds or the matrix dimensions are invalid. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`insert_rows_and_cols_clobber_req`]). #[track_caller] pub fn insert_rows_and_cols_clobber( cholesky_factors_extended: MatMut<'_, E>, diff --git a/faer-cholesky/src/llt/compute.rs b/faer-cholesky/src/llt/compute.rs index 90ec8964..df9c5c04 100644 --- a/faer-cholesky/src/llt/compute.rs +++ b/faer-cholesky/src/llt/compute.rs @@ -185,8 +185,10 @@ fn cholesky_in_place_impl( /// /// # Panics /// -/// - Panics if the input matrix is not square. -/// - Panics if the provided memory in `stack` is insufficient. +/// Panics if the input matrix is not square. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`cholesky_in_place_req`]). #[track_caller] #[inline] pub fn cholesky_in_place( diff --git a/faer-cholesky/src/llt/inverse.rs b/faer-cholesky/src/llt/inverse.rs index cadc33ea..a8f441b0 100644 --- a/faer-cholesky/src/llt/inverse.rs +++ b/faer-cholesky/src/llt/inverse.rs @@ -65,7 +65,9 @@ pub fn invert_lower_in_place_req( /// # Panics /// /// - Panics if `cholesky_factor` is not a square matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`invert_lower_req`]). #[track_caller] pub fn invert_lower_in_place( cholesky_factor: MatMut<'_, E>, @@ -83,7 +85,9 @@ pub fn invert_lower_in_place( /// /// - Panics if `cholesky_factor` is not a square matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`invert_lower_in_place_req`]). #[track_caller] pub fn invert_lower( dst: MatMut<'_, E>, diff --git a/faer-cholesky/src/llt/mod.rs b/faer-cholesky/src/llt/mod.rs index f060f485..28818d38 100644 --- a/faer-cholesky/src/llt/mod.rs +++ b/faer-cholesky/src/llt/mod.rs @@ -243,8 +243,9 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [1, 3], + Parallelism::Rayon(8), DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n, r).unwrap(), + delete_rows_and_cols_clobber_req::(n, r, Parallelism::Rayon(8)).unwrap(), )), ); @@ -270,8 +271,9 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [0, 2], + Parallelism::Rayon(8), DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n, r).unwrap(), + delete_rows_and_cols_clobber_req::(n, r, Parallelism::Rayon(8)).unwrap(), )), ); @@ -297,8 +299,9 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [0, 2, 3], + Parallelism::Rayon(8), DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n, r).unwrap(), + delete_rows_and_cols_clobber_req::(n, r, Parallelism::Rayon(8)).unwrap(), )), ); @@ -335,8 +338,10 @@ mod tests { delete_rows_and_cols_clobber( a.as_mut(), &mut [position, position + 1], + Parallelism::Rayon(8), DynStack::new(&mut GlobalMemBuffer::new( - delete_rows_and_cols_clobber_req::(n + r, r).unwrap(), + delete_rows_and_cols_clobber_req::(n + r, r, Parallelism::Rayon(8)) + .unwrap(), )), ); diff --git a/faer-cholesky/src/llt/reconstruct.rs b/faer-cholesky/src/llt/reconstruct.rs index e2fae8c1..b6fd7350 100644 --- a/faer-cholesky/src/llt/reconstruct.rs +++ b/faer-cholesky/src/llt/reconstruct.rs @@ -30,6 +30,9 @@ pub fn reconstruct_lower_in_place_req( /// /// - Panics if `cholesky_factor` is not a square matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`reconstruct_lower_req`]). #[track_caller] pub fn reconstruct_lower( dst: MatMut<'_, E>, @@ -60,6 +63,9 @@ pub fn reconstruct_lower( /// # Panics /// /// - Panics if `cholesky_factor` is not a square matrix. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`reconstruct_lower_in_place_req`]). #[track_caller] pub fn reconstruct_lower_in_place( cholesky_factor: MatMut<'_, E>, diff --git a/faer-cholesky/src/llt/solve.rs b/faer-cholesky/src/llt/solve.rs index 6a6a1867..df946100 100644 --- a/faer-cholesky/src/llt/solve.rs +++ b/faer-cholesky/src/llt/solve.rs @@ -62,31 +62,41 @@ pub fn solve_transpose_req( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `rhs`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factor` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_in_place_req`]). #[track_caller] pub fn solve_in_place_with_conj( - cholesky_factors: MatRef<'_, E>, + cholesky_factor: MatRef<'_, E>, conj_lhs: Conj, rhs: MatMut<'_, E>, parallelism: Parallelism, stack: DynStack<'_>, ) { let _ = &stack; - let n = cholesky_factors.nrows(); + let n = cholesky_factor.nrows(); - assert!(cholesky_factors.nrows() == cholesky_factors.ncols()); + assert!(cholesky_factor.nrows() == cholesky_factor.ncols()); assert!(rhs.nrows() == n); let mut rhs = rhs; solve::solve_lower_triangular_in_place_with_conj( - cholesky_factors, + cholesky_factor, conj_lhs, rhs.rb_mut(), parallelism, ); solve::solve_upper_triangular_in_place_with_conj( - cholesky_factors.transpose(), + cholesky_factor.transpose(), conj_lhs.compose(Conj::Yes), rhs.rb_mut(), parallelism, @@ -100,10 +110,21 @@ pub fn solve_in_place_with_conj( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `dst`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factor` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// * `dst` must have `n` rows, and the same number of columns as `rhs`. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_req`]). #[track_caller] pub fn solve_with_conj( dst: MatMut<'_, E>, - cholesky_factors: MatRef<'_, E>, + cholesky_factor: MatRef<'_, E>, conj_lhs: Conj, rhs: MatRef<'_, E>, parallelism: Parallelism, @@ -111,7 +132,7 @@ pub fn solve_with_conj( ) { let mut dst = dst; zipped!(dst.rb_mut(), rhs).for_each(|mut dst, src| dst.write(src.read())); - solve_in_place_with_conj(cholesky_factors, conj_lhs, dst, parallelism, stack) + solve_in_place_with_conj(cholesky_factor, conj_lhs, dst, parallelism, stack) } /// Given the Cholesky factor of a matrix $A$ and a matrix $B$ stored in `rhs`, this function @@ -121,9 +142,19 @@ pub fn solve_with_conj( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `rhs`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factor` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_in_place_req`]). #[track_caller] pub fn solve_transpose_in_place_with_conj( - cholesky_factors: MatRef<'_, E>, + cholesky_factor: MatRef<'_, E>, conj_lhs: Conj, rhs: MatMut<'_, E>, parallelism: Parallelism, @@ -131,7 +162,7 @@ pub fn solve_transpose_in_place_with_conj( ) { // (L L.*).T = conj(L L.*) solve_in_place_with_conj( - cholesky_factors, + cholesky_factor, conj_lhs.compose(Conj::Yes), rhs, parallelism, @@ -146,10 +177,21 @@ pub fn solve_transpose_in_place_with_conj( /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`. /// /// The solution of the linear system is stored in `dst`. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// +/// * `cholesky_factor` must be square of dimension `n`. +/// * `rhs` must have `n` rows. +/// * `dst` must have `n` rows, and the same number of columns as `rhs`. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_req`]). #[track_caller] pub fn solve_transpose_with_conj( dst: MatMut<'_, E>, - cholesky_factors: MatRef<'_, E>, + cholesky_factor: MatRef<'_, E>, conj_lhs: Conj, rhs: MatRef<'_, E>, parallelism: Parallelism, @@ -157,5 +199,5 @@ pub fn solve_transpose_with_conj( ) { let mut dst = dst; zipped!(dst.rb_mut(), rhs).for_each(|mut dst, src| dst.write(src.read())); - solve_transpose_in_place_with_conj(cholesky_factors, conj_lhs, dst, parallelism, stack) + solve_transpose_in_place_with_conj(cholesky_factor, conj_lhs, dst, parallelism, stack) } diff --git a/faer-cholesky/src/llt/update.rs b/faer-cholesky/src/llt/update.rs index deaca6e6..1bbb4a52 100644 --- a/faer-cholesky/src/llt/update.rs +++ b/faer-cholesky/src/llt/update.rs @@ -905,6 +905,14 @@ impl<'a, E: ComplexField> RankRUpdate<'a, E> { /// /// The matrix $W$ and the vector $\alpha$ are clobbered, meaning that the values they contain after /// the function is called are unspecified. +/// +/// # Panics +/// +/// Panics if any of these conditions is violated: +/// * `cholesky_factor` must be square of dimension `n`. +/// * `w` must have `n` rows. +/// * `alpha` must have one column. +/// * `alpha` must have the same number of rows as the number of columns in `w`. #[track_caller] pub fn rank_r_update_clobber( cholesky_factor: MatMut<'_, E>, @@ -917,6 +925,7 @@ pub fn rank_r_update_clobber( assert!(cholesky_factor.ncols() == n); assert!(w.nrows() == n); assert!(alpha.nrows() == k); + assert!(alpha.ncols() == 1); if n == 0 { return Ok(()); @@ -937,7 +946,9 @@ pub fn rank_r_update_clobber( pub fn delete_rows_and_cols_clobber_req( dim: usize, number_of_rows_to_remove: usize, + parallelism: Parallelism, ) -> Result { + let _ = parallelism; let r = number_of_rows_to_remove; StackReq::try_all_of([temp_mat_req::(dim, r)?, temp_mat_req::(r, 1)?]) } @@ -949,14 +960,24 @@ pub fn delete_rows_and_cols_clobber_req( /// /// The result is stored in the top left corner (with dimension `n - r`) of `cholesky_factor`. /// -/// The indices are clobbered, meaning that the values that the slice contains after the function +/// The indices are clobbered, meaning that the values in the slice after the function /// is called are unspecified. +/// +/// # Panics +/// +/// Panics if the matrix is not square, the indices are out of bounds, or the list of indices +/// contains duplicates. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`delete_rows_and_cols_clobber_req`]). #[track_caller] pub fn delete_rows_and_cols_clobber( cholesky_factor: MatMut<'_, E>, indices: &mut [usize], + parallelism: Parallelism, stack: DynStack<'_>, ) { + let _ = parallelism; let n = cholesky_factor.nrows(); let r = indices.len(); assert!(cholesky_factor.ncols() == n); @@ -1036,6 +1057,13 @@ pub fn insert_rows_and_cols_clobber_req( /// /// The inserted matrix is clobbered, meaning that the values it contains after the function /// is called are unspecified. +/// +/// # Panics +/// +/// Panics if the index is out of bounds or the matrix dimensions are invalid. +/// +/// This can also panic if the provided memory in `stack` is insufficient (see +/// [`insert_rows_and_cols_clobber_req`]). #[track_caller] pub fn insert_rows_and_cols_clobber( cholesky_factor_extended: MatMut<'_, E>, diff --git a/faer-lu/src/full_pivoting/compute.rs b/faer-lu/src/full_pivoting/compute.rs index b458bed8..6d914cf2 100644 --- a/faer-lu/src/full_pivoting/compute.rs +++ b/faer-lu/src/full_pivoting/compute.rs @@ -1036,21 +1036,28 @@ fn lu_in_place_unblocked( ) -> usize { let m = matrix.nrows(); let n = matrix.ncols(); + let size = Ord::min(m, n); - debug_assert!(row_transpositions.len() == m); - debug_assert!(col_transpositions.len() == n); + debug_assert!(row_transpositions.len() == size); + debug_assert!(col_transpositions.len() == size); if n == 0 || m == 0 { return 0; } - let size = ::min(m, n); - let mut n_transpositions = 0; - let (mut max_row, mut max_col, _) = best_in_matrix(matrix.rb()); + let (mut max_row, mut max_col, mut biggest) = best_in_matrix(matrix.rb()); for k in 0..size { + if biggest < E::Real::zero_threshold().unwrap() { + for idx in k..size { + row_transpositions[idx] = idx; + col_transpositions[idx] = idx; + } + break; + } + row_transpositions[k] = max_row; col_transpositions[k] = max_col; @@ -1091,7 +1098,7 @@ fn lu_in_place_unblocked( match parallelism { Parallelism::None => { - (max_row, max_col, _) = rank_one_update_and_best_in_matrix( + (max_row, max_col, biggest) = rank_one_update_and_best_in_matrix( bottom_right, bottom_left.col(k).rb(), top_right.row(k).rb(), @@ -1100,13 +1107,13 @@ fn lu_in_place_unblocked( _ => { let n_threads = parallelism_degree(parallelism); - let mut biggest = vec![(0_usize, 0_usize, E::Real::zero()); n_threads]; + let mut biggest_vec = vec![(0_usize, 0_usize, E::Real::zero()); n_threads]; let lhs = bottom_left.col(k).into_const(); let rhs = top_right.row(k).into_const(); { - let biggest = Ptr(biggest.as_mut_ptr()); + let biggest = Ptr(biggest_vec.as_mut_ptr()); for_each_raw( n_threads, @@ -1127,13 +1134,14 @@ fn lu_in_place_unblocked( max_row = 0; max_col = 0; let mut biggest_value = E::Real::zero(); - for (row, col, value) in biggest { + for (row, col, value) in biggest_vec { if value > biggest_value { max_row = row; max_col = col; biggest_value = value; } } + biggest = biggest_value; } }; max_row += k + 1; @@ -1195,10 +1203,10 @@ fn default_disable_parallelism(m: usize, n: usize) -> bool { /// # Panics /// /// - Panics if the length of the row permutation slices is not equal to the number of rows of the -/// matrix +/// matrix. /// - Panics if the length of the column permutation slices is not equal to the number of columns of -/// the matrix -/// - Panics if the provided memory in `stack` is insufficient. +/// the matrix. +/// - Panics if the provided memory in `stack` is insufficient (see [`lu_in_place_req`]). pub fn lu_in_place<'out, E: ComplexField>( matrix: MatMut<'_, E>, row_perm: &'out mut [usize], @@ -1216,13 +1224,15 @@ pub fn lu_in_place<'out, E: ComplexField>( let _ = parallelism; let m = matrix.nrows(); let n = matrix.ncols(); + let size = Ord::min(m, n); + assert!(row_perm.len() == m); assert!(row_perm_inv.len() == m); assert!(col_perm.len() == n); assert!(col_perm_inv.len() == n); - let (mut row_transpositions, stack) = stack.make_with(m, |i| i); - let (mut col_transpositions, _) = stack.make_with(n, |i| i); + let (mut row_transpositions, stack) = stack.make_with(size, |_| 0usize); + let (mut col_transpositions, _) = stack.make_with(size, |_| 0usize); let n_transpositions = if matrix.row_stride().abs() < matrix.col_stride().abs() { lu_in_place_unblocked( @@ -1439,4 +1449,171 @@ mod tests { compute_lu_row_major_generic::(random_c64, 1e-6); compute_lu_row_major_generic::(random_c32, 1e-2); } + + #[test] + fn test_lu_c32_zeros() { + for (m, n) in [ + (2, 4), + (2, 2), + (3, 3), + (4, 2), + (2, 1), + (4, 4), + (20, 20), + (20, 2), + (2, 20), + (40, 20), + (20, 40), + ] { + let mat = Mat::with_dims(m, n, |_i, _j| c32::zero()); + for parallelism in [Parallelism::None, Parallelism::Rayon(0)] { + let mut mat = mat.clone(); + let mat_orig = mat.clone(); + + let mut mat = mat.as_mut(); + let mat_orig = mat_orig.as_ref(); + + let mut row_perm = vec![0; m]; + let mut row_perm_inv = vec![0; m]; + let mut col_perm = vec![0; n]; + let mut col_perm_inv = vec![0; n]; + + let (_, row_perm, col_perm) = lu_in_place( + mat.rb_mut(), + &mut row_perm, + &mut row_perm_inv, + &mut col_perm, + &mut col_perm_inv, + parallelism, + make_stack!(lu_in_place_req::( + m, + n, + Parallelism::None, + Default::default() + )), + Default::default(), + ); + let reconstructed = reconstruct_matrix(mat.rb(), row_perm.rb(), col_perm.rb()); + + for i in 0..m { + for j in 0..n { + assert!((mat_orig.read(i, j).sub(&reconstructed.read(i, j))).abs() < 1e-4); + } + } + } + } + } + + #[test] + fn test_lu_c32_ones() { + for (m, n) in [ + (2, 4), + (2, 2), + (3, 3), + (4, 2), + (2, 1), + (4, 4), + (20, 20), + (20, 2), + (2, 20), + (40, 20), + (20, 40), + ] { + let mat = Mat::with_dims(m, n, |_i, _j| c32::one()); + for parallelism in [Parallelism::None, Parallelism::Rayon(0)] { + let mut mat = mat.clone(); + let mat_orig = mat.clone(); + + let mut mat = mat.as_mut(); + let mat_orig = mat_orig.as_ref(); + + let mut row_perm = vec![0; m]; + let mut row_perm_inv = vec![0; m]; + let mut col_perm = vec![0; n]; + let mut col_perm_inv = vec![0; n]; + + let (_, row_perm, col_perm) = lu_in_place( + mat.rb_mut(), + &mut row_perm, + &mut row_perm_inv, + &mut col_perm, + &mut col_perm_inv, + parallelism, + make_stack!(lu_in_place_req::( + m, + n, + Parallelism::None, + Default::default() + )), + Default::default(), + ); + let reconstructed = reconstruct_matrix(mat.rb(), row_perm.rb(), col_perm.rb()); + + for i in 0..m { + for j in 0..n { + assert!((mat_orig.read(i, j).sub(&reconstructed.read(i, j))).abs() < 1e-4); + } + } + } + } + } + + #[test] + fn test_lu_c32_rank_2() { + for (m, n) in [ + (2, 4), + (2, 2), + (3, 3), + (4, 2), + (2, 1), + (4, 4), + (20, 20), + (20, 2), + (2, 20), + (40, 20), + (20, 40), + ] { + let u0 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random())); + let v0 = Mat::with_dims(1, n, |_, _| c32::new(random(), random())); + let u1 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random())); + let v1 = Mat::with_dims(1, n, |_, _| c32::new(random(), random())); + + let mat = u0 * v0 + u1 * v1; + for parallelism in [Parallelism::None, Parallelism::Rayon(0)] { + let mut mat = mat.clone(); + let mat_orig = mat.clone(); + + let mut mat = mat.as_mut(); + let mat_orig = mat_orig.as_ref(); + + let mut row_perm = vec![0; m]; + let mut row_perm_inv = vec![0; m]; + let mut col_perm = vec![0; n]; + let mut col_perm_inv = vec![0; n]; + + let (_, row_perm, col_perm) = lu_in_place( + mat.rb_mut(), + &mut row_perm, + &mut row_perm_inv, + &mut col_perm, + &mut col_perm_inv, + parallelism, + make_stack!(lu_in_place_req::( + m, + n, + Parallelism::None, + Default::default() + )), + Default::default(), + ); + let reconstructed = reconstruct_matrix(mat.rb(), row_perm.rb(), col_perm.rb()); + + for i in 0..m { + for j in 0..n { + assert!((mat_orig.read(i, j).sub(&reconstructed.read(i, j))).abs() < 1e-4); + } + } + } + } + } } diff --git a/faer-lu/src/full_pivoting/inverse.rs b/faer-lu/src/full_pivoting/inverse.rs index f747e272..0d04343b 100644 --- a/faer-lu/src/full_pivoting/inverse.rs +++ b/faer-lu/src/full_pivoting/inverse.rs @@ -93,7 +93,7 @@ fn invert_impl( /// - Panics if the row permutation doesn't have the same dimension as the matrix. /// - Panics if the column permutation doesn't have the same dimension as the matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]). #[track_caller] pub fn invert( dst: MatMut<'_, T>, @@ -125,7 +125,7 @@ pub fn invert( /// - Panics if the LU factors are not a square matrix. /// - Panics if the row permutation doesn't have the same dimension as the matrix. /// - Panics if the column permutation doesn't have the same dimension as the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]). #[track_caller] pub fn invert_in_place( lu_factors: MatMut<'_, T>, diff --git a/faer-lu/src/full_pivoting/reconstruct.rs b/faer-lu/src/full_pivoting/reconstruct.rs index 09d81cd7..6aaf7f44 100644 --- a/faer-lu/src/full_pivoting/reconstruct.rs +++ b/faer-lu/src/full_pivoting/reconstruct.rs @@ -102,7 +102,7 @@ fn reconstruct_impl( /// - Panics if the column permutation doesn't have the same dimension as the number of columns of /// the matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]). #[track_caller] pub fn reconstruct( dst: MatMut<'_, T>, @@ -133,7 +133,7 @@ pub fn reconstruct( /// matrix. /// - Panics if the column permutation doesn't have the same dimension as the number of columns of /// the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]). #[track_caller] pub fn reconstruct_in_place( lu_factors: MatMut<'_, T>, diff --git a/faer-lu/src/full_pivoting/solve.rs b/faer-lu/src/full_pivoting/solve.rs index b5c6f15c..c5dd23c1 100644 --- a/faer-lu/src/full_pivoting/solve.rs +++ b/faer-lu/src/full_pivoting/solve.rs @@ -165,7 +165,7 @@ pub fn solve_transpose_req( /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]). pub fn solve( dst: MatMut<'_, T>, lu_factors: MatRef<'_, T>, @@ -202,7 +202,7 @@ pub fn solve( /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]). pub fn solve_in_place( lu_factors: MatRef<'_, T>, conj_lhs: Conj, @@ -239,7 +239,7 @@ pub fn solve_in_place( /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]). pub fn solve_transpose( dst: MatMut<'_, T>, lu_factors: MatRef<'_, T>, @@ -276,7 +276,8 @@ pub fn solve_transpose( /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_in_place_req`]). pub fn solve_transpose_in_place( lu_factors: MatRef<'_, T>, conj_lhs: Conj, diff --git a/faer-lu/src/partial_pivoting/compute.rs b/faer-lu/src/partial_pivoting/compute.rs index fb392f95..bc50474b 100644 --- a/faer-lu/src/partial_pivoting/compute.rs +++ b/faer-lu/src/partial_pivoting/compute.rs @@ -485,8 +485,8 @@ pub fn lu_in_place_req( /// # Panics /// /// - Panics if the length of the permutation slices is not equal to the number of rows of the -/// matrix, or if the provided memory in `stack` is insufficient. -/// - Panics if the provided memory in `stack` is insufficient. +/// matrix. +/// - Panics if the provided memory in `stack` is insufficient (see [`lu_in_place_req`]). pub fn lu_in_place<'out, E: ComplexField>( matrix: MatMut<'_, E>, perm: &'out mut [usize], diff --git a/faer-lu/src/partial_pivoting/inverse.rs b/faer-lu/src/partial_pivoting/inverse.rs index f37460e7..e04a0e10 100644 --- a/faer-lu/src/partial_pivoting/inverse.rs +++ b/faer-lu/src/partial_pivoting/inverse.rs @@ -91,7 +91,7 @@ pub fn invert_req( /// - Panics if the LU factors are not a square matrix. /// - Panics if the row permutation doesn't have the same dimension as the matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]). #[track_caller] pub fn invert( dst: MatMut<'_, E>, @@ -115,7 +115,7 @@ pub fn invert( /// - Panics if the LU factors are not a square matrix. /// - Panics if the row permutation doesn't have the same dimension as the matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]). #[track_caller] pub fn invert_in_place( lu_factors: MatMut<'_, E>, diff --git a/faer-lu/src/partial_pivoting/reconstruct.rs b/faer-lu/src/partial_pivoting/reconstruct.rs index 5ade0cfd..1b6a2acc 100644 --- a/faer-lu/src/partial_pivoting/reconstruct.rs +++ b/faer-lu/src/partial_pivoting/reconstruct.rs @@ -77,7 +77,7 @@ fn reconstruct_impl( /// - Panics if the row permutation doesn't have the same dimension as the number of rows of the /// matrix. /// - Panics if the destination shape doesn't match the shape of the matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]). #[track_caller] pub fn reconstruct( dst: MatMut<'_, E>, @@ -98,7 +98,7 @@ pub fn reconstruct( /// /// - Panics if the row permutation doesn't have the same dimension as the number of rows of the /// matrix. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]). #[track_caller] pub fn reconstruct_in_place( lu_factors: MatMut<'_, E>, diff --git a/faer-lu/src/partial_pivoting/solve.rs b/faer-lu/src/partial_pivoting/solve.rs index 759b2a20..2f019604 100644 --- a/faer-lu/src/partial_pivoting/solve.rs +++ b/faer-lu/src/partial_pivoting/solve.rs @@ -162,7 +162,7 @@ pub fn solve_transpose_req( /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]). pub fn solve( dst: MatMut<'_, E>, lu_factors: MatRef<'_, E>, @@ -196,7 +196,7 @@ pub fn solve( /// - Panics if `lu_factors` is not a square matrix. /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]). pub fn solve_in_place( lu_factors: MatRef<'_, E>, conj_lhs: Conj, @@ -230,7 +230,7 @@ pub fn solve_in_place( /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]). pub fn solve_transpose( dst: MatMut<'_, E>, lu_factors: MatRef<'_, E>, @@ -263,7 +263,8 @@ pub fn solve_transpose( /// - Panics if `lu_factors` is not a square matrix. /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_in_place_req`]). pub fn solve_transpose_in_place( lu_factors: MatRef<'_, E>, conj_lhs: Conj, diff --git a/faer-qr/src/col_pivoting/compute.rs b/faer-qr/src/col_pivoting/compute.rs index 9df10f52..08fa2f7b 100644 --- a/faer-qr/src/col_pivoting/compute.rs +++ b/faer-qr/src/col_pivoting/compute.rs @@ -687,7 +687,7 @@ pub fn qr_in_place_req( /// - Panics if the block size is zero. /// - Panics if the length of `col_perm` and `col_perm_inv` is not equal to the number of columns /// of `matrix`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`qr_in_place_req`]). pub fn qr_in_place<'out, E: ComplexField>( matrix: MatMut<'_, E>, householder_factor: MatMut<'_, E>, @@ -1044,4 +1044,183 @@ mod tests { } } } + + #[test] + fn test_qr_c32_zeros() { + for parallelism in [Parallelism::None, Parallelism::Rayon(8)] { + for (m, n) in [(2, 2), (2, 4), (4, 2), (4, 4), (63, 63)] { + let mut mat = Mat::::zeros(m, n); + let mat_orig = mat.clone(); + let size = m.min(n); + let blocksize = 8; + let mut householder = Mat::zeros(blocksize, size); + let mut perm = vec![0; n]; + let mut perm_inv = vec![0; n]; + + qr_in_place( + mat.as_mut(), + householder.as_mut(), + &mut perm, + &mut perm_inv, + parallelism, + make_stack!(qr_in_place_req::( + m, + n, + blocksize, + parallelism, + Default::default() + )), + Default::default(), + ); + + let (q, r) = reconstruct_factors(mat.as_ref(), householder.as_ref()); + let mut qr = Mat::zeros(m, n); + let mut qhq = Mat::zeros(m, m); + matmul( + qr.as_mut(), + q.as_ref(), + r.as_ref(), + None, + c32::one(), + Parallelism::Rayon(8), + ); + + matmul( + qhq.as_mut(), + q.as_ref().adjoint(), + q.as_ref(), + None, + c32::one(), + Parallelism::Rayon(8), + ); + + for j in 0..n { + for i in 0..m { + assert_approx_eq!(qr.read(i, j), mat_orig.read(i, perm[j]), 1e-4); + } + } + } + } + } + + #[test] + fn test_qr_c32_ones() { + for parallelism in [Parallelism::None, Parallelism::Rayon(8)] { + for (m, n) in [(2, 2), (2, 4), (4, 2), (4, 4), (63, 63)] { + let mut mat = Mat::::with_dims(m, n, |_, _| c32::one()); + let mat_orig = mat.clone(); + let size = m.min(n); + let blocksize = 8; + let mut householder = Mat::zeros(blocksize, size); + let mut perm = vec![0; n]; + let mut perm_inv = vec![0; n]; + + qr_in_place( + mat.as_mut(), + householder.as_mut(), + &mut perm, + &mut perm_inv, + parallelism, + make_stack!(qr_in_place_req::( + m, + n, + blocksize, + parallelism, + Default::default() + )), + Default::default(), + ); + + let (q, r) = reconstruct_factors(mat.as_ref(), householder.as_ref()); + let mut qr = Mat::zeros(m, n); + let mut qhq = Mat::zeros(m, m); + matmul( + qr.as_mut(), + q.as_ref(), + r.as_ref(), + None, + c32::one(), + Parallelism::Rayon(8), + ); + + matmul( + qhq.as_mut(), + q.as_ref().adjoint(), + q.as_ref(), + None, + c32::one(), + Parallelism::Rayon(8), + ); + + for j in 0..n { + for i in 0..m { + assert_approx_eq!(qr.read(i, j), mat_orig.read(i, perm[j]), 1e-4); + } + } + } + } + } + + #[test] + fn test_qr_c32_rank_2() { + for parallelism in [Parallelism::None, Parallelism::Rayon(8)] { + for (m, n) in [(2, 2), (2, 4), (4, 2), (4, 4), (63, 63)] { + let u0 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random())); + let v0 = Mat::with_dims(1, n, |_, _| c32::new(random(), random())); + let u1 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random())); + let v1 = Mat::with_dims(1, n, |_, _| c32::new(random(), random())); + + let mut mat = u0 * v0 + u1 * v1; + let mat_orig = mat.clone(); + let size = m.min(n); + let blocksize = 8; + let mut householder = Mat::zeros(blocksize, size); + let mut perm = vec![0; n]; + let mut perm_inv = vec![0; n]; + + qr_in_place( + mat.as_mut(), + householder.as_mut(), + &mut perm, + &mut perm_inv, + parallelism, + make_stack!(qr_in_place_req::( + m, + n, + blocksize, + parallelism, + Default::default() + )), + Default::default(), + ); + + let (q, r) = reconstruct_factors(mat.as_ref(), householder.as_ref()); + let mut qr = Mat::zeros(m, n); + let mut qhq = Mat::zeros(m, m); + matmul( + qr.as_mut(), + q.as_ref(), + r.as_ref(), + None, + c32::one(), + Parallelism::Rayon(8), + ); + + matmul( + qhq.as_mut(), + q.as_ref().adjoint(), + q.as_ref(), + None, + c32::one(), + Parallelism::Rayon(8), + ); + + for j in 0..n { + for i in 0..m { + assert_approx_eq!(qr.read(i, j), mat_orig.read(i, perm[j]), 1e-4); + } + } + } + } + } } diff --git a/faer-qr/src/col_pivoting/inverse.rs b/faer-qr/src/col_pivoting/inverse.rs index 533bcfe0..42de4dac 100644 --- a/faer-qr/src/col_pivoting/inverse.rs +++ b/faer-qr/src/col_pivoting/inverse.rs @@ -19,7 +19,7 @@ use reborrow::*; /// - Panics if the block size is zero. /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. /// - Panics if `dst` doesn't have the same shape as `qr_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]). #[track_caller] pub fn invert( dst: MatMut<'_, E>, @@ -68,7 +68,7 @@ pub fn invert( /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]). #[track_caller] pub fn invert_in_place( qr_factors: MatMut<'_, E>, diff --git a/faer-qr/src/col_pivoting/reconstruct.rs b/faer-qr/src/col_pivoting/reconstruct.rs index 3083f353..1867f6a9 100644 --- a/faer-qr/src/col_pivoting/reconstruct.rs +++ b/faer-qr/src/col_pivoting/reconstruct.rs @@ -17,7 +17,7 @@ use reborrow::*; /// - Panics if the block size is zero. /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. /// - Panics if `dst` doesn't have the same shape as `qr_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]). #[track_caller] pub fn reconstruct( dst: MatMut<'_, E>, @@ -67,7 +67,7 @@ pub fn reconstruct( /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]). #[track_caller] pub fn reconstruct_in_place( qr_factors: MatMut<'_, E>, diff --git a/faer-qr/src/col_pivoting/solve.rs b/faer-qr/src/col_pivoting/solve.rs index f2c8254a..9e857283 100644 --- a/faer-qr/src/col_pivoting/solve.rs +++ b/faer-qr/src/col_pivoting/solve.rs @@ -78,7 +78,7 @@ pub fn solve_transpose_req( /// - Panics if the block size is zero. /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]). #[track_caller] pub fn solve_in_place( qr_factors: MatRef<'_, T>, @@ -118,7 +118,8 @@ pub fn solve_in_place( /// - Panics if the block size is zero. /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_in_place_req`]). #[track_caller] pub fn solve_transpose_in_place( qr_factors: MatRef<'_, T>, @@ -159,7 +160,7 @@ pub fn solve_transpose_in_place( /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]). #[track_caller] pub fn solve( dst: MatMut<'_, T>, @@ -202,7 +203,7 @@ pub fn solve( /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]). #[track_caller] pub fn solve_transpose( dst: MatMut<'_, T>, diff --git a/faer-qr/src/no_pivoting/compute.rs b/faer-qr/src/no_pivoting/compute.rs index 839623bf..8809e095 100644 --- a/faer-qr/src/no_pivoting/compute.rs +++ b/faer-qr/src/no_pivoting/compute.rs @@ -331,7 +331,7 @@ fn qr_in_place_blocked( /// - Panics if the number of columns of the householder factor is not equal to the minimum of the /// number of rows and the number of columns of the input matrix. /// - Panics if the block size is zero. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`qr_in_place_req`]). #[track_caller] pub fn qr_in_place( matrix: MatMut<'_, E>, diff --git a/faer-qr/src/no_pivoting/inverse.rs b/faer-qr/src/no_pivoting/inverse.rs index 06f4d53c..89784dc7 100644 --- a/faer-qr/src/no_pivoting/inverse.rs +++ b/faer-qr/src/no_pivoting/inverse.rs @@ -17,7 +17,7 @@ use reborrow::*; /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. /// - Panics if `dst` doesn't have the same shape as `qr_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]). #[track_caller] pub fn invert( dst: MatMut<'_, E>, @@ -62,7 +62,7 @@ pub fn invert( /// - Panics if the number of columns of `householder_factor` isn't the same as the minimum of the /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]). #[track_caller] pub fn invert_in_place( qr_factors: MatMut<'_, E>, diff --git a/faer-qr/src/no_pivoting/reconstruct.rs b/faer-qr/src/no_pivoting/reconstruct.rs index 7cd9efc1..c3c5b920 100644 --- a/faer-qr/src/no_pivoting/reconstruct.rs +++ b/faer-qr/src/no_pivoting/reconstruct.rs @@ -15,7 +15,7 @@ use reborrow::*; /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. /// - Panics if `dst` doesn't have the same shape as `qr_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]). #[track_caller] pub fn reconstruct( dst: MatMut<'_, E>, @@ -60,7 +60,7 @@ pub fn reconstruct( /// - Panics if the number of columns of `householder_factor` isn't the same as the minimum of the /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]). #[track_caller] pub fn reconstruct_in_place( qr_factors: MatMut<'_, E>, diff --git a/faer-qr/src/no_pivoting/solve.rs b/faer-qr/src/no_pivoting/solve.rs index f70489d2..fab80e2e 100644 --- a/faer-qr/src/no_pivoting/solve.rs +++ b/faer-qr/src/no_pivoting/solve.rs @@ -72,7 +72,7 @@ pub fn solve_transpose_req( /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]). #[track_caller] pub fn solve_in_place( qr_factors: MatRef<'_, E>, @@ -119,7 +119,8 @@ pub fn solve_in_place( /// number of rows and the number of columns of `qr_factors`. /// - Panics if the block size is zero. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see +/// [`solve_transpose_in_place_req`]). #[track_caller] pub fn solve_transpose_in_place( qr_factors: MatRef<'_, E>, @@ -173,7 +174,7 @@ pub fn solve_transpose_in_place( /// - Panics if the block size is zero. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]). #[track_caller] pub fn solve( dst: MatMut<'_, E>, @@ -212,7 +213,7 @@ pub fn solve( /// - Panics if the block size is zero. /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`. /// - Panics if `rhs` and `dst` don't have the same shape. -/// - Panics if the provided memory in `stack` is insufficient. +/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]). #[track_caller] pub fn solve_transpose( dst: MatMut<'_, E>,