From 7b6f7f58d6a30df4810bd64723890bda706f8db3 Mon Sep 17 00:00:00 2001
From: sarah <>
Date: Sat, 16 Sep 2023 14:37:16 +0200
Subject: [PATCH] panic docs and rank-deficiency robustness for fullpivlu and
 colpivqr

---
 faer-cholesky/src/ldlt_diagonal/compute.rs  |  11 +-
 faer-cholesky/src/ldlt_diagonal/mod.rs      |  15 +-
 faer-cholesky/src/ldlt_diagonal/solve.rs    |  98 +++++++++-
 faer-cholesky/src/ldlt_diagonal/update.rs   |  27 +++
 faer-cholesky/src/llt/compute.rs            |   6 +-
 faer-cholesky/src/llt/inverse.rs            |   8 +-
 faer-cholesky/src/llt/mod.rs                |  13 +-
 faer-cholesky/src/llt/reconstruct.rs        |   6 +
 faer-cholesky/src/llt/solve.rs              |  64 ++++--
 faer-cholesky/src/llt/update.rs             |  30 ++-
 faer-lu/src/full_pivoting/compute.rs        | 205 ++++++++++++++++++--
 faer-lu/src/full_pivoting/inverse.rs        |   4 +-
 faer-lu/src/full_pivoting/reconstruct.rs    |   4 +-
 faer-lu/src/full_pivoting/solve.rs          |   9 +-
 faer-lu/src/partial_pivoting/compute.rs     |   4 +-
 faer-lu/src/partial_pivoting/inverse.rs     |   4 +-
 faer-lu/src/partial_pivoting/reconstruct.rs |   4 +-
 faer-lu/src/partial_pivoting/solve.rs       |   9 +-
 faer-qr/src/col_pivoting/compute.rs         | 181 ++++++++++++++++-
 faer-qr/src/col_pivoting/inverse.rs         |   4 +-
 faer-qr/src/col_pivoting/reconstruct.rs     |   4 +-
 faer-qr/src/col_pivoting/solve.rs           |   9 +-
 faer-qr/src/no_pivoting/compute.rs          |   2 +-
 faer-qr/src/no_pivoting/inverse.rs          |   4 +-
 faer-qr/src/no_pivoting/reconstruct.rs      |   4 +-
 faer-qr/src/no_pivoting/solve.rs            |   9 +-
 26 files changed, 656 insertions(+), 82 deletions(-)

diff --git a/faer-cholesky/src/ldlt_diagonal/compute.rs b/faer-cholesky/src/ldlt_diagonal/compute.rs
index f9c4b24e..793fabc7 100644
--- a/faer-cholesky/src/ldlt_diagonal/compute.rs
+++ b/faer-cholesky/src/ldlt_diagonal/compute.rs
@@ -291,14 +291,17 @@ fn cholesky_in_place_impl<E: ComplexField>(
 ///
 /// # Warning
 ///
-/// The Cholesky decomposition may have poor numerical stability properties when used with non
-/// positive definite matrices. In the general case, it is recommended to first permute the matrix
-/// using [`crate::compute_cholesky_permutation`] and
-/// [`permute_rows_and_cols_symmetric`](faer_core::permutation::permute_rows_and_cols_symmetric_lower).
+/// The Cholesky decomposition with diagonal may have poor numerical stability properties when used
+/// with non positive definite matrices. In the general case, it is recommended to first permute
+/// (and conjugate when necessary) the rows and columns of the matrix using the permutation obtained
+/// from [`crate::compute_cholesky_permutation`].
 ///
 /// # Panics
 ///
 /// Panics if the input matrix is not square.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`raw_cholesky_in_place_req`]).
 #[track_caller]
 #[inline]
 pub fn raw_cholesky_in_place<E: ComplexField>(
diff --git a/faer-cholesky/src/ldlt_diagonal/mod.rs b/faer-cholesky/src/ldlt_diagonal/mod.rs
index e104c89e..b01c3df1 100644
--- a/faer-cholesky/src/ldlt_diagonal/mod.rs
+++ b/faer-cholesky/src/ldlt_diagonal/mod.rs
@@ -3,9 +3,9 @@
 //! where $D$ is a diagonal matrix, and $L$ is a unit lower triangular matrix.
 //!
 //! The Cholesky decomposition with diagonal may have poor numerical stability properties when used
-//! with non positive definite matrices. In the general case, it is recommended to first permute the
-//! matrix using [`crate::compute_cholesky_permutation`] and
-//! [`permute_rows_and_cols_symmetric`](faer_core::permutation::permute_rows_and_cols_symmetric_lower).
+//! with non positive definite matrices. In the general case, it is recommended to first permute
+//! (and conjugate when necessary) the rows and columns of the matrix using the permutation obtained
+//! from [`crate::compute_cholesky_permutation`].
 
 pub mod compute;
 pub mod solve;
@@ -239,8 +239,9 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [1, 3],
+                Parallelism::None,
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<E>(n, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<E>(n, r, Parallelism::None).unwrap(),
                 )),
             );
 
@@ -268,8 +269,9 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [0, 2],
+                Parallelism::None,
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<E>(n, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<E>(n, r, Parallelism::None).unwrap(),
                 )),
             );
 
@@ -297,8 +299,9 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [0, 2, 3],
+                Parallelism::None,
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<E>(n, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<E>(n, r, Parallelism::None).unwrap(),
                 )),
             );
 
diff --git a/faer-cholesky/src/ldlt_diagonal/solve.rs b/faer-cholesky/src/ldlt_diagonal/solve.rs
index d4483a6f..7f707fe3 100644
--- a/faer-cholesky/src/ldlt_diagonal/solve.rs
+++ b/faer-cholesky/src/ldlt_diagonal/solve.rs
@@ -1,9 +1,61 @@
-use dyn_stack::DynStack;
-use faer_core::{solve, zipped, ComplexField, Conj, MatMut, MatRef, Parallelism};
+use dyn_stack::{DynStack, SizeOverflow, StackReq};
+use faer_core::{solve, zipped, ComplexField, Conj, Entity, MatMut, MatRef, Parallelism};
 use reborrow::*;
 
 use assert2::assert;
 
+/// Computes the size and alignment of required workspace for solving a linear system defined by a
+/// matrix in place, given its diagonal LDLT decomposition.
+pub fn solve_in_place_req<E: Entity>(
+    cholesky_dimension: usize,
+    rhs_ncols: usize,
+    parallelism: Parallelism,
+) -> Result<StackReq, SizeOverflow> {
+    let _ = cholesky_dimension;
+    let _ = rhs_ncols;
+    let _ = parallelism;
+    Ok(StackReq::default())
+}
+
+/// Computes the size and alignment of required workspace for solving a linear system defined by a
+/// matrix out of place, given its diagonal LDLT decomposition.
+pub fn solve_req<E: Entity>(
+    cholesky_dimension: usize,
+    rhs_ncols: usize,
+    parallelism: Parallelism,
+) -> Result<StackReq, SizeOverflow> {
+    let _ = cholesky_dimension;
+    let _ = rhs_ncols;
+    let _ = parallelism;
+    Ok(StackReq::default())
+}
+
+/// Computes the size and alignment of required workspace for solving a linear system defined by
+/// the transpose of a matrix in place, given its diagonal LDLT decomposition.
+pub fn solve_transpose_in_place_req<E: Entity>(
+    cholesky_dimension: usize,
+    rhs_ncols: usize,
+    parallelism: Parallelism,
+) -> Result<StackReq, SizeOverflow> {
+    let _ = cholesky_dimension;
+    let _ = rhs_ncols;
+    let _ = parallelism;
+    Ok(StackReq::default())
+}
+
+/// Computes the size and alignment of required workspace for solving a linear system defined by
+/// the transpose of a matrix out of place, given its diagonal LDLT decomposition.
+pub fn solve_transpose_req<E: Entity>(
+    cholesky_dimension: usize,
+    rhs_ncols: usize,
+    parallelism: Parallelism,
+) -> Result<StackReq, SizeOverflow> {
+    let _ = cholesky_dimension;
+    let _ = rhs_ncols;
+    let _ = parallelism;
+    Ok(StackReq::default())
+}
+
 /// Given the Cholesky factors of a matrix $A$ and a matrix $B$ stored in `rhs`, this function
 /// computes the solution of the linear system:
 /// $$\text{Op}_A(A)X = B.$$
@@ -11,6 +63,16 @@ use assert2::assert;
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `rhs`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factors` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_in_place_req`]).
 #[track_caller]
 pub fn solve_in_place_with_conj<E: ComplexField>(
     cholesky_factors: MatRef<'_, E>,
@@ -60,6 +122,16 @@ pub fn solve_in_place_with_conj<E: ComplexField>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `rhs`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factors` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_transpose_in_place_req`]).
 #[track_caller]
 pub fn solve_transpose_in_place_with_conj<E: ComplexField>(
     cholesky_factors: MatRef<'_, E>,
@@ -88,6 +160,17 @@ pub fn solve_transpose_in_place_with_conj<E: ComplexField>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `dst`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factors` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+/// * `dst` must have `n` rows, and the same number of columns as `rhs`.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_transpose_req`]).
 #[track_caller]
 pub fn solve_transpose_with_conj<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -109,6 +192,17 @@ pub fn solve_transpose_with_conj<E: ComplexField>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `dst`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factors` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+/// * `dst` must have `n` rows, and the same number of columns as `rhs`.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_req`]).
 #[track_caller]
 pub fn solve_with_conj<E: ComplexField>(
     dst: MatMut<'_, E>,
diff --git a/faer-cholesky/src/ldlt_diagonal/update.rs b/faer-cholesky/src/ldlt_diagonal/update.rs
index c9ea3ba9..94e279b6 100644
--- a/faer-cholesky/src/ldlt_diagonal/update.rs
+++ b/faer-cholesky/src/ldlt_diagonal/update.rs
@@ -637,6 +637,14 @@ impl<'a, E: ComplexField> RankRUpdate<'a, E> {
 ///
 /// The matrix $W$ and the vector $\alpha$ are clobbered, meaning that the values they contain after
 /// the function is called are unspecified.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+/// * `cholesky_factors` must be square of dimension `n`.
+/// * `w` must have `n` rows.
+/// * `alpha` must have one column.
+/// * `alpha` must have the same number of rows as the number of columns in `w`.
 #[track_caller]
 pub fn rank_r_update_clobber<E: ComplexField>(
     cholesky_factors: MatMut<'_, E>,
@@ -726,7 +734,9 @@ pub(crate) fn rank_update_indices(
 pub fn delete_rows_and_cols_clobber_req<E: Entity>(
     dim: usize,
     number_of_rows_to_remove: usize,
+    parallelism: Parallelism,
 ) -> Result<StackReq, SizeOverflow> {
+    let _ = parallelism;
     let r = number_of_rows_to_remove;
     StackReq::try_all_of([temp_mat_req::<E>(dim, r)?, temp_mat_req::<E>(r, 1)?])
 }
@@ -740,12 +750,22 @@ pub fn delete_rows_and_cols_clobber_req<E: Entity>(
 ///
 /// The indices are clobbered, meaning that the values that the slice contains after the function
 /// is called are unspecified.
+///
+/// # Panics
+///
+/// Panics if the matrix is not square, the indices are out of bounds, or the list of indices
+/// contains duplicates.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`delete_rows_and_cols_clobber_req`]).
 #[track_caller]
 pub fn delete_rows_and_cols_clobber<E: ComplexField>(
     cholesky_factors: MatMut<'_, E>,
     indices: &mut [usize],
+    parallelism: Parallelism,
     stack: DynStack<'_>,
 ) {
+    let _ = parallelism;
     let n = cholesky_factors.nrows();
     let r = indices.len();
     assert!(cholesky_factors.ncols() == n);
@@ -824,6 +844,13 @@ pub fn insert_rows_and_cols_clobber_req<E: Entity>(
 ///
 /// The inserted matrix is clobbered, meaning that the values it contains after the function
 /// is called are unspecified.
+///
+/// # Panics
+///
+/// Panics if the index is out of bounds or the matrix dimensions are invalid.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`insert_rows_and_cols_clobber_req`]).
 #[track_caller]
 pub fn insert_rows_and_cols_clobber<E: ComplexField>(
     cholesky_factors_extended: MatMut<'_, E>,
diff --git a/faer-cholesky/src/llt/compute.rs b/faer-cholesky/src/llt/compute.rs
index 90ec8964..df9c5c04 100644
--- a/faer-cholesky/src/llt/compute.rs
+++ b/faer-cholesky/src/llt/compute.rs
@@ -185,8 +185,10 @@ fn cholesky_in_place_impl<E: ComplexField>(
 ///
 /// # Panics
 ///
-/// - Panics if the input matrix is not square.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// Panics if the input matrix is not square.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`cholesky_in_place_req`]).
 #[track_caller]
 #[inline]
 pub fn cholesky_in_place<E: ComplexField>(
diff --git a/faer-cholesky/src/llt/inverse.rs b/faer-cholesky/src/llt/inverse.rs
index cadc33ea..a8f441b0 100644
--- a/faer-cholesky/src/llt/inverse.rs
+++ b/faer-cholesky/src/llt/inverse.rs
@@ -65,7 +65,9 @@ pub fn invert_lower_in_place_req<E: Entity>(
 /// # Panics
 ///
 /// - Panics if `cholesky_factor` is not a square matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`invert_lower_req`]).
 #[track_caller]
 pub fn invert_lower_in_place<E: ComplexField>(
     cholesky_factor: MatMut<'_, E>,
@@ -83,7 +85,9 @@ pub fn invert_lower_in_place<E: ComplexField>(
 ///
 /// - Panics if `cholesky_factor` is not a square matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`invert_lower_in_place_req`]).
 #[track_caller]
 pub fn invert_lower<E: ComplexField>(
     dst: MatMut<'_, E>,
diff --git a/faer-cholesky/src/llt/mod.rs b/faer-cholesky/src/llt/mod.rs
index f060f485..28818d38 100644
--- a/faer-cholesky/src/llt/mod.rs
+++ b/faer-cholesky/src/llt/mod.rs
@@ -243,8 +243,9 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [1, 3],
+                Parallelism::Rayon(8),
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<E>(n, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<E>(n, r, Parallelism::Rayon(8)).unwrap(),
                 )),
             );
 
@@ -270,8 +271,9 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [0, 2],
+                Parallelism::Rayon(8),
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<E>(n, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<E>(n, r, Parallelism::Rayon(8)).unwrap(),
                 )),
             );
 
@@ -297,8 +299,9 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [0, 2, 3],
+                Parallelism::Rayon(8),
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<E>(n, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<E>(n, r, Parallelism::Rayon(8)).unwrap(),
                 )),
             );
 
@@ -335,8 +338,10 @@ mod tests {
             delete_rows_and_cols_clobber(
                 a.as_mut(),
                 &mut [position, position + 1],
+                Parallelism::Rayon(8),
                 DynStack::new(&mut GlobalMemBuffer::new(
-                    delete_rows_and_cols_clobber_req::<f64>(n + r, r).unwrap(),
+                    delete_rows_and_cols_clobber_req::<f64>(n + r, r, Parallelism::Rayon(8))
+                        .unwrap(),
                 )),
             );
 
diff --git a/faer-cholesky/src/llt/reconstruct.rs b/faer-cholesky/src/llt/reconstruct.rs
index e2fae8c1..b6fd7350 100644
--- a/faer-cholesky/src/llt/reconstruct.rs
+++ b/faer-cholesky/src/llt/reconstruct.rs
@@ -30,6 +30,9 @@ pub fn reconstruct_lower_in_place_req<E: Entity>(
 ///
 /// - Panics if `cholesky_factor` is not a square matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`reconstruct_lower_req`]).
 #[track_caller]
 pub fn reconstruct_lower<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -60,6 +63,9 @@ pub fn reconstruct_lower<E: ComplexField>(
 /// # Panics
 ///
 /// - Panics if `cholesky_factor` is not a square matrix.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`reconstruct_lower_in_place_req`]).
 #[track_caller]
 pub fn reconstruct_lower_in_place<E: ComplexField>(
     cholesky_factor: MatMut<'_, E>,
diff --git a/faer-cholesky/src/llt/solve.rs b/faer-cholesky/src/llt/solve.rs
index 6a6a1867..df946100 100644
--- a/faer-cholesky/src/llt/solve.rs
+++ b/faer-cholesky/src/llt/solve.rs
@@ -62,31 +62,41 @@ pub fn solve_transpose_req<E: Entity>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `rhs`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factor` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_in_place_req`]).
 #[track_caller]
 pub fn solve_in_place_with_conj<E: ComplexField>(
-    cholesky_factors: MatRef<'_, E>,
+    cholesky_factor: MatRef<'_, E>,
     conj_lhs: Conj,
     rhs: MatMut<'_, E>,
     parallelism: Parallelism,
     stack: DynStack<'_>,
 ) {
     let _ = &stack;
-    let n = cholesky_factors.nrows();
+    let n = cholesky_factor.nrows();
 
-    assert!(cholesky_factors.nrows() == cholesky_factors.ncols());
+    assert!(cholesky_factor.nrows() == cholesky_factor.ncols());
     assert!(rhs.nrows() == n);
 
     let mut rhs = rhs;
 
     solve::solve_lower_triangular_in_place_with_conj(
-        cholesky_factors,
+        cholesky_factor,
         conj_lhs,
         rhs.rb_mut(),
         parallelism,
     );
 
     solve::solve_upper_triangular_in_place_with_conj(
-        cholesky_factors.transpose(),
+        cholesky_factor.transpose(),
         conj_lhs.compose(Conj::Yes),
         rhs.rb_mut(),
         parallelism,
@@ -100,10 +110,21 @@ pub fn solve_in_place_with_conj<E: ComplexField>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `dst`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factor` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+/// * `dst` must have `n` rows, and the same number of columns as `rhs`.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_req`]).
 #[track_caller]
 pub fn solve_with_conj<E: ComplexField>(
     dst: MatMut<'_, E>,
-    cholesky_factors: MatRef<'_, E>,
+    cholesky_factor: MatRef<'_, E>,
     conj_lhs: Conj,
     rhs: MatRef<'_, E>,
     parallelism: Parallelism,
@@ -111,7 +132,7 @@ pub fn solve_with_conj<E: ComplexField>(
 ) {
     let mut dst = dst;
     zipped!(dst.rb_mut(), rhs).for_each(|mut dst, src| dst.write(src.read()));
-    solve_in_place_with_conj(cholesky_factors, conj_lhs, dst, parallelism, stack)
+    solve_in_place_with_conj(cholesky_factor, conj_lhs, dst, parallelism, stack)
 }
 
 /// Given the Cholesky factor of a matrix $A$ and a matrix $B$ stored in `rhs`, this function
@@ -121,9 +142,19 @@ pub fn solve_with_conj<E: ComplexField>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `rhs`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factor` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_transpose_in_place_req`]).
 #[track_caller]
 pub fn solve_transpose_in_place_with_conj<E: ComplexField>(
-    cholesky_factors: MatRef<'_, E>,
+    cholesky_factor: MatRef<'_, E>,
     conj_lhs: Conj,
     rhs: MatMut<'_, E>,
     parallelism: Parallelism,
@@ -131,7 +162,7 @@ pub fn solve_transpose_in_place_with_conj<E: ComplexField>(
 ) {
     // (L L.*).T = conj(L L.*)
     solve_in_place_with_conj(
-        cholesky_factors,
+        cholesky_factor,
         conj_lhs.compose(Conj::Yes),
         rhs,
         parallelism,
@@ -146,10 +177,21 @@ pub fn solve_transpose_in_place_with_conj<E: ComplexField>(
 /// $\text{Op}_A$ is either the identity or the conjugation depending on the value of `conj_lhs`.  
 ///
 /// The solution of the linear system is stored in `dst`.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+///
+/// * `cholesky_factor` must be square of dimension `n`.
+/// * `rhs` must have `n` rows.
+/// * `dst` must have `n` rows, and the same number of columns as `rhs`.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`solve_transpose_req`]).
 #[track_caller]
 pub fn solve_transpose_with_conj<E: ComplexField>(
     dst: MatMut<'_, E>,
-    cholesky_factors: MatRef<'_, E>,
+    cholesky_factor: MatRef<'_, E>,
     conj_lhs: Conj,
     rhs: MatRef<'_, E>,
     parallelism: Parallelism,
@@ -157,5 +199,5 @@ pub fn solve_transpose_with_conj<E: ComplexField>(
 ) {
     let mut dst = dst;
     zipped!(dst.rb_mut(), rhs).for_each(|mut dst, src| dst.write(src.read()));
-    solve_transpose_in_place_with_conj(cholesky_factors, conj_lhs, dst, parallelism, stack)
+    solve_transpose_in_place_with_conj(cholesky_factor, conj_lhs, dst, parallelism, stack)
 }
diff --git a/faer-cholesky/src/llt/update.rs b/faer-cholesky/src/llt/update.rs
index deaca6e6..1bbb4a52 100644
--- a/faer-cholesky/src/llt/update.rs
+++ b/faer-cholesky/src/llt/update.rs
@@ -905,6 +905,14 @@ impl<'a, E: ComplexField> RankRUpdate<'a, E> {
 ///
 /// The matrix $W$ and the vector $\alpha$ are clobbered, meaning that the values they contain after
 /// the function is called are unspecified.
+///
+/// # Panics
+///
+/// Panics if any of these conditions is violated:
+/// * `cholesky_factor` must be square of dimension `n`.
+/// * `w` must have `n` rows.
+/// * `alpha` must have one column.
+/// * `alpha` must have the same number of rows as the number of columns in `w`.
 #[track_caller]
 pub fn rank_r_update_clobber<E: ComplexField>(
     cholesky_factor: MatMut<'_, E>,
@@ -917,6 +925,7 @@ pub fn rank_r_update_clobber<E: ComplexField>(
     assert!(cholesky_factor.ncols() == n);
     assert!(w.nrows() == n);
     assert!(alpha.nrows() == k);
+    assert!(alpha.ncols() == 1);
 
     if n == 0 {
         return Ok(());
@@ -937,7 +946,9 @@ pub fn rank_r_update_clobber<E: ComplexField>(
 pub fn delete_rows_and_cols_clobber_req<E: Entity>(
     dim: usize,
     number_of_rows_to_remove: usize,
+    parallelism: Parallelism,
 ) -> Result<StackReq, SizeOverflow> {
+    let _ = parallelism;
     let r = number_of_rows_to_remove;
     StackReq::try_all_of([temp_mat_req::<E>(dim, r)?, temp_mat_req::<E>(r, 1)?])
 }
@@ -949,14 +960,24 @@ pub fn delete_rows_and_cols_clobber_req<E: Entity>(
 ///
 /// The result is stored in the top left corner (with dimension `n - r`) of `cholesky_factor`.
 ///
-/// The indices are clobbered, meaning that the values that the slice contains after the function
+/// The indices are clobbered, meaning that the values in the slice after the function
 /// is called are unspecified.
+///
+/// # Panics
+///
+/// Panics if the matrix is not square, the indices are out of bounds, or the list of indices
+/// contains duplicates.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`delete_rows_and_cols_clobber_req`]).
 #[track_caller]
 pub fn delete_rows_and_cols_clobber<E: ComplexField>(
     cholesky_factor: MatMut<'_, E>,
     indices: &mut [usize],
+    parallelism: Parallelism,
     stack: DynStack<'_>,
 ) {
+    let _ = parallelism;
     let n = cholesky_factor.nrows();
     let r = indices.len();
     assert!(cholesky_factor.ncols() == n);
@@ -1036,6 +1057,13 @@ pub fn insert_rows_and_cols_clobber_req<E: Entity>(
 ///
 /// The inserted matrix is clobbered, meaning that the values it contains after the function
 /// is called are unspecified.
+///
+/// # Panics
+///
+/// Panics if the index is out of bounds or the matrix dimensions are invalid.
+///
+/// This can also panic if the provided memory in `stack` is insufficient (see
+/// [`insert_rows_and_cols_clobber_req`]).
 #[track_caller]
 pub fn insert_rows_and_cols_clobber<E: ComplexField>(
     cholesky_factor_extended: MatMut<'_, E>,
diff --git a/faer-lu/src/full_pivoting/compute.rs b/faer-lu/src/full_pivoting/compute.rs
index b458bed8..6d914cf2 100644
--- a/faer-lu/src/full_pivoting/compute.rs
+++ b/faer-lu/src/full_pivoting/compute.rs
@@ -1036,21 +1036,28 @@ fn lu_in_place_unblocked<E: ComplexField>(
 ) -> usize {
     let m = matrix.nrows();
     let n = matrix.ncols();
+    let size = Ord::min(m, n);
 
-    debug_assert!(row_transpositions.len() == m);
-    debug_assert!(col_transpositions.len() == n);
+    debug_assert!(row_transpositions.len() == size);
+    debug_assert!(col_transpositions.len() == size);
 
     if n == 0 || m == 0 {
         return 0;
     }
 
-    let size = <usize as Ord>::min(m, n);
-
     let mut n_transpositions = 0;
 
-    let (mut max_row, mut max_col, _) = best_in_matrix(matrix.rb());
+    let (mut max_row, mut max_col, mut biggest) = best_in_matrix(matrix.rb());
 
     for k in 0..size {
+        if biggest < E::Real::zero_threshold().unwrap() {
+            for idx in k..size {
+                row_transpositions[idx] = idx;
+                col_transpositions[idx] = idx;
+            }
+            break;
+        }
+
         row_transpositions[k] = max_row;
         col_transpositions[k] = max_col;
 
@@ -1091,7 +1098,7 @@ fn lu_in_place_unblocked<E: ComplexField>(
 
         match parallelism {
             Parallelism::None => {
-                (max_row, max_col, _) = rank_one_update_and_best_in_matrix(
+                (max_row, max_col, biggest) = rank_one_update_and_best_in_matrix(
                     bottom_right,
                     bottom_left.col(k).rb(),
                     top_right.row(k).rb(),
@@ -1100,13 +1107,13 @@ fn lu_in_place_unblocked<E: ComplexField>(
             _ => {
                 let n_threads = parallelism_degree(parallelism);
 
-                let mut biggest = vec![(0_usize, 0_usize, E::Real::zero()); n_threads];
+                let mut biggest_vec = vec![(0_usize, 0_usize, E::Real::zero()); n_threads];
 
                 let lhs = bottom_left.col(k).into_const();
                 let rhs = top_right.row(k).into_const();
 
                 {
-                    let biggest = Ptr(biggest.as_mut_ptr());
+                    let biggest = Ptr(biggest_vec.as_mut_ptr());
 
                     for_each_raw(
                         n_threads,
@@ -1127,13 +1134,14 @@ fn lu_in_place_unblocked<E: ComplexField>(
                 max_row = 0;
                 max_col = 0;
                 let mut biggest_value = E::Real::zero();
-                for (row, col, value) in biggest {
+                for (row, col, value) in biggest_vec {
                     if value > biggest_value {
                         max_row = row;
                         max_col = col;
                         biggest_value = value;
                     }
                 }
+                biggest = biggest_value;
             }
         };
         max_row += k + 1;
@@ -1195,10 +1203,10 @@ fn default_disable_parallelism(m: usize, n: usize) -> bool {
 /// # Panics
 ///
 /// - Panics if the length of the row permutation slices is not equal to the number of rows of the
-///   matrix
+///   matrix.
 /// - Panics if the length of the column permutation slices is not equal to the number of columns of
-///   the matrix
-/// - Panics if the provided memory in `stack` is insufficient.
+///   the matrix.
+/// - Panics if the provided memory in `stack` is insufficient (see [`lu_in_place_req`]).
 pub fn lu_in_place<'out, E: ComplexField>(
     matrix: MatMut<'_, E>,
     row_perm: &'out mut [usize],
@@ -1216,13 +1224,15 @@ pub fn lu_in_place<'out, E: ComplexField>(
     let _ = parallelism;
     let m = matrix.nrows();
     let n = matrix.ncols();
+    let size = Ord::min(m, n);
+
     assert!(row_perm.len() == m);
     assert!(row_perm_inv.len() == m);
     assert!(col_perm.len() == n);
     assert!(col_perm_inv.len() == n);
 
-    let (mut row_transpositions, stack) = stack.make_with(m, |i| i);
-    let (mut col_transpositions, _) = stack.make_with(n, |i| i);
+    let (mut row_transpositions, stack) = stack.make_with(size, |_| 0usize);
+    let (mut col_transpositions, _) = stack.make_with(size, |_| 0usize);
 
     let n_transpositions = if matrix.row_stride().abs() < matrix.col_stride().abs() {
         lu_in_place_unblocked(
@@ -1439,4 +1449,171 @@ mod tests {
         compute_lu_row_major_generic::<c64>(random_c64, 1e-6);
         compute_lu_row_major_generic::<c32>(random_c32, 1e-2);
     }
+
+    #[test]
+    fn test_lu_c32_zeros() {
+        for (m, n) in [
+            (2, 4),
+            (2, 2),
+            (3, 3),
+            (4, 2),
+            (2, 1),
+            (4, 4),
+            (20, 20),
+            (20, 2),
+            (2, 20),
+            (40, 20),
+            (20, 40),
+        ] {
+            let mat = Mat::with_dims(m, n, |_i, _j| c32::zero());
+            for parallelism in [Parallelism::None, Parallelism::Rayon(0)] {
+                let mut mat = mat.clone();
+                let mat_orig = mat.clone();
+
+                let mut mat = mat.as_mut();
+                let mat_orig = mat_orig.as_ref();
+
+                let mut row_perm = vec![0; m];
+                let mut row_perm_inv = vec![0; m];
+                let mut col_perm = vec![0; n];
+                let mut col_perm_inv = vec![0; n];
+
+                let (_, row_perm, col_perm) = lu_in_place(
+                    mat.rb_mut(),
+                    &mut row_perm,
+                    &mut row_perm_inv,
+                    &mut col_perm,
+                    &mut col_perm_inv,
+                    parallelism,
+                    make_stack!(lu_in_place_req::<f64>(
+                        m,
+                        n,
+                        Parallelism::None,
+                        Default::default()
+                    )),
+                    Default::default(),
+                );
+                let reconstructed = reconstruct_matrix(mat.rb(), row_perm.rb(), col_perm.rb());
+
+                for i in 0..m {
+                    for j in 0..n {
+                        assert!((mat_orig.read(i, j).sub(&reconstructed.read(i, j))).abs() < 1e-4);
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_lu_c32_ones() {
+        for (m, n) in [
+            (2, 4),
+            (2, 2),
+            (3, 3),
+            (4, 2),
+            (2, 1),
+            (4, 4),
+            (20, 20),
+            (20, 2),
+            (2, 20),
+            (40, 20),
+            (20, 40),
+        ] {
+            let mat = Mat::with_dims(m, n, |_i, _j| c32::one());
+            for parallelism in [Parallelism::None, Parallelism::Rayon(0)] {
+                let mut mat = mat.clone();
+                let mat_orig = mat.clone();
+
+                let mut mat = mat.as_mut();
+                let mat_orig = mat_orig.as_ref();
+
+                let mut row_perm = vec![0; m];
+                let mut row_perm_inv = vec![0; m];
+                let mut col_perm = vec![0; n];
+                let mut col_perm_inv = vec![0; n];
+
+                let (_, row_perm, col_perm) = lu_in_place(
+                    mat.rb_mut(),
+                    &mut row_perm,
+                    &mut row_perm_inv,
+                    &mut col_perm,
+                    &mut col_perm_inv,
+                    parallelism,
+                    make_stack!(lu_in_place_req::<f64>(
+                        m,
+                        n,
+                        Parallelism::None,
+                        Default::default()
+                    )),
+                    Default::default(),
+                );
+                let reconstructed = reconstruct_matrix(mat.rb(), row_perm.rb(), col_perm.rb());
+
+                for i in 0..m {
+                    for j in 0..n {
+                        assert!((mat_orig.read(i, j).sub(&reconstructed.read(i, j))).abs() < 1e-4);
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_lu_c32_rank_2() {
+        for (m, n) in [
+            (2, 4),
+            (2, 2),
+            (3, 3),
+            (4, 2),
+            (2, 1),
+            (4, 4),
+            (20, 20),
+            (20, 2),
+            (2, 20),
+            (40, 20),
+            (20, 40),
+        ] {
+            let u0 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random()));
+            let v0 = Mat::with_dims(1, n, |_, _| c32::new(random(), random()));
+            let u1 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random()));
+            let v1 = Mat::with_dims(1, n, |_, _| c32::new(random(), random()));
+
+            let mat = u0 * v0 + u1 * v1;
+            for parallelism in [Parallelism::None, Parallelism::Rayon(0)] {
+                let mut mat = mat.clone();
+                let mat_orig = mat.clone();
+
+                let mut mat = mat.as_mut();
+                let mat_orig = mat_orig.as_ref();
+
+                let mut row_perm = vec![0; m];
+                let mut row_perm_inv = vec![0; m];
+                let mut col_perm = vec![0; n];
+                let mut col_perm_inv = vec![0; n];
+
+                let (_, row_perm, col_perm) = lu_in_place(
+                    mat.rb_mut(),
+                    &mut row_perm,
+                    &mut row_perm_inv,
+                    &mut col_perm,
+                    &mut col_perm_inv,
+                    parallelism,
+                    make_stack!(lu_in_place_req::<f64>(
+                        m,
+                        n,
+                        Parallelism::None,
+                        Default::default()
+                    )),
+                    Default::default(),
+                );
+                let reconstructed = reconstruct_matrix(mat.rb(), row_perm.rb(), col_perm.rb());
+
+                for i in 0..m {
+                    for j in 0..n {
+                        assert!((mat_orig.read(i, j).sub(&reconstructed.read(i, j))).abs() < 1e-4);
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/faer-lu/src/full_pivoting/inverse.rs b/faer-lu/src/full_pivoting/inverse.rs
index f747e272..0d04343b 100644
--- a/faer-lu/src/full_pivoting/inverse.rs
+++ b/faer-lu/src/full_pivoting/inverse.rs
@@ -93,7 +93,7 @@ fn invert_impl<T: ComplexField>(
 /// - Panics if the row permutation doesn't have the same dimension as the matrix.
 /// - Panics if the column permutation doesn't have the same dimension as the matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]).
 #[track_caller]
 pub fn invert<T: ComplexField>(
     dst: MatMut<'_, T>,
@@ -125,7 +125,7 @@ pub fn invert<T: ComplexField>(
 /// - Panics if the LU factors are not a square matrix.
 /// - Panics if the row permutation doesn't have the same dimension as the matrix.
 /// - Panics if the column permutation doesn't have the same dimension as the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]).
 #[track_caller]
 pub fn invert_in_place<T: ComplexField>(
     lu_factors: MatMut<'_, T>,
diff --git a/faer-lu/src/full_pivoting/reconstruct.rs b/faer-lu/src/full_pivoting/reconstruct.rs
index 09d81cd7..6aaf7f44 100644
--- a/faer-lu/src/full_pivoting/reconstruct.rs
+++ b/faer-lu/src/full_pivoting/reconstruct.rs
@@ -102,7 +102,7 @@ fn reconstruct_impl<T: ComplexField>(
 /// - Panics if the column permutation doesn't have the same dimension as the number of columns of
 ///   the matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]).
 #[track_caller]
 pub fn reconstruct<T: ComplexField>(
     dst: MatMut<'_, T>,
@@ -133,7 +133,7 @@ pub fn reconstruct<T: ComplexField>(
 ///   matrix.
 /// - Panics if the column permutation doesn't have the same dimension as the number of columns of
 ///   the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]).
 #[track_caller]
 pub fn reconstruct_in_place<T: ComplexField>(
     lu_factors: MatMut<'_, T>,
diff --git a/faer-lu/src/full_pivoting/solve.rs b/faer-lu/src/full_pivoting/solve.rs
index b5c6f15c..c5dd23c1 100644
--- a/faer-lu/src/full_pivoting/solve.rs
+++ b/faer-lu/src/full_pivoting/solve.rs
@@ -165,7 +165,7 @@ pub fn solve_transpose_req<T: Entity>(
 /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]).
 pub fn solve<T: ComplexField>(
     dst: MatMut<'_, T>,
     lu_factors: MatRef<'_, T>,
@@ -202,7 +202,7 @@ pub fn solve<T: ComplexField>(
 /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]).
 pub fn solve_in_place<T: ComplexField>(
     lu_factors: MatRef<'_, T>,
     conj_lhs: Conj,
@@ -239,7 +239,7 @@ pub fn solve_in_place<T: ComplexField>(
 /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]).
 pub fn solve_transpose<T: ComplexField>(
     dst: MatMut<'_, T>,
     lu_factors: MatRef<'_, T>,
@@ -276,7 +276,8 @@ pub fn solve_transpose<T: ComplexField>(
 /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `col_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see
+///   [`solve_transpose_in_place_req`]).
 pub fn solve_transpose_in_place<T: ComplexField>(
     lu_factors: MatRef<'_, T>,
     conj_lhs: Conj,
diff --git a/faer-lu/src/partial_pivoting/compute.rs b/faer-lu/src/partial_pivoting/compute.rs
index fb392f95..bc50474b 100644
--- a/faer-lu/src/partial_pivoting/compute.rs
+++ b/faer-lu/src/partial_pivoting/compute.rs
@@ -485,8 +485,8 @@ pub fn lu_in_place_req<E: Entity>(
 /// # Panics
 ///
 /// - Panics if the length of the permutation slices is not equal to the number of rows of the
-/// matrix, or if the provided memory in `stack` is insufficient.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// matrix.
+/// - Panics if the provided memory in `stack` is insufficient (see [`lu_in_place_req`]).
 pub fn lu_in_place<'out, E: ComplexField>(
     matrix: MatMut<'_, E>,
     perm: &'out mut [usize],
diff --git a/faer-lu/src/partial_pivoting/inverse.rs b/faer-lu/src/partial_pivoting/inverse.rs
index f37460e7..e04a0e10 100644
--- a/faer-lu/src/partial_pivoting/inverse.rs
+++ b/faer-lu/src/partial_pivoting/inverse.rs
@@ -91,7 +91,7 @@ pub fn invert_req<E: Entity>(
 /// - Panics if the LU factors are not a square matrix.
 /// - Panics if the row permutation doesn't have the same dimension as the matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]).
 #[track_caller]
 pub fn invert<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -115,7 +115,7 @@ pub fn invert<E: ComplexField>(
 /// - Panics if the LU factors are not a square matrix.
 /// - Panics if the row permutation doesn't have the same dimension as the matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]).
 #[track_caller]
 pub fn invert_in_place<E: ComplexField>(
     lu_factors: MatMut<'_, E>,
diff --git a/faer-lu/src/partial_pivoting/reconstruct.rs b/faer-lu/src/partial_pivoting/reconstruct.rs
index 5ade0cfd..1b6a2acc 100644
--- a/faer-lu/src/partial_pivoting/reconstruct.rs
+++ b/faer-lu/src/partial_pivoting/reconstruct.rs
@@ -77,7 +77,7 @@ fn reconstruct_impl<E: ComplexField>(
 /// - Panics if the row permutation doesn't have the same dimension as the number of rows of the
 /// matrix.
 /// - Panics if the destination shape doesn't match the shape of the matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]).
 #[track_caller]
 pub fn reconstruct<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -98,7 +98,7 @@ pub fn reconstruct<E: ComplexField>(
 ///
 /// - Panics if the row permutation doesn't have the same dimension as the number of rows of the
 /// matrix.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]).
 #[track_caller]
 pub fn reconstruct_in_place<E: ComplexField>(
     lu_factors: MatMut<'_, E>,
diff --git a/faer-lu/src/partial_pivoting/solve.rs b/faer-lu/src/partial_pivoting/solve.rs
index 759b2a20..2f019604 100644
--- a/faer-lu/src/partial_pivoting/solve.rs
+++ b/faer-lu/src/partial_pivoting/solve.rs
@@ -162,7 +162,7 @@ pub fn solve_transpose_req<E: Entity>(
 /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]).
 pub fn solve<E: ComplexField>(
     dst: MatMut<'_, E>,
     lu_factors: MatRef<'_, E>,
@@ -196,7 +196,7 @@ pub fn solve<E: ComplexField>(
 /// - Panics if `lu_factors` is not a square matrix.
 /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]).
 pub fn solve_in_place<E: ComplexField>(
     lu_factors: MatRef<'_, E>,
     conj_lhs: Conj,
@@ -230,7 +230,7 @@ pub fn solve_in_place<E: ComplexField>(
 /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]).
 pub fn solve_transpose<E: ComplexField>(
     dst: MatMut<'_, E>,
     lu_factors: MatRef<'_, E>,
@@ -263,7 +263,8 @@ pub fn solve_transpose<E: ComplexField>(
 /// - Panics if `lu_factors` is not a square matrix.
 /// - Panics if `row_perm` doesn't have the same dimension as `lu_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see
+///   [`solve_transpose_in_place_req`]).
 pub fn solve_transpose_in_place<E: ComplexField>(
     lu_factors: MatRef<'_, E>,
     conj_lhs: Conj,
diff --git a/faer-qr/src/col_pivoting/compute.rs b/faer-qr/src/col_pivoting/compute.rs
index 9df10f52..08fa2f7b 100644
--- a/faer-qr/src/col_pivoting/compute.rs
+++ b/faer-qr/src/col_pivoting/compute.rs
@@ -687,7 +687,7 @@ pub fn qr_in_place_req<E: Entity>(
 /// - Panics if the block size is zero.
 /// - Panics if the length of `col_perm` and `col_perm_inv` is not equal to the number of columns
 /// of `matrix`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`qr_in_place_req`]).
 pub fn qr_in_place<'out, E: ComplexField>(
     matrix: MatMut<'_, E>,
     householder_factor: MatMut<'_, E>,
@@ -1044,4 +1044,183 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn test_qr_c32_zeros() {
+        for parallelism in [Parallelism::None, Parallelism::Rayon(8)] {
+            for (m, n) in [(2, 2), (2, 4), (4, 2), (4, 4), (63, 63)] {
+                let mut mat = Mat::<c32>::zeros(m, n);
+                let mat_orig = mat.clone();
+                let size = m.min(n);
+                let blocksize = 8;
+                let mut householder = Mat::zeros(blocksize, size);
+                let mut perm = vec![0; n];
+                let mut perm_inv = vec![0; n];
+
+                qr_in_place(
+                    mat.as_mut(),
+                    householder.as_mut(),
+                    &mut perm,
+                    &mut perm_inv,
+                    parallelism,
+                    make_stack!(qr_in_place_req::<c32>(
+                        m,
+                        n,
+                        blocksize,
+                        parallelism,
+                        Default::default()
+                    )),
+                    Default::default(),
+                );
+
+                let (q, r) = reconstruct_factors(mat.as_ref(), householder.as_ref());
+                let mut qr = Mat::zeros(m, n);
+                let mut qhq = Mat::zeros(m, m);
+                matmul(
+                    qr.as_mut(),
+                    q.as_ref(),
+                    r.as_ref(),
+                    None,
+                    c32::one(),
+                    Parallelism::Rayon(8),
+                );
+
+                matmul(
+                    qhq.as_mut(),
+                    q.as_ref().adjoint(),
+                    q.as_ref(),
+                    None,
+                    c32::one(),
+                    Parallelism::Rayon(8),
+                );
+
+                for j in 0..n {
+                    for i in 0..m {
+                        assert_approx_eq!(qr.read(i, j), mat_orig.read(i, perm[j]), 1e-4);
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_qr_c32_ones() {
+        for parallelism in [Parallelism::None, Parallelism::Rayon(8)] {
+            for (m, n) in [(2, 2), (2, 4), (4, 2), (4, 4), (63, 63)] {
+                let mut mat = Mat::<c32>::with_dims(m, n, |_, _| c32::one());
+                let mat_orig = mat.clone();
+                let size = m.min(n);
+                let blocksize = 8;
+                let mut householder = Mat::zeros(blocksize, size);
+                let mut perm = vec![0; n];
+                let mut perm_inv = vec![0; n];
+
+                qr_in_place(
+                    mat.as_mut(),
+                    householder.as_mut(),
+                    &mut perm,
+                    &mut perm_inv,
+                    parallelism,
+                    make_stack!(qr_in_place_req::<c32>(
+                        m,
+                        n,
+                        blocksize,
+                        parallelism,
+                        Default::default()
+                    )),
+                    Default::default(),
+                );
+
+                let (q, r) = reconstruct_factors(mat.as_ref(), householder.as_ref());
+                let mut qr = Mat::zeros(m, n);
+                let mut qhq = Mat::zeros(m, m);
+                matmul(
+                    qr.as_mut(),
+                    q.as_ref(),
+                    r.as_ref(),
+                    None,
+                    c32::one(),
+                    Parallelism::Rayon(8),
+                );
+
+                matmul(
+                    qhq.as_mut(),
+                    q.as_ref().adjoint(),
+                    q.as_ref(),
+                    None,
+                    c32::one(),
+                    Parallelism::Rayon(8),
+                );
+
+                for j in 0..n {
+                    for i in 0..m {
+                        assert_approx_eq!(qr.read(i, j), mat_orig.read(i, perm[j]), 1e-4);
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_qr_c32_rank_2() {
+        for parallelism in [Parallelism::None, Parallelism::Rayon(8)] {
+            for (m, n) in [(2, 2), (2, 4), (4, 2), (4, 4), (63, 63)] {
+                let u0 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random()));
+                let v0 = Mat::with_dims(1, n, |_, _| c32::new(random(), random()));
+                let u1 = Mat::with_dims(m, 1, |_, _| c32::new(random(), random()));
+                let v1 = Mat::with_dims(1, n, |_, _| c32::new(random(), random()));
+
+                let mut mat = u0 * v0 + u1 * v1;
+                let mat_orig = mat.clone();
+                let size = m.min(n);
+                let blocksize = 8;
+                let mut householder = Mat::zeros(blocksize, size);
+                let mut perm = vec![0; n];
+                let mut perm_inv = vec![0; n];
+
+                qr_in_place(
+                    mat.as_mut(),
+                    householder.as_mut(),
+                    &mut perm,
+                    &mut perm_inv,
+                    parallelism,
+                    make_stack!(qr_in_place_req::<c32>(
+                        m,
+                        n,
+                        blocksize,
+                        parallelism,
+                        Default::default()
+                    )),
+                    Default::default(),
+                );
+
+                let (q, r) = reconstruct_factors(mat.as_ref(), householder.as_ref());
+                let mut qr = Mat::zeros(m, n);
+                let mut qhq = Mat::zeros(m, m);
+                matmul(
+                    qr.as_mut(),
+                    q.as_ref(),
+                    r.as_ref(),
+                    None,
+                    c32::one(),
+                    Parallelism::Rayon(8),
+                );
+
+                matmul(
+                    qhq.as_mut(),
+                    q.as_ref().adjoint(),
+                    q.as_ref(),
+                    None,
+                    c32::one(),
+                    Parallelism::Rayon(8),
+                );
+
+                for j in 0..n {
+                    for i in 0..m {
+                        assert_approx_eq!(qr.read(i, j), mat_orig.read(i, perm[j]), 1e-4);
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/faer-qr/src/col_pivoting/inverse.rs b/faer-qr/src/col_pivoting/inverse.rs
index 533bcfe0..42de4dac 100644
--- a/faer-qr/src/col_pivoting/inverse.rs
+++ b/faer-qr/src/col_pivoting/inverse.rs
@@ -19,7 +19,7 @@ use reborrow::*;
 /// - Panics if the block size is zero.
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
 /// - Panics if `dst` doesn't have the same shape as `qr_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]).
 #[track_caller]
 pub fn invert<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -68,7 +68,7 @@ pub fn invert<E: ComplexField>(
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]).
 #[track_caller]
 pub fn invert_in_place<E: ComplexField>(
     qr_factors: MatMut<'_, E>,
diff --git a/faer-qr/src/col_pivoting/reconstruct.rs b/faer-qr/src/col_pivoting/reconstruct.rs
index 3083f353..1867f6a9 100644
--- a/faer-qr/src/col_pivoting/reconstruct.rs
+++ b/faer-qr/src/col_pivoting/reconstruct.rs
@@ -17,7 +17,7 @@ use reborrow::*;
 /// - Panics if the block size is zero.
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
 /// - Panics if `dst` doesn't have the same shape as `qr_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]).
 #[track_caller]
 pub fn reconstruct<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -67,7 +67,7 @@ pub fn reconstruct<E: ComplexField>(
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]).
 #[track_caller]
 pub fn reconstruct_in_place<E: ComplexField>(
     qr_factors: MatMut<'_, E>,
diff --git a/faer-qr/src/col_pivoting/solve.rs b/faer-qr/src/col_pivoting/solve.rs
index f2c8254a..9e857283 100644
--- a/faer-qr/src/col_pivoting/solve.rs
+++ b/faer-qr/src/col_pivoting/solve.rs
@@ -78,7 +78,7 @@ pub fn solve_transpose_req<T: Entity>(
 /// - Panics if the block size is zero.
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]).
 #[track_caller]
 pub fn solve_in_place<T: ComplexField>(
     qr_factors: MatRef<'_, T>,
@@ -118,7 +118,8 @@ pub fn solve_in_place<T: ComplexField>(
 /// - Panics if the block size is zero.
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see
+///   [`solve_transpose_in_place_req`]).
 #[track_caller]
 pub fn solve_transpose_in_place<T: ComplexField>(
     qr_factors: MatRef<'_, T>,
@@ -159,7 +160,7 @@ pub fn solve_transpose_in_place<T: ComplexField>(
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]).
 #[track_caller]
 pub fn solve<T: ComplexField>(
     dst: MatMut<'_, T>,
@@ -202,7 +203,7 @@ pub fn solve<T: ComplexField>(
 /// - Panics if `col_perm` doesn't have the same dimension as `qr_factors`.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]).
 #[track_caller]
 pub fn solve_transpose<T: ComplexField>(
     dst: MatMut<'_, T>,
diff --git a/faer-qr/src/no_pivoting/compute.rs b/faer-qr/src/no_pivoting/compute.rs
index 839623bf..8809e095 100644
--- a/faer-qr/src/no_pivoting/compute.rs
+++ b/faer-qr/src/no_pivoting/compute.rs
@@ -331,7 +331,7 @@ fn qr_in_place_blocked<E: ComplexField>(
 /// - Panics if the number of columns of the householder factor is not equal to the minimum of the
 /// number of rows and the number of columns of the input matrix.
 /// - Panics if the block size is zero.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`qr_in_place_req`]).
 #[track_caller]
 pub fn qr_in_place<E: ComplexField>(
     matrix: MatMut<'_, E>,
diff --git a/faer-qr/src/no_pivoting/inverse.rs b/faer-qr/src/no_pivoting/inverse.rs
index 06f4d53c..89784dc7 100644
--- a/faer-qr/src/no_pivoting/inverse.rs
+++ b/faer-qr/src/no_pivoting/inverse.rs
@@ -17,7 +17,7 @@ use reborrow::*;
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
 /// - Panics if `dst` doesn't have the same shape as `qr_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_req`]).
 #[track_caller]
 pub fn invert<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -62,7 +62,7 @@ pub fn invert<E: ComplexField>(
 /// - Panics if the number of columns of `householder_factor` isn't the same as the minimum of the
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`invert_in_place_req`]).
 #[track_caller]
 pub fn invert_in_place<E: ComplexField>(
     qr_factors: MatMut<'_, E>,
diff --git a/faer-qr/src/no_pivoting/reconstruct.rs b/faer-qr/src/no_pivoting/reconstruct.rs
index 7cd9efc1..c3c5b920 100644
--- a/faer-qr/src/no_pivoting/reconstruct.rs
+++ b/faer-qr/src/no_pivoting/reconstruct.rs
@@ -15,7 +15,7 @@ use reborrow::*;
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
 /// - Panics if `dst` doesn't have the same shape as `qr_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_req`]).
 #[track_caller]
 pub fn reconstruct<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -60,7 +60,7 @@ pub fn reconstruct<E: ComplexField>(
 /// - Panics if the number of columns of `householder_factor` isn't the same as the minimum of the
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`reconstruct_in_place_req`]).
 #[track_caller]
 pub fn reconstruct_in_place<E: ComplexField>(
     qr_factors: MatMut<'_, E>,
diff --git a/faer-qr/src/no_pivoting/solve.rs b/faer-qr/src/no_pivoting/solve.rs
index f70489d2..fab80e2e 100644
--- a/faer-qr/src/no_pivoting/solve.rs
+++ b/faer-qr/src/no_pivoting/solve.rs
@@ -72,7 +72,7 @@ pub fn solve_transpose_req<E: Entity>(
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_in_place_req`]).
 #[track_caller]
 pub fn solve_in_place<E: ComplexField>(
     qr_factors: MatRef<'_, E>,
@@ -119,7 +119,8 @@ pub fn solve_in_place<E: ComplexField>(
 /// number of rows and the number of columns of `qr_factors`.
 /// - Panics if the block size is zero.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see
+///   [`solve_transpose_in_place_req`]).
 #[track_caller]
 pub fn solve_transpose_in_place<E: ComplexField>(
     qr_factors: MatRef<'_, E>,
@@ -173,7 +174,7 @@ pub fn solve_transpose_in_place<E: ComplexField>(
 /// - Panics if the block size is zero.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_req`]).
 #[track_caller]
 pub fn solve<E: ComplexField>(
     dst: MatMut<'_, E>,
@@ -212,7 +213,7 @@ pub fn solve<E: ComplexField>(
 /// - Panics if the block size is zero.
 /// - Panics if `rhs` doesn't have the same number of rows as the dimension of `lu_factors`.
 /// - Panics if `rhs` and `dst` don't have the same shape.
-/// - Panics if the provided memory in `stack` is insufficient.
+/// - Panics if the provided memory in `stack` is insufficient (see [`solve_transpose_req`]).
 #[track_caller]
 pub fn solve_transpose<E: ComplexField>(
     dst: MatMut<'_, E>,