From ebbe731d6f04136b0c306c415e9d5fdaff0730ef Mon Sep 17 00:00:00 2001 From: Jacob Mischka Date: Fri, 8 Oct 2021 20:51:00 -0500 Subject: [PATCH] Format columns with decimal alignment with constraints only once (#92) Instead of formatting the cells in order to compute their widths, then constraining the maximum width, then formatting the cells again for display, we now only format the cells once, in full, and pass along the width constraints. By doing so, we have more knowledge about the column width at format time, and we're able to align decimals. Closes #87 --- src/datatype.rs | 77 +++++++++++++++++++++++++++++++++-------- src/main.rs | 92 ++++++++++++++++++++----------------------------- 2 files changed, 100 insertions(+), 69 deletions(-) diff --git a/src/datatype.rs b/src/datatype.rs index d26e5c9..471de48 100644 --- a/src/datatype.rs +++ b/src/datatype.rs @@ -111,22 +111,79 @@ pub fn infer_type_from_string(text: &str) -> ValueType { } } -pub fn trunc_strings(vec_col: &[&str], width: usize) -> Vec { +pub fn format_strings( + vec_col: &[&str], + lower_column_width: usize, + upper_column_width: usize, +) -> Vec { let ellipsis = '\u{2026}'; - vec_col + + let strings_and_fracts: Vec<(String, usize, usize)> = vec_col .iter() .map(|&string| format_if_na(string)) - // add .map(|string| format_if_num(&string)) .map(|string| { + // the string, and the length of its fractional digits if any + let (lhs, rhs) = if is_double(&string) { + let mut split = string.split('.'); + ( + split.next().map(|lhs| lhs.len()).unwrap_or_default(), + split.next().map(|rhs| rhs.len()).unwrap_or_default(), + ) + } else { + (0, 0) + }; + (string, lhs, rhs) + }) + .collect(); + + let max_fract: usize = strings_and_fracts + .iter() + .map(|(_, _, fract)| *fract) + .max() + .unwrap_or_default(); + let max_whole: usize = strings_and_fracts + .iter() + .map(|(_, whole, _)| *whole) + .max() + .unwrap_or_default(); + + let strings_and_widths: Vec<(String, usize)> = strings_and_fracts + .into_iter() + .map(|(mut string, whole, fract)| { + if max_fract > 0 && is_double(&string) { + if whole < max_whole { + let mut s = String::new(); + s.push_str(&" ".repeat(max_whole - whole)); + s.push_str(&string); + string = s; + } + + string.push_str(&" ".repeat(max_fract - fract)); + } let len = string.chars().count(); - if len > width { - let (rv, _) = string.unicode_truncate(width - 1); + // the string and its length + (string, len) + }) + .collect(); + + let max_width: usize = strings_and_widths + .iter() + .map(|(_, width)| *width) + .max() + .unwrap_or_default() + .clamp(lower_column_width, upper_column_width); + + strings_and_widths + .into_iter() + .map(|(string, len)| { + if len > max_width { + let (rv, _) = string.unicode_truncate(max_width - 1); let spacer: &str = &" "; let string_and_ellipses = [rv.to_string(), ellipsis.to_string()].join(""); [string_and_ellipses, spacer.to_string()].join("") } else { - let add_space = width - len + 1; + let add_space = max_width - len + 1; let borrowed_string: &str = &" ".repeat(add_space); [string, "".to_string()].join(borrowed_string) } @@ -134,14 +191,6 @@ pub fn trunc_strings(vec_col: &[&str], width: usize) -> Vec { .collect() } -pub fn header_len_str(vec_col: &[&str]) -> Vec { - vec_col - .iter() - .map(|&string| format_if_num(&string)) - .map(|string| string.chars().count()) - .collect::>() -} - pub fn format_if_na(text: &str) -> String { // todo add repeat strings for NA let missing_string_value = "NA"; diff --git a/src/main.rs b/src/main.rs index 0b36100..bb1f962 100644 --- a/src/main.rs +++ b/src/main.rs @@ -208,10 +208,10 @@ fn main() { let lower_column_width_defined = !(opt.lower_column_width == 2); let upper_column_width_defined = !(opt.lower_column_width == 20); let lower_column_width = match (&config, lower_column_width_defined) { - (Some(x), false) => &x.lower_column_width, - (Some(_x), true) => &opt.lower_column_width, - (None, false) => &opt.lower_column_width, - (None, true) => &opt.lower_column_width, + (Some(x), false) => x.lower_column_width, + (Some(_x), true) => opt.lower_column_width, + (None, false) => opt.lower_column_width, + (None, true) => opt.lower_column_width, }; let lower_column_width = if lower_column_width.to_owned() < 2 { panic!("lower-column-width must be larger than 2") @@ -219,10 +219,10 @@ fn main() { lower_column_width }; let upper_column_width = match (&config, upper_column_width_defined) { - (Some(x), false) => &x.upper_column_width, - (Some(_x), true) => &opt.upper_column_width, - (None, false) => &opt.upper_column_width, - (None, true) => &opt.upper_column_width, + (Some(x), false) => x.upper_column_width, + (Some(_x), true) => opt.upper_column_width, + (None, false) => opt.upper_column_width, + (None, true) => opt.upper_column_width, }; let upper_column_width = if upper_column_width <= lower_column_width { panic!("upper-column-width must be larger than lower-column-width") @@ -366,41 +366,11 @@ fn main() { } // vector of formatted values - let mut vf: Vec> = vec![vec!["#".to_string(); rows as usize]; cols as usize]; + let vf: Vec> = v + .iter() + .map(|col| datatype::format_strings(col, lower_column_width, upper_column_width)) + .collect(); - // get max width in columns - let mut col_largest_width = Vec::new(); - for column in &v { - let size: usize = datatype::header_len_str(&column).into_iter().max().unwrap(); - col_largest_width.push(size); - } - if debug_mode { - println!("{:?}", "col_largest_width"); - println!("{:?}", col_largest_width); - } - - // column width must be between the specified sizes - col_largest_width.iter_mut().for_each(|width| { - *width = (*width).clamp(lower_column_width.to_owned(), upper_column_width.to_owned()) - }); - - if debug_mode { - println!("{:?}", "col_largest_width post-proc"); - println!("{:?}", col_largest_width); - } - - // format datatypes spaces - // let mut vec_format_datatypes: Vec<_> = vec!["#"; cols as usize]; - //for i in 0..cols { - // let add_space = col_largest_width[i] - vec_datatypes[i].len(); - // let borrowed_string = " ".repeat(add_space); - // let string = vec_datatypes[i].to_string(); - //} - - // make vector of formatted values - for i in 0..cols { - vf[i] = datatype::trunc_strings(&v[i], col_largest_width[i]); - } if debug_mode { println!("{:?}", "Transposed Vector of Elements"); println!("{:?}", v); @@ -656,7 +626,11 @@ mod tests { let col_largest_width_post_proc: Vec = vec![16, 13, 4, 10]; let mut vf: Vec> = vec![vec!["#".to_string(); 13 as usize]; 4 as usize]; for i in 0..col_largest_width_post_proc.len() { - vf[i] = datatype::trunc_strings(&v[i], col_largest_width_post_proc[i]); + vf[i] = datatype::format_strings( + &v[i], + col_largest_width_post_proc[i], + col_largest_width_post_proc[i], + ); } assert_eq!( @@ -680,16 +654,16 @@ mod tests { ], [ "value ", - "0.00000001 ", - "0.0000001 ", - "0.000001 ", - "0.00001 ", - "0.0001 ", - "0.001 ", - "0.01 ", - "0.1 ", - "1 ", - "10 ", + " 0.00000001 ", + " 0.0000001 ", + " 0.000001 ", + " 0.00001 ", + " 0.0001 ", + " 0.001 ", + " 0.01 ", + " 0.1 ", + " 1 ", + " 10 ", "100 ", "NA ", "2/ 2.5 Gallon " @@ -729,7 +703,11 @@ mod tests { let col_largest_width_post_proc: Vec = vec![4, 4, 4, 4]; let mut vf: Vec> = vec![vec!["#".to_string(); 3 as usize]; 4 as usize]; for i in 0..col_largest_width_post_proc.len() { - vf[i] = datatype::trunc_strings(&v[i], col_largest_width_post_proc[i]); + vf[i] = datatype::format_strings( + &v[i], + col_largest_width_post_proc[i], + col_largest_width_post_proc[i], + ); } assert_eq!( @@ -757,7 +735,11 @@ mod tests { let col_largest_width_post_proc: Vec = vec![7, 10, 20, 7, 7, 7, 7]; let mut vf: Vec> = vec![vec!["#".to_string(); 2 as usize]; 7 as usize]; for i in 0..col_largest_width_post_proc.len() { - vf[i] = datatype::trunc_strings(&v[i], col_largest_width_post_proc[i]); + vf[i] = datatype::format_strings( + &v[i], + col_largest_width_post_proc[i], + col_largest_width_post_proc[i], + ); } assert_eq!(