From 1a3bfb632fa1331f17b0f555f749ae26e138c0e3 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 6 Mar 2024 21:41:39 +0000 Subject: [PATCH] Array display --- bench-vortex/src/lib.rs | 5 +- vortex-alp/src/array.rs | 9 +-- vortex-array/src/array/bool/mod.rs | 3 +- vortex-array/src/array/chunked/mod.rs | 12 ++-- vortex-array/src/array/constant/mod.rs | 2 +- vortex-array/src/array/mod.rs | 10 ++- vortex-array/src/array/primitive/mod.rs | 2 +- vortex-array/src/array/sparse/mod.rs | 8 +-- vortex-array/src/array/struct_/mod.rs | 3 +- vortex-array/src/array/typed/mod.rs | 2 +- vortex-array/src/array/varbin/mod.rs | 6 +- vortex-array/src/array/varbinview/mod.rs | 14 ++-- vortex-array/src/formatter.rs | 84 ++++++++++++++++++------ vortex-dict/src/dict.rs | 6 +- vortex-fastlanes/src/bitpacking/mod.rs | 10 ++- vortex-fastlanes/src/for/mod.rs | 4 +- vortex-ree/src/ree.rs | 6 +- vortex-roaring/src/boolean/mod.rs | 2 +- vortex-roaring/src/integer/mod.rs | 2 +- vortex-zigzag/src/zigzag.rs | 3 +- 20 files changed, 111 insertions(+), 82 deletions(-) diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 6ceb56714c..d5b1db0744 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -60,6 +60,7 @@ mod test { use vortex::compress::{CompressConfig, CompressCtx}; use vortex::dtype::DType; use vortex::error::{VortexError, VortexResult}; + use vortex::formatter::display_tree; use crate::enumerate_arrays; @@ -91,7 +92,6 @@ mod test { .unwrap(); } - #[ignore] #[test] fn compression_ratio() { setup_logger(); @@ -114,6 +114,9 @@ mod test { chunked.chunks().len() ); let array = chunked.boxed(); + + println!("{}", display_tree(array.as_ref())); + let cfg = CompressConfig::new( HashSet::from_iter(enumerate_arrays().iter().map(|e| (*e).id())), HashSet::default(), diff --git a/vortex-alp/src/array.rs b/vortex-alp/src/array.rs index 978af89680..7ebfc5a001 100644 --- a/vortex-alp/src/array.rs +++ b/vortex-alp/src/array.rs @@ -140,12 +140,9 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for ALPArray { impl ArrayDisplay for ALPArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln(format!("exponents: {:?}", self.exponents()))?; - if let Some(p) = self.patches() { - f.writeln("patches:")?; - f.indent(|indent| indent.array(p.as_ref()))?; - } - f.indent(|indent| indent.array(self.encoded())) + f.property("exponents", format!("{:?}", self.exponents()))?; + f.child("encoded", self.encoded())?; + f.maybe_child("patches", self.patches()) } } diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 65cd19be96..2899dbd324 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -179,7 +179,8 @@ impl ArrayDisplay for BoolArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { let true_count = self.stats().get_or_compute_or(0usize, &Stat::TrueCount); let false_count = self.len() - true_count; - f.writeln(format!("n_true: {}, n_false: {}", true_count, false_count)) + f.property("n_true", true_count)?; + f.property("n_false", false_count) } } diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index 6618b5a206..27413d2a73 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -193,14 +193,10 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for ChunkedArray { impl ArrayDisplay for ChunkedArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln("chunks:")?; - f.indent(|indent| { - for chunk in self.chunks() { - indent - .new_total_size(chunk.nbytes(), |new_total| new_total.array(chunk.as_ref()))?; - } - Ok(()) - }) + for (i, c) in self.chunks().iter().enumerate() { + f.child(&format!("{}", i), c.as_ref())? + } + Ok(()) } } diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index 9becc699c9..093a93cd96 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -111,7 +111,7 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for ConstantArray { impl ArrayDisplay for ConstantArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln(format!("{}", self.scalar())) + f.property("scalar", self.scalar()) } } diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index e364fa2765..941e01dedb 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -18,7 +18,7 @@ use crate::compress::EncodingCompression; use crate::compute::ArrayCompute; use crate::dtype::{DType, Nullability}; use crate::error::{VortexError, VortexResult}; -use crate::formatter::{ArrayDisplay, ArrayFormatter}; +use crate::formatter::ArrayDisplay; use crate::serde::{ArraySerde, EncodingSerde}; use crate::stats::Stats; @@ -187,6 +187,12 @@ impl<'a> From<&'a dyn Array> for ArrayKind<'a> { impl Display for dyn Array { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - ArrayFormatter::new(f, "".to_string(), self.nbytes()).array(self) + write!( + f, + "{}({}, len={})", + self.encoding().id(), + self.dtype(), + self.len() + ) } } diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index ee2ae84606..63d6798a2a 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -294,7 +294,7 @@ impl FromIterator> for PrimitiveArray { impl ArrayDisplay for PrimitiveArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { match_each_native_ptype!(self.ptype(), |$P| { - f.writeln(format!("{:?}{}", + f.property("values", format!("{:?}{}", &self.buffer().typed_data::<$P>()[..min(10, self.len())], if self.len() > 10 { "..." } else { "" })) }) diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index ab757dff4f..6a0e7ad943 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -199,11 +199,9 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for SparseArray { impl ArrayDisplay for SparseArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln(format!("offset: {}", self.indices_offset()))?; - f.writeln("indices:")?; - f.indent(|indented| indented.array(self.indices()))?; - f.writeln("values:")?; - f.indent(|indented| indented.array(self.values())) + f.property("offset", self.indices_offset())?; + f.child("indices", self.indices())?; + f.child("values", self.values()) } } diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index d007925194..fe5b552b6c 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -198,8 +198,7 @@ impl ArrayDisplay for StructArray { unreachable!() }; for (name, field) in n.iter().zip(self.fields()) { - f.writeln(format!("{}:", &**name))?; - f.indent(|indented| indented.array(field.as_ref()))?; + f.child(name, field.as_ref())?; } Ok(()) } diff --git a/vortex-array/src/array/typed/mod.rs b/vortex-array/src/array/typed/mod.rs index 30ce3041fb..0a7515af6d 100644 --- a/vortex-array/src/array/typed/mod.rs +++ b/vortex-array/src/array/typed/mod.rs @@ -146,7 +146,7 @@ impl Encoding for TypedEncoding { impl ArrayDisplay for TypedArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.indent(|indented| indented.array(self.untyped_array())) + f.child("untyped", self.untyped_array()) } } diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index e2e60c29d4..0f7a24fccb 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -320,10 +320,8 @@ impl Encoding for VarBinEncoding { impl ArrayDisplay for VarBinArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln("offsets:")?; - f.indent(|ind| ind.array(self.offsets()))?; - f.writeln("bytes:")?; - f.indent(|ind| ind.array(self.bytes())) + f.child("offsets", self.offsets())?; + f.child("bytes", self.bytes()) } } diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index fd5ad7a6c5..c731684249 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -339,15 +339,11 @@ impl Encoding for VarBinViewEncoding { impl ArrayDisplay for VarBinViewArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln("views:")?; - f.indent(|ind| ind.array(self.views()))?; - f.writeln("data:")?; - f.indent(|ind| { - for d in self.data() { - ind.array(d.as_ref())?; - } - Ok(()) - }) + f.child("views", self.views())?; + for (i, d) in self.data().iter().enumerate() { + f.child(&format!("data_{}", i), d.as_ref())?; + } + Ok(()) } } diff --git a/vortex-array/src/formatter.rs b/vortex-array/src/formatter.rs index 9126a0cfc3..543ee7557f 100644 --- a/vortex-array/src/formatter.rs +++ b/vortex-array/src/formatter.rs @@ -1,22 +1,45 @@ -use std::fmt::Formatter; +use std::fmt; +use std::fmt::{Display, Write}; use humansize::{format_size, DECIMAL}; use crate::array::Array; pub trait ArrayDisplay { - fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result; + fn fmt(&self, fmt: &'_ mut ArrayFormatter) -> fmt::Result; +} + +pub struct ArrayFormatterWrapper<'a>(&'a dyn Array); + +impl<'a> ArrayFormatterWrapper<'a> { + pub fn new(array: &'a dyn Array) -> ArrayFormatterWrapper<'a> { + ArrayFormatterWrapper(array) + } +} + +impl<'a, 'b: 'a> Display for ArrayFormatterWrapper<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let array = self.0; + let mut array_fmt = ArrayFormatter::new(fmt, "".to_string(), array.nbytes()); + array_fmt.child("root", array) + } +} + +pub fn display_tree(array: &dyn Array) -> String { + let mut string = String::new(); + write!(string, "{}", ArrayFormatterWrapper(array)).unwrap(); + string } pub struct ArrayFormatter<'a, 'b: 'a> { - fmt: &'a mut Formatter<'b>, + fmt: &'a mut fmt::Formatter<'b>, indent: String, total_size: usize, } impl<'a, 'b: 'a> ArrayFormatter<'a, 'b> { - pub fn new( - fmt: &'a mut Formatter<'b>, + fn new( + fmt: &'a mut fmt::Formatter<'b>, indent: String, total_size: usize, ) -> ArrayFormatter<'a, 'b> { @@ -27,25 +50,34 @@ impl<'a, 'b: 'a> ArrayFormatter<'a, 'b> { } } - pub fn array(&mut self, array: &dyn Array) -> std::fmt::Result { - self.writeln(format!( - "{}({}), len={}, nbytes={} ({:.2}%)", - array.encoding().id(), - array.dtype(), - array.len(), + pub fn property(&mut self, name: &str, value: T) -> fmt::Result { + writeln!(self.fmt, "{}{}: {}", self.indent, name, value) + } + + pub fn child(&mut self, name: &str, array: &dyn Array) -> fmt::Result { + writeln!( + self.fmt, + "{}{}: {} nbytes={} ({:.2}%)", + self.indent, + name, + array, format_size(array.nbytes(), DECIMAL), 100f64 * array.nbytes() as f64 / self.total_size as f64 - ))?; - ArrayDisplay::fmt(array, self) + )?; + self.indent(|indent| ArrayDisplay::fmt(array, indent)) } - pub fn writeln>(&mut self, str: T) -> std::fmt::Result { - writeln!(self.fmt, "{}{}", self.indent, str.as_ref()) + pub fn maybe_child(&mut self, name: &str, array: Option<&dyn Array>) -> fmt::Result { + if let Some(array) = array { + self.child(&format!("{}?", name), array) + } else { + writeln!(self.fmt, "{}{}: None", self.indent, name) + } } - pub fn indent(&mut self, indented: F) -> std::fmt::Result + fn indent(&mut self, indented: F) -> fmt::Result where - F: FnOnce(&mut ArrayFormatter) -> std::fmt::Result, + F: FnOnce(&mut ArrayFormatter) -> fmt::Result, { let original_ident = self.indent.clone(); self.indent += " "; @@ -54,9 +86,9 @@ impl<'a, 'b: 'a> ArrayFormatter<'a, 'b> { res } - pub fn new_total_size(&mut self, total: usize, new_total: F) -> std::fmt::Result + pub fn new_total_size(&mut self, total: usize, new_total: F) -> fmt::Result where - F: FnOnce(&mut ArrayFormatter) -> std::fmt::Result, + F: FnOnce(&mut ArrayFormatter) -> fmt::Result, { let original_total = self.total_size; self.total_size = total; @@ -69,10 +101,20 @@ impl<'a, 'b: 'a> ArrayFormatter<'a, 'b> { #[cfg(test)] mod test { use crate::array::ArrayRef; + use crate::formatter::display_tree; + + #[test] + fn display_primitive() { + let arr: ArrayRef = (0..100).collect::>().into(); + assert_eq!( + format!("{}", arr), + "vortex.primitive(signed_int(32), len=100)" + ); + } #[test] - fn primitive_array() { + fn tree_display_primitive() { let arr: ArrayRef = (0..100).collect::>().into(); - assert_eq!(format!("{}", arr), "vortex.primitive(signed_int(32)), len=100, nbytes=400 B (100.00%)\n[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]...\n") + assert_eq!(display_tree(arr.as_ref()), "root: vortex.primitive(signed_int(32), len=100) nbytes=400 B (100.00%)\n values: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]...\n") } } diff --git a/vortex-dict/src/dict.rs b/vortex-dict/src/dict.rs index 8cb0cbcd54..e447d100ee 100644 --- a/vortex-dict/src/dict.rs +++ b/vortex-dict/src/dict.rs @@ -103,10 +103,8 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for DictArray { impl ArrayDisplay for DictArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln("dict:")?; - f.indent(|indent| indent.array(self.dict()))?; - f.writeln("codes:")?; - f.indent(|indent| indent.array(self.codes())) + f.child("values", self.dict())?; + f.child("codes", self.codes()) } } diff --git a/vortex-fastlanes/src/bitpacking/mod.rs b/vortex-fastlanes/src/bitpacking/mod.rs index 785479a1af..a356dabddd 100644 --- a/vortex-fastlanes/src/bitpacking/mod.rs +++ b/vortex-fastlanes/src/bitpacking/mod.rs @@ -150,12 +150,10 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for BitPackedArray { impl ArrayDisplay for BitPackedArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln(format!("packed: u{}", self.bit_width()))?; - if let Some(p) = self.patches() { - f.writeln("patches:")?; - f.indent(|indent| indent.array(p.as_ref()))?; - } - f.array(self.encoded()) + f.property("packed", format!("u{}", self.bit_width()))?; + f.child("encoded", self.encoded())?; + f.maybe_child("patches", self.patches())?; + f.maybe_child("validity", self.validity()) } } diff --git a/vortex-fastlanes/src/for/mod.rs b/vortex-fastlanes/src/for/mod.rs index 53f3f7fed4..0ffc717f1f 100644 --- a/vortex-fastlanes/src/for/mod.rs +++ b/vortex-fastlanes/src/for/mod.rs @@ -116,8 +116,8 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for FoRArray { impl ArrayDisplay for FoRArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln(format!("reference: {}", self.reference))?; - f.indent(|indent| indent.array(self.child())) + f.property("reference", self.reference())?; + f.child("shifted", self.child()) } } diff --git a/vortex-ree/src/ree.rs b/vortex-ree/src/ree.rs index e3764f0c95..57b302c94f 100644 --- a/vortex-ree/src/ree.rs +++ b/vortex-ree/src/ree.rs @@ -247,10 +247,8 @@ impl Encoding for REEEncoding { impl ArrayDisplay for REEArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln("values:")?; - f.indent(|indented| indented.array(self.values()))?; - f.writeln("ends:")?; - f.indent(|indented| indented.array(self.ends())) + f.child("values", self.values())?; + f.child("ends", self.ends()) } } diff --git a/vortex-roaring/src/boolean/mod.rs b/vortex-roaring/src/boolean/mod.rs index 4e74bd48ed..9c11cdd256 100644 --- a/vortex-roaring/src/boolean/mod.rs +++ b/vortex-roaring/src/boolean/mod.rs @@ -126,7 +126,7 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for RoaringBoolArray { impl ArrayDisplay for RoaringBoolArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.indent(|indent| indent.writeln(format!("{:?}", self.bitmap()))) + f.property("bitmap", format!("{:?}", self.bitmap())) } } diff --git a/vortex-roaring/src/integer/mod.rs b/vortex-roaring/src/integer/mod.rs index eab6dfdb08..b2be9ae685 100644 --- a/vortex-roaring/src/integer/mod.rs +++ b/vortex-roaring/src/integer/mod.rs @@ -128,7 +128,7 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for RoaringIntArray { impl ArrayDisplay for RoaringIntArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.indent(|indent| indent.writeln(format!("{:?}", self.bitmap()))) + f.property("bitmap", format!("{:?}", self.bitmap())) } } diff --git a/vortex-zigzag/src/zigzag.rs b/vortex-zigzag/src/zigzag.rs index 08ebace534..aed91579ee 100644 --- a/vortex-zigzag/src/zigzag.rs +++ b/vortex-zigzag/src/zigzag.rs @@ -116,8 +116,7 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for ZigZagArray { impl ArrayDisplay for ZigZagArray { fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.writeln("zigzag:")?; - f.indent(|indent| indent.array(self.encoded.as_ref())) + f.child("zigzag", self.encoded()) } }