diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 5f6868d8cb..07d471ed8c 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -35,7 +35,7 @@ Vortex array:
    >>> parquet = pq.read_table("_static/example.parquet")
    >>> vtx = vortex.array(parquet)
    >>> vtx.nbytes
-   141024
+   141070
 
 Compress
 ^^^^^^^^
@@ -46,9 +46,9 @@ Use :func:`~vortex.encoding.compress` to compress the Vortex array and check the
 
    >>> cvtx = vortex.compress(vtx)
    >>> cvtx.nbytes
-   15243
+   17780
    >>> cvtx.nbytes / vtx.nbytes
-   0.108...
+   0.126...
 
 Vortex uses nearly ten times fewer bytes than Arrow. Fewer bytes means more of your data fits in
 cache and RAM.
diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs
index 7c9fd8c6ff..012b3bcf30 100644
--- a/pyvortex/src/array.rs
+++ b/pyvortex/src/array.rs
@@ -520,10 +520,10 @@ impl PyArray {
     ///
     ///     >>> arr = vortex.array([1, 2, None, 3])
     ///     >>> print(arr.tree_display())
-    ///     root: vortex.primitive(0x03)(i64?, len=4) nbytes=33 B (100.00%)
+    ///     root: vortex.primitive(0x03)(i64?, len=4) nbytes=36 B (100.00%)
     ///       metadata: PrimitiveMetadata { validity: Array }
     ///       buffer: 32 B
-    ///       validity: vortex.bool(0x02)(bool, len=4) nbytes=1 B (3.03%)
+    ///       validity: vortex.bool(0x02)(bool, len=4) nbytes=3 B (8.33%)
     ///         metadata: BoolMetadata { validity: NonNullable, first_byte_bit_offset: 0 }
     ///         buffer: 1 B
     ///     <BLANKLINE>
diff --git a/uv.lock b/uv.lock
index 92847e54cc..e7f4ac4f72 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1003,7 +1003,7 @@ wheels = [
 
 [[package]]
 name = "vortex-array"
-version = "0.13.1"
+version = "0.21.0"
 source = { editable = "pyvortex" }
 dependencies = [
     { name = "pyarrow" },
diff --git a/vortex-array/src/metadata.rs b/vortex-array/src/metadata.rs
index d9b7ace135..4645526bcc 100644
--- a/vortex-array/src/metadata.rs
+++ b/vortex-array/src/metadata.rs
@@ -13,7 +13,6 @@ use crate::encoding::Encoding;
 /// Note that this allows us to restrict the ('static + Send + Sync) requirement to just the
 /// metadata trait, and not the entire array trait. We require 'static so that we can downcast
 /// use the Any trait.
-/// TODO(ngates): add Display
 pub trait ArrayMetadata:
     'static + Send + Sync + Debug + TrySerializeArrayMetadata + Display
 {
diff --git a/vortex-array/src/nbytes.rs b/vortex-array/src/nbytes.rs
index fd4f861061..da639c5294 100644
--- a/vortex-array/src/nbytes.rs
+++ b/vortex-array/src/nbytes.rs
@@ -11,7 +11,7 @@ impl ArrayData {
         self.encoding()
             .accept(self.as_ref(), &mut visitor)
             .vortex_expect("Failed to get nbytes from Array");
-        visitor.0
+        visitor.0 + size_of_val(self.array_metadata())
     }
 }
 
diff --git a/vortex-sampling-compressor/src/compressors/mod.rs b/vortex-sampling-compressor/src/compressors/mod.rs
index 4ed0e0fad3..6e349be801 100644
--- a/vortex-sampling-compressor/src/compressors/mod.rs
+++ b/vortex-sampling-compressor/src/compressors/mod.rs
@@ -183,13 +183,6 @@ impl<'a> CompressionTree<'a> {
         std::mem::take(&mut self.metadata)
     }
 
-    pub fn num_descendants(&self) -> usize {
-        self.children
-            .iter()
-            .filter_map(|child| child.as_ref().map(|c| c.num_descendants() + 1))
-            .sum::<usize>()
-    }
-
     #[allow(clippy::type_complexity)]
     pub fn into_parts(
         self,
diff --git a/vortex-sampling-compressor/src/lib.rs b/vortex-sampling-compressor/src/lib.rs
index f6a595e187..058739239d 100644
--- a/vortex-sampling-compressor/src/lib.rs
+++ b/vortex-sampling-compressor/src/lib.rs
@@ -9,7 +9,7 @@ use compressors::roaring_bool::RoaringBoolCompressor;
 use compressors::roaring_int::RoaringIntCompressor;
 use compressors::struct_::StructCompressor;
 use compressors::varbin::VarBinCompressor;
-use compressors::{CompressedArray, CompressionTree, CompressorRef};
+use compressors::{CompressedArray, CompressorRef};
 use vortex_alp::{ALPEncoding, ALPRDEncoding};
 use vortex_array::array::{
     PrimitiveEncoding, SparseEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
@@ -126,16 +126,8 @@ impl Objective {
         base_size_bytes: usize,
         config: &CompressConfig,
     ) -> f64 {
-        let num_descendants = array
-            .path()
-            .as_ref()
-            .map(CompressionTree::num_descendants)
-            .unwrap_or(0) as u64;
-        let overhead_bytes = num_descendants * config.overhead_bytes_per_array;
-        let size_in_bytes = array.nbytes() as u64 + overhead_bytes;
-
         match &config.objective {
-            Objective::MinSize => (size_in_bytes as f64) / (base_size_bytes as f64),
+            Objective::MinSize => (array.nbytes() as f64) / (base_size_bytes as f64),
         }
     }
 }
@@ -153,8 +145,6 @@ pub struct CompressConfig {
     max_cost: u8,
     // Are we minimizing size or maximizing performance?
     objective: Objective,
-    /// Penalty in bytes per compression level
-    overhead_bytes_per_array: u64,
 
     // Target chunk size in bytes
     target_block_bytesize: usize,
@@ -172,7 +162,6 @@ impl Default for CompressConfig {
             sample_count: 16,
             max_cost: constants::DEFAULT_MAX_COST,
             objective: Objective::MinSize,
-            overhead_bytes_per_array: 64,
             target_block_bytesize: 16 * mib,
             target_block_size: 64 * kib,
             rng_seed: 0,
diff --git a/vortex-sampling-compressor/tests/smoketest.rs b/vortex-sampling-compressor/tests/smoketest.rs
index db626ed5c6..c441f2b203 100644
--- a/vortex-sampling-compressor/tests/smoketest.rs
+++ b/vortex-sampling-compressor/tests/smoketest.rs
@@ -125,7 +125,7 @@ mod tests {
             assert_eq!(chunk.encoding().id(), FoREncoding::ID);
             assert_eq!(
                 chunk.statistics().get(Stat::UncompressedSizeInBytes),
-                Some(Scalar::from((chunk.len() * 8) as u64))
+                Some(Scalar::from((chunk.len() * 8) as u64 + 1))
             );
         }
 
@@ -138,7 +138,7 @@ mod tests {
             assert_eq!(chunk.encoding().id(), BoolEncoding::ID);
             assert_eq!(
                 chunk.statistics().get(Stat::UncompressedSizeInBytes),
-                Some(Scalar::from(chunk.len().div_ceil(8) as u64))
+                Some(Scalar::from(chunk.len().div_ceil(8) as u64 + 2))
             );
         }
 
@@ -154,7 +154,7 @@ mod tests {
             );
             assert_eq!(
                 chunk.statistics().get(Stat::UncompressedSizeInBytes),
-                Some(Scalar::from(1392640u64))
+                Some(Scalar::from(1392677u64))
             );
         }
 
@@ -167,7 +167,7 @@ mod tests {
             assert_eq!(chunk.encoding().id(), VarBinEncoding::ID);
             assert_eq!(
                 chunk.statistics().get(Stat::UncompressedSizeInBytes),
-                Some(Scalar::from(134357000u64))
+                Some(Scalar::from(134357018u64))
             );
         }
 
@@ -180,7 +180,7 @@ mod tests {
             assert_eq!(chunk.encoding().id(), DateTimePartsEncoding::ID);
             assert_eq!(
                 chunk.statistics().get(Stat::UncompressedSizeInBytes),
-                Some((chunk.len() * 8).into())
+                Some((chunk.len() * 8 + 4).into())
             )
         }
     }