From d029c431f70deee306043224d40cac5d9701a932 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 13:50:50 +0000
Subject: [PATCH 01/10] ALP

---
 Cargo.lock                 |   1 +
 vortex-alp/Cargo.toml      |   1 +
 vortex-alp/src/alp.rs      |  20 ++-
 vortex-alp/src/compress.rs | 274 ++++++++++++++++++++++++++-----------
 vortex-alp/src/compute.rs  |  66 +++++----
 vortex-alp/src/serde.rs    |   5 +-
 6 files changed, 242 insertions(+), 125 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 13a0456f84..aa8dfa5aea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2833,6 +2833,7 @@ dependencies = [
  "itertools 0.12.1",
  "linkme",
  "log",
+ "num-traits",
  "vortex",
 ]
 
diff --git a/vortex-alp/Cargo.toml b/vortex-alp/Cargo.toml
index cb1a1cdd1d..8f37114d5f 100644
--- a/vortex-alp/Cargo.toml
+++ b/vortex-alp/Cargo.toml
@@ -16,6 +16,7 @@ arrow = { version = "50.0.0" }
 vortex = { "path" = "../vortex" }
 linkme = "0.3.22"
 itertools = "0.12.1"
+num-traits = "0.2.18"
 codecz = { version = "0.1.0", path = "../codecz" }
 log = { version = "0.4.20", features = [] }
 
diff --git a/vortex-alp/src/alp.rs b/vortex-alp/src/alp.rs
index 65d37418aa..ad150ddfe4 100644
--- a/vortex-alp/src/alp.rs
+++ b/vortex-alp/src/alp.rs
@@ -12,23 +12,29 @@ use vortex::stats::{Stats, StatsSet};
 
 use crate::compress::alp_encode;
 
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Exponents {
+    pub e: u8,
+    pub f: u8,
+}
+
 #[derive(Debug, Clone)]
 pub struct ALPArray {
     encoded: ArrayRef,
-    exponents: ALPExponents,
+    exponents: Exponents,
     patches: Option<ArrayRef>,
     dtype: DType,
     stats: Arc<RwLock<StatsSet>>,
 }
 
 impl ALPArray {
-    pub fn new(encoded: ArrayRef, exponents: ALPExponents, patches: Option<ArrayRef>) -> Self {
+    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<ArrayRef>) -> Self {
         Self::try_new(encoded, exponents, patches).unwrap()
     }
 
     pub fn try_new(
         encoded: ArrayRef,
-        exponents: ALPExponents,
+        exponents: Exponents,
         patches: Option<ArrayRef>,
     ) -> VortexResult<Self> {
         let dtype = match encoded.dtype() {
@@ -59,8 +65,8 @@ impl ALPArray {
         self.encoded.as_ref()
     }
 
-    pub fn exponents(&self) -> ALPExponents {
-        self.exponents
+    pub fn exponents(&self) -> &Exponents {
+        &self.exponents
     }
 
     pub fn patches(&self) -> Option<&dyn Array> {
@@ -111,7 +117,7 @@ impl Array for ALPArray {
     fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
         Ok(Self::try_new(
             self.encoded().slice(start, stop)?,
-            self.exponents(),
+            self.exponents().clone(),
             self.patches().map(|p| p.slice(start, stop)).transpose()?,
         )?
         .boxed())
@@ -140,7 +146,7 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for ALPArray {
 
 impl ArrayDisplay for ALPArray {
     fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result {
-        f.writeln(format!("exponents: {}", self.exponents()))?;
+        f.writeln(format!("exponents: {:?}", self.exponents()))?;
         if let Some(p) = self.patches() {
             f.writeln("patches:")?;
             f.indent(|indent| indent.array(p.as_ref()))?;
diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index a742b0924a..a6986ce74a 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -1,16 +1,17 @@
+use itertools::Itertools;
 use log::debug;
+use num_traits::{cast, Float, PrimInt};
 
-use codecz::alp;
-use codecz::alp::{ALPEncoded, ALPExponents, SupportsALP};
 use vortex::array::downcast::DowncastArrayBuiltin;
 use vortex::array::primitive::PrimitiveArray;
 use vortex::array::sparse::SparseArray;
-use vortex::array::{Array, ArrayRef, CloneOptionalArray};
+use vortex::array::{Array, ArrayRef};
 use vortex::compress::{CompressConfig, CompressCtx, Compressor, EncodingCompression};
 use vortex::ptype::{NativePType, PType};
 
 use crate::alp::{ALPArray, ALPEncoding};
 use crate::downcast::DowncastALP;
+use crate::Exponents;
 
 impl EncodingCompression for ALPEncoding {
     fn compressor(
@@ -38,9 +39,15 @@ fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx)
     let like_alp = like.map(|like_array| like_array.as_alp());
 
     let parray = array.as_primitive();
-    let (encoded, exponents, patches) = like_alp
-        .map(|alp_like| alp_encode_like_parts(parray, alp_like))
-        .unwrap_or_else(|| alp_encode_parts(parray));
+    let (exponents, encoded, patches) = match parray.ptype() {
+        PType::F32 => {
+            ALPFloat::encode_to_array(parray.typed_data::<f32>(), like_alp.map(|a| a.exponents()))
+        }
+        PType::F64 => {
+            ALPFloat::encode_to_array(parray.typed_data::<f64>(), like_alp.map(|a| a.exponents()))
+        }
+        _ => panic!("Unsupported ptype"),
+    };
 
     ALPArray::new(
         ctx.next_level()
@@ -55,91 +62,196 @@ fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx)
 }
 
 pub fn alp_encode(parray: &PrimitiveArray) -> ALPArray {
-    let (encoded, exponents, patches) = alp_encode_parts(parray);
+    let (exponents, encoded, patches) = match parray.ptype() {
+        PType::F32 => ALPFloat::encode_to_array(parray.typed_data::<f32>(), None),
+        PType::F64 => ALPFloat::encode_to_array(parray.typed_data::<f64>(), None),
+        _ => panic!("Unsupported ptype"),
+    };
     ALPArray::new(encoded, exponents, patches)
 }
 
-fn alp_encode_parts(parray: &PrimitiveArray) -> (ArrayRef, ALPExponents, Option<ArrayRef>) {
-    match parray.ptype() {
-        PType::F32 => {
-            alp_encode_primitive(parray.buffer().typed_data::<f32>(), parray.validity(), None)
-        }
-        PType::F64 => {
-            alp_encode_primitive(parray.buffer().typed_data::<f64>(), parray.validity(), None)
-        }
-        _ => panic!("Unsupported ptype"),
+trait ALPFloat: NativePType + Float {
+    type ALPInt: NativePType + PrimInt;
+    const FRACTIONAL_BITS: u8;
+    const SWEET: Self;
+    const F10: &'static [Self]; // TODO(ngates): const exprs for these to be arrays.
+    const IF10: &'static [Self];
+
+    /// Round to the nearest floating integer by shifting in and out of the low precision range.
+    fn fast_round(self) -> Self {
+        (self + Self::SWEET) - Self::SWEET
     }
-}
 
-fn alp_encode_like_parts(
-    parray: &PrimitiveArray,
-    sample: &ALPArray,
-) -> (ArrayRef, ALPExponents, Option<ArrayRef>) {
-    match parray.ptype() {
-        PType::F32 => alp_encode_primitive(
-            parray.buffer().typed_data::<f32>(),
-            parray.validity(),
-            Some(sample.exponents()),
-        ),
-        PType::F64 => alp_encode_primitive(
-            parray.buffer().typed_data::<f64>(),
-            parray.validity(),
-            Some(sample.exponents()),
-        ),
-        _ => panic!("Unsupported ptype"),
+    fn find_best_exponents(_values: &[Self]) -> Exponents {
+        Exponents { e: 16, f: 13 }
     }
-}
 
-fn alp_encode_primitive<T: SupportsALP + NativePType>(
-    values: &[T],
-    validity: Option<&dyn Array>,
-    exponents: Option<ALPExponents>,
-) -> (ArrayRef, ALPExponents, Option<ArrayRef>)
-where
-    T::EncInt: NativePType,
-{
-    // TODO: actually handle CodecErrors instead of blindly unwrapping
-    let ALPEncoded {
-        values,
-        exponents,
-        exceptions_idx,
-        num_exceptions,
-    } = exponents
-        .map(|exp| alp::encode_with(values, exp))
-        .unwrap_or_else(|| alp::encode(values))
-        .unwrap();
-    let values = PrimitiveArray::from_nullable_in(values, validity.clone_optional()); // move and re-alias
-
-    let patches = if num_exceptions == 0 {
-        None
-    } else {
-        let patch_indices = codecz::utils::into_u64_vec(&exceptions_idx, num_exceptions);
-        let patch_values = codecz::utils::gather_patches(
-            values.buffer().typed_data::<T>(),
-            patch_indices.as_slice(),
-        );
-        Some(
-            SparseArray::new(
-                PrimitiveArray::from_vec_in(patch_indices).boxed(),
-                PrimitiveArray::from_vec_in(patch_values).boxed(),
-                values.len(),
-            )
-            .boxed(),
+    fn encode_to_array(
+        values: &[Self],
+        exponents: Option<&Exponents>,
+    ) -> (Exponents, ArrayRef, Option<ArrayRef>) {
+        let best_exponents =
+            exponents.map_or_else(|| Self::find_best_exponents(values), Exponents::clone);
+        let (values, exc_pos, exc) = Self::encode(values, &best_exponents);
+        let len = values.len();
+        (
+            best_exponents,
+            PrimitiveArray::from_vec(values).boxed(),
+            (exc.len() > 0).then(|| {
+                SparseArray::new(
+                    PrimitiveArray::from_vec(exc_pos).boxed(),
+                    PrimitiveArray::from_vec(exc).boxed(),
+                    len,
+                )
+                .boxed()
+            }),
         )
-    };
+    }
 
-    (values.boxed(), exponents, patches)
+    fn encode(values: &[Self], exponents: &Exponents) -> (Vec<Self::ALPInt>, Vec<u64>, Vec<Self>) {
+        let mut exc_pos = Vec::new();
+        let mut exc_value = Vec::new();
+        let encoded = values
+            .iter()
+            .enumerate()
+            .map(|(i, v)| {
+                let encoded =
+                    (*v * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
+                        .fast_round();
+                let decoded =
+                    encoded * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize];
+
+                if encoded != decoded {
+                    exc_pos.push(i as u64);
+                    exc_value.push(*v);
+                    // TODO(ngates): we could find previous?
+                    Self::default()
+                } else {
+                    *v
+                }
+            })
+            .map(|v| cast(v).unwrap())
+            .collect_vec();
+
+        (encoded, exc_pos, exc_value)
+    }
 }
 
-#[allow(dead_code)]
-pub fn alp_decode(parray: &PrimitiveArray, exp: ALPExponents) -> PrimitiveArray {
-    match parray.ptype() {
-        PType::I32 => PrimitiveArray::from_vec_in(
-            alp::decode::<f32>(parray.buffer().typed_data::<i32>(), exp).unwrap(),
-        ),
-        PType::I64 => PrimitiveArray::from_vec_in(
-            alp::decode::<f64>(parray.buffer().typed_data::<i64>(), exp).unwrap(),
-        ),
-        _ => panic!("Unsupported ptype"),
+impl ALPFloat for f32 {
+    type ALPInt = i32;
+    const FRACTIONAL_BITS: u8 = 23;
+    const SWEET: Self =
+        (1 << Self::FRACTIONAL_BITS) as Self + (1 << Self::FRACTIONAL_BITS - 1) as Self;
+
+    const F10: &'static [Self] = &[
+        1.0,
+        10.0,
+        100.0,
+        1000.0,
+        10000.0,
+        100000.0,
+        1000000.0,
+        10000000.0,
+        100000000.0,
+        1000000000.0,
+        10000000000.0,
+    ];
+    const IF10: &'static [Self] = &[
+        1.0,
+        0.1,
+        0.01,
+        0.001,
+        0.0001,
+        0.00001,
+        0.000001,
+        0.0000001,
+        0.00000001,
+        0.000000001,
+        0.0000000001,
+    ];
+}
+
+impl ALPFloat for f64 {
+    type ALPInt = i64;
+    const FRACTIONAL_BITS: u8 = 52;
+    const SWEET: Self =
+        (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << Self::FRACTIONAL_BITS - 1) as Self;
+    const F10: &'static [Self] = &[
+        1.0,
+        10.0,
+        100.0,
+        1000.0,
+        10000.0,
+        100000.0,
+        1000000.0,
+        10000000.0,
+        100000000.0,
+        1000000000.0,
+        10000000000.0,
+        100000000000.0,
+        1000000000000.0,
+        10000000000000.0,
+        100000000000000.0,
+        1000000000000000.0,
+        10000000000000000.0,
+        100000000000000000.0,
+        1000000000000000000.0,
+        10000000000000000000.0,
+        100000000000000000000.0,
+        1000000000000000000000.0,
+        10000000000000000000000.0,
+        100000000000000000000000.0,
+    ];
+
+    const IF10: &'static [Self] = &[
+        1.0,
+        0.1,
+        0.01,
+        0.001,
+        0.0001,
+        0.00001,
+        0.000001,
+        0.0000001,
+        0.00000001,
+        0.000000001,
+        0.0000000001,
+        0.00000000001,
+        0.000000000001,
+        0.0000000000001,
+        0.00000000000001,
+        0.000000000000001,
+        0.0000000000000001,
+        0.00000000000000001,
+        0.000000000000000001,
+        0.0000000000000000001,
+        0.00000000000000000001,
+    ];
+}
+
+//
+// #[allow(dead_code)]
+// pub fn alp_decode(parray: &PrimitiveArray, exp: ALPExponents) -> PrimitiveArray {
+//     match parray.ptype() {
+//         PType::I32 => PrimitiveArray::from_vec_in(
+//             alp::decode::<f32>(parray.buffer().typed_data::<i32>(), exp).unwrap(),
+//         ),
+//         PType::I64 => PrimitiveArray::from_vec_in(
+//             alp::decode::<f64>(parray.buffer().typed_data::<i64>(), exp).unwrap(),
+//         ),
+//         _ => panic!("Unsupported ptype"),
+//     }
+// }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_compress() {
+        // Create a range offset by a million
+        let array = PrimitiveArray::from_vec(vec![1.234; 1024]);
+        let encoded = alp_encode(&array);
+        println!("Encoded {:?}", encoded);
+        assert_eq!(encoded.exponents(), &Exponents { e: 0, f: 0 });
     }
 }
diff --git a/vortex-alp/src/compute.rs b/vortex-alp/src/compute.rs
index d85d6e1072..41b01512bc 100644
--- a/vortex-alp/src/compute.rs
+++ b/vortex-alp/src/compute.rs
@@ -1,11 +1,8 @@
 use crate::ALPArray;
-use codecz::alp;
-use vortex::array::Array;
-use vortex::compute::scalar_at::{scalar_at, ScalarAtFn};
+use vortex::compute::scalar_at::ScalarAtFn;
 use vortex::compute::ArrayCompute;
-use vortex::dtype::{DType, FloatWidth};
 use vortex::error::VortexResult;
-use vortex::scalar::{NullableScalar, Scalar, ScalarRef};
+use vortex::scalar::ScalarRef;
 
 impl ArrayCompute for ALPArray {
     fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
@@ -14,34 +11,35 @@ impl ArrayCompute for ALPArray {
 }
 
 impl ScalarAtFn for ALPArray {
-    fn scalar_at(&self, index: usize) -> VortexResult<ScalarRef> {
-        if let Some(patch) = self
-            .patches()
-            .and_then(|p| scalar_at(p, index).ok())
-            .and_then(|p| p.into_nonnull())
-        {
-            return Ok(patch);
-        }
-
-        let Some(encoded_val) = scalar_at(self.encoded(), index)?.into_nonnull() else {
-            return Ok(NullableScalar::none(self.dtype().clone()).boxed());
-        };
-        match self.dtype() {
-            DType::Float(FloatWidth::_32, _) => {
-                let encoded_val: i32 = encoded_val.try_into().unwrap();
-                Ok(alp::decode_single::<f32>(encoded_val, self.exponents())
-                    .unwrap()
-                    .into())
-            }
-
-            DType::Float(FloatWidth::_64, _) => {
-                let encoded_val: i64 = encoded_val.try_into().unwrap();
-                Ok(alp::decode_single::<f64>(encoded_val, self.exponents())
-                    .unwrap()
-                    .into())
-            }
-
-            _ => unreachable!(),
-        }
+    fn scalar_at(&self, _index: usize) -> VortexResult<ScalarRef> {
+        todo!()
+        // if let Some(patch) = self
+        //     .patches()
+        //     .and_then(|p| scalar_at(p, index).ok())
+        //     .and_then(|p| p.into_nonnull())
+        // {
+        //     return Ok(patch);
+        // }
+        //
+        // let Some(encoded_val) = scalar_at(self.encoded(), index)?.into_nonnull() else {
+        //     return Ok(NullableScalar::none(self.dtype().clone()).boxed());
+        // };
+        // match self.dtype() {
+        //     DType::Float(FloatWidth::_32, _) => {
+        //         let encoded_val: i32 = encoded_val.try_into().unwrap();
+        //         Ok(alp::decode_single::<f32>(encoded_val, self.exponents())
+        //             .unwrap()
+        //             .into())
+        //     }
+        //
+        //     DType::Float(FloatWidth::_64, _) => {
+        //         let encoded_val: i64 = encoded_val.try_into().unwrap();
+        //         Ok(alp::decode_single::<f64>(encoded_val, self.exponents())
+        //             .unwrap()
+        //             .into())
+        //     }
+        //
+        //     _ => unreachable!(),
+        // }
     }
 }
diff --git a/vortex-alp/src/serde.rs b/vortex-alp/src/serde.rs
index 58ef3e090e..3d3e4215a2 100644
--- a/vortex-alp/src/serde.rs
+++ b/vortex-alp/src/serde.rs
@@ -1,12 +1,11 @@
 use std::io;
 use std::io::ErrorKind;
 
-use codecz::alp::ALPExponents;
 use vortex::array::{Array, ArrayRef};
 use vortex::dtype::{DType, FloatWidth, Signedness};
 use vortex::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx};
 
-use crate::{ALPArray, ALPEncoding};
+use crate::{ALPArray, ALPEncoding, Exponents};
 
 impl ArraySerde for ALPArray {
     fn write(&self, ctx: &mut WriteCtx) -> io::Result<()> {
@@ -39,7 +38,7 @@ impl EncodingSerde for ALPEncoding {
         let encoded = ctx.with_schema(&encoded_dtype).read()?;
         Ok(ALPArray::new(
             encoded,
-            ALPExponents {
+            Exponents {
                 e: exponents[0],
                 f: exponents[1],
             },

From 465ecad8488234c9d314a4718c4de7ebf2c666bc Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 14:04:13 +0000
Subject: [PATCH 02/10] ALP

---
 vortex-alp/src/compress.rs           | 40 +++++++++++++++++++++++++---
 vortex/src/array/chunked/compress.rs |  7 +++--
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index a6986ce74a..5c6961d36d 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -82,8 +82,38 @@ trait ALPFloat: NativePType + Float {
         (self + Self::SWEET) - Self::SWEET
     }
 
-    fn find_best_exponents(_values: &[Self]) -> Exponents {
-        Exponents { e: 16, f: 13 }
+    fn find_best_exponents(values: &[Self]) -> Exponents {
+        let mut best_e: usize = 0;
+        let mut best_f: usize = 0;
+        let mut best_nbytes: usize = usize::MAX;
+
+        // TODO(wmanning): idea, start with highest e, then find the best f
+        // after that, try e's in descending order, with a gap no larger than the original e - f
+        for e in 0..Self::F10.len() - 1 {
+            for f in 0..e {
+                let (_, encoded, patches) = Self::encode_to_array(
+                    values,
+                    Some(&Exponents {
+                        e: e as u8,
+                        f: f as u8,
+                    }),
+                );
+                let size = encoded.nbytes() + patches.map_or(0, |p| p.nbytes());
+                if size < best_nbytes {
+                    best_nbytes = size;
+                    best_e = e;
+                    best_f = f;
+                } else if size == best_nbytes && e - f < best_e - best_f {
+                    best_e = e;
+                    best_f = f;
+                }
+            }
+        }
+
+        Exponents {
+            e: best_e as u8,
+            f: best_f as u8,
+        }
     }
 
     fn encode_to_array(
@@ -121,7 +151,7 @@ trait ALPFloat: NativePType + Float {
                 let decoded =
                     encoded * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize];
 
-                if encoded != decoded {
+                if decoded != *v {
                     exc_pos.push(i as u64);
                     exc_value.push(*v);
                     // TODO(ngates): we could find previous?
@@ -225,6 +255,9 @@ impl ALPFloat for f64 {
         0.000000000000000001,
         0.0000000000000000001,
         0.00000000000000000001,
+        0.000000000000000000001,
+        0.0000000000000000000001,
+        0.00000000000000000000001,
     ];
 }
 
@@ -252,6 +285,7 @@ mod test {
         let array = PrimitiveArray::from_vec(vec![1.234; 1024]);
         let encoded = alp_encode(&array);
         println!("Encoded {:?}", encoded);
+        assert_eq!(encoded.patches(), None);
         assert_eq!(encoded.exponents(), &Exponents { e: 0, f: 0 });
     }
 }
diff --git a/vortex/src/array/chunked/compress.rs b/vortex/src/array/chunked/compress.rs
index 1268ba6dfd..a5bf11aa54 100644
--- a/vortex/src/array/chunked/compress.rs
+++ b/vortex/src/array/chunked/compress.rs
@@ -1,9 +1,8 @@
-use rayon::prelude::*;
-
 use crate::array::chunked::{ChunkedArray, ChunkedEncoding};
 use crate::array::downcast::DowncastArrayBuiltin;
 use crate::array::{Array, ArrayRef};
 use crate::compress::{CompressConfig, CompressCtx, Compressor, EncodingCompression};
+use itertools::Itertools;
 
 impl EncodingCompression for ChunkedEncoding {
     fn compressor(
@@ -27,7 +26,7 @@ fn chunked_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: Compress
         .map(|c_like| {
             chunked_array
                 .chunks()
-                .par_iter()
+                .iter()
                 .zip_eq(c_like.chunks())
                 .map(|(chunk, chunk_like)| ctx.compress(chunk.as_ref(), Some(chunk_like.as_ref())))
                 .collect()
@@ -35,7 +34,7 @@ fn chunked_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: Compress
         .unwrap_or_else(|| {
             chunked_array
                 .chunks()
-                .par_iter()
+                .iter()
                 .map(|chunk| ctx.compress(chunk.as_ref(), None))
                 .collect()
         });

From 55d407b488855867e7c48a034771fbd741fa51a4 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 14:14:05 +0000
Subject: [PATCH 03/10] ALP

---
 vortex-alp/src/compress.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index 5c6961d36d..14d05ec188 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -51,11 +51,13 @@ fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx)
 
     ALPArray::new(
         ctx.next_level()
-            .compress(encoded.as_ref(), like_alp.map(|a| a.encoded())),
+            //.compress(encoded.as_ref(), like_alp.map(|a| a.encoded())),
+            .compress(encoded.as_ref(), None),
         exponents,
         patches.map(|p| {
             ctx.next_level()
-                .compress(p.as_ref(), like_alp.and_then(|a| a.patches()))
+                //.compress(p.as_ref(), like_alp.and_then(|a| a.patches()))
+                .compress(p.as_ref(), None)
         }),
     )
     .boxed()

From cecded6bb53d69aa1d2bca12c22cefcccb81601c Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 14:40:01 +0000
Subject: [PATCH 04/10] ALP

---
 vortex-alp/src/compress.rs | 71 +++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 40 deletions(-)

diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index 14d05ec188..935906afd4 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use log::debug;
-use num_traits::{cast, Float, PrimInt};
+use num_traits::{Float, NumCast, PrimInt};
 
 use vortex::array::downcast::DowncastArrayBuiltin;
 use vortex::array::primitive::PrimitiveArray;
@@ -51,13 +51,13 @@ fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx)
 
     ALPArray::new(
         ctx.next_level()
-            //.compress(encoded.as_ref(), like_alp.map(|a| a.encoded())),
-            .compress(encoded.as_ref(), None),
+            .compress(encoded.as_ref(), like_alp.map(|a| a.encoded())),
+        // .compress(encoded.as_ref(), None),
         exponents,
         patches.map(|p| {
             ctx.next_level()
-                //.compress(p.as_ref(), like_alp.and_then(|a| a.patches()))
-                .compress(p.as_ref(), None)
+                .compress(p.as_ref(), like_alp.and_then(|a| a.patches()))
+            // .compress(p.as_ref(), None)
         }),
     )
     .boxed()
@@ -75,6 +75,7 @@ pub fn alp_encode(parray: &PrimitiveArray) -> ALPArray {
 trait ALPFloat: NativePType + Float {
     type ALPInt: NativePType + PrimInt;
     const FRACTIONAL_BITS: u8;
+    const MAX_EXPONENT: u8;
     const SWEET: Self;
     const F10: &'static [Self]; // TODO(ngates): const exprs for these to be arrays.
     const IF10: &'static [Self];
@@ -85,21 +86,16 @@ trait ALPFloat: NativePType + Float {
     }
 
     fn find_best_exponents(values: &[Self]) -> Exponents {
-        let mut best_e: usize = 0;
-        let mut best_f: usize = 0;
+        let mut best_e: u8 = 0;
+        let mut best_f: u8 = 0;
         let mut best_nbytes: usize = usize::MAX;
 
         // TODO(wmanning): idea, start with highest e, then find the best f
         // after that, try e's in descending order, with a gap no larger than the original e - f
-        for e in 0..Self::F10.len() - 1 {
+        for e in 0..Self::MAX_EXPONENT {
             for f in 0..e {
-                let (_, encoded, patches) = Self::encode_to_array(
-                    values,
-                    Some(&Exponents {
-                        e: e as u8,
-                        f: f as u8,
-                    }),
-                );
+                let (_, encoded, patches) =
+                    Self::encode_to_array(values, Some(&Exponents { e, f }));
                 let size = encoded.nbytes() + patches.map_or(0, |p| p.nbytes());
                 if size < best_nbytes {
                     best_nbytes = size;
@@ -143,6 +139,7 @@ trait ALPFloat: NativePType + Float {
     fn encode(values: &[Self], exponents: &Exponents) -> (Vec<Self::ALPInt>, Vec<u64>, Vec<Self>) {
         let mut exc_pos = Vec::new();
         let mut exc_value = Vec::new();
+        let mut prev = Self::ALPInt::default();
         let encoded = values
             .iter()
             .enumerate()
@@ -153,16 +150,18 @@ trait ALPFloat: NativePType + Float {
                 let decoded =
                     encoded * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize];
 
-                if decoded != *v {
-                    exc_pos.push(i as u64);
-                    exc_value.push(*v);
-                    // TODO(ngates): we could find previous?
-                    Self::default()
-                } else {
-                    *v
+                if decoded == *v {
+                    if let Some(e) = <<Self as ALPFloat>::ALPInt as NumCast>::from(encoded) {
+                        prev = e;
+                        return e;
+                    }
                 }
+
+                exc_pos.push(i as u64);
+                exc_value.push(*v);
+                // Emit the last known good value. This helps with run-end encoding.
+                prev
             })
-            .map(|v| cast(v).unwrap())
             .collect_vec();
 
         (encoded, exc_pos, exc_value)
@@ -172,6 +171,7 @@ trait ALPFloat: NativePType + Float {
 impl ALPFloat for f32 {
     type ALPInt = i32;
     const FRACTIONAL_BITS: u8 = 23;
+    const MAX_EXPONENT: u8 = 10;
     const SWEET: Self =
         (1 << Self::FRACTIONAL_BITS) as Self + (1 << Self::FRACTIONAL_BITS - 1) as Self;
 
@@ -205,6 +205,7 @@ impl ALPFloat for f32 {
 
 impl ALPFloat for f64 {
     type ALPInt = i64;
+    const MAX_EXPONENT: u8 = 18; // 10^18 is the maximum i64
     const FRACTIONAL_BITS: u8 = 52;
     const SWEET: Self =
         (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << Self::FRACTIONAL_BITS - 1) as Self;
@@ -263,20 +264,6 @@ impl ALPFloat for f64 {
     ];
 }
 
-//
-// #[allow(dead_code)]
-// pub fn alp_decode(parray: &PrimitiveArray, exp: ALPExponents) -> PrimitiveArray {
-//     match parray.ptype() {
-//         PType::I32 => PrimitiveArray::from_vec_in(
-//             alp::decode::<f32>(parray.buffer().typed_data::<i32>(), exp).unwrap(),
-//         ),
-//         PType::I64 => PrimitiveArray::from_vec_in(
-//             alp::decode::<f64>(parray.buffer().typed_data::<i64>(), exp).unwrap(),
-//         ),
-//         _ => panic!("Unsupported ptype"),
-//     }
-// }
-
 #[cfg(test)]
 mod test {
     use super::*;
@@ -284,10 +271,14 @@ mod test {
     #[test]
     fn test_compress() {
         // Create a range offset by a million
-        let array = PrimitiveArray::from_vec(vec![1.234; 1024]);
+        let array = PrimitiveArray::from_vec(vec![1.234f32; 10]);
         let encoded = alp_encode(&array);
         println!("Encoded {:?}", encoded);
-        assert_eq!(encoded.patches(), None);
-        assert_eq!(encoded.exponents(), &Exponents { e: 0, f: 0 });
+        assert!(encoded.patches().is_none());
+        assert_eq!(
+            encoded.encoded().as_primitive().typed_data::<i32>(),
+            vec![1234; 10]
+        );
+        assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 });
     }
 }

From 5ae0e9b81cbb7ca4b17e5d026a7489df95363cb7 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 16:08:03 +0000
Subject: [PATCH 05/10] ALP

---
 vortex-alp/src/compress.rs | 71 +++++++++++++++++++++++++-------------
 vortex-alp/src/compute.rs  | 66 ++++++++++++++++++-----------------
 vortex-array/src/lib.rs    |  2 --
 3 files changed, 81 insertions(+), 58 deletions(-)

diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index 935906afd4..2ce182f9f1 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -7,12 +7,15 @@ use vortex::array::primitive::PrimitiveArray;
 use vortex::array::sparse::SparseArray;
 use vortex::array::{Array, ArrayRef};
 use vortex::compress::{CompressConfig, CompressCtx, Compressor, EncodingCompression};
+use vortex::error::{VortexError, VortexResult};
 use vortex::ptype::{NativePType, PType};
 
 use crate::alp::{ALPArray, ALPEncoding};
 use crate::downcast::DowncastALP;
 use crate::Exponents;
 
+const SAMPLE_SIZE: usize = 32;
+
 impl EncodingCompression for ALPEncoding {
     fn compressor(
         &self,
@@ -49,27 +52,25 @@ fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx)
         _ => panic!("Unsupported ptype"),
     };
 
-    ALPArray::new(
+    let compressed_encoded = ctx
+        .next_level()
+        .compress(encoded.as_ref(), like_alp.map(|a| a.encoded()));
+
+    let compressed_patches = patches.map(|p| {
         ctx.next_level()
-            .compress(encoded.as_ref(), like_alp.map(|a| a.encoded())),
-        // .compress(encoded.as_ref(), None),
-        exponents,
-        patches.map(|p| {
-            ctx.next_level()
-                .compress(p.as_ref(), like_alp.and_then(|a| a.patches()))
-            // .compress(p.as_ref(), None)
-        }),
-    )
-    .boxed()
+            .compress(p.as_ref(), like_alp.and_then(|a| a.patches()))
+    });
+
+    ALPArray::new(compressed_encoded, exponents, compressed_patches).boxed()
 }
 
-pub fn alp_encode(parray: &PrimitiveArray) -> ALPArray {
+pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult<ALPArray> {
     let (exponents, encoded, patches) = match parray.ptype() {
         PType::F32 => ALPFloat::encode_to_array(parray.typed_data::<f32>(), None),
         PType::F64 => ALPFloat::encode_to_array(parray.typed_data::<f64>(), None),
-        _ => panic!("Unsupported ptype"),
+        _ => return Err(VortexError::InvalidPType(parray.ptype().clone())),
     };
-    ALPArray::new(encoded, exponents, patches)
+    Ok(ALPArray::new(encoded, exponents, patches))
 }
 
 trait ALPFloat: NativePType + Float {
@@ -77,7 +78,7 @@ trait ALPFloat: NativePType + Float {
     const FRACTIONAL_BITS: u8;
     const MAX_EXPONENT: u8;
     const SWEET: Self;
-    const F10: &'static [Self]; // TODO(ngates): const exprs for these to be arrays.
+    const F10: &'static [Self];
     const IF10: &'static [Self];
 
     /// Round to the nearest floating integer by shifting in and out of the low precision range.
@@ -90,12 +91,22 @@ trait ALPFloat: NativePType + Float {
         let mut best_f: u8 = 0;
         let mut best_nbytes: usize = usize::MAX;
 
+        let sample = (values.len() > SAMPLE_SIZE).then(|| {
+            values
+                .iter()
+                .step_by(values.len() / SAMPLE_SIZE)
+                .cloned()
+                .collect_vec()
+        });
+
         // TODO(wmanning): idea, start with highest e, then find the best f
         // after that, try e's in descending order, with a gap no larger than the original e - f
         for e in 0..Self::MAX_EXPONENT {
             for f in 0..e {
-                let (_, encoded, patches) =
-                    Self::encode_to_array(values, Some(&Exponents { e, f }));
+                let (_, encoded, patches) = Self::encode_to_array(
+                    sample.as_deref().unwrap_or(values),
+                    Some(&Exponents { e, f }),
+                );
                 let size = encoded.nbytes() + patches.map_or(0, |p| p.nbytes());
                 if size < best_nbytes {
                     best_nbytes = size;
@@ -109,8 +120,8 @@ trait ALPFloat: NativePType + Float {
         }
 
         Exponents {
-            e: best_e as u8,
-            f: best_f as u8,
+            e: best_e,
+            f: best_f,
         }
     }
 
@@ -205,8 +216,8 @@ impl ALPFloat for f32 {
 
 impl ALPFloat for f64 {
     type ALPInt = i64;
-    const MAX_EXPONENT: u8 = 18; // 10^18 is the maximum i64
     const FRACTIONAL_BITS: u8 = 52;
+    const MAX_EXPONENT: u8 = 18; // 10^18 is the maximum i64
     const SWEET: Self =
         (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << Self::FRACTIONAL_BITS - 1) as Self;
     const F10: &'static [Self] = &[
@@ -270,14 +281,26 @@ mod test {
 
     #[test]
     fn test_compress() {
-        // Create a range offset by a million
-        let array = PrimitiveArray::from_vec(vec![1.234f32; 10]);
-        let encoded = alp_encode(&array);
+        let array = PrimitiveArray::from_vec(vec![1.234f32; 1025]);
+        let encoded = alp_encode(&array).unwrap();
+        println!("Encoded {:?}", encoded);
+        assert!(encoded.patches().is_none());
+        assert_eq!(
+            encoded.encoded().as_primitive().typed_data::<i32>(),
+            vec![1234; 1025]
+        );
+        assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 });
+    }
+
+    #[test]
+    fn test_nullable_compress() {
+        let array = PrimitiveArray::from_iter(vec![1.234f32; 1025]);
+        let encoded = alp_encode(&array).unwrap();
         println!("Encoded {:?}", encoded);
         assert!(encoded.patches().is_none());
         assert_eq!(
             encoded.encoded().as_primitive().typed_data::<i32>(),
-            vec![1234; 10]
+            vec![1234; 1025]
         );
         assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 });
     }
diff --git a/vortex-alp/src/compute.rs b/vortex-alp/src/compute.rs
index 41b01512bc..12d888260e 100644
--- a/vortex-alp/src/compute.rs
+++ b/vortex-alp/src/compute.rs
@@ -1,8 +1,10 @@
 use crate::ALPArray;
-use vortex::compute::scalar_at::ScalarAtFn;
+use vortex::array::Array;
+use vortex::compute::scalar_at::{scalar_at, ScalarAtFn};
 use vortex::compute::ArrayCompute;
+use vortex::dtype::{DType, FloatWidth};
 use vortex::error::VortexResult;
-use vortex::scalar::ScalarRef;
+use vortex::scalar::{NullableScalar, ScalarRef};
 
 impl ArrayCompute for ALPArray {
     fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
@@ -11,35 +13,35 @@ impl ArrayCompute for ALPArray {
 }
 
 impl ScalarAtFn for ALPArray {
-    fn scalar_at(&self, _index: usize) -> VortexResult<ScalarRef> {
-        todo!()
-        // if let Some(patch) = self
-        //     .patches()
-        //     .and_then(|p| scalar_at(p, index).ok())
-        //     .and_then(|p| p.into_nonnull())
-        // {
-        //     return Ok(patch);
-        // }
-        //
-        // let Some(encoded_val) = scalar_at(self.encoded(), index)?.into_nonnull() else {
-        //     return Ok(NullableScalar::none(self.dtype().clone()).boxed());
-        // };
-        // match self.dtype() {
-        //     DType::Float(FloatWidth::_32, _) => {
-        //         let encoded_val: i32 = encoded_val.try_into().unwrap();
-        //         Ok(alp::decode_single::<f32>(encoded_val, self.exponents())
-        //             .unwrap()
-        //             .into())
-        //     }
-        //
-        //     DType::Float(FloatWidth::_64, _) => {
-        //         let encoded_val: i64 = encoded_val.try_into().unwrap();
-        //         Ok(alp::decode_single::<f64>(encoded_val, self.exponents())
-        //             .unwrap()
-        //             .into())
-        //     }
-        //
-        //     _ => unreachable!(),
-        // }
+    fn scalar_at(&self, index: usize) -> VortexResult<ScalarRef> {
+        if let Some(patch) = self
+            .patches()
+            .and_then(|p| scalar_at(p, index).ok())
+            .and_then(|p| p.into_nonnull())
+        {
+            return Ok(patch);
+        }
+
+        let Some(encoded_val) = scalar_at(self.encoded(), index)?.into_nonnull() else {
+            return Ok(NullableScalar::none(self.dtype().clone()).boxed());
+        };
+
+        match self.dtype() {
+            DType::Float(FloatWidth::_32, _) => {
+                let encoded_val: i32 = encoded_val.try_into().unwrap();
+                Ok(alp::decode_single::<f32>(encoded_val, self.exponents())
+                    .unwrap()
+                    .into())
+            }
+
+            DType::Float(FloatWidth::_64, _) => {
+                let encoded_val: i64 = encoded_val.try_into().unwrap();
+                Ok(alp::decode_single::<f64>(encoded_val, self.exponents())
+                    .unwrap()
+                    .into())
+            }
+
+            _ => unreachable!(),
+        }
     }
 }
diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs
index 0fbd8789b5..fad5ab605f 100644
--- a/vortex-array/src/lib.rs
+++ b/vortex-array/src/lib.rs
@@ -1,5 +1,3 @@
-#![feature(iterator_try_collect)]
-
 pub mod array;
 pub mod arrow;
 pub mod scalar;

From b7ef3a4440079b7a195403d44c29c31230e35199 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 16:31:29 +0000
Subject: [PATCH 06/10] ALP

---
 vortex-alp/src/alp.rs      | 365 +++++++++++++++++++++----------------
 vortex-alp/src/array.rs    | 172 +++++++++++++++++
 vortex-alp/src/compress.rs | 225 ++---------------------
 vortex-alp/src/compute.rs  |  24 +--
 vortex-alp/src/lib.rs      |   3 +-
 vortex-alp/src/serde.rs    |   7 +-
 6 files changed, 411 insertions(+), 385 deletions(-)
 create mode 100644 vortex-alp/src/array.rs

diff --git a/vortex-alp/src/alp.rs b/vortex-alp/src/alp.rs
index ad150ddfe4..ff1a8e9165 100644
--- a/vortex-alp/src/alp.rs
+++ b/vortex-alp/src/alp.rs
@@ -1,16 +1,12 @@
-use std::any::Any;
-use std::sync::{Arc, RwLock};
+use itertools::Itertools;
+use num_traits::{Float, NumCast, PrimInt};
+use vortex::array::primitive::PrimitiveArray;
+use vortex::array::sparse::SparseArray;
+use vortex::ptype::NativePType;
 
-pub use codecz::alp::ALPExponents;
-use vortex::array::{Array, ArrayKind, ArrayRef, ArrowIterator, Encoding, EncodingId, EncodingRef};
-use vortex::compress::EncodingCompression;
-use vortex::dtype::{DType, IntWidth, Signedness};
-use vortex::error::{VortexError, VortexResult};
-use vortex::formatter::{ArrayDisplay, ArrayFormatter};
-use vortex::serde::{ArraySerde, EncodingSerde};
-use vortex::stats::{Stats, StatsSet};
+use vortex::array::{Array, ArrayRef};
 
-use crate::compress::alp_encode;
+const SAMPLE_SIZE: usize = 32;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Exponents {
@@ -18,160 +14,213 @@ pub struct Exponents {
     pub f: u8,
 }
 
-#[derive(Debug, Clone)]
-pub struct ALPArray {
-    encoded: ArrayRef,
-    exponents: Exponents,
-    patches: Option<ArrayRef>,
-    dtype: DType,
-    stats: Arc<RwLock<StatsSet>>,
-}
-
-impl ALPArray {
-    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<ArrayRef>) -> Self {
-        Self::try_new(encoded, exponents, patches).unwrap()
-    }
-
-    pub fn try_new(
-        encoded: ArrayRef,
-        exponents: Exponents,
-        patches: Option<ArrayRef>,
-    ) -> VortexResult<Self> {
-        let dtype = match encoded.dtype() {
-            d @ DType::Int(width, Signedness::Signed, nullability) => match width {
-                IntWidth::_32 => DType::Float(32.into(), *nullability),
-                IntWidth::_64 => DType::Float(64.into(), *nullability),
-                _ => return Err(VortexError::InvalidDType(d.clone())),
-            },
-            d => return Err(VortexError::InvalidDType(d.clone())),
-        };
-        Ok(Self {
-            encoded,
-            exponents,
-            patches,
-            dtype,
-            stats: Arc::new(RwLock::new(StatsSet::new())),
-        })
-    }
-
-    pub fn encode(array: &dyn Array) -> VortexResult<ArrayRef> {
-        match ArrayKind::from(array) {
-            ArrayKind::Primitive(p) => Ok(alp_encode(p).boxed()),
-            _ => Err(VortexError::InvalidEncoding(array.encoding().id().clone())),
+pub trait ALPFloat: NativePType + Float {
+    type ALPInt: NativePType + PrimInt;
+    const FRACTIONAL_BITS: u8;
+    const MAX_EXPONENT: u8;
+    const SWEET: Self;
+    const F10: &'static [Self];
+    const IF10: &'static [Self];
+
+    /// Round to the nearest floating integer by shifting in and out of the low precision range.
+    fn fast_round(self) -> Self {
+        (self + Self::SWEET) - Self::SWEET
+    }
+
+    fn as_int(self) -> Option<Self::ALPInt> {
+        <Self::ALPInt as NumCast>::from(self)
+    }
+
+    fn find_best_exponents(values: &[Self]) -> Exponents {
+        let mut best_e: u8 = 0;
+        let mut best_f: u8 = 0;
+        let mut best_nbytes: usize = usize::MAX;
+
+        let sample = (values.len() > SAMPLE_SIZE).then(|| {
+            values
+                .iter()
+                .step_by(values.len() / SAMPLE_SIZE)
+                .cloned()
+                .collect_vec()
+        });
+
+        // TODO(wmanning): idea, start with highest e, then find the best f
+        // after that, try e's in descending order, with a gap no larger than the original e - f
+        for e in 0..Self::MAX_EXPONENT {
+            for f in 0..e {
+                let (_, encoded, patches) = Self::encode_to_array(
+                    sample.as_deref().unwrap_or(values),
+                    Some(&Exponents { e, f }),
+                );
+                let size = encoded.nbytes() + patches.map_or(0, |p| p.nbytes());
+                if size < best_nbytes {
+                    best_nbytes = size;
+                    best_e = e;
+                    best_f = f;
+                } else if size == best_nbytes && e - f < best_e - best_f {
+                    best_e = e;
+                    best_f = f;
+                }
+            }
         }
-    }
-
-    pub fn encoded(&self) -> &dyn Array {
-        self.encoded.as_ref()
-    }
-
-    pub fn exponents(&self) -> &Exponents {
-        &self.exponents
-    }
-
-    pub fn patches(&self) -> Option<&dyn Array> {
-        self.patches.as_deref()
-    }
-}
-
-impl Array for ALPArray {
-    #[inline]
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    #[inline]
-    fn boxed(self) -> ArrayRef {
-        Box::new(self)
-    }
-
-    #[inline]
-    fn into_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    #[inline]
-    fn len(&self) -> usize {
-        self.encoded.len()
-    }
-
-    #[inline]
-    fn is_empty(&self) -> bool {
-        self.encoded.is_empty()
-    }
 
-    #[inline]
-    fn dtype(&self) -> &DType {
-        &self.dtype
-    }
-
-    #[inline]
-    fn stats(&self) -> Stats {
-        Stats::new(&self.stats, self)
-    }
-
-    fn iter_arrow(&self) -> Box<ArrowIterator> {
-        todo!()
-    }
-
-    fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
-        Ok(Self::try_new(
-            self.encoded().slice(start, stop)?,
-            self.exponents().clone(),
-            self.patches().map(|p| p.slice(start, stop)).transpose()?,
-        )?
-        .boxed())
-    }
-
-    #[inline]
-    fn encoding(&self) -> EncodingRef {
-        &ALPEncoding
-    }
-
-    #[inline]
-    fn nbytes(&self) -> usize {
-        self.encoded().nbytes() + self.patches().map(|p| p.nbytes()).unwrap_or(0)
-    }
-
-    fn serde(&self) -> &dyn ArraySerde {
-        self
-    }
-}
-
-impl<'arr> AsRef<(dyn Array + 'arr)> for ALPArray {
-    fn as_ref(&self) -> &(dyn Array + 'arr) {
-        self
+        Exponents {
+            e: best_e,
+            f: best_f,
+        }
     }
-}
 
-impl ArrayDisplay for ALPArray {
-    fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result {
-        f.writeln(format!("exponents: {:?}", self.exponents()))?;
-        if let Some(p) = self.patches() {
-            f.writeln("patches:")?;
-            f.indent(|indent| indent.array(p.as_ref()))?;
-        }
-        f.indent(|indent| indent.array(self.encoded()))
+    fn encode_to_array(
+        values: &[Self],
+        exponents: Option<&Exponents>,
+    ) -> (Exponents, ArrayRef, Option<ArrayRef>) {
+        let best_exponents =
+            exponents.map_or_else(|| Self::find_best_exponents(values), Exponents::clone);
+        let (values, exc_pos, exc) = Self::encode(values, &best_exponents);
+        let len = values.len();
+        (
+            best_exponents,
+            PrimitiveArray::from_vec(values).boxed(),
+            (exc.len() > 0).then(|| {
+                SparseArray::new(
+                    PrimitiveArray::from_vec(exc_pos).boxed(),
+                    PrimitiveArray::from_vec(exc).boxed(),
+                    len,
+                )
+                .boxed()
+            }),
+        )
+    }
+
+    fn encode(values: &[Self], exponents: &Exponents) -> (Vec<Self::ALPInt>, Vec<u64>, Vec<Self>) {
+        let mut exc_pos = Vec::new();
+        let mut exc_value = Vec::new();
+        let mut prev = Self::ALPInt::default();
+        let encoded = values
+            .iter()
+            .enumerate()
+            .map(|(i, v)| {
+                let encoded =
+                    (*v * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
+                        .fast_round();
+                let decoded =
+                    encoded * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize];
+
+                if decoded == *v {
+                    if let Some(e) = encoded.as_int() {
+                        prev = e;
+                        return e;
+                    }
+                }
+
+                exc_pos.push(i as u64);
+                exc_value.push(*v);
+                // Emit the last known good value. This helps with run-end encoding.
+                prev
+            })
+            .collect_vec();
+
+        (encoded, exc_pos, exc_value)
+    }
+
+    fn decode_single(encoded: Self::ALPInt, exponents: &Exponents) -> Self {
+        let encoded_float: Self = Self::from(encoded).unwrap();
+        encoded_float * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
     }
 }
 
-#[derive(Debug)]
-pub struct ALPEncoding;
-
-impl ALPEncoding {
-    pub const ID: EncodingId = EncodingId::new("vortex.alp");
+impl ALPFloat for f32 {
+    type ALPInt = i32;
+    const FRACTIONAL_BITS: u8 = 23;
+    const MAX_EXPONENT: u8 = 10;
+    const SWEET: Self =
+        (1 << Self::FRACTIONAL_BITS) as Self + (1 << Self::FRACTIONAL_BITS - 1) as Self;
+
+    const F10: &'static [Self] = &[
+        1.0,
+        10.0,
+        100.0,
+        1000.0,
+        10000.0,
+        100000.0,
+        1000000.0,
+        10000000.0,
+        100000000.0,
+        1000000000.0,
+        10000000000.0,
+    ];
+    const IF10: &'static [Self] = &[
+        1.0,
+        0.1,
+        0.01,
+        0.001,
+        0.0001,
+        0.00001,
+        0.000001,
+        0.0000001,
+        0.00000001,
+        0.000000001,
+        0.0000000001,
+    ];
 }
 
-impl Encoding for ALPEncoding {
-    fn id(&self) -> &EncodingId {
-        &Self::ID
-    }
-
-    fn compression(&self) -> Option<&dyn EncodingCompression> {
-        Some(self)
-    }
-
-    fn serde(&self) -> Option<&dyn EncodingSerde> {
-        Some(self)
-    }
+impl ALPFloat for f64 {
+    type ALPInt = i64;
+    const FRACTIONAL_BITS: u8 = 52;
+    const MAX_EXPONENT: u8 = 18; // 10^18 is the maximum i64
+    const SWEET: Self =
+        (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << Self::FRACTIONAL_BITS - 1) as Self;
+    const F10: &'static [Self] = &[
+        1.0,
+        10.0,
+        100.0,
+        1000.0,
+        10000.0,
+        100000.0,
+        1000000.0,
+        10000000.0,
+        100000000.0,
+        1000000000.0,
+        10000000000.0,
+        100000000000.0,
+        1000000000000.0,
+        10000000000000.0,
+        100000000000000.0,
+        1000000000000000.0,
+        10000000000000000.0,
+        100000000000000000.0,
+        1000000000000000000.0,
+        10000000000000000000.0,
+        100000000000000000000.0,
+        1000000000000000000000.0,
+        10000000000000000000000.0,
+        100000000000000000000000.0,
+    ];
+
+    const IF10: &'static [Self] = &[
+        1.0,
+        0.1,
+        0.01,
+        0.001,
+        0.0001,
+        0.00001,
+        0.000001,
+        0.0000001,
+        0.00000001,
+        0.000000001,
+        0.0000000001,
+        0.00000000001,
+        0.000000000001,
+        0.0000000000001,
+        0.00000000000001,
+        0.000000000000001,
+        0.0000000000000001,
+        0.00000000000000001,
+        0.000000000000000001,
+        0.0000000000000000001,
+        0.00000000000000000001,
+        0.000000000000000000001,
+        0.0000000000000000000001,
+        0.00000000000000000000001,
+    ];
 }
diff --git a/vortex-alp/src/array.rs b/vortex-alp/src/array.rs
new file mode 100644
index 0000000000..472a70dafe
--- /dev/null
+++ b/vortex-alp/src/array.rs
@@ -0,0 +1,172 @@
+use std::any::Any;
+use std::sync::{Arc, RwLock};
+
+use crate::alp::Exponents;
+pub use codecz::alp::ALPExponents;
+use vortex::array::{Array, ArrayKind, ArrayRef, ArrowIterator, Encoding, EncodingId, EncodingRef};
+use vortex::compress::EncodingCompression;
+use vortex::dtype::{DType, IntWidth, Signedness};
+use vortex::error::{VortexError, VortexResult};
+use vortex::formatter::{ArrayDisplay, ArrayFormatter};
+use vortex::serde::{ArraySerde, EncodingSerde};
+use vortex::stats::{Stats, StatsSet};
+
+use crate::compress::alp_encode;
+
+#[derive(Debug, Clone)]
+pub struct ALPArray {
+    encoded: ArrayRef,
+    exponents: Exponents,
+    patches: Option<ArrayRef>,
+    dtype: DType,
+    stats: Arc<RwLock<StatsSet>>,
+}
+
+impl ALPArray {
+    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<ArrayRef>) -> Self {
+        Self::try_new(encoded, exponents, patches).unwrap()
+    }
+
+    pub fn try_new(
+        encoded: ArrayRef,
+        exponents: Exponents,
+        patches: Option<ArrayRef>,
+    ) -> VortexResult<Self> {
+        let dtype = match encoded.dtype() {
+            d @ DType::Int(width, Signedness::Signed, nullability) => match width {
+                IntWidth::_32 => DType::Float(32.into(), *nullability),
+                IntWidth::_64 => DType::Float(64.into(), *nullability),
+                _ => return Err(VortexError::InvalidDType(d.clone())),
+            },
+            d => return Err(VortexError::InvalidDType(d.clone())),
+        };
+        Ok(Self {
+            encoded,
+            exponents,
+            patches,
+            dtype,
+            stats: Arc::new(RwLock::new(StatsSet::new())),
+        })
+    }
+
+    pub fn encode(array: &dyn Array) -> VortexResult<ArrayRef> {
+        match ArrayKind::from(array) {
+            ArrayKind::Primitive(p) => Ok(alp_encode(p)?.boxed()),
+            _ => Err(VortexError::InvalidEncoding(array.encoding().id().clone())),
+        }
+    }
+
+    pub fn encoded(&self) -> &dyn Array {
+        self.encoded.as_ref()
+    }
+
+    pub fn exponents(&self) -> &Exponents {
+        &self.exponents
+    }
+
+    pub fn patches(&self) -> Option<&dyn Array> {
+        self.patches.as_deref()
+    }
+}
+
+impl Array for ALPArray {
+    #[inline]
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    #[inline]
+    fn boxed(self) -> ArrayRef {
+        Box::new(self)
+    }
+
+    #[inline]
+    fn into_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    #[inline]
+    fn len(&self) -> usize {
+        self.encoded.len()
+    }
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.encoded.is_empty()
+    }
+
+    #[inline]
+    fn dtype(&self) -> &DType {
+        &self.dtype
+    }
+
+    #[inline]
+    fn stats(&self) -> Stats {
+        Stats::new(&self.stats, self)
+    }
+
+    fn iter_arrow(&self) -> Box<ArrowIterator> {
+        todo!()
+    }
+
+    fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
+        Ok(Self::try_new(
+            self.encoded().slice(start, stop)?,
+            self.exponents().clone(),
+            self.patches().map(|p| p.slice(start, stop)).transpose()?,
+        )?
+        .boxed())
+    }
+
+    #[inline]
+    fn encoding(&self) -> EncodingRef {
+        &ALPEncoding
+    }
+
+    #[inline]
+    fn nbytes(&self) -> usize {
+        self.encoded().nbytes() + self.patches().map(|p| p.nbytes()).unwrap_or(0)
+    }
+
+    fn serde(&self) -> &dyn ArraySerde {
+        self
+    }
+}
+
+impl<'arr> AsRef<(dyn Array + 'arr)> for ALPArray {
+    fn as_ref(&self) -> &(dyn Array + 'arr) {
+        self
+    }
+}
+
+impl ArrayDisplay for ALPArray {
+    fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result {
+        f.writeln(format!("exponents: {:?}", self.exponents()))?;
+        if let Some(p) = self.patches() {
+            f.writeln("patches:")?;
+            f.indent(|indent| indent.array(p.as_ref()))?;
+        }
+        f.indent(|indent| indent.array(self.encoded()))
+    }
+}
+
+#[derive(Debug)]
+pub struct ALPEncoding;
+
+impl ALPEncoding {
+    pub const ID: EncodingId = EncodingId::new("vortex.alp");
+}
+
+impl Encoding for ALPEncoding {
+    fn id(&self) -> &EncodingId {
+        &Self::ID
+    }
+
+    fn compression(&self) -> Option<&dyn EncodingCompression> {
+        Some(self)
+    }
+
+    fn serde(&self) -> Option<&dyn EncodingSerde> {
+        Some(self)
+    }
+}
diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index 2ce182f9f1..6115637a9f 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -1,20 +1,15 @@
-use itertools::Itertools;
 use log::debug;
-use num_traits::{Float, NumCast, PrimInt};
 
+use crate::alp::ALPFloat;
 use vortex::array::downcast::DowncastArrayBuiltin;
 use vortex::array::primitive::PrimitiveArray;
-use vortex::array::sparse::SparseArray;
 use vortex::array::{Array, ArrayRef};
 use vortex::compress::{CompressConfig, CompressCtx, Compressor, EncodingCompression};
 use vortex::error::{VortexError, VortexResult};
-use vortex::ptype::{NativePType, PType};
+use vortex::ptype::PType;
 
-use crate::alp::{ALPArray, ALPEncoding};
+use crate::array::{ALPArray, ALPEncoding};
 use crate::downcast::DowncastALP;
-use crate::Exponents;
-
-const SAMPLE_SIZE: usize = 32;
 
 impl EncodingCompression for ALPEncoding {
     fn compressor(
@@ -41,7 +36,12 @@ impl EncodingCompression for ALPEncoding {
 fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx) -> ArrayRef {
     let like_alp = like.map(|like_array| like_array.as_alp());
 
-    let parray = array.as_primitive();
+    let mut parray = array.as_primitive().clone();
+    if parray.validity().is_some() {
+        parray = compute::
+
+    }
+
     let (exponents, encoded, patches) = match parray.ptype() {
         PType::F32 => {
             ALPFloat::encode_to_array(parray.typed_data::<f32>(), like_alp.map(|a| a.exponents()))
@@ -73,211 +73,10 @@ pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult<ALPArray> {
     Ok(ALPArray::new(encoded, exponents, patches))
 }
 
-trait ALPFloat: NativePType + Float {
-    type ALPInt: NativePType + PrimInt;
-    const FRACTIONAL_BITS: u8;
-    const MAX_EXPONENT: u8;
-    const SWEET: Self;
-    const F10: &'static [Self];
-    const IF10: &'static [Self];
-
-    /// Round to the nearest floating integer by shifting in and out of the low precision range.
-    fn fast_round(self) -> Self {
-        (self + Self::SWEET) - Self::SWEET
-    }
-
-    fn find_best_exponents(values: &[Self]) -> Exponents {
-        let mut best_e: u8 = 0;
-        let mut best_f: u8 = 0;
-        let mut best_nbytes: usize = usize::MAX;
-
-        let sample = (values.len() > SAMPLE_SIZE).then(|| {
-            values
-                .iter()
-                .step_by(values.len() / SAMPLE_SIZE)
-                .cloned()
-                .collect_vec()
-        });
-
-        // TODO(wmanning): idea, start with highest e, then find the best f
-        // after that, try e's in descending order, with a gap no larger than the original e - f
-        for e in 0..Self::MAX_EXPONENT {
-            for f in 0..e {
-                let (_, encoded, patches) = Self::encode_to_array(
-                    sample.as_deref().unwrap_or(values),
-                    Some(&Exponents { e, f }),
-                );
-                let size = encoded.nbytes() + patches.map_or(0, |p| p.nbytes());
-                if size < best_nbytes {
-                    best_nbytes = size;
-                    best_e = e;
-                    best_f = f;
-                } else if size == best_nbytes && e - f < best_e - best_f {
-                    best_e = e;
-                    best_f = f;
-                }
-            }
-        }
-
-        Exponents {
-            e: best_e,
-            f: best_f,
-        }
-    }
-
-    fn encode_to_array(
-        values: &[Self],
-        exponents: Option<&Exponents>,
-    ) -> (Exponents, ArrayRef, Option<ArrayRef>) {
-        let best_exponents =
-            exponents.map_or_else(|| Self::find_best_exponents(values), Exponents::clone);
-        let (values, exc_pos, exc) = Self::encode(values, &best_exponents);
-        let len = values.len();
-        (
-            best_exponents,
-            PrimitiveArray::from_vec(values).boxed(),
-            (exc.len() > 0).then(|| {
-                SparseArray::new(
-                    PrimitiveArray::from_vec(exc_pos).boxed(),
-                    PrimitiveArray::from_vec(exc).boxed(),
-                    len,
-                )
-                .boxed()
-            }),
-        )
-    }
-
-    fn encode(values: &[Self], exponents: &Exponents) -> (Vec<Self::ALPInt>, Vec<u64>, Vec<Self>) {
-        let mut exc_pos = Vec::new();
-        let mut exc_value = Vec::new();
-        let mut prev = Self::ALPInt::default();
-        let encoded = values
-            .iter()
-            .enumerate()
-            .map(|(i, v)| {
-                let encoded =
-                    (*v * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
-                        .fast_round();
-                let decoded =
-                    encoded * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize];
-
-                if decoded == *v {
-                    if let Some(e) = <<Self as ALPFloat>::ALPInt as NumCast>::from(encoded) {
-                        prev = e;
-                        return e;
-                    }
-                }
-
-                exc_pos.push(i as u64);
-                exc_value.push(*v);
-                // Emit the last known good value. This helps with run-end encoding.
-                prev
-            })
-            .collect_vec();
-
-        (encoded, exc_pos, exc_value)
-    }
-}
-
-impl ALPFloat for f32 {
-    type ALPInt = i32;
-    const FRACTIONAL_BITS: u8 = 23;
-    const MAX_EXPONENT: u8 = 10;
-    const SWEET: Self =
-        (1 << Self::FRACTIONAL_BITS) as Self + (1 << Self::FRACTIONAL_BITS - 1) as Self;
-
-    const F10: &'static [Self] = &[
-        1.0,
-        10.0,
-        100.0,
-        1000.0,
-        10000.0,
-        100000.0,
-        1000000.0,
-        10000000.0,
-        100000000.0,
-        1000000000.0,
-        10000000000.0,
-    ];
-    const IF10: &'static [Self] = &[
-        1.0,
-        0.1,
-        0.01,
-        0.001,
-        0.0001,
-        0.00001,
-        0.000001,
-        0.0000001,
-        0.00000001,
-        0.000000001,
-        0.0000000001,
-    ];
-}
-
-impl ALPFloat for f64 {
-    type ALPInt = i64;
-    const FRACTIONAL_BITS: u8 = 52;
-    const MAX_EXPONENT: u8 = 18; // 10^18 is the maximum i64
-    const SWEET: Self =
-        (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << Self::FRACTIONAL_BITS - 1) as Self;
-    const F10: &'static [Self] = &[
-        1.0,
-        10.0,
-        100.0,
-        1000.0,
-        10000.0,
-        100000.0,
-        1000000.0,
-        10000000.0,
-        100000000.0,
-        1000000000.0,
-        10000000000.0,
-        100000000000.0,
-        1000000000000.0,
-        10000000000000.0,
-        100000000000000.0,
-        1000000000000000.0,
-        10000000000000000.0,
-        100000000000000000.0,
-        1000000000000000000.0,
-        10000000000000000000.0,
-        100000000000000000000.0,
-        1000000000000000000000.0,
-        10000000000000000000000.0,
-        100000000000000000000000.0,
-    ];
-
-    const IF10: &'static [Self] = &[
-        1.0,
-        0.1,
-        0.01,
-        0.001,
-        0.0001,
-        0.00001,
-        0.000001,
-        0.0000001,
-        0.00000001,
-        0.000000001,
-        0.0000000001,
-        0.00000000001,
-        0.000000000001,
-        0.0000000000001,
-        0.00000000000001,
-        0.000000000000001,
-        0.0000000000000001,
-        0.00000000000000001,
-        0.000000000000000001,
-        0.0000000000000000001,
-        0.00000000000000000001,
-        0.000000000000000000001,
-        0.0000000000000000000001,
-        0.00000000000000000000001,
-    ];
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::alp::Exponents;
 
     #[test]
     fn test_compress() {
@@ -294,13 +93,13 @@ mod test {
 
     #[test]
     fn test_nullable_compress() {
-        let array = PrimitiveArray::from_iter(vec![1.234f32; 1025]);
+        let array = PrimitiveArray::from_iter(vec![None, Some(1.234f32), None]);
         let encoded = alp_encode(&array).unwrap();
         println!("Encoded {:?}", encoded);
         assert!(encoded.patches().is_none());
         assert_eq!(
             encoded.encoded().as_primitive().typed_data::<i32>(),
-            vec![1234; 1025]
+            vec![0, 1234, 1234]
         );
         assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 });
     }
diff --git a/vortex-alp/src/compute.rs b/vortex-alp/src/compute.rs
index 12d888260e..86426ab162 100644
--- a/vortex-alp/src/compute.rs
+++ b/vortex-alp/src/compute.rs
@@ -1,10 +1,12 @@
+use crate::alp::ALPFloat;
 use crate::ALPArray;
+use std::f32;
 use vortex::array::Array;
 use vortex::compute::scalar_at::{scalar_at, ScalarAtFn};
 use vortex::compute::ArrayCompute;
 use vortex::dtype::{DType, FloatWidth};
-use vortex::error::VortexResult;
-use vortex::scalar::{NullableScalar, ScalarRef};
+use vortex::error::{VortexError, VortexResult};
+use vortex::scalar::{NullableScalar, Scalar, ScalarRef};
 
 impl ArrayCompute for ALPArray {
     fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
@@ -29,19 +31,19 @@ impl ScalarAtFn for ALPArray {
         match self.dtype() {
             DType::Float(FloatWidth::_32, _) => {
                 let encoded_val: i32 = encoded_val.try_into().unwrap();
-                Ok(alp::decode_single::<f32>(encoded_val, self.exponents())
-                    .unwrap()
-                    .into())
+                Ok(ScalarRef::from(<f32 as ALPFloat>::decode_single(
+                    encoded_val,
+                    self.exponents(),
+                )))
             }
-
             DType::Float(FloatWidth::_64, _) => {
                 let encoded_val: i64 = encoded_val.try_into().unwrap();
-                Ok(alp::decode_single::<f64>(encoded_val, self.exponents())
-                    .unwrap()
-                    .into())
+                Ok(ScalarRef::from(<f64 as ALPFloat>::decode_single(
+                    encoded_val,
+                    self.exponents(),
+                )))
             }
-
-            _ => unreachable!(),
+            _ => Err(VortexError::InvalidDType(self.dtype().clone())),
         }
     }
 }
diff --git a/vortex-alp/src/lib.rs b/vortex-alp/src/lib.rs
index 8ff1d432cc..6f6ec0efd8 100644
--- a/vortex-alp/src/lib.rs
+++ b/vortex-alp/src/lib.rs
@@ -1,8 +1,9 @@
-pub use alp::*;
+pub use array::*;
 use linkme::distributed_slice;
 use vortex::array::{EncodingRef, ENCODINGS};
 
 mod alp;
+mod array;
 mod compress;
 mod compute;
 mod downcast;
diff --git a/vortex-alp/src/serde.rs b/vortex-alp/src/serde.rs
index 3d3e4215a2..9419fc13cc 100644
--- a/vortex-alp/src/serde.rs
+++ b/vortex-alp/src/serde.rs
@@ -1,11 +1,13 @@
 use std::io;
 use std::io::ErrorKind;
 
+use crate::alp::Exponents;
 use vortex::array::{Array, ArrayRef};
 use vortex::dtype::{DType, FloatWidth, Signedness};
 use vortex::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx};
 
-use crate::{ALPArray, ALPEncoding, Exponents};
+use crate::ALPArray;
+use crate::ALPEncoding;
 
 impl ArraySerde for ALPArray {
     fn write(&self, ctx: &mut WriteCtx) -> io::Result<()> {
@@ -76,7 +78,8 @@ mod test {
             0.0004f64,
             1000000.0f64,
             0.33f64,
-        ]));
+        ]))
+        .unwrap();
         let read_arr = roundtrip_array(arr.as_ref()).unwrap();
 
         let read_alp = read_arr.as_alp();

From 6fdf93c229b68d99bc6932c0ecc4872620ea03f7 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 17:11:39 +0000
Subject: [PATCH 07/10] ALP

---
 Cargo.lock                         | 51 +++++++++++++++++++++-
 vortex-alp/Cargo.toml              | 13 ++++--
 vortex-alp/benches/alp_compress.rs | 11 +++++
 vortex-alp/src/alp.rs              | 69 +++++++++---------------------
 vortex-alp/src/array.rs            |  1 -
 vortex-alp/src/compress.rs         | 51 +++++++++++++++-------
 vortex-alp/src/lib.rs              |  2 +
 7 files changed, 127 insertions(+), 71 deletions(-)
 create mode 100644 vortex-alp/benches/alp_compress.rs

diff --git a/Cargo.lock b/Cargo.lock
index e7b474ef82..f481cfd052 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -550,6 +550,7 @@ checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb"
 dependencies = [
  "anstyle",
  "clap_lex",
+ "terminal_size",
 ]
 
 [[package]]
@@ -587,6 +588,12 @@ dependencies = [
  "walkdir",
 ]
 
+[[package]]
+name = "condtype"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
+
 [[package]]
 name = "const-random"
 version = "0.1.18"
@@ -757,6 +764,31 @@ dependencies = [
  "powerfmt",
 ]
 
+[[package]]
+name = "divan"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0d567df2c9c2870a43f3f2bd65aaeb18dbce1c18f217c3e564b4fbaeb3ee56c"
+dependencies = [
+ "cfg-if",
+ "clap",
+ "condtype",
+ "divan-macros",
+ "libc",
+ "regex-lite",
+]
+
+[[package]]
+name = "divan-macros"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27540baf49be0d484d8f0130d7d8da3011c32a44d4fc873368154f1510e574a2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.52",
+]
+
 [[package]]
 name = "dyn-clone"
 version = "1.0.17"
@@ -2164,6 +2196,12 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "regex-lite"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.2"
@@ -2543,6 +2581,16 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
+dependencies = [
+ "rustix",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.57"
@@ -2812,8 +2860,7 @@ dependencies = [
 name = "vortex-alp"
 version = "0.1.0"
 dependencies = [
- "arrow",
- "codecz",
+ "divan",
  "itertools 0.12.1",
  "linkme",
  "log",
diff --git a/vortex-alp/Cargo.toml b/vortex-alp/Cargo.toml
index 1e1fc4cf51..64911557b3 100644
--- a/vortex-alp/Cargo.toml
+++ b/vortex-alp/Cargo.toml
@@ -11,14 +11,19 @@ include = { workspace = true }
 edition = { workspace = true }
 rust-version = { workspace = true }
 
+[lints]
+workspace = true
+
 [dependencies]
-arrow = { version = "50.0.0" }
 vortex-array = { path = "../vortex-array" }
 linkme = "0.3.22"
 itertools = "0.12.1"
 num-traits = "0.2.18"
-codecz = { path = "../codecz" }
 log = { version = "0.4.20", features = [] }
 
-[lints]
-workspace = true
+[dev-dependencies]
+divan = "0.1.14"
+
+[[bench]]
+name = "alp_compress"
+harness = false
\ No newline at end of file
diff --git a/vortex-alp/benches/alp_compress.rs b/vortex-alp/benches/alp_compress.rs
new file mode 100644
index 0000000000..d26137a409
--- /dev/null
+++ b/vortex-alp/benches/alp_compress.rs
@@ -0,0 +1,11 @@
+use vortex_alp::{ALPFloat, Exponents};
+
+fn main() {
+    divan::main();
+}
+
+#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
+fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>, Vec<T>) {
+    let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
+    T::encode(values.as_slice(), None)
+}
diff --git a/vortex-alp/src/alp.rs b/vortex-alp/src/alp.rs
index ff1a8e9165..f2113489cc 100644
--- a/vortex-alp/src/alp.rs
+++ b/vortex-alp/src/alp.rs
@@ -1,10 +1,6 @@
 use itertools::Itertools;
-use num_traits::{Float, NumCast, PrimInt};
-use vortex::array::primitive::PrimitiveArray;
-use vortex::array::sparse::SparseArray;
-use vortex::ptype::NativePType;
-
-use vortex::array::{Array, ArrayRef};
+use num_traits::{Float, NumCast, PrimInt, Zero};
+use std::mem::size_of;
 
 const SAMPLE_SIZE: usize = 32;
 
@@ -14,8 +10,9 @@ pub struct Exponents {
     pub f: u8,
 }
 
-pub trait ALPFloat: NativePType + Float {
-    type ALPInt: NativePType + PrimInt;
+pub trait ALPFloat: Float + 'static {
+    type ALPInt: PrimInt;
+
     const FRACTIONAL_BITS: u8;
     const MAX_EXPONENT: u8;
     const SWEET: Self;
@@ -32,8 +29,7 @@ pub trait ALPFloat: NativePType + Float {
     }
 
     fn find_best_exponents(values: &[Self]) -> Exponents {
-        let mut best_e: u8 = 0;
-        let mut best_f: u8 = 0;
+        let mut best_exp = Exponents { e: 0, f: 0 };
         let mut best_nbytes: usize = usize::MAX;
 
         let sample = (values.len() > SAMPLE_SIZE).then(|| {
@@ -45,66 +41,43 @@ pub trait ALPFloat: NativePType + Float {
         });
 
         // TODO(wmanning): idea, start with highest e, then find the best f
-        // after that, try e's in descending order, with a gap no larger than the original e - f
+        //  after that, try e's in descending order, with a gap no larger than the original e - f
         for e in 0..Self::MAX_EXPONENT {
             for f in 0..e {
-                let (_, encoded, patches) = Self::encode_to_array(
+                let (_, encoded, exc_pos, exc_patches) = Self::encode(
                     sample.as_deref().unwrap_or(values),
                     Some(&Exponents { e, f }),
                 );
-                let size = encoded.nbytes() + patches.map_or(0, |p| p.nbytes());
+                let size =
+                    (encoded.len() + exc_patches.len()) * size_of::<Self>() + (exc_pos.len() * 4);
                 if size < best_nbytes {
                     best_nbytes = size;
-                    best_e = e;
-                    best_f = f;
-                } else if size == best_nbytes && e - f < best_e - best_f {
-                    best_e = e;
-                    best_f = f;
+                    best_exp = Exponents { e, f };
+                } else if size == best_nbytes && e - f < best_exp.e - best_exp.f {
+                    best_exp = Exponents { e, f };
                 }
             }
         }
 
-        Exponents {
-            e: best_e,
-            f: best_f,
-        }
+        best_exp
     }
 
-    fn encode_to_array(
+    fn encode(
         values: &[Self],
         exponents: Option<&Exponents>,
-    ) -> (Exponents, ArrayRef, Option<ArrayRef>) {
-        let best_exponents =
-            exponents.map_or_else(|| Self::find_best_exponents(values), Exponents::clone);
-        let (values, exc_pos, exc) = Self::encode(values, &best_exponents);
-        let len = values.len();
-        (
-            best_exponents,
-            PrimitiveArray::from_vec(values).boxed(),
-            (exc.len() > 0).then(|| {
-                SparseArray::new(
-                    PrimitiveArray::from_vec(exc_pos).boxed(),
-                    PrimitiveArray::from_vec(exc).boxed(),
-                    len,
-                )
-                .boxed()
-            }),
-        )
-    }
+    ) -> (Exponents, Vec<Self::ALPInt>, Vec<u64>, Vec<Self>) {
+        let exp = exponents.map_or_else(|| Self::find_best_exponents(values), Exponents::clone);
 
-    fn encode(values: &[Self], exponents: &Exponents) -> (Vec<Self::ALPInt>, Vec<u64>, Vec<Self>) {
         let mut exc_pos = Vec::new();
         let mut exc_value = Vec::new();
-        let mut prev = Self::ALPInt::default();
+        let mut prev = Self::ALPInt::zero();
         let encoded = values
             .iter()
             .enumerate()
             .map(|(i, v)| {
                 let encoded =
-                    (*v * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
-                        .fast_round();
-                let decoded =
-                    encoded * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize];
+                    (*v * Self::F10[exp.e as usize] * Self::IF10[exp.f as usize]).fast_round();
+                let decoded = encoded * Self::F10[exp.f as usize] * Self::IF10[exp.e as usize];
 
                 if decoded == *v {
                     if let Some(e) = encoded.as_int() {
@@ -120,7 +93,7 @@ pub trait ALPFloat: NativePType + Float {
             })
             .collect_vec();
 
-        (encoded, exc_pos, exc_value)
+        (exp, encoded, exc_pos, exc_value)
     }
 
     fn decode_single(encoded: Self::ALPInt, exponents: &Exponents) -> Self {
diff --git a/vortex-alp/src/array.rs b/vortex-alp/src/array.rs
index 472a70dafe..978af89680 100644
--- a/vortex-alp/src/array.rs
+++ b/vortex-alp/src/array.rs
@@ -2,7 +2,6 @@ use std::any::Any;
 use std::sync::{Arc, RwLock};
 
 use crate::alp::Exponents;
-pub use codecz::alp::ALPExponents;
 use vortex::array::{Array, ArrayKind, ArrayRef, ArrowIterator, Encoding, EncodingId, EncodingRef};
 use vortex::compress::EncodingCompression;
 use vortex::dtype::{DType, IntWidth, Signedness};
diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index 6115637a9f..c59bc93c2c 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -3,13 +3,15 @@ use log::debug;
 use crate::alp::ALPFloat;
 use vortex::array::downcast::DowncastArrayBuiltin;
 use vortex::array::primitive::PrimitiveArray;
+use vortex::array::sparse::SparseArray;
 use vortex::array::{Array, ArrayRef};
 use vortex::compress::{CompressConfig, CompressCtx, Compressor, EncodingCompression};
 use vortex::error::{VortexError, VortexResult};
-use vortex::ptype::PType;
+use vortex::ptype::{NativePType, PType};
 
 use crate::array::{ALPArray, ALPEncoding};
 use crate::downcast::DowncastALP;
+use crate::Exponents;
 
 impl EncodingCompression for ALPEncoding {
     fn compressor(
@@ -36,19 +38,12 @@ impl EncodingCompression for ALPEncoding {
 fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx) -> ArrayRef {
     let like_alp = like.map(|like_array| like_array.as_alp());
 
-    let mut parray = array.as_primitive().clone();
-    if parray.validity().is_some() {
-        parray = compute::
-
-    }
+    // TODO(ngates): fill forward nulls
+    let parray = array.as_primitive();
 
     let (exponents, encoded, patches) = match parray.ptype() {
-        PType::F32 => {
-            ALPFloat::encode_to_array(parray.typed_data::<f32>(), like_alp.map(|a| a.exponents()))
-        }
-        PType::F64 => {
-            ALPFloat::encode_to_array(parray.typed_data::<f64>(), like_alp.map(|a| a.exponents()))
-        }
+        PType::F32 => encode_to_array(parray.typed_data::<f32>(), like_alp.map(|a| a.exponents())),
+        PType::F64 => encode_to_array(parray.typed_data::<f64>(), like_alp.map(|a| a.exponents())),
         _ => panic!("Unsupported ptype"),
     };
 
@@ -64,17 +59,41 @@ fn alp_compressor(array: &dyn Array, like: Option<&dyn Array>, ctx: CompressCtx)
     ALPArray::new(compressed_encoded, exponents, compressed_patches).boxed()
 }
 
+fn encode_to_array<T>(
+    values: &[T],
+    exponents: Option<&Exponents>,
+) -> (Exponents, ArrayRef, Option<ArrayRef>)
+where
+    T: ALPFloat + NativePType,
+    T::ALPInt: NativePType,
+{
+    let (exponents, values, exc_pos, exc) = T::encode(values, exponents);
+    let len = values.len();
+    (
+        exponents,
+        PrimitiveArray::from_vec(values).boxed(),
+        (exc.len() > 0).then(|| {
+            SparseArray::new(
+                PrimitiveArray::from_vec(exc_pos).boxed(),
+                PrimitiveArray::from_vec(exc).boxed(),
+                len,
+            )
+            .boxed()
+        }),
+    )
+}
+
 pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult<ALPArray> {
     let (exponents, encoded, patches) = match parray.ptype() {
-        PType::F32 => ALPFloat::encode_to_array(parray.typed_data::<f32>(), None),
-        PType::F64 => ALPFloat::encode_to_array(parray.typed_data::<f64>(), None),
+        PType::F32 => encode_to_array(parray.typed_data::<f32>(), None),
+        PType::F64 => encode_to_array(parray.typed_data::<f64>(), None),
         _ => return Err(VortexError::InvalidPType(parray.ptype().clone())),
     };
     Ok(ALPArray::new(encoded, exponents, patches))
 }
 
 #[cfg(test)]
-mod test {
+mod tests {
     use super::*;
     use crate::alp::Exponents;
 
@@ -99,7 +118,7 @@ mod test {
         assert!(encoded.patches().is_none());
         assert_eq!(
             encoded.encoded().as_primitive().typed_data::<i32>(),
-            vec![0, 1234, 1234]
+            vec![0, 1234, 0]
         );
         assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 });
     }
diff --git a/vortex-alp/src/lib.rs b/vortex-alp/src/lib.rs
index 6f6ec0efd8..f104396cd5 100644
--- a/vortex-alp/src/lib.rs
+++ b/vortex-alp/src/lib.rs
@@ -1,4 +1,6 @@
+pub use alp::*;
 pub use array::*;
+
 use linkme::distributed_slice;
 use vortex::array::{EncodingRef, ENCODINGS};
 

From 2824169fe0a99a1d896043b824d90ff6ededd8a1 Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Tue, 5 Mar 2024 17:28:42 +0000
Subject: [PATCH 08/10] Bench

---
 vortex-alp/benches/alp_compress.rs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/vortex-alp/benches/alp_compress.rs b/vortex-alp/benches/alp_compress.rs
index d26137a409..d0cb986d91 100644
--- a/vortex-alp/benches/alp_compress.rs
+++ b/vortex-alp/benches/alp_compress.rs
@@ -1,4 +1,6 @@
-use vortex_alp::{ALPFloat, Exponents};
+use vortex::array::primitive::PrimitiveArray;
+use vortex::array::ArrayRef;
+use vortex_alp::{ALPArray, ALPFloat, Exponents};
 
 fn main() {
     divan::main();
@@ -9,3 +11,10 @@ fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>,
     let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
     T::encode(values.as_slice(), None)
 }
+
+// TODO(ngates): remove this
+#[divan::bench(args = [100_000, 10_000_000])]
+fn alp_compress_array(n: usize) -> ArrayRef {
+    let array = PrimitiveArray::from_vec(vec![1.234f64; n]);
+    ALPArray::encode(&array).unwrap()
+}

From b11a043c0fc765475046b1aa31f43041c0fc1d0e Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Wed, 6 Mar 2024 09:07:35 +0000
Subject: [PATCH 09/10] Compressors

---
 vortex-alp/src/alp.rs      | 4 ++--
 vortex-alp/src/compress.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vortex-alp/src/alp.rs b/vortex-alp/src/alp.rs
index f2113489cc..75e410fb71 100644
--- a/vortex-alp/src/alp.rs
+++ b/vortex-alp/src/alp.rs
@@ -107,7 +107,7 @@ impl ALPFloat for f32 {
     const FRACTIONAL_BITS: u8 = 23;
     const MAX_EXPONENT: u8 = 10;
     const SWEET: Self =
-        (1 << Self::FRACTIONAL_BITS) as Self + (1 << Self::FRACTIONAL_BITS - 1) as Self;
+        (1 << Self::FRACTIONAL_BITS) as Self + (1 << (Self::FRACTIONAL_BITS - 1)) as Self;
 
     const F10: &'static [Self] = &[
         1.0,
@@ -142,7 +142,7 @@ impl ALPFloat for f64 {
     const FRACTIONAL_BITS: u8 = 52;
     const MAX_EXPONENT: u8 = 18; // 10^18 is the maximum i64
     const SWEET: Self =
-        (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << Self::FRACTIONAL_BITS - 1) as Self;
+        (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << (Self::FRACTIONAL_BITS - 1)) as Self;
     const F10: &'static [Self] = &[
         1.0,
         10.0,
diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index c59bc93c2c..5ccbc6d3df 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -72,7 +72,7 @@ where
     (
         exponents,
         PrimitiveArray::from_vec(values).boxed(),
-        (exc.len() > 0).then(|| {
+        (!exc.is_empty()).then(|| {
             SparseArray::new(
                 PrimitiveArray::from_vec(exc_pos).boxed(),
                 PrimitiveArray::from_vec(exc).boxed(),
@@ -87,7 +87,7 @@ pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult<ALPArray> {
     let (exponents, encoded, patches) = match parray.ptype() {
         PType::F32 => encode_to_array(parray.typed_data::<f32>(), None),
         PType::F64 => encode_to_array(parray.typed_data::<f64>(), None),
-        _ => return Err(VortexError::InvalidPType(parray.ptype().clone())),
+        _ => return Err(VortexError::InvalidPType(*parray.ptype())),
     };
     Ok(ALPArray::new(encoded, exponents, patches))
 }

From 43a47fea0b4d320e1e0817bc1946f554fe93457e Mon Sep 17 00:00:00 2001
From: Nicholas Gates <nick@nickgates.com>
Date: Wed, 6 Mar 2024 09:24:46 +0000
Subject: [PATCH 10/10] REE

---
 vortex-alp/src/compress.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs
index 5ccbc6d3df..83018a53c6 100644
--- a/vortex-alp/src/compress.rs
+++ b/vortex-alp/src/compress.rs
@@ -95,7 +95,6 @@ pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult<ALPArray> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::alp::Exponents;
 
     #[test]
     fn test_compress() {