Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make EncodingID Copy #131

Merged
merged 6 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 16 additions & 39 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,10 @@ use log::{info, warn};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ProjectionMask;

use vortex::array::bool::BoolEncoding;
use vortex::array::chunked::{ChunkedArray, ChunkedEncoding};
use vortex::array::composite::CompositeEncoding;
use vortex::array::constant::ConstantEncoding;
use vortex::array::chunked::ChunkedArray;
use vortex::array::downcast::DowncastArrayBuiltin;
use vortex::array::primitive::PrimitiveEncoding;
use vortex::array::sparse::SparseEncoding;
use vortex::array::struct_::StructEncoding;
use vortex::array::varbin::VarBinEncoding;
use vortex::array::varbinview::VarBinViewEncoding;
use vortex::array::IntoArray;
use vortex::array::{Array, ArrayRef, Encoding};
use vortex::array::{Array, ArrayRef};
use vortex::array::{EncodingId, IntoArray};
use vortex::arrow::FromArrowType;
use vortex::compress::{CompressConfig, CompressCtx};
use vortex::formatter::display_tree;
Expand All @@ -32,40 +24,25 @@ use vortex_ree::REEEncoding;
use vortex_roaring::RoaringBoolEncoding;
use vortex_schema::DType;

pub fn enumerate_arrays() -> Vec<&'static dyn Encoding> {
pub fn enumerate_arrays() -> Vec<EncodingId> {
vec![
// TODO(ngates): fix https://github.com/fulcrum-so/vortex/issues/35
// Builtins
&BoolEncoding,
&ChunkedEncoding,
&CompositeEncoding,
&ConstantEncoding,
&PrimitiveEncoding,
&SparseEncoding,
&StructEncoding,
&VarBinEncoding,
&VarBinViewEncoding,
// Encodings
&ALPEncoding,
&DictEncoding,
&BitPackedEncoding,
&FoREncoding,
&DateTimeEncoding,
// &DeltaEncoding,
// &FFoREncoding,
&REEEncoding,
&RoaringBoolEncoding,
// &RoaringIntEncoding,
ALPEncoding::ID,
DictEncoding::ID,
BitPackedEncoding::ID,
FoREncoding::ID,
DateTimeEncoding::ID,
// DeltaEncoding::ID,
// FFoREncoding::ID,
REEEncoding::ID,
RoaringBoolEncoding::ID,
// RoaringIntEncoding::ID,
// Doesn't offer anything more than FoR really
// &ZigZagEncoding,
// ZigZagEncoding::ID,
]
}

pub fn compress_ctx() -> CompressCtx {
let cfg = CompressConfig::new(
HashSet::from_iter(enumerate_arrays().iter().map(|e| (*e).id())),
HashSet::default(),
);
let cfg = CompressConfig::new(HashSet::from_iter(enumerate_arrays()), HashSet::default());
info!("Compression config {cfg:?}");
CompressCtx::new(Arc::new(cfg))
}
Expand Down
2 changes: 1 addition & 1 deletion pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ impl PyArray {
inner.into_any().downcast::<VarBinViewArray>().unwrap(),
)?
.extract(py),
ArrayKind::Other(other) => match *other.encoding().id() {
ArrayKind::Other(other) => match other.encoding().id() {
// PyEnc chooses to expose certain encodings as first-class objects.
// For the remainder, we should have a generic EncArray implementation that supports basic functions.
ALPEncoding::ID => {
Expand Down
6 changes: 3 additions & 3 deletions vortex-alp/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl ALPArray {
pub fn encode(array: &dyn Array) -> VortexResult<ArrayRef> {
match ArrayKind::from(array) {
ArrayKind::Primitive(p) => Ok(alp_encode(p)?.into_array()),
_ => Err(VortexError::InvalidEncoding(array.encoding().id().clone())),
_ => Err(VortexError::InvalidEncoding(array.encoding().id())),
}
}

Expand Down Expand Up @@ -131,8 +131,8 @@ impl ALPEncoding {
}

impl Encoding for ALPEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn compression(&self) -> Option<&dyn EncodingCompression> {
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ impl BoolEncoding {
static ENCODINGS_BOOL: EncodingRef = &BoolEncoding;

impl Encoding for BoolEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn serde(&self) -> Option<&dyn EncodingSerde> {
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/chunked/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ impl ChunkedEncoding {
static ENCODINGS_CHUNKED: EncodingRef = &ChunkedEncoding;

impl Encoding for ChunkedEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn serde(&self) -> Option<&dyn EncodingSerde> {
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/composite/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ impl CompositeEncoding {
static ENCODINGS_COMPOSITE: EncodingRef = &CompositeEncoding;

impl Encoding for CompositeEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn compression(&self) -> Option<&dyn EncodingCompression> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/composite/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ impl EncodingCompression for CompositeEncoding {
array: &dyn Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
(array.encoding().id() == &Self::ID).then_some(self)
(array.encoding().id() == Self::ID).then_some(self)
}

fn compress(
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/constant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ impl ConstantEncoding {
static ENCODINGS_CONSTANT: EncodingRef = &ConstantEncoding;

impl Encoding for ConstantEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn serde(&self) -> Option<&dyn EncodingSerde> {
Expand Down
6 changes: 3 additions & 3 deletions vortex-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ pub fn check_validity_buffer(validity: Option<&ArrayRef>, expected_len: usize) -
Ok(())
}

#[derive(Clone, Debug, Eq, PartialEq, Hash)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub struct EncodingId(&'static str);

impl EncodingId {
Expand All @@ -268,7 +268,7 @@ impl Display for EncodingId {
}

pub trait Encoding: Debug + Send + Sync + 'static {
fn id(&self) -> &EncodingId;
fn id(&self) -> EncodingId;

/// Whether this encoding provides a compressor.
fn compression(&self) -> Option<&dyn EncodingCompression> {
Expand Down Expand Up @@ -308,7 +308,7 @@ pub enum ArrayKind<'a> {

impl<'a> From<&'a dyn Array> for ArrayKind<'a> {
fn from(value: &'a dyn Array) -> Self {
match *value.encoding().id() {
match value.encoding().id() {
BoolEncoding::ID => ArrayKind::Bool(value.as_bool()),
ChunkedEncoding::ID => ArrayKind::Chunked(value.as_chunked()),
CompositeEncoding::ID => ArrayKind::Composite(value.as_composite()),
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/primitive/compute/patch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{compute, match_each_native_ptype};
impl PatchFn for PrimitiveArray {
fn patch(&self, patch: &dyn Array) -> VortexResult<ArrayRef> {
match patch.encoding().id() {
&SparseEncoding::ID => patch_with_sparse(self, patch.as_sparse()),
SparseEncoding::ID => patch_with_sparse(self, patch.as_sparse()),
// TODO(ngates): support a default implementation based on iter_arrow?
_ => Err(VortexError::MissingKernel(
"patch",
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ impl PrimitiveEncoding {
static ENCODINGS_PRIMITIVE: EncodingRef = &PrimitiveEncoding;

impl Encoding for PrimitiveEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn serde(&self) -> Option<&dyn EncodingSerde> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/sparse/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ impl EncodingCompression for SparseEncoding {
array: &dyn Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
(array.encoding().id() == &Self::ID).then_some(self)
(array.encoding().id() == Self::ID).then_some(self)
}

fn compress(
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/sparse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ impl SparseEncoding {
static ENCODINGS_SPARSE: EncodingRef = &SparseEncoding;

impl Encoding for SparseEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn compression(&self) -> Option<&dyn EncodingCompression> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/struct_/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ impl EncodingCompression for StructEncoding {
array: &dyn Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
(array.encoding().id() == &Self::ID).then_some(self)
(array.encoding().id() == Self::ID).then_some(self)
}

fn compress(
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ impl StructEncoding {
static ENCODINGS_STRUCT: EncodingRef = &StructEncoding;

impl Encoding for StructEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn compression(&self) -> Option<&dyn EncodingCompression> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/varbin/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ impl EncodingCompression for VarBinEncoding {
array: &dyn Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
(array.encoding().id() == &Self::ID).then_some(self)
(array.encoding().id() == Self::ID).then_some(self)
}

fn compress(
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/varbin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,8 @@ impl VarBinEncoding {
static ENCODINGS_VARBIN: EncodingRef = &VarBinEncoding;

impl Encoding for VarBinEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn compression(&self) -> Option<&dyn EncodingCompression> {
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/varbinview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ impl VarBinViewEncoding {
static ENCODINGS_VARBINVIEW: EncodingRef = &VarBinViewEncoding;

impl Encoding for VarBinViewEncoding {
fn id(&self) -> &EncodingId {
&Self::ID
fn id(&self) -> EncodingId {
Self::ID
}

fn serde(&self) -> Option<&dyn EncodingSerde> {
Expand Down
36 changes: 25 additions & 11 deletions vortex-array/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ use std::sync::Arc;

use log::{debug, info, warn};

use crate::array::chunked::ChunkedArray;
use crate::array::chunked::{ChunkedArray, ChunkedEncoding};
use crate::array::composite::CompositeEncoding;
use crate::array::constant::{ConstantArray, ConstantEncoding};
use crate::array::struct_::StructArray;
use crate::array::sparse::SparseEncoding;
use crate::array::struct_::{StructArray, StructEncoding};
use crate::array::varbin::VarBinEncoding;
use crate::array::{Array, ArrayKind, ArrayRef, Encoding, EncodingId, ENCODINGS};
use crate::compute;
use crate::compute::scalar_at::scalar_at;
Expand Down Expand Up @@ -47,8 +50,8 @@ pub struct CompressConfig {
max_depth: u8,
// TODO(ngates): can each encoding define their own configs?
pub ree_average_run_threshold: f32,
encodings: HashSet<&'static EncodingId>,
disabled_encodings: HashSet<&'static EncodingId>,
encodings: HashSet<EncodingId>,
disabled_encodings: HashSet<EncodingId>,
}

impl Default for CompressConfig {
Expand All @@ -68,12 +71,23 @@ impl Default for CompressConfig {
}

impl CompressConfig {
const DEFAULT_ENCODINGS: [EncodingId; 5] = [
ChunkedEncoding::ID,
CompositeEncoding::ID,
SparseEncoding::ID,
StructEncoding::ID,
VarBinEncoding::ID,
];

pub fn new(
encodings: HashSet<&'static EncodingId>,
mut disabled_encodings: HashSet<&'static EncodingId>,
mut encodings: HashSet<EncodingId>,
mut disabled_encodings: HashSet<EncodingId>,
) -> Self {
Self::DEFAULT_ENCODINGS.iter().for_each(|e| {
encodings.insert(*e);
});
// Always disable constant encoding, it's handled separately
disabled_encodings.insert(&ConstantEncoding::ID);
disabled_encodings.insert(ConstantEncoding::ID);
Self {
encodings,
disabled_encodings,
Expand All @@ -91,9 +105,9 @@ impl CompressConfig {
)
}

pub fn is_enabled(&self, kind: &EncodingId) -> bool {
(self.encodings.is_empty() || self.encodings.contains(kind))
&& !self.disabled_encodings.contains(kind)
pub fn is_enabled(&self, kind: EncodingId) -> bool {
(self.encodings.is_empty() || self.encodings.contains(&kind))
&& !self.disabled_encodings.contains(&kind)
}
}

Expand Down Expand Up @@ -236,7 +250,7 @@ pub fn sampled_compression(array: &dyn Array, ctx: &CompressCtx) -> VortexResult
let mut candidates: Vec<&dyn EncodingCompression> = ENCODINGS
.iter()
.filter(|encoding| ctx.options().is_enabled(encoding.id()))
.filter(|encoding| !ctx.disabled_encodings.contains(encoding.id()))
.filter(|encoding| !ctx.disabled_encodings.contains(&encoding.id()))
.filter_map(|encoding| encoding.compression())
.filter(|compression| {
if compression
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ pub enum VortexError {
InvalidArgument(ErrString),
// Used when a function is not implemented for a given array type.
#[error("function {0} not implemented for {1}")]
NotImplemented(&'static str, &'static EncodingId),
NotImplemented(&'static str, EncodingId),
// Used when a function is implemented for an array type, but the RHS is not supported.
#[error("missing kernel {0} for {1} and {2:?}")]
MissingKernel(&'static str, &'static EncodingId, Vec<&'static EncodingId>),
MissingKernel(&'static str, EncodingId, Vec<EncodingId>),
#[error("invalid data type: {0}")]
InvalidDType(DType),
#[error("invalid physical type: {0:?}")]
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ where

pub struct ReadCtx<'a> {
schema: &'a DType,
encodings: Vec<&'static EncodingId>,
encodings: Vec<EncodingId>,
r: &'a mut dyn Read,
}

Expand Down Expand Up @@ -167,7 +167,7 @@ impl<'a> ReadCtx<'a> {

pub struct WriteCtx<'a> {
w: &'a mut dyn Write,
available_encodings: Vec<&'static EncodingId>,
available_encodings: Vec<EncodingId>,
}

impl<'a> WriteCtx<'a> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-datetime/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ impl EncodingCompression for DateTimeEncoding {
array: &dyn Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
if array.encoding().id() != &CompositeEncoding::ID {
if array.encoding().id() != CompositeEncoding::ID {
return None;
}

Expand Down
Loading