diff --git a/.cargo/config b/.cargo/config new file mode 120000 index 0000000000..e1fd6d8b13 --- /dev/null +++ b/.cargo/config @@ -0,0 +1 @@ +../config.toml \ No newline at end of file diff --git a/.github/actions/setup-python/action.yml b/.github/actions/setup-python/action.yml new file mode 100644 index 0000000000..2d1998905c --- /dev/null +++ b/.github/actions/setup-python/action.yml @@ -0,0 +1,47 @@ +name: 'Setup Rust' +description: 'Toolchain setup and Initial compilation' +inputs: + rye-version: + description: 'Rye version to use' + required: true + default: '0.16.0' +runs: + using: "composite" + steps: + - name: Rye Cache + id: rye-cache + uses: actions/cache@v4 + with: + path: ~/.rye + key: "rye-${{ runner.os }}-${{ inputs.rye-version }}" + + - name: Rye Install + shell: bash + run: curl -sSf https://rye-up.com/get | bash + if: steps.rye-cache.outputs.cache-hit != 'true' + env: + RYE_VERSION: "${{ inputs.rye-version }}" + RYE_INSTALL_OPTION: "--yes" + + - name: Rye Shims + shell: bash + run: echo "~/.rye/shims" >> $GITHUB_PATH + + - name: Venv Cache + id: venv-cache + uses: actions/cache@v4 + with: + path: .venv + key: "venv-${{ runner.os }}-${{ hashFiles('requirements**.lock') }}" + + - name: Rye Sync + shell: bash + # --no-lock prevents resolution of the lock file. The locks are still respected. + # We always run `rye sync` even if the cache fetch was successful since it builds our Rust extensions for us. + run: rye sync --no-lock + env: + MATURIN_PEP517_ARGS: "--profile dev" + + - name: Export Path + shell: bash + run: echo "PATH=$PATH" >> $GITHUB_ENV \ No newline at end of file diff --git a/.github/actions/setup-rust/action.yml b/.github/actions/setup-rust/action.yml new file mode 100644 index 0000000000..100b184cba --- /dev/null +++ b/.github/actions/setup-rust/action.yml @@ -0,0 +1,44 @@ +name: 'Setup Rust' +description: 'Toolchain setup and Initial compilation' +inputs: + rust-toolchain: # id of input + description: 'Rust toolchain version to use' + required: true + default: stable +runs: + using: "composite" + steps: + - name: Rust Toolchain Cache + id: rustup-cache + uses: actions/cache@v4 + with: + path: ~/.rustup + key: "rustup-${{ runner.os }}-${{ inputs.rust-toolchain }}" + + - name: Rust Toolchain + uses: dtolnay/rust-toolchain@stable + if: steps.rustup-cache.outputs.cache-hit != 'true' + with: + toolchain: "${{ inputs.rust-toolchain }}" + components: clippy, rustfmt + - name: Rust Dependency Cache + uses: Swatinem/rust-cache@v2 + with: + shared-key: "shared" # To allow reuse across jobs + + - name: Rust Compile Cache + uses: mozilla-actions/sccache-action@v0.0.4 + - name: Rust Compile Cache Config + shell: bash + run: | + echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV + echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV + echo "CARGO_INCREMENTAL=0" >> $GITHUB_ENV + + - name: Rust Build + shell: bash + run: cargo build --all-targets + + - name: Export Path + shell: bash + run: echo "PATH=$PATH" >> $GITHUB_ENV \ No newline at end of file diff --git a/.github/actions/setup-zig/action.yml b/.github/actions/setup-zig/action.yml new file mode 100644 index 0000000000..33486f0f81 --- /dev/null +++ b/.github/actions/setup-zig/action.yml @@ -0,0 +1,23 @@ +name: 'Setup Zig' +description: 'Toolchain setup and Initial compilation' +runs: + using: "composite" + steps: + - name: Zig Version + id: zig-version + shell: bash + run: echo "version=$(cat .zig-version | tr -d '\n ')" >> $GITHUB_OUTPUT + + - name: Zig Setup + uses: goto-bus-stop/setup-zig@v2 + with: + version: "${{ steps.zig-version.outputs.version }}" + + # TODO(ngates): should we cache zig-cache? Or zig-out? + - name: Zig Build + shell: bash + run: zig build + + - name: Export Path + shell: bash + run: echo "PATH=$PATH" >> $GITHUB_ENV \ No newline at end of file diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml new file mode 100644 index 0000000000..d52ccfffe8 --- /dev/null +++ b/.github/workflows/bench-pr.yml @@ -0,0 +1,42 @@ +name: PR Benchmarks + +on: + pull_request: + types: [ labeled, synchronize ] + branches: [ "develop" ] + workflow_dispatch: { } + +permissions: + actions: write + contents: read + pull-requests: write + +jobs: + bench: + runs-on: ubuntu-latest-large + if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }} + steps: + # We remove the benchmark label first so that the workflow can be re-triggered. + - uses: actions-ecosystem/action-remove-labels@v1 + with: + labels: benchmark + + - uses: actions/checkout@v4 + + - uses: ./.github/actions/setup-zig + - uses: ./.github/actions/setup-rust + + - name: Bench - Vortex + run: cargo bench | tee bench.txt + + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1.19.3 + with: + name: Vortex Benchmarks + tool: cargo + github-token: ${{ secrets.GITHUB_TOKEN }} + output-file-path: bench.txt + summary-always: true + auto-push: true + fail-on-alert: false + diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000000..5180858bef --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,35 @@ +name: Benchmarks + +on: + push: + branches: [ "develop" ] + workflow_dispatch: { } + +permissions: + actions: read + contents: write + deployments: write + +jobs: + bench: + runs-on: ubuntu-latest-large + if: ${{ github.event_name == 'workflow_dispatch' || (contains(github.event.head_commit.message, '[benchmark]') && github.ref_name == 'develop') }} + steps: + - uses: actions/checkout@v4 + + - uses: ./.github/actions/setup-zig + - uses: ./.github/actions/setup-rust + + - name: Bench - Vortex + run: cargo bench | tee bench.txt + + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1.19.3 + with: + name: Vortex Benchmarks + tool: cargo + github-token: ${{ secrets.GITHUB_TOKEN }} + output-file-path: bench.txt + summary-always: true + auto-push: true + fail-on-alert: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..b3eab6a16a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI + +on: + push: + branches: [ "develop" ] + pull_request: + branches: [ "develop" ] + workflow_dispatch: { } + +permissions: + actions: read + contents: read + +jobs: + build: + name: 'build' + runs-on: ubuntu-latest-medium + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-zig + - uses: ./.github/actions/setup-rust + - uses: ./.github/actions/setup-python + + - name: Python Lint - Format + run: rye run ruff format --check . + - name: Python Lint - Ruff + run: rye run ruff . + + - name: Rust Lint - Format + run: cargo fmt --all --check + - name: Rust Lint - Clippy + run: cargo clippy --all-targets + + - name: Zig Lint - Fmt + run: zig fmt --check zig/ + + - name: Rust Test + run: cargo test --all + + - name: Zig Test + run: zig build test + + - name: Pytest - PyEnc + run: rye run pytest --benchmark-disable test/ + working-directory: pyenc/ + diff --git a/codecz-sys/build.rs b/codecz-sys/build.rs index c007b2bd8d..38fbbdc5ca 100644 --- a/codecz-sys/build.rs +++ b/codecz-sys/build.rs @@ -26,11 +26,7 @@ fn main() { for entry in WalkDir::new(root_dir.join("zig")) .into_iter() .filter_map(|e| e.ok()) - .filter(|e| { - !e.path() - .components() - .any(|c| c.as_os_str() == "zig-cache") - }) + .filter(|e| !e.path().components().any(|c| c.as_os_str() == "zig-cache")) .filter(|e| { e.path() .extension() diff --git a/codecz/src/math.rs b/codecz/src/math.rs index 95dd6a6084..6525d5772f 100644 --- a/codecz/src/math.rs +++ b/codecz/src/math.rs @@ -17,7 +17,6 @@ use codecz_sys::{ RunLengthStats_t, }; use num_traits::Num; -use paste; #[allow(dead_code)] pub struct RunLengthStats { diff --git a/enc-ree/src/compress.rs b/enc-ree/src/compress.rs index 1ccf927538..5b1f4ff5d1 100644 --- a/enc-ree/src/compress.rs +++ b/enc-ree/src/compress.rs @@ -95,7 +95,7 @@ mod test { use arrow::buffer::BooleanBuffer; use enc::array::bool::BoolArray; - use enc::array::primitive::PrimitiveArray; + use enc::array::downcast::DowncastArrayBuiltin; use enc::array::Array; use crate::compress::ree_decode; @@ -117,14 +117,8 @@ mod test { ); let decoded = ree_decode( - arr.ends() - .as_any() - .downcast_ref::() - .unwrap(), - arr.values() - .as_any() - .downcast_ref::() - .unwrap(), + arr.ends().as_primitive(), + arr.values().as_primitive(), arr.validity().cloned(), ); @@ -133,13 +127,7 @@ mod test { vec![1i32, 1, 2, 2, 2, 3, 3, 3, 3, 3].as_slice() ); assert_eq!( - decoded - .validity() - .unwrap() - .as_any() - .downcast_ref::() - .unwrap() - .buffer(), + decoded.validity().unwrap().as_bool().buffer(), &BooleanBuffer::from(vec![ true, true, false, true, true, true, true, false, true, true, ]) diff --git a/enc-ree/src/ree.rs b/enc-ree/src/ree.rs index 3977b560d6..7da307b94f 100644 --- a/enc-ree/src/ree.rs +++ b/enc-ree/src/ree.rs @@ -312,15 +312,13 @@ fn run_ends_logical_length>(ends: &T) -> usize { #[cfg(test)] mod test { - use std::ops::Deref; - use arrow::array::cast::AsArray; use arrow::array::types::Int32Type; + use enc::array::Array; use itertools::Itertools; - use enc::dtype::{IntWidth, Nullability, Signedness}; - - use super::*; + use crate::REEArray; + use enc::dtype::{DType, IntWidth, Nullability, Signedness}; #[test] fn new() { @@ -354,7 +352,7 @@ mod test { arr.iter_arrow() .zip_eq([vec![2, 2, 3, 3, 3]]) .for_each(|(from_iter, orig)| { - assert_eq!(from_iter.as_primitive::().values().deref(), orig); + assert_eq!(*from_iter.as_primitive::().values(), orig); }); } @@ -364,7 +362,7 @@ mod test { arr.iter_arrow() .zip_eq([vec![1, 1, 2, 2, 2, 3, 3, 3, 3, 3]]) .for_each(|(from_iter, orig)| { - assert_eq!(from_iter.as_primitive::().values().deref(), orig); + assert_eq!(*from_iter.as_primitive::().values(), orig); }); } } diff --git a/enc-roaring/src/boolean/serde.rs b/enc-roaring/src/boolean/serde.rs index edb2c2c11d..3faeeb4f22 100644 --- a/enc-roaring/src/boolean/serde.rs +++ b/enc-roaring/src/boolean/serde.rs @@ -32,6 +32,7 @@ impl EncodingSerde for RoaringBoolEncoding { #[cfg(test)] mod test { + use crate::downcast::DowncastRoaring; use croaring::Bitmap; use crate::serde_tests::test::roundtrip_array; @@ -42,10 +43,7 @@ mod test { let arr = RoaringBoolArray::new(Bitmap::from_range(245..63000), 65536); let read_arr = roundtrip_array(arr.as_ref()).unwrap(); - let read_roaring = read_arr - .as_any() - .downcast_ref::() - .unwrap(); + let read_roaring = read_arr.as_roaring_bool(); assert_eq!(arr.bitmap(), read_roaring.bitmap()); } } diff --git a/enc-roaring/src/downcast.rs b/enc-roaring/src/downcast.rs index 6be924773f..a0501a80d7 100644 --- a/enc-roaring/src/downcast.rs +++ b/enc-roaring/src/downcast.rs @@ -6,6 +6,7 @@ mod private { pub trait Sealed {} } +#[allow(dead_code)] pub trait DowncastRoaring: private::Sealed { fn maybe_roaring_int(&self) -> Option<&RoaringIntArray>; diff --git a/enc-zigzag/src/compress.rs b/enc-zigzag/src/compress.rs index 336594ef8e..7a60ffe83a 100644 --- a/enc-zigzag/src/compress.rs +++ b/enc-zigzag/src/compress.rs @@ -19,9 +19,7 @@ impl EncodingCompression for ZigZagEncoding { _config: &CompressConfig, ) -> Option<&'static Compressor> { // Only support primitive arrays - let Some(parray) = array.maybe_primitive() else { - return None; - }; + let parray = array.maybe_primitive()?; // Only supports signed integers if !parray.ptype().is_signed_int() { diff --git a/enc/src/array/chunked/mod.rs b/enc/src/array/chunked/mod.rs index 15b2ee7452..1100dcf54f 100644 --- a/enc/src/array/chunked/mod.rs +++ b/enc/src/array/chunked/mod.rs @@ -266,8 +266,6 @@ impl Iterator for ChunkedArrowIterator { #[cfg(test)] mod test { - use std::ops::Deref; - use arrow::array::cast::AsArray; use arrow::array::types::UInt64Type; use arrow::array::ArrayRef as ArrowArrayRef; @@ -294,7 +292,7 @@ mod test { } fn assert_equal_slices(arr: ArrowArrayRef, slice: &[T::Native]) { - assert_eq!(arr.as_primitive::().values().deref(), slice); + assert_eq!(*arr.as_primitive::().values(), slice); } #[test] diff --git a/enc/src/array/primitive/stats.rs b/enc/src/array/primitive/stats.rs index 297d561a4e..ef066abcce 100644 --- a/enc/src/array/primitive/stats.rs +++ b/enc/src/array/primitive/stats.rs @@ -147,9 +147,10 @@ where #[cfg(test)] mod test { - use super::*; + use crate::array::primitive::PrimitiveArray; use crate::array::Array; use crate::scalar::ListScalarVec; + use crate::stats::Stat; #[test] fn stats() { diff --git a/enc/src/array/sparse/mod.rs b/enc/src/array/sparse/mod.rs index 9a7c111db4..8ba14c74d8 100644 --- a/enc/src/array/sparse/mod.rs +++ b/enc/src/array/sparse/mod.rs @@ -267,13 +267,12 @@ impl Encoding for SparseEncoding { #[cfg(test)] mod test { - use std::ops::Deref; - + use crate::array::sparse::SparseArray; + use crate::array::Array; + use crate::error::EncError; use arrow::array::AsArray; use arrow::datatypes::{Int32Type, UInt32Type}; - use super::*; - fn sparse_array() -> SparseArray { // merged array: [null, null, 100, null, null, 200, null, null, 300, null] SparseArray::new(vec![2u32, 5, 8].into(), vec![100, 200, 300].into(), 10) @@ -282,23 +281,21 @@ mod test { fn assert_sparse_array(sparse: &dyn Array, values: (&[u32], &[i32])) { let sparse_arrow = sparse.as_ref().iter_arrow().next().unwrap(); assert_eq!( - sparse_arrow + *sparse_arrow .as_struct() .column_by_name("indices") .unwrap() .as_primitive::() - .values() - .deref(), + .values(), values.0 ); assert_eq!( - sparse_arrow + *sparse_arrow .as_struct() .column_by_name("values") .unwrap() .as_primitive::() - .values() - .deref(), + .values(), values.1 ); } diff --git a/enc/src/array/typed/mod.rs b/enc/src/array/typed/mod.rs index 8c0c0d600a..c339679585 100644 --- a/enc/src/array/typed/mod.rs +++ b/enc/src/array/typed/mod.rs @@ -156,7 +156,6 @@ impl ArrayDisplay for TypedArray { #[cfg(test)] mod test { use std::iter; - use std::ops::Deref; use arrow::array::cast::AsArray; use arrow::array::types::Time64MicrosecondType; @@ -197,8 +196,8 @@ mod test { ])))) .for_each(|(enc, arrow)| { assert_eq!( - enc.as_primitive::().values().deref(), - arrow.values().deref() + *enc.as_primitive::().values(), + *arrow.values() ) }); } diff --git a/enc/src/array/varbin/mod.rs b/enc/src/array/varbin/mod.rs index 22255ba1c2..ba1d917201 100644 --- a/enc/src/array/varbin/mod.rs +++ b/enc/src/array/varbin/mod.rs @@ -386,11 +386,13 @@ impl<'a> FromIterator> for VarBinArray { #[cfg(test)] mod test { - use arrow::array::GenericStringArray as ArrowStringArray; + use crate::array::Array; + use arrow::array::{AsArray, GenericStringArray as ArrowStringArray}; use crate::array::primitive::PrimitiveArray; - - use super::*; + use crate::array::varbin::VarBinArray; + use crate::arrow::CombineChunks; + use crate::dtype::{DType, Nullability}; fn binary_array() -> VarBinArray { let values = PrimitiveArray::from_vec( diff --git a/enc/src/arrow/compute/repeat.rs b/enc/src/arrow/compute/repeat.rs index 5beb02159c..662775e721 100644 --- a/enc/src/arrow/compute/repeat.rs +++ b/enc/src/arrow/compute/repeat.rs @@ -64,12 +64,11 @@ fn repeat_primitive( #[cfg(test)] mod test { + use crate::arrow::compute::repeat; use arrow::array::cast::AsArray; use arrow::array::types::UInt64Type; use arrow::array::{Scalar, UInt64Array}; - use super::*; - #[test] fn test_repeat() { let scalar = Scalar::new(UInt64Array::from(vec![47])); diff --git a/enc/src/error.rs b/enc/src/error.rs index 78981505e8..c767dbf17d 100644 --- a/enc/src/error.rs +++ b/enc/src/error.rs @@ -96,6 +96,7 @@ impl From for EncError { } #[derive(Debug)] +#[allow(dead_code)] pub struct PolarsError(polars_core::error::PolarsError); impl PartialEq for PolarsError { diff --git a/enc/src/scalar/primitive.rs b/enc/src/scalar/primitive.rs index dff6913df8..9d61969f67 100644 --- a/enc/src/scalar/primitive.rs +++ b/enc/src/scalar/primitive.rs @@ -285,9 +285,10 @@ impl Display for PScalar { #[cfg(test)] mod test { - use crate::dtype::{IntWidth, Nullability, Signedness}; - - use super::*; + use crate::dtype::{DType, IntWidth, Nullability, Signedness}; + use crate::error::EncError; + use crate::ptype::PType; + use crate::scalar::Scalar; #[test] fn into_from() { diff --git a/pyenc/src/enc_arrow.rs b/pyenc/src/enc_arrow.rs index f9aac347bc..1fb3a50c4c 100644 --- a/pyenc/src/enc_arrow.rs +++ b/pyenc/src/enc_arrow.rs @@ -5,7 +5,6 @@ use arrow::pyarrow::ToPyArrow; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{IntoPyDict, PyList}; -use pyo3::{PyAny, PyResult}; use enc::array::Array; diff --git a/pyenc/test/test_array.py b/pyenc/test/test_array.py index 889e2452e7..5686b4ac0c 100644 --- a/pyenc/test/test_array.py +++ b/pyenc/test/test_array.py @@ -1,7 +1,8 @@ -import enc import pyarrow as pa import pytest +import enc + def test_primitive_array_round_trip(): a = pa.array([0, 1, 2, 3]) diff --git a/pyenc/test/test_compress.py b/pyenc/test/test_compress.py index 2ad4b96c73..fb94d95a6b 100644 --- a/pyenc/test/test_compress.py +++ b/pyenc/test/test_compress.py @@ -1,7 +1,8 @@ -import enc import numpy as np import pyarrow as pa +import enc + def test_primitive_compress(): a = pa.array([0, 0, 0, 0, 9, 9, 9, 9, 1, 5]) diff --git a/pyenc/test/test_serde.py b/pyenc/test/test_serde.py index b6e0d78399..c30a5dd208 100644 --- a/pyenc/test/test_serde.py +++ b/pyenc/test/test_serde.py @@ -1,7 +1,8 @@ -import enc import pyarrow as pa from pyarrow import fs +import enc + local = fs.LocalFileSystem() diff --git a/pyproject.toml b/pyproject.toml index a6515e6a65..cffda950cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,10 +40,12 @@ members = [ [tool.ruff] line-length = 120 +extend-exclude = [".venv"] + +[tool.ruff.lint] select = ["F", "E", "W", "UP", "I"] # Do not auto-fix unused variables. This is really annoying when IntelliJ runs autofix while editing. unfixable = ["F841"] -extend-exclude = [".venv"] [tool.pytest.ini_options] log_cli = true diff --git a/spiral-alloc/src/alloc.rs b/spiral-alloc/src/alloc.rs index ca2191846c..7d018c926f 100644 --- a/spiral-alloc/src/alloc.rs +++ b/spiral-alloc/src/alloc.rs @@ -1,5 +1,3 @@ -use core::alloc::Layout; -use core::alloc::LayoutError; use core::ptr::NonNull; use super::SPIRAL_ALIGNMENT;