Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FastLanes #36

Merged
merged 25 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/bench-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ jobs:
labels: benchmark

- uses: actions/checkout@v4
with:
submodules: recursive

- uses: ./.github/actions/setup-zig
- uses: ./.github/actions/setup-rust
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ jobs:
if: ${{ github.event_name == 'workflow_dispatch' || (contains(github.event.head_commit.message, '[benchmark]') && github.ref_name == 'develop') }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- uses: ./.github/actions/setup-zig
- uses: ./.github/actions/setup-rust
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ jobs:
runs-on: ubuntu-latest-medium
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: ./.github/actions/setup-zig
- uses: ./.github/actions/setup-rust
- uses: ./.github/actions/setup-python
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "deps/fastlanez"]
path = deps/fastlanez
url = https://github.com/fulcrum-so/fastlanez.git
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ members = [
"bench-vortex",
"codecz",
"codecz-sys",
"fastlanez-sys",
"pyvortex",
"vortex",
"vortex-alloc",
"vortex-alp",
"vortex-dict",
"vortex-fastlanes",
"vortex-ffor",
"vortex-ree",
"vortex-roaring",
Expand Down
1 change: 1 addition & 0 deletions bench-vortex/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data
1 change: 1 addition & 0 deletions bench-vortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ arrow-array = "50.0.0"
vortex = { path = "../vortex" }
vortex-alp = { path = "../vortex-alp" }
vortex-dict = { path = "../vortex-dict" }
vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-ffor = { path = "../vortex-ffor" }
vortex-ree = { path = "../vortex-ree" }
vortex-roaring = { path = "../vortex-roaring" }
Expand Down
3 changes: 1 addition & 2 deletions bench-vortex/benches/compress_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ use vortex::array::{Array, ArrayRef};
use vortex::compress::CompressCtx;
use vortex::dtype::DType;
use vortex::error::{VortexError, VortexResult};

use vortex_bench::enumerate_arrays;

fn download_taxi_data() -> &'static Path {
let download_path = Path::new("../../pyspiral/bench/.data/https-d37ci6vzurychx-cloudfront-net-trip-data-yellow-tripdata-2023-11.parquet");
let download_path = Path::new("data/yellow-tripdata-2023-11.parquet");
if download_path.exists() {
return download_path;
}
Expand Down
11 changes: 7 additions & 4 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use itertools::Itertools;
use vortex::array::Encoding;
use vortex_alp::ALPEncoding;
use vortex_dict::DictEncoding;
use vortex_ffor::FFoREncoding;
use vortex_fastlanes::{BitPackedEncoding, FoREncoding};
use vortex_ree::REEEncoding;
use vortex_roaring::{RoaringBoolEncoding, RoaringIntEncoding};
use vortex_zigzag::ZigZagEncoding;
Expand All @@ -26,7 +26,9 @@ pub fn enumerate_arrays() {
let encodings: Vec<&dyn Encoding> = vec![
&ALPEncoding,
&DictEncoding,
&FFoREncoding,
&BitPackedEncoding,
&FoREncoding,
//&FFoREncoding,
&REEEncoding,
&RoaringBoolEncoding,
&RoaringIntEncoding,
Expand All @@ -46,15 +48,16 @@ mod test {
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use simplelog::{ColorChoice, Config, TermLogger, TerminalMode};

use crate::enumerate_arrays;
use vortex::array::chunked::ChunkedArray;
use vortex::array::{Array, ArrayRef};
use vortex::compress::CompressCtx;
use vortex::dtype::DType;
use vortex::error::{VortexError, VortexResult};

use crate::enumerate_arrays;

pub fn download_taxi_data() -> &'static Path {
let download_path = Path::new("../../pyspiral/bench/.data/https-d37ci6vzurychx-cloudfront-net-trip-data-yellow-tripdata-2023-11.parquet");
let download_path = Path::new("data/yellow-tripdata-2023-11.parquet");
if download_path.exists() {
return download_path;
}
Expand Down
14 changes: 8 additions & 6 deletions codecz/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,24 @@

extern crate alloc;

use crate::{AlignedVec, ALIGNED_ALLOCATOR};
use alloc::sync::Arc;
use arrow_buffer::{BooleanBuffer, Buffer};
use core::ptr::NonNull;

pub fn into_u32_vec(bb: &BooleanBuffer, cardinality: usize) -> AlignedVec<u32> {
let mut vec: AlignedVec<u32> = AlignedVec::with_capacity_in(cardinality, ALIGNED_ALLOCATOR);
use arrow_buffer::{BooleanBuffer, Buffer};

use crate::{AlignedVec, ALIGNED_ALLOCATOR};

pub fn into_u64_vec(bb: &BooleanBuffer, cardinality: usize) -> AlignedVec<u64> {
let mut vec: AlignedVec<u64> = AlignedVec::with_capacity_in(cardinality, ALIGNED_ALLOCATOR);
if cardinality > 0 {
for idx in bb.set_indices() {
vec.push(idx as u32);
vec.push(idx as u64);
}
}
vec
}

pub fn gather_patches<T: Copy + Sized>(data: &[T], indices: &[u32]) -> AlignedVec<T> {
pub fn gather_patches<T: Copy + Sized>(data: &[T], indices: &[u64]) -> AlignedVec<T> {
let mut vec: AlignedVec<T> = AlignedVec::with_capacity_in(indices.len(), ALIGNED_ALLOCATOR);
for idx in indices {
vec.push(data[*idx as usize]);
Expand Down
1 change: 1 addition & 0 deletions deps/fastlanez
Submodule fastlanez added at d4ed21
18 changes: 18 additions & 0 deletions fastlanez-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "fastlanez-sys"
version = "0.1.0"
edition = "2021"
links = "fastlanez"

[lints]
workspace = true

[dependencies]
arrayref = "0.3.7"
paste = "1.0.14"
seq-macro = "0.3.5"
uninit = "0.6.2"

[build-dependencies]
bindgen = "0.69.1"
walkdir = "2.4.0"
121 changes: 121 additions & 0 deletions fastlanez-sys/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use std::env;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};

use walkdir::WalkDir;

fn main() {
let buildrs_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap())
.canonicalize()
.expect("Failed to canonicalize CARGO_MANIFEST_DIR");
let root_dir = buildrs_dir
.join("../")
.canonicalize()
.expect("Failed to canonicalize root dir");
let fastlanez_dir = root_dir.join("deps/fastlanez");

// Tell cargo to tell rustc to link codecz
println!(
"cargo:rustc-link-search={}",
fastlanez_dir.join("zig-out/lib").to_str().unwrap()
);
println!("cargo:rustc-link-lib=fastlanez");

rerun_if_changed(&buildrs_dir.join("build.rs"));
WalkDir::new(fastlanez_dir.join("src"))
.into_iter()
.filter_map(|e| e.ok())
.for_each(|e| rerun_if_changed(e.path()));

let zig_opt = get_zig_opt();
println!("cargo:info=invoking `zig build` with {}", zig_opt);
if !Command::new("zig")
.args(["build", "lib"])
.arg(zig_opt)
.args(["--summary", "all"])
.current_dir(fastlanez_dir.clone())
.spawn()
.expect("Could not invoke `zig build`")
.wait()
.unwrap()
.success()
{
// Panic if the command was not successful.
panic!(
"failed to successfully invoke `zig build` in {}",
root_dir.to_str().unwrap()
);
}

let bindings = bindgen::Builder::default()
.header(
fastlanez_dir
.join("zig-out/include/fastlanez.h")
.to_str()
.unwrap(),
)
.clang_args(&[
get_zig_include().as_ref(),
"-DZIG_TARGET_MAX_INT_ALIGNMENT=16",
])
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
.allowlist_item("fl_.*")
.generate()
.expect("Unable to generate bindings");

// Write the bindings to the $OUT_DIR/bindings.rs file.
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
bindings
.write_to_file(out_path.join("bindings.rs"))
.expect("Couldn't write bindings!");
}

fn rerun_if_changed(path: &Path) {
println!(
"cargo:rerun-if-changed={}",
path.canonicalize()
.unwrap_or_else(|_| panic!("failed to canonicalize {}", path.to_str().unwrap()))
.to_str()
.unwrap()
);
}

fn get_zig_opt() -> &'static str {
let profile_env = env::var("PROFILE").unwrap();
let opt_level_zero = env::var("OPT_LEVEL").unwrap() == "0";

// based on https://doc.rust-lang.org/cargo/reference/environment-variables.html
//
// confusingly, the PROFILE env var will be either "debug" or "release" depending on whether the cargo profile
// derives from the "dev" or "release" profile, respectively. *facepalm*
// so `cargo build` and `cargo test` will be "debug"; `cargo build --release` and `cargo bench` will be "release"
//
// we also check whether debug_assertions are enabled (to pick a sane value for custom profiles)
if profile_env == "debug" || cfg!(debug_assertions) {
"-Doptimize=Debug"
} else if profile_env == "release" || !opt_level_zero {
"-Doptimize=ReleaseSmall"
} else {
// we're in a custom profile, the opt_level is 0, but debug assertions aren't enabled
// pretty weird case, let's default to debug
println!(
"cargo:warning=unrecognized cargo profile {}, defaulting to `zig build -Doptimize=Debug`", profile_env
);
"-Doptimize=Debug"
}
}

fn get_zig_include() -> String {
String::from_utf8(
Command::new("bash")
.arg("-c")
.arg("zig env | grep lib_dir | awk -F'\"' '{print \"-I\"$4}'")
.stdout(Stdio::piped())
.output()
.expect("Failed to execute command")
.stdout,
)
.expect("Failed to convert command output to string")
.trim_end()
.to_string()
}
Loading