From 4bd931c23002eedc74483d583778cc9a8a132478 Mon Sep 17 00:00:00 2001
From: Amey Chaugule <amey@probablynothinglabs.xyz>
Date: Wed, 14 Aug 2024 17:43:34 -0700
Subject: [PATCH 1/8] Adding GroupedWindowAggStream

---
 Cargo.lock                                    |   2 +
 crates/core/Cargo.toml                        |   2 +
 ...lesce_before_streaming_window_aggregate.rs |   2 +-
 .../continuous/group_values/bytes.rs          | 126 +++++
 .../continuous/group_values/bytes_view.rs     | 129 +++++
 .../continuous/group_values/mod.rs            |  97 ++++
 .../continuous/group_values/primitive.rs      | 224 ++++++++
 .../continuous/group_values/row.rs            | 254 +++++++++
 .../continuous/grouped_window_agg_stream.rs   | 535 ++++++++++++++++++
 .../core/src/physical_plan/continuous/mod.rs  | 106 ++++
 .../physical_plan/continuous/order/full.rs    | 144 +++++
 .../src/physical_plan/continuous/order/mod.rs | 124 ++++
 .../physical_plan/continuous/order/partial.rs | 252 +++++++++
 .../{ => continuous}/streaming_window.rs      | 117 ++--
 crates/core/src/physical_plan/mod.rs          |   2 +-
 crates/core/src/planner/streaming_window.rs   |   4 +-
 examples/examples/kafka_rideshare.rs          |   4 +-
 17 files changed, 2034 insertions(+), 90 deletions(-)
 create mode 100644 crates/core/src/physical_plan/continuous/group_values/bytes.rs
 create mode 100644 crates/core/src/physical_plan/continuous/group_values/bytes_view.rs
 create mode 100644 crates/core/src/physical_plan/continuous/group_values/mod.rs
 create mode 100644 crates/core/src/physical_plan/continuous/group_values/primitive.rs
 create mode 100644 crates/core/src/physical_plan/continuous/group_values/row.rs
 create mode 100644 crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs
 create mode 100644 crates/core/src/physical_plan/continuous/mod.rs
 create mode 100644 crates/core/src/physical_plan/continuous/order/full.rs
 create mode 100644 crates/core/src/physical_plan/continuous/order/mod.rs
 create mode 100644 crates/core/src/physical_plan/continuous/order/partial.rs
 rename crates/core/src/physical_plan/{ => continuous}/streaming_window.rs (91%)

diff --git a/Cargo.lock b/Cargo.lock
index 9720ae0..4babca3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1134,6 +1134,7 @@ dependencies = [
 name = "df-streams-core"
 version = "0.1.0"
 dependencies = [
+ "ahash",
  "apache-avro",
  "arrow",
  "arrow-array",
@@ -1148,6 +1149,7 @@ dependencies = [
  "delegate",
  "futures",
  "half",
+ "hashbrown",
  "itertools 0.13.0",
  "log",
  "rdkafka",
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 44d23e9..7d68c0d 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -28,3 +28,5 @@ rocksdb = "0.22.0"
 bincode = "1.3.3"
 half = "2.4.1"
 delegate = "0.12.0"
+ahash = "0.8.11"
+hashbrown = "0.14.5"
diff --git a/crates/core/src/physical_optimizer/coalesce_before_streaming_window_aggregate.rs b/crates/core/src/physical_optimizer/coalesce_before_streaming_window_aggregate.rs
index 97809bf..4e68422 100644
--- a/crates/core/src/physical_optimizer/coalesce_before_streaming_window_aggregate.rs
+++ b/crates/core/src/physical_optimizer/coalesce_before_streaming_window_aggregate.rs
@@ -8,7 +8,7 @@ use datafusion::physical_plan::ExecutionPlanProperties;
 use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion::error::Result;
 
-use crate::physical_plan::streaming_window::FranzStreamingWindowExec;
+use crate::physical_plan::continuous::streaming_window::FranzStreamingWindowExec;
 
 pub struct CoaslesceBeforeStreamingAggregate {}
 
diff --git a/crates/core/src/physical_plan/continuous/group_values/bytes.rs b/crates/core/src/physical_plan/continuous/group_values/bytes.rs
new file mode 100644
index 0000000..f04e516
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/group_values/bytes.rs
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{Array, ArrayRef, OffsetSizeTrait, RecordBatch};
+use datafusion::common::Result;
+use datafusion::{
+    logical_expr::EmitTo, physical_expr::binary_map::OutputType,
+    physical_expr_common::binary_map::ArrowBytesMap,
+};
+
+use super::GroupValues;
+/// A [`GroupValues`] storing single column of Utf8/LargeUtf8/Binary/LargeBinary values
+///
+/// This specialization is significantly faster than using the more general
+/// purpose `Row`s format
+pub struct GroupValuesByes<O: OffsetSizeTrait> {
+    /// Map string/binary values to group index
+    map: ArrowBytesMap<O, usize>,
+    /// The total number of groups so far (used to assign group_index)
+    num_groups: usize,
+}
+
+impl<O: OffsetSizeTrait> GroupValuesByes<O> {
+    pub fn new(output_type: OutputType) -> Self {
+        Self {
+            map: ArrowBytesMap::new(output_type),
+            num_groups: 0,
+        }
+    }
+}
+
+impl<O: OffsetSizeTrait> GroupValues for GroupValuesByes<O> {
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
+        assert_eq!(cols.len(), 1);
+
+        // look up / add entries in the table
+        let arr = &cols[0];
+
+        groups.clear();
+        self.map.insert_if_new(
+            arr,
+            // called for each new group
+            |_value| {
+                // assign new group index on each insert
+                let group_idx = self.num_groups;
+                self.num_groups += 1;
+                group_idx
+            },
+            // called for each group
+            |group_idx| {
+                groups.push(group_idx);
+            },
+        );
+
+        // ensure we assigned a group to for each row
+        assert_eq!(groups.len(), arr.len());
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        self.map.size() + std::mem::size_of::<Self>()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.num_groups == 0
+    }
+
+    fn len(&self) -> usize {
+        self.num_groups
+    }
+
+    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        // Reset the map to default, and convert it into a single array
+        let map_contents = self.map.take().into_state();
+
+        let group_values = match emit_to {
+            EmitTo::All => {
+                self.num_groups -= map_contents.len();
+                map_contents
+            }
+            EmitTo::First(n) if n == self.len() => {
+                self.num_groups -= map_contents.len();
+                map_contents
+            }
+            EmitTo::First(n) => {
+                // if we only wanted to take the first n, insert the rest back
+                // into the map we could potentially avoid this reallocation, at
+                // the expense of much more complex code.
+                // see https://github.com/apache/datafusion/issues/9195
+                let emit_group_values = map_contents.slice(0, n);
+                let remaining_group_values = map_contents.slice(n, map_contents.len() - n);
+
+                self.num_groups = 0;
+                let mut group_indexes = vec![];
+                self.intern(&[remaining_group_values], &mut group_indexes)?;
+
+                // Verify that the group indexes were assigned in the correct order
+                assert_eq!(0, group_indexes[0]);
+
+                emit_group_values
+            }
+        };
+
+        Ok(vec![group_values])
+    }
+
+    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+        // in theory we could potentially avoid this reallocation and clear the
+        // contents of the maps, but for now we just reset the map from the beginning
+        self.map.take();
+    }
+}
diff --git a/crates/core/src/physical_plan/continuous/group_values/bytes_view.rs b/crates/core/src/physical_plan/continuous/group_values/bytes_view.rs
new file mode 100644
index 0000000..6b57a3d
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/group_values/bytes_view.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{Array, ArrayRef, RecordBatch};
+use datafusion::{
+    common::Result, logical_expr::EmitTo, physical_expr::binary_map::OutputType,
+    physical_expr_common::binary_view_map::ArrowBytesViewMap,
+};
+
+use super::GroupValues;
+//use datafusion_expr::EmitTo;
+//use datafusion_physical_expr::binary_map::OutputType;
+//use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewMap;
+
+/// A [`GroupValues`] storing single column of Utf8View/BinaryView values
+///
+/// This specialization is significantly faster than using the more general
+/// purpose `Row`s format
+pub struct GroupValuesBytesView {
+    /// Map string/binary values to group index
+    map: ArrowBytesViewMap<usize>,
+    /// The total number of groups so far (used to assign group_index)
+    num_groups: usize,
+}
+
+impl GroupValuesBytesView {
+    pub fn new(output_type: OutputType) -> Self {
+        Self {
+            map: ArrowBytesViewMap::new(output_type),
+            num_groups: 0,
+        }
+    }
+}
+
+impl GroupValues for GroupValuesBytesView {
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
+        assert_eq!(cols.len(), 1);
+
+        // look up / add entries in the table
+        let arr = &cols[0];
+
+        groups.clear();
+        self.map.insert_if_new(
+            arr,
+            // called for each new group
+            |_value| {
+                // assign new group index on each insert
+                let group_idx = self.num_groups;
+                self.num_groups += 1;
+                group_idx
+            },
+            // called for each group
+            |group_idx| {
+                groups.push(group_idx);
+            },
+        );
+
+        // ensure we assigned a group to for each row
+        assert_eq!(groups.len(), arr.len());
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        self.map.size() + std::mem::size_of::<Self>()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.num_groups == 0
+    }
+
+    fn len(&self) -> usize {
+        self.num_groups
+    }
+
+    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        // Reset the map to default, and convert it into a single array
+        let map_contents = self.map.take().into_state();
+
+        let group_values = match emit_to {
+            EmitTo::All => {
+                self.num_groups -= map_contents.len();
+                map_contents
+            }
+            EmitTo::First(n) if n == self.len() => {
+                self.num_groups -= map_contents.len();
+                map_contents
+            }
+            EmitTo::First(n) => {
+                // if we only wanted to take the first n, insert the rest back
+                // into the map we could potentially avoid this reallocation, at
+                // the expense of much more complex code.
+                // see https://github.com/apache/datafusion/issues/9195
+                let emit_group_values = map_contents.slice(0, n);
+                let remaining_group_values = map_contents.slice(n, map_contents.len() - n);
+
+                self.num_groups = 0;
+                let mut group_indexes = vec![];
+                self.intern(&[remaining_group_values], &mut group_indexes)?;
+
+                // Verify that the group indexes were assigned in the correct order
+                assert_eq!(0, group_indexes[0]);
+
+                emit_group_values
+            }
+        };
+
+        Ok(vec![group_values])
+    }
+
+    fn clear_shrink(&mut self, _batch: &RecordBatch) {
+        // in theory we could potentially avoid this reallocation and clear the
+        // contents of the maps, but for now we just reset the map from the beginning
+        self.map.take();
+    }
+}
diff --git a/crates/core/src/physical_plan/continuous/group_values/mod.rs b/crates/core/src/physical_plan/continuous/group_values/mod.rs
new file mode 100644
index 0000000..111bf5f
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/group_values/mod.rs
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::record_batch::RecordBatch;
+use arrow_array::{downcast_primitive, ArrayRef};
+use arrow_schema::{DataType, SchemaRef};
+use bytes_view::GroupValuesBytesView;
+use datafusion::common::Result;
+
+pub(crate) mod primitive;
+use datafusion::logical_expr::EmitTo;
+use datafusion::physical_expr::binary_map::OutputType;
+use primitive::GroupValuesPrimitive;
+
+mod row;
+use row::GroupValuesRows;
+
+mod bytes;
+mod bytes_view;
+use bytes::GroupValuesByes;
+//use datafusion_physical_expr::binary_map::OutputType;
+
+/// An interning store for group keys
+pub trait GroupValues: Send {
+    /// Calculates the `groups` for each input row of `cols`
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()>;
+
+    /// Returns the number of bytes used by this [`GroupValues`]
+    fn size(&self) -> usize;
+
+    /// Returns true if this [`GroupValues`] is empty
+    fn is_empty(&self) -> bool;
+
+    /// The number of values stored in this [`GroupValues`]
+    fn len(&self) -> usize;
+
+    /// Emits the group values
+    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
+
+    /// Clear the contents and shrink the capacity to the size of the batch (free up memory usage)
+    fn clear_shrink(&mut self, batch: &RecordBatch);
+}
+
+pub fn new_group_values(schema: SchemaRef) -> Result<Box<dyn GroupValues>> {
+    if schema.fields.len() == 1 {
+        let d = schema.fields[0].data_type();
+
+        macro_rules! downcast_helper {
+            ($t:ty, $d:ident) => {
+                return Ok(Box::new(GroupValuesPrimitive::<$t>::new($d.clone())))
+            };
+        }
+
+        downcast_primitive! {
+            d => (downcast_helper, d),
+            _ => {}
+        }
+
+        match d {
+            DataType::Utf8 => {
+                return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Utf8)));
+            }
+            DataType::LargeUtf8 => {
+                return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Utf8)));
+            }
+            DataType::Utf8View => {
+                return Ok(Box::new(GroupValuesBytesView::new(OutputType::Utf8View)));
+            }
+            DataType::Binary => {
+                return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Binary)));
+            }
+            DataType::LargeBinary => {
+                return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Binary)));
+            }
+            DataType::BinaryView => {
+                return Ok(Box::new(GroupValuesBytesView::new(OutputType::BinaryView)));
+            }
+            _ => {}
+        }
+    }
+
+    Ok(Box::new(GroupValuesRows::try_new(schema)?))
+}
diff --git a/crates/core/src/physical_plan/continuous/group_values/primitive.rs b/crates/core/src/physical_plan/continuous/group_values/primitive.rs
new file mode 100644
index 0000000..f9dc68f
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/group_values/primitive.rs
@@ -0,0 +1,224 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use ahash::RandomState;
+use arrow::array::BooleanBufferBuilder;
+use arrow::buffer::NullBuffer;
+use arrow::datatypes::{i256, IntervalDayTime, IntervalMonthDayNano};
+use arrow::record_batch::RecordBatch;
+use arrow_array::cast::AsArray;
+use arrow_array::{ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, PrimitiveArray};
+//use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
+use arrow_schema::DataType;
+use datafusion::common::utils::proxy::VecAllocExt;
+use datafusion::common::Result;
+use datafusion::logical_expr::EmitTo;
+//use datafusion_common::Result;
+//use datafusion_execution::memory_pool::proxy::VecAllocExt;
+//use datafusion_expr::EmitTo;
+use half::f16;
+use hashbrown::raw::RawTable;
+use std::sync::Arc;
+
+use super::GroupValues;
+
+/// A trait to allow hashing of floating point numbers
+pub(crate) trait HashValue {
+    fn hash(&self, state: &RandomState) -> u64;
+}
+
+macro_rules! hash_integer {
+    ($($t:ty),+) => {
+        $(impl HashValue for $t {
+            #[cfg(not(feature = "force_hash_collisions"))]
+            fn hash(&self, state: &RandomState) -> u64 {
+                state.hash_one(self)
+            }
+
+            #[cfg(feature = "force_hash_collisions")]
+            fn hash(&self, _state: &RandomState) -> u64 {
+                0
+            }
+        })+
+    };
+}
+hash_integer!(i8, i16, i32, i64, i128, i256);
+hash_integer!(u8, u16, u32, u64);
+hash_integer!(IntervalDayTime, IntervalMonthDayNano);
+
+macro_rules! hash_float {
+    ($($t:ty),+) => {
+        $(impl HashValue for $t {
+            #[cfg(not(feature = "force_hash_collisions"))]
+            fn hash(&self, state: &RandomState) -> u64 {
+                state.hash_one(self.to_bits())
+            }
+
+            #[cfg(feature = "force_hash_collisions")]
+            fn hash(&self, _state: &RandomState) -> u64 {
+                0
+            }
+        })+
+    };
+}
+
+hash_float!(f16, f32, f64);
+
+/// A [`GroupValues`] storing a single column of primitive values
+///
+/// This specialization is significantly faster than using the more general
+/// purpose `Row`s format
+pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
+    /// The data type of the output array
+    data_type: DataType,
+    /// Stores the group index based on the hash of its value
+    ///
+    /// We don't store the hashes as hashing fixed width primitives
+    /// is fast enough for this not to benefit performance
+    map: RawTable<usize>,
+    /// The group index of the null value if any
+    null_group: Option<usize>,
+    /// The values for each group index
+    values: Vec<T::Native>,
+    /// The random state used to generate hashes
+    random_state: RandomState,
+}
+
+impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> {
+    pub fn new(data_type: DataType) -> Self {
+        assert!(PrimitiveArray::<T>::is_compatible(&data_type));
+        Self {
+            data_type,
+            map: RawTable::with_capacity(128),
+            values: Vec::with_capacity(128),
+            null_group: None,
+            random_state: Default::default(),
+        }
+    }
+}
+
+impl<T: ArrowPrimitiveType> GroupValues for GroupValuesPrimitive<T>
+where
+    T::Native: HashValue,
+{
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
+        assert_eq!(cols.len(), 1);
+        groups.clear();
+
+        for v in cols[0].as_primitive::<T>() {
+            let group_id = match v {
+                None => *self.null_group.get_or_insert_with(|| {
+                    let group_id = self.values.len();
+                    self.values.push(Default::default());
+                    group_id
+                }),
+                Some(key) => {
+                    let state = &self.random_state;
+                    let hash = key.hash(state);
+                    let insert = self.map.find_or_find_insert_slot(
+                        hash,
+                        |g| unsafe { self.values.get_unchecked(*g).is_eq(key) },
+                        |g| unsafe { self.values.get_unchecked(*g).hash(state) },
+                    );
+
+                    // SAFETY: No mutation occurred since find_or_find_insert_slot
+                    unsafe {
+                        match insert {
+                            Ok(v) => *v.as_ref(),
+                            Err(slot) => {
+                                let g = self.values.len();
+                                self.map.insert_in_slot(hash, slot, g);
+                                self.values.push(key);
+                                g
+                            }
+                        }
+                    }
+                }
+            };
+            groups.push(group_id)
+        }
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        self.map.capacity() * std::mem::size_of::<usize>() + self.values.allocated_size()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.values.is_empty()
+    }
+
+    fn len(&self) -> usize {
+        self.values.len()
+    }
+
+    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        fn build_primitive<T: ArrowPrimitiveType>(
+            values: Vec<T::Native>,
+            null_idx: Option<usize>,
+        ) -> PrimitiveArray<T> {
+            let nulls = null_idx.map(|null_idx| {
+                let mut buffer = BooleanBufferBuilder::new(values.len());
+                buffer.append_n(values.len(), true);
+                buffer.set_bit(null_idx, false);
+                unsafe { NullBuffer::new_unchecked(buffer.finish(), 1) }
+            });
+            PrimitiveArray::<T>::new(values.into(), nulls)
+        }
+
+        let array: PrimitiveArray<T> = match emit_to {
+            EmitTo::All => {
+                self.map.clear();
+                build_primitive(std::mem::take(&mut self.values), self.null_group.take())
+            }
+            EmitTo::First(n) => {
+                // SAFETY: self.map outlives iterator and is not modified concurrently
+                unsafe {
+                    for bucket in self.map.iter() {
+                        // Decrement group index by n
+                        match bucket.as_ref().checked_sub(n) {
+                            // Group index was >= n, shift value down
+                            Some(sub) => *bucket.as_mut() = sub,
+                            // Group index was < n, so remove from table
+                            None => self.map.erase(bucket),
+                        }
+                    }
+                }
+                let null_group = match &mut self.null_group {
+                    Some(v) if *v >= n => {
+                        *v -= n;
+                        None
+                    }
+                    Some(_) => self.null_group.take(),
+                    None => None,
+                };
+                let mut split = self.values.split_off(n);
+                std::mem::swap(&mut self.values, &mut split);
+                build_primitive(split, null_group)
+            }
+        };
+        Ok(vec![Arc::new(array.with_data_type(self.data_type.clone()))])
+    }
+
+    fn clear_shrink(&mut self, batch: &RecordBatch) {
+        let count = batch.num_rows();
+        self.values.clear();
+        self.values.shrink_to(count);
+        self.map.clear();
+        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
+    }
+}
diff --git a/crates/core/src/physical_plan/continuous/group_values/row.rs b/crates/core/src/physical_plan/continuous/group_values/row.rs
new file mode 100644
index 0000000..9a3167c
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/group_values/row.rs
@@ -0,0 +1,254 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use ahash::RandomState;
+use arrow::compute::cast;
+use arrow::record_batch::RecordBatch;
+use arrow::row::{RowConverter, Rows, SortField};
+use arrow_array::{Array, ArrayRef};
+use arrow_schema::{DataType, SchemaRef};
+use datafusion::common::Result;
+use datafusion::common::{
+    hash_utils::create_hashes,
+    utils::proxy::{RawTableAllocExt, VecAllocExt},
+};
+use datafusion::error::DataFusionError;
+use datafusion::logical_expr::EmitTo;
+use hashbrown::raw::RawTable;
+
+use super::GroupValues;
+
+/// A [`GroupValues`] making use of [`Rows`]
+pub struct GroupValuesRows {
+    /// The output schema
+    schema: SchemaRef,
+
+    /// Converter for the group values
+    row_converter: RowConverter,
+
+    /// Logically maps group values to a group_index in
+    /// [`Self::group_values`] and in each accumulator
+    ///
+    /// Uses the raw API of hashbrown to avoid actually storing the
+    /// keys (group values) in the table
+    ///
+    /// keys: u64 hashes of the GroupValue
+    /// values: (hash, group_index)
+    map: RawTable<(u64, usize)>,
+
+    /// The size of `map` in bytes
+    map_size: usize,
+
+    /// The actual group by values, stored in arrow [`Row`] format.
+    /// `group_values[i]` holds the group value for group_index `i`.
+    ///
+    /// The row format is used to compare group keys quickly and store
+    /// them efficiently in memory. Quick comparison is especially
+    /// important for multi-column group keys.
+    ///
+    /// [`Row`]: arrow::row::Row
+    group_values: Option<Rows>,
+
+    /// reused buffer to store hashes
+    hashes_buffer: Vec<u64>,
+
+    /// reused buffer to store rows
+    rows_buffer: Rows,
+
+    /// Random state for creating hashes
+    random_state: RandomState,
+}
+
+impl GroupValuesRows {
+    pub fn try_new(schema: SchemaRef) -> Result<Self> {
+        let row_converter = RowConverter::new(
+            schema
+                .fields()
+                .iter()
+                .map(|f| SortField::new(f.data_type().clone()))
+                .collect(),
+        )?;
+
+        let map = RawTable::with_capacity(0);
+
+        let starting_rows_capacity = 1000;
+        let starting_data_capacity = 64 * starting_rows_capacity;
+        let rows_buffer = row_converter.empty_rows(starting_rows_capacity, starting_data_capacity);
+        Ok(Self {
+            schema,
+            row_converter,
+            map,
+            map_size: 0,
+            group_values: None,
+            hashes_buffer: Default::default(),
+            rows_buffer,
+            random_state: Default::default(),
+        })
+    }
+}
+
+impl GroupValues for GroupValuesRows {
+    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
+        // Convert the group keys into the row format
+        let group_rows = &mut self.rows_buffer;
+        group_rows.clear();
+        self.row_converter.append(group_rows, cols)?;
+        let n_rows = group_rows.num_rows();
+
+        let mut group_values = match self.group_values.take() {
+            Some(group_values) => group_values,
+            None => self.row_converter.empty_rows(0, 0),
+        };
+
+        // tracks to which group each of the input rows belongs
+        groups.clear();
+
+        // 1.1 Calculate the group keys for the group values
+        let batch_hashes = &mut self.hashes_buffer;
+        batch_hashes.clear();
+        batch_hashes.resize(n_rows, 0);
+        create_hashes(cols, &self.random_state, batch_hashes)?;
+
+        for (row, &target_hash) in batch_hashes.iter().enumerate() {
+            let entry = self.map.get_mut(target_hash, |(exist_hash, group_idx)| {
+                // Somewhat surprisingly, this closure can be called even if the
+                // hash doesn't match, so check the hash first with an integer
+                // comparison first avoid the more expensive comparison with
+                // group value. https://github.com/apache/datafusion/pull/11718
+                target_hash == *exist_hash
+                    // verify that the group that we are inserting with hash is
+                    // actually the same key value as the group in
+                    // existing_idx  (aka group_values @ row)
+                    && group_rows.row(row) == group_values.row(*group_idx)
+            });
+
+            let group_idx = match entry {
+                // Existing group_index for this group value
+                Some((_hash, group_idx)) => *group_idx,
+                //  1.2 Need to create new entry for the group
+                None => {
+                    // Add new entry to aggr_state and save newly created index
+                    let group_idx = group_values.num_rows();
+                    group_values.push(group_rows.row(row));
+
+                    // for hasher function, use precomputed hash value
+                    self.map.insert_accounted(
+                        (target_hash, group_idx),
+                        |(hash, _group_index)| *hash,
+                        &mut self.map_size,
+                    );
+                    group_idx
+                }
+            };
+            groups.push(group_idx);
+        }
+
+        self.group_values = Some(group_values);
+
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        let group_values_size = self.group_values.as_ref().map(|v| v.size()).unwrap_or(0);
+        self.row_converter.size()
+            + group_values_size
+            + self.map_size
+            + self.rows_buffer.size()
+            + self.hashes_buffer.allocated_size()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    fn len(&self) -> usize {
+        self.group_values
+            .as_ref()
+            .map(|group_values| group_values.num_rows())
+            .unwrap_or(0)
+    }
+
+    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        let mut group_values = self
+            .group_values
+            .take()
+            .expect("Can not emit from empty rows");
+
+        let mut output = match emit_to {
+            EmitTo::All => {
+                let output = self.row_converter.convert_rows(&group_values)?;
+                group_values.clear();
+                output
+            }
+            EmitTo::First(n) => {
+                let groups_rows = group_values.iter().take(n);
+                let output = self.row_converter.convert_rows(groups_rows)?;
+                // Clear out first n group keys by copying them to a new Rows.
+                // TODO file some ticket in arrow-rs to make this more efficient?
+                let mut new_group_values = self.row_converter.empty_rows(0, 0);
+                for row in group_values.iter().skip(n) {
+                    new_group_values.push(row);
+                }
+                std::mem::swap(&mut new_group_values, &mut group_values);
+
+                // SAFETY: self.map outlives iterator and is not modified concurrently
+                unsafe {
+                    for bucket in self.map.iter() {
+                        // Decrement group index by n
+                        match bucket.as_ref().1.checked_sub(n) {
+                            // Group index was >= n, shift value down
+                            Some(sub) => bucket.as_mut().1 = sub,
+                            // Group index was < n, so remove from table
+                            None => self.map.erase(bucket),
+                        }
+                    }
+                }
+                output
+            }
+        };
+
+        // TODO: Materialize dictionaries in group keys (#7647)
+        for (field, array) in self.schema.fields.iter().zip(&mut output) {
+            let expected = field.data_type();
+            if let DataType::Dictionary(_, v) = expected {
+                let actual = array.data_type();
+                if v.as_ref() != actual {
+                    return Err(DataFusionError::Internal(format!(
+                        "Converted group rows expected dictionary of {v} got {actual}"
+                    )));
+                }
+                *array = cast(array.as_ref(), expected)?;
+            }
+        }
+
+        self.group_values = Some(group_values);
+        Ok(output)
+    }
+
+    fn clear_shrink(&mut self, batch: &RecordBatch) {
+        let count = batch.num_rows();
+        self.group_values = self.group_values.take().map(|mut rows| {
+            rows.clear();
+            rows
+        });
+        self.map.clear();
+        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
+        self.map_size = self.map.capacity() * std::mem::size_of::<(u64, usize)>();
+        self.hashes_buffer.clear();
+        self.hashes_buffer.shrink_to(count);
+    }
+}
diff --git a/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs b/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs
new file mode 100644
index 0000000..c87a3e3
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs
@@ -0,0 +1,535 @@
+use std::{
+    collections::BTreeMap,
+    pin::Pin,
+    sync::{Arc, Mutex},
+    task::{Context, Poll},
+    time::{Duration, SystemTime, UNIX_EPOCH},
+};
+
+use arrow::array::*;
+use arrow::{
+    compute::{concat_batches, filter_record_batch},
+    datatypes::TimestampMillisecondType,
+};
+
+use arrow_array::{ArrayRef, PrimitiveArray, RecordBatch, StructArray, TimestampMillisecondArray};
+use arrow_ord::cmp;
+use arrow_schema::{Schema, SchemaRef};
+use datafusion::{
+    common::{utils::proxy::VecAllocExt, DataFusionError, Result},
+    execution::{
+        memory_pool::{MemoryConsumer, MemoryReservation},
+        runtime_env::RuntimeEnv,
+    },
+    logical_expr::EmitTo,
+    physical_plan::{aggregates::PhysicalGroupBy, PhysicalExpr},
+};
+use datafusion::{
+    execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext},
+    physical_plan::{
+        aggregates::{aggregate_expressions, AggregateMode},
+        metrics::BaselineMetrics,
+        AggregateExpr,
+    },
+};
+use futures::{Stream, StreamExt};
+
+use crate::physical_plan::utils::{
+    accumulators::{create_accumulators, AccumulatorItem},
+    time::RecordBatchWatermark,
+};
+
+use super::{
+    add_window_columns_to_record_batch, add_window_columns_to_schema, create_group_accumulator,
+    group_values::{new_group_values, GroupValues},
+    order::GroupOrdering,
+    streaming_window::{
+        get_windows_for_watermark, FranzStreamingWindowExec, FranzStreamingWindowType,
+        FranzWindowFrame,
+    },
+    GroupsAccumulatorItem,
+};
+
+pub struct GroupedWindowAggStream {
+    pub schema: SchemaRef,
+    input: SendableRecordBatchStream,
+    baseline_metrics: BaselineMetrics,
+    exec_aggregate_expressions: Vec<Arc<dyn AggregateExpr>>,
+    aggregate_expressions: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+    filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
+    latest_watermark: Arc<Mutex<Option<SystemTime>>>,
+    window_frames: BTreeMap<SystemTime, GroupedAggWindowFrame>,
+    window_type: FranzStreamingWindowType,
+    aggregation_mode: AggregateMode,
+    group_by: PhysicalGroupBy,
+    group_schema: Arc<Schema>,
+    context: Arc<TaskContext>,
+}
+
+fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef {
+    let group_fields = schema.fields()[0..group_count].to_vec();
+    Arc::new(Schema::new(group_fields))
+}
+
+#[allow(dead_code)]
+impl GroupedWindowAggStream {
+    pub fn new(
+        exec_operator: &FranzStreamingWindowExec,
+        context: Arc<TaskContext>,
+        partition: usize,
+        watermark: Arc<Mutex<Option<SystemTime>>>,
+        window_type: FranzStreamingWindowType,
+        aggregation_mode: AggregateMode,
+    ) -> Result<Self> {
+        let agg_schema = Arc::clone(&exec_operator.schema);
+        let agg_filter_expr = exec_operator.filter_expressions.clone();
+
+        let baseline_metrics = BaselineMetrics::new(&exec_operator.metrics, partition);
+        let input = exec_operator
+            .input
+            .execute(partition, Arc::clone(&context))?;
+
+        let aggregate_expressions =
+            aggregate_expressions(&exec_operator.aggregate_expressions, &exec_operator.mode, 0)?;
+        let filter_expressions = match exec_operator.mode {
+            AggregateMode::Partial | AggregateMode::Single | AggregateMode::SinglePartitioned => {
+                agg_filter_expr
+            }
+            AggregateMode::Final | AggregateMode::FinalPartitioned => {
+                vec![None; exec_operator.aggregate_expressions.len()]
+            }
+        };
+
+        let group_by = exec_operator.group_by.clone();
+        let group_schema = group_schema(&agg_schema, group_by.expr.len());
+        Ok(Self {
+            schema: agg_schema,
+            input,
+            baseline_metrics,
+            exec_aggregate_expressions: exec_operator.aggregate_expressions.clone(),
+            aggregate_expressions,
+            filter_expressions,
+            latest_watermark: watermark,
+            window_frames: BTreeMap::new(),
+            window_type,
+            aggregation_mode,
+            group_by,
+            group_schema,
+            context,
+        })
+    }
+
+    pub fn output_schema_with_window(&self) -> SchemaRef {
+        Arc::new(add_window_columns_to_schema(self.schema.clone()))
+    }
+
+    fn trigger_windows(&mut self) -> Result<RecordBatch, DataFusionError> {
+        let mut results: Vec<RecordBatch> = Vec::new();
+        let watermark_lock: std::sync::MutexGuard<'_, Option<SystemTime>> =
+            self.latest_watermark.lock().unwrap();
+
+        if let Some(watermark) = *watermark_lock {
+            let mut window_frames_to_remove: Vec<SystemTime> = Vec::new();
+
+            for (timestamp, frame) in self.window_frames.iter_mut() {
+                if watermark >= frame.window_end_time {
+                    let rb = frame.evaluate()?;
+                    let result = add_window_columns_to_record_batch(
+                        rb,
+                        frame.window_start_time,
+                        frame.window_end_time,
+                    );
+                    results.push(result);
+                    window_frames_to_remove.push(*timestamp);
+                }
+            }
+
+            for timestamp in window_frames_to_remove {
+                self.window_frames.remove(&timestamp);
+            }
+        }
+        concat_batches(&self.output_schema_with_window(), &results)
+            .map_err(|err| DataFusionError::ArrowError(err, None))
+    }
+
+    fn process_watermark(&mut self, watermark: RecordBatchWatermark) {
+        // should this be within a mutex?
+        let mut watermark_lock: std::sync::MutexGuard<Option<SystemTime>> =
+            self.latest_watermark.lock().unwrap();
+
+        if let Some(current_watermark) = *watermark_lock {
+            if current_watermark <= watermark.min_timestamp {
+                *watermark_lock = Some(watermark.min_timestamp)
+            }
+        } else {
+            *watermark_lock = Some(watermark.min_timestamp)
+        }
+    }
+
+    fn get_window_length(&mut self) -> Duration {
+        match self.window_type {
+            FranzStreamingWindowType::Session(duration) => duration,
+            FranzStreamingWindowType::Sliding(duration, _) => duration,
+            FranzStreamingWindowType::Tumbling(duration) => duration,
+        }
+    }
+
+    fn ensure_window_frames_for_ranges(
+        &mut self,
+        ranges: &Vec<(SystemTime, SystemTime)>,
+    ) -> Result<(), DataFusionError> {
+        for (start_time, end_time) in ranges {
+            self.window_frames.entry(*start_time).or_insert({
+                let accumulators: Vec<_> = self
+                    .exec_aggregate_expressions
+                    .iter()
+                    .map(create_group_accumulator)
+                    .collect::<Result<_>>()?;
+                let elapsed = start_time.elapsed().unwrap().as_millis();
+                let name = format!("GroupedHashAggregateStream WindowStart[{elapsed}]");
+                // Threading in Memory Reservation for now. We are currently not supporting spilling to disk.
+                let reservation = MemoryConsumer::new(name)
+                    .with_can_spill(false)
+                    .register(self.context.memory_pool());
+                let group_values = new_group_values(self.group_schema.clone())?;
+
+                GroupedAggWindowFrame::new(
+                    *start_time,
+                    *end_time,
+                    "canonical_timestamp".to_string(),
+                    accumulators,
+                    self.aggregate_expressions.clone(),
+                    self.filter_expressions.clone(),
+                    self.group_by.clone(),
+                    self.schema.clone(),
+                    self.baseline_metrics.clone(),
+                    group_values,
+                    Default::default(),
+                    GroupOrdering::None,
+                    reservation,
+                )
+            });
+        }
+        Ok(())
+    }
+
+    #[inline]
+    fn poll_next_inner(&mut self, cx: &mut Context<'_>) -> Poll<Option<Result<RecordBatch>>> {
+        let result: std::prelude::v1::Result<RecordBatch, DataFusionError> = match self
+            .input
+            .poll_next_unpin(cx)
+        {
+            Poll::Ready(rdy) => match rdy {
+                Some(Ok(batch)) => {
+                    if batch.num_rows() > 0 {
+                        let watermark: RecordBatchWatermark =
+                            RecordBatchWatermark::try_from(&batch, "_streaming_internal_metadata")?;
+                        let ranges = get_windows_for_watermark(&watermark, self.window_type);
+                        let _ = self.ensure_window_frames_for_ranges(&ranges);
+                        for range in ranges {
+                            let frame = self.window_frames.get_mut(&range.0).unwrap();
+                            let _ = frame.push(&batch);
+                        }
+                        self.process_watermark(watermark);
+
+                        self.trigger_windows()
+                    } else {
+                        Ok(RecordBatch::new_empty(self.output_schema_with_window()))
+                    }
+                }
+                Some(Err(e)) => Err(e),
+                None => Ok(RecordBatch::new_empty(self.output_schema_with_window())),
+            },
+            Poll::Pending => {
+                return Poll::Pending;
+            }
+        };
+        Poll::Ready(Some(result))
+    }
+}
+
+impl RecordBatchStream for GroupedWindowAggStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+impl Stream for GroupedWindowAggStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let poll: Poll<Option<std::prelude::v1::Result<RecordBatch, DataFusionError>>> =
+            self.poll_next_inner(cx);
+        self.baseline_metrics.record_poll(poll)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, None)
+    }
+}
+
+pub struct GroupedAggWindowFrame {
+    pub window_start_time: SystemTime,
+    pub window_end_time: SystemTime,
+    pub timestamp_column: String,
+    pub accumulators: Vec<GroupsAccumulatorItem>,
+    pub aggregate_expressions: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+    pub filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
+    pub group_by: PhysicalGroupBy,
+    /// GROUP BY expressions
+    /// An interning store of group keys
+    group_values: Box<dyn GroupValues>,
+    /// scratch space for the current input [`RecordBatch`] being
+    /// processed. Reused across batches here to avoid reallocations
+    current_group_indices: Vec<usize>,
+
+    /// Optional ordering information, that might allow groups to be
+    /// emitted from the hash table prior to seeing the end of the
+    /// input
+    group_ordering: GroupOrdering,
+    reservation: MemoryReservation,
+
+    pub schema: SchemaRef,
+    pub baseline_metrics: BaselineMetrics,
+}
+
+impl GroupedAggWindowFrame {
+    pub(crate) fn new(
+        window_start_time: SystemTime,
+        window_end_time: SystemTime,
+        timestamp_column: String,
+        accumulators: Vec<GroupsAccumulatorItem>,
+        aggregate_expressions: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+        filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
+        group_by: PhysicalGroupBy,
+        schema: SchemaRef,
+        baseline_metrics: BaselineMetrics,
+        group_values: Box<dyn GroupValues>,
+        current_group_indices: Vec<usize>,
+        group_ordering: GroupOrdering,
+        reservation: MemoryReservation,
+    ) -> Self {
+        Self {
+            window_start_time,
+            window_end_time,
+            timestamp_column,
+            accumulators,
+            aggregate_expressions,
+            filter_expressions,
+            group_by,
+            group_values,
+            current_group_indices,
+            group_ordering,
+            reservation,
+            schema,
+            baseline_metrics,
+        }
+    }
+
+    fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<()> {
+        // Evaluate the grouping expressions
+        let group_by_values = evaluate_group_by(&self.group_by, &batch)?;
+        // Evaluate the aggregation expressions.
+        let input_values = evaluate_many(&self.aggregate_expressions, &batch)?;
+
+        // Evaluate the filter expressions, if any, against the inputs
+        let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
+        for group_values in &group_by_values {
+            // calculate the group indices for each input row
+            let starting_num_groups = self.group_values.len();
+            self.group_values
+                .intern(group_values, &mut self.current_group_indices)?;
+            let group_indices = &self.current_group_indices;
+
+            // Update ordering information if necessary
+            let total_num_groups = self.group_values.len();
+            if total_num_groups > starting_num_groups {
+                self.group_ordering
+                    .new_groups(group_values, group_indices, total_num_groups)?;
+            }
+
+            // Gather the inputs to call the actual accumulator
+            let t = self
+                .accumulators
+                .iter_mut()
+                .zip(input_values.iter())
+                .zip(filter_values.iter());
+
+            for ((acc, values), opt_filter) in t {
+                let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
+
+                acc.update_batch(values, group_indices, opt_filter, total_num_groups)?;
+            }
+        }
+        self.update_memory_reservation()
+    }
+
+    fn update_memory_reservation(&mut self) -> Result<()> {
+        let acc = self.accumulators.iter().map(|x| x.size()).sum::<usize>();
+        self.reservation.try_resize(
+            acc + self.group_values.size()
+                + self.group_ordering.size()
+                + self.current_group_indices.allocated_size(),
+        )
+    }
+
+    pub fn push(&mut self, batch: &RecordBatch) -> Result<(), DataFusionError> {
+        let metadata = batch
+            .column_by_name("_streaming_internal_metadata")
+            .unwrap();
+        let metadata_struct = metadata.as_any().downcast_ref::<StructArray>().unwrap();
+
+        let ts_column = metadata_struct
+            .column_by_name("canonical_timestamp")
+            .unwrap();
+
+        let ts_array = ts_column
+            .as_any()
+            .downcast_ref::<PrimitiveArray<TimestampMillisecondType>>()
+            .unwrap()
+            .to_owned();
+
+        let start_time_duration = self
+            .window_start_time
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_millis() as i64;
+
+        let gte_cmp_filter = cmp::gt_eq(
+            &ts_array,
+            &TimestampMillisecondArray::new_scalar(start_time_duration),
+        )?;
+
+        let filtered_batch: RecordBatch = filter_record_batch(batch, &gte_cmp_filter)?;
+
+        let end_time_duration = self
+            .window_end_time
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_millis() as i64;
+
+        let metadata = filtered_batch
+            .column_by_name("_streaming_internal_metadata")
+            .unwrap();
+        let metadata_struct = metadata.as_any().downcast_ref::<StructArray>().unwrap();
+
+        let ts_column = metadata_struct
+            .column_by_name("canonical_timestamp")
+            .unwrap();
+
+        let ts_array = ts_column
+            .as_any()
+            .downcast_ref::<PrimitiveArray<TimestampMillisecondType>>()
+            .unwrap()
+            .to_owned();
+
+        let lt_cmp_filter = cmp::lt(
+            &ts_array,
+            &TimestampMillisecondArray::new_scalar(end_time_duration),
+        )?;
+        let final_batch = filter_record_batch(&filtered_batch, &lt_cmp_filter)?;
+
+        let _ = self.group_aggregate_batch(final_batch);
+
+        Ok(())
+    }
+
+    /// Create an output RecordBatch with the group keys and
+    /// accumulator states/values specified in emit_to
+    fn evaluate(&mut self) -> Result<RecordBatch> {
+        //let timer = self.baseline_metrics.elapsed_compute().timer();
+
+        let schema = self.schema.clone();
+        if self.group_values.is_empty() {
+            return Ok(RecordBatch::new_empty(schema));
+        }
+
+        let mut output = self.group_values.emit(EmitTo::All)?;
+
+        // Next output each aggregate value
+        for acc in self.accumulators.iter_mut() {
+            output.push(acc.evaluate(EmitTo::All)?)
+        }
+
+        // emit reduces the memory usage. Ignore Err from update_memory_reservation. Even if it is
+        // over the target memory size after emission, we can emit again rather than returning Err.
+        let _ = self.update_memory_reservation();
+        let batch = RecordBatch::try_new(schema, output)?;
+        Ok(batch)
+    }
+}
+
+pub(crate) fn evaluate_group_by(
+    group_by: &PhysicalGroupBy,
+    batch: &RecordBatch,
+) -> Result<Vec<Vec<ArrayRef>>> {
+    let exprs: Vec<ArrayRef> = group_by
+        .expr
+        .iter()
+        .map(|(expr, _)| {
+            let value = expr.evaluate(batch)?;
+            value.into_array(batch.num_rows())
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    let null_exprs: Vec<ArrayRef> = group_by
+        .null_expr
+        .iter()
+        .map(|(expr, _)| {
+            let value = expr.evaluate(batch)?;
+            value.into_array(batch.num_rows())
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    Ok(group_by
+        .groups
+        .iter()
+        .map(|group| {
+            group
+                .iter()
+                .enumerate()
+                .map(|(idx, is_null)| {
+                    if *is_null {
+                        Arc::clone(&null_exprs[idx])
+                    } else {
+                        Arc::clone(&exprs[idx])
+                    }
+                })
+                .collect()
+        })
+        .collect())
+}
+
+/// Evaluates expressions against a record batch.
+fn evaluate(expr: &[Arc<dyn PhysicalExpr>], batch: &RecordBatch) -> Result<Vec<ArrayRef>> {
+    expr.iter()
+        .map(|expr| {
+            expr.evaluate(batch)
+                .and_then(|v| v.into_array(batch.num_rows()))
+        })
+        .collect()
+}
+
+/// Evaluates expressions against a record batch.
+pub(crate) fn evaluate_many(
+    expr: &[Vec<Arc<dyn PhysicalExpr>>],
+    batch: &RecordBatch,
+) -> Result<Vec<Vec<ArrayRef>>> {
+    expr.iter().map(|expr| evaluate(expr, batch)).collect()
+}
+
+fn evaluate_optional(
+    expr: &[Option<Arc<dyn PhysicalExpr>>],
+    batch: &RecordBatch,
+) -> Result<Vec<Option<ArrayRef>>> {
+    expr.iter()
+        .map(|expr| {
+            expr.as_ref()
+                .map(|expr| {
+                    expr.evaluate(batch)
+                        .and_then(|v| v.into_array(batch.num_rows()))
+                })
+                .transpose()
+        })
+        .collect()
+}
diff --git a/crates/core/src/physical_plan/continuous/mod.rs b/crates/core/src/physical_plan/continuous/mod.rs
new file mode 100644
index 0000000..e980eb3
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/mod.rs
@@ -0,0 +1,106 @@
+use std::{
+    sync::Arc,
+    time::{SystemTime, UNIX_EPOCH},
+};
+
+use arrow::{
+    array::PrimitiveBuilder, compute::filter_record_batch, datatypes::TimestampMillisecondType,
+};
+use arrow_array::{Array, BooleanArray, RecordBatch};
+use arrow_schema::{DataType, Field, Schema, SchemaBuilder, SchemaRef, TimeUnit};
+use datafusion::{
+    common::{downcast_value, DataFusionError, Result},
+    logical_expr::GroupsAccumulator,
+    physical_expr::GroupsAccumulatorAdapter,
+    physical_plan::PhysicalExpr,
+};
+pub mod grouped_window_agg_stream;
+pub mod streaming_window;
+
+mod group_values;
+mod order;
+use datafusion::physical_expr::AggregateExpr;
+use log::debug;
+
+pub(crate) type GroupsAccumulatorItem = Box<dyn GroupsAccumulator>;
+
+pub(crate) fn create_group_accumulator(
+    agg_expr: &Arc<dyn AggregateExpr>,
+) -> Result<Box<dyn GroupsAccumulator>> {
+    if agg_expr.groups_accumulator_supported() {
+        agg_expr.create_groups_accumulator()
+    } else {
+        // Note in the log when the slow path is used
+        debug!(
+            "Creating GroupsAccumulatorAdapter for {}: {agg_expr:?}",
+            agg_expr.name()
+        );
+        let agg_expr_captured = Arc::clone(agg_expr);
+        let factory = move || agg_expr_captured.create_accumulator();
+        Ok(Box::new(GroupsAccumulatorAdapter::new(factory)))
+    }
+}
+
+fn add_window_columns_to_schema(schema: SchemaRef) -> Schema {
+    let fields = schema.flattened_fields().to_owned();
+
+    let mut builder = SchemaBuilder::new();
+
+    for field in fields {
+        builder.push(field.clone());
+    }
+    builder.push(Field::new(
+        "window_start_time",
+        DataType::Timestamp(TimeUnit::Millisecond, None),
+        false,
+    ));
+    builder.push(Field::new(
+        "window_end_time",
+        DataType::Timestamp(TimeUnit::Millisecond, None),
+        false,
+    ));
+
+    builder.finish()
+}
+
+fn add_window_columns_to_record_batch(
+    record_batch: RecordBatch,
+    start_time: SystemTime,
+    end_time: SystemTime,
+) -> RecordBatch {
+    let start_time_duration = start_time.duration_since(UNIX_EPOCH).unwrap().as_millis() as i64;
+    let end_time_duration = end_time.duration_since(UNIX_EPOCH).unwrap().as_millis() as i64;
+
+    let mut start_builder = PrimitiveBuilder::<TimestampMillisecondType>::new();
+    let mut end_builder = PrimitiveBuilder::<TimestampMillisecondType>::new();
+
+    for _ in 0..record_batch.num_rows() {
+        start_builder.append_value(start_time_duration);
+        end_builder.append_value(end_time_duration);
+    }
+
+    let start_array = start_builder.finish();
+    let end_array = end_builder.finish();
+
+    let new_schema = add_window_columns_to_schema(record_batch.schema());
+    let mut new_columns = record_batch.columns().to_vec();
+    new_columns.push(Arc::new(start_array));
+    new_columns.push(Arc::new(end_array));
+
+    RecordBatch::try_new(Arc::new(new_schema), new_columns).unwrap()
+}
+
+pub fn as_boolean_array(array: &dyn Array) -> Result<&BooleanArray> {
+    Ok(downcast_value!(array, BooleanArray))
+}
+
+fn batch_filter(batch: &RecordBatch, predicate: &Arc<dyn PhysicalExpr>) -> Result<RecordBatch> {
+    predicate
+        .evaluate(batch)
+        .and_then(|v| v.into_array(batch.num_rows()))
+        .and_then(|array| {
+            Ok(as_boolean_array(&array)?)
+                // apply filter array to record batch
+                .and_then(|filter_array| Ok(filter_record_batch(batch, filter_array)?))
+        })
+}
diff --git a/crates/core/src/physical_plan/continuous/order/full.rs b/crates/core/src/physical_plan/continuous/order/full.rs
new file mode 100644
index 0000000..65af814
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/order/full.rs
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::logical_expr::EmitTo;
+
+/// Tracks grouping state when the data is ordered entirely by its
+/// group keys
+///
+/// When the group values are sorted, as soon as we see group `n+1` we
+/// know we will never see any rows for group `n again and thus they
+/// can be emitted.
+///
+/// For example, given `SUM(amt) GROUP BY id` if the input is sorted
+/// by `id` as soon as a new `id` value is seen all previous values
+/// can be emitted.
+///
+/// The state is tracked like this:
+///
+/// ```text
+///      ┌─────┐   ┌──────────────────┐
+///      │┌───┐│   │ ┌──────────────┐ │         ┏━━━━━━━━━━━━━━┓
+///      ││ 0 ││   │ │     123      │ │   ┌─────┃      13      ┃
+///      │└───┘│   │ └──────────────┘ │   │     ┗━━━━━━━━━━━━━━┛
+///      │ ... │   │    ...           │   │
+///      │┌───┐│   │ ┌──────────────┐ │   │         current
+///      ││12 ││   │ │     234      │ │   │
+///      │├───┤│   │ ├──────────────┤ │   │
+///      ││12 ││   │ │     234      │ │   │
+///      │├───┤│   │ ├──────────────┤ │   │
+///      ││13 ││   │ │     456      │◀┼───┘
+///      │└───┘│   │ └──────────────┘ │
+///      └─────┘   └──────────────────┘
+///
+///  group indices    group_values        current tracks the most
+/// (in group value                          recent group index
+///      order)
+/// ```
+///
+/// In this diagram, the current group is `13`, and thus groups
+/// `0..12` can be emitted. Note that `13` can not yet be emitted as
+/// there may be more values in the next batch with the same group_id.
+#[derive(Debug)]
+pub(crate) struct GroupOrderingFull {
+    state: State,
+}
+
+#[derive(Debug)]
+enum State {
+    /// Seen no input yet
+    Start,
+
+    /// Data is in progress. `current is the current group for which
+    /// values are being generated. Can emit `current` - 1
+    InProgress { current: usize },
+
+    /// Seen end of input: all groups can be emitted
+    Complete,
+}
+
+impl GroupOrderingFull {
+    pub fn new() -> Self {
+        Self {
+            state: State::Start,
+        }
+    }
+
+    // How many groups be emitted, or None if no data can be emitted
+    pub fn emit_to(&self) -> Option<EmitTo> {
+        match &self.state {
+            State::Start => None,
+            State::InProgress { current, .. } => {
+                if *current == 0 {
+                    // Can not emit if still on the first row
+                    None
+                } else {
+                    // otherwise emit all rows prior to the current group
+                    Some(EmitTo::First(*current))
+                }
+            }
+            State::Complete { .. } => Some(EmitTo::All),
+        }
+    }
+
+    /// remove the first n groups from the internal state, shifting
+    /// all existing indexes down by `n`
+    pub fn remove_groups(&mut self, n: usize) {
+        match &mut self.state {
+            State::Start => panic!("invalid state: start"),
+            State::InProgress { current } => {
+                // shift down by n
+                assert!(*current >= n);
+                *current -= n;
+            }
+            State::Complete { .. } => panic!("invalid state: complete"),
+        }
+    }
+
+    /// Note that the input is complete so any outstanding groups are done as well
+    pub fn input_done(&mut self) {
+        self.state = State::Complete;
+    }
+
+    /// Called when new groups are added in a batch. See documentation
+    /// on [`super::GroupOrdering::new_groups`]
+    pub fn new_groups(&mut self, total_num_groups: usize) {
+        assert_ne!(total_num_groups, 0);
+
+        // Update state
+        let max_group_index = total_num_groups - 1;
+        self.state = match self.state {
+            State::Start => State::InProgress {
+                current: max_group_index,
+            },
+            State::InProgress { current } => {
+                // expect to see new group indexes when called again
+                assert!(current <= max_group_index, "{current} <= {max_group_index}");
+                State::InProgress {
+                    current: max_group_index,
+                }
+            }
+            State::Complete { .. } => {
+                panic!("Saw new group after input was complete");
+            }
+        };
+    }
+
+    pub(crate) fn size(&self) -> usize {
+        std::mem::size_of::<Self>()
+    }
+}
diff --git a/crates/core/src/physical_plan/continuous/order/mod.rs b/crates/core/src/physical_plan/continuous/order/mod.rs
new file mode 100644
index 0000000..921452a
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/order/mod.rs
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::ArrayRef;
+use arrow_schema::Schema;
+use datafusion::{
+    common::Result, logical_expr::EmitTo, physical_expr::PhysicalSortExpr,
+    physical_plan::InputOrderMode,
+};
+
+mod full;
+mod partial;
+
+pub(crate) use full::GroupOrderingFull;
+pub(crate) use partial::GroupOrderingPartial;
+
+/// Ordering information for each group in the hash table
+#[derive(Debug)]
+pub(crate) enum GroupOrdering {
+    /// Groups are not ordered
+    None,
+    /// Groups are ordered by some pre-set of the group keys
+    Partial(GroupOrderingPartial),
+    /// Groups are entirely contiguous,
+    Full(GroupOrderingFull),
+}
+
+impl GroupOrdering {
+    /// Create a `GroupOrdering` for the specified ordering
+    pub fn try_new(
+        input_schema: &Schema,
+        mode: &InputOrderMode,
+        ordering: &[PhysicalSortExpr],
+    ) -> Result<Self> {
+        match mode {
+            InputOrderMode::Linear => Ok(GroupOrdering::None),
+            InputOrderMode::PartiallySorted(order_indices) => {
+                GroupOrderingPartial::try_new(input_schema, order_indices, ordering)
+                    .map(GroupOrdering::Partial)
+            }
+            InputOrderMode::Sorted => Ok(GroupOrdering::Full(GroupOrderingFull::new())),
+        }
+    }
+
+    // How many groups be emitted, or None if no data can be emitted
+    pub fn emit_to(&self) -> Option<EmitTo> {
+        match self {
+            GroupOrdering::None => None,
+            GroupOrdering::Partial(partial) => partial.emit_to(),
+            GroupOrdering::Full(full) => full.emit_to(),
+        }
+    }
+
+    /// Updates the state the input is done
+    pub fn input_done(&mut self) {
+        match self {
+            GroupOrdering::None => {}
+            GroupOrdering::Partial(partial) => partial.input_done(),
+            GroupOrdering::Full(full) => full.input_done(),
+        }
+    }
+
+    /// remove the first n groups from the internal state, shifting
+    /// all existing indexes down by `n`
+    pub fn remove_groups(&mut self, n: usize) {
+        match self {
+            GroupOrdering::None => {}
+            GroupOrdering::Partial(partial) => partial.remove_groups(n),
+            GroupOrdering::Full(full) => full.remove_groups(n),
+        }
+    }
+
+    /// Called when new groups are added in a batch
+    ///
+    /// * `total_num_groups`: total number of groups (so max
+    ///   group_index is total_num_groups - 1).
+    ///
+    /// * `group_values`: group key values for *each row* in the batch
+    ///
+    /// * `group_indices`: indices for each row in the batch
+    ///
+    /// * `hashes`: hash values for each row in the batch
+    pub fn new_groups(
+        &mut self,
+        batch_group_values: &[ArrayRef],
+        group_indices: &[usize],
+        total_num_groups: usize,
+    ) -> Result<()> {
+        match self {
+            GroupOrdering::None => {}
+            GroupOrdering::Partial(partial) => {
+                partial.new_groups(batch_group_values, group_indices, total_num_groups)?;
+            }
+            GroupOrdering::Full(full) => {
+                full.new_groups(total_num_groups);
+            }
+        };
+        Ok(())
+    }
+
+    /// Return the size of memory used by the ordering state, in bytes
+    pub(crate) fn size(&self) -> usize {
+        std::mem::size_of::<Self>()
+            + match self {
+                GroupOrdering::None => 0,
+                GroupOrdering::Partial(partial) => partial.size(),
+                GroupOrdering::Full(full) => full.size(),
+            }
+    }
+}
diff --git a/crates/core/src/physical_plan/continuous/order/partial.rs b/crates/core/src/physical_plan/continuous/order/partial.rs
new file mode 100644
index 0000000..e941c10
--- /dev/null
+++ b/crates/core/src/physical_plan/continuous/order/partial.rs
@@ -0,0 +1,252 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::row::{OwnedRow, RowConverter, Rows, SortField};
+use arrow_array::ArrayRef;
+use arrow_schema::Schema;
+use datafusion::{
+    common::utils::proxy::VecAllocExt, common::Result, logical_expr::EmitTo,
+    physical_expr::PhysicalSortExpr,
+};
+
+use std::sync::Arc;
+
+/// Tracks grouping state when the data is ordered by some subset of
+/// the group keys.
+///
+/// Once the next *sort key* value is seen, never see groups with that
+/// sort key again, so we can emit all groups with the previous sort
+/// key and earlier.
+///
+/// For example, given `SUM(amt) GROUP BY id, state` if the input is
+/// sorted by `state, when a new value of `state` is seen, all groups
+/// with prior values of `state` can be emitted.
+///
+/// The state is tracked like this:
+///
+/// ```text
+///                                            ┏━━━━━━━━━━━━━━━━━┓ ┏━━━━━━━┓
+///     ┌─────┐    ┌───────────────────┐ ┌─────┃        9        ┃ ┃ "MD"  ┃
+///     │┌───┐│    │ ┌──────────────┐  │ │     ┗━━━━━━━━━━━━━━━━━┛ ┗━━━━━━━┛
+///     ││ 0 ││    │ │  123, "MA"   │  │ │        current_sort      sort_key
+///     │└───┘│    │ └──────────────┘  │ │
+///     │ ... │    │    ...            │ │      current_sort tracks the
+///     │┌───┐│    │ ┌──────────────┐  │ │      smallest group index that had
+///     ││ 8 ││    │ │  765, "MA"   │  │ │      the same sort_key as current
+///     │├───┤│    │ ├──────────────┤  │ │
+///     ││ 9 ││    │ │  923, "MD"   │◀─┼─┘
+///     │├───┤│    │ ├──────────────┤  │        ┏━━━━━━━━━━━━━━┓
+///     ││10 ││    │ │  345, "MD"   │  │  ┌─────┃      11      ┃
+///     │├───┤│    │ ├──────────────┤  │  │     ┗━━━━━━━━━━━━━━┛
+///     ││11 ││    │ │  124, "MD"   │◀─┼──┘         current
+///     │└───┘│    │ └──────────────┘  │
+///     └─────┘    └───────────────────┘
+///
+///  group indices
+/// (in group value  group_values               current tracks the most
+///      order)                                    recent group index
+///```
+#[derive(Debug)]
+pub(crate) struct GroupOrderingPartial {
+    /// State machine
+    state: State,
+
+    /// The indexes of the group by columns that form the sort key.
+    /// For example if grouping by `id, state` and ordered by `state`
+    /// this would be `[1]`.
+    order_indices: Vec<usize>,
+
+    /// Converter for the sort key (used on the group columns
+    /// specified in `order_indexes`)
+    row_converter: RowConverter,
+}
+
+#[derive(Debug, Default)]
+enum State {
+    /// The ordering was temporarily taken.  `Self::Taken` is left
+    /// when state must be temporarily taken to satisfy the borrow
+    /// checker. If an error happens before the state can be restored,
+    /// the ordering information is lost and execution can not
+    /// proceed, but there is no undefined behavior.
+    #[default]
+    Taken,
+
+    /// Seen no input yet
+    Start,
+
+    /// Data is in progress.
+    InProgress {
+        /// Smallest group index with the sort_key
+        current_sort: usize,
+        /// The sort key of group_index `current_sort`
+        sort_key: OwnedRow,
+        /// index of the current group for which values are being
+        /// generated
+        current: usize,
+    },
+
+    /// Seen end of input, all groups can be emitted
+    Complete,
+}
+
+impl GroupOrderingPartial {
+    pub fn try_new(
+        input_schema: &Schema,
+        order_indices: &[usize],
+        ordering: &[PhysicalSortExpr],
+    ) -> Result<Self> {
+        assert!(!order_indices.is_empty());
+        assert!(order_indices.len() <= ordering.len());
+
+        // get only the section of ordering, that consist of group by expressions.
+        let fields = ordering[0..order_indices.len()]
+            .iter()
+            .map(|sort_expr| {
+                Ok(SortField::new_with_options(
+                    sort_expr.expr.data_type(input_schema)?,
+                    sort_expr.options,
+                ))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(Self {
+            state: State::Start,
+            order_indices: order_indices.to_vec(),
+            row_converter: RowConverter::new(fields)?,
+        })
+    }
+
+    /// Creates sort keys from the group values
+    ///
+    /// For example, if group_values had `A, B, C` but the input was
+    /// only sorted on `B` and `C` this should return rows for (`B`,
+    /// `C`)
+    fn compute_sort_keys(&mut self, group_values: &[ArrayRef]) -> Result<Rows> {
+        // Take only the columns that are in the sort key
+        let sort_values: Vec<_> = self
+            .order_indices
+            .iter()
+            .map(|&idx| Arc::clone(&group_values[idx]))
+            .collect();
+
+        Ok(self.row_converter.convert_columns(&sort_values)?)
+    }
+
+    /// How many groups be emitted, or None if no data can be emitted
+    pub fn emit_to(&self) -> Option<EmitTo> {
+        match &self.state {
+            State::Taken => unreachable!("State previously taken"),
+            State::Start => None,
+            State::InProgress { current_sort, .. } => {
+                // Can not emit if we are still on the first row sort
+                // row otherwise we can emit all groups that had earlier sort keys
+                //
+                if *current_sort == 0 {
+                    None
+                } else {
+                    Some(EmitTo::First(*current_sort))
+                }
+            }
+            State::Complete => Some(EmitTo::All),
+        }
+    }
+
+    /// remove the first n groups from the internal state, shifting
+    /// all existing indexes down by `n`
+    pub fn remove_groups(&mut self, n: usize) {
+        match &mut self.state {
+            State::Taken => unreachable!("State previously taken"),
+            State::Start => panic!("invalid state: start"),
+            State::InProgress {
+                current_sort,
+                current,
+                sort_key: _,
+            } => {
+                // shift indexes down by n
+                assert!(*current >= n);
+                *current -= n;
+                assert!(*current_sort >= n);
+                *current_sort -= n;
+            }
+            State::Complete { .. } => panic!("invalid state: complete"),
+        }
+    }
+
+    /// Note that the input is complete so any outstanding groups are done as well
+    pub fn input_done(&mut self) {
+        self.state = match self.state {
+            State::Taken => unreachable!("State previously taken"),
+            _ => State::Complete,
+        };
+    }
+
+    /// Called when new groups are added in a batch. See documentation
+    /// on [`super::GroupOrdering::new_groups`]
+    pub fn new_groups(
+        &mut self,
+        batch_group_values: &[ArrayRef],
+        group_indices: &[usize],
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert!(total_num_groups > 0);
+        assert!(!batch_group_values.is_empty());
+
+        let max_group_index = total_num_groups - 1;
+
+        // compute the sort key values for each group
+        let sort_keys = self.compute_sort_keys(batch_group_values)?;
+
+        let old_state = std::mem::take(&mut self.state);
+        let (mut current_sort, mut sort_key) = match &old_state {
+            State::Taken => unreachable!("State previously taken"),
+            State::Start => (0, sort_keys.row(0)),
+            State::InProgress {
+                current_sort,
+                sort_key,
+                ..
+            } => (*current_sort, sort_key.row()),
+            State::Complete => {
+                panic!("Saw new group after the end of input");
+            }
+        };
+
+        // Find latest sort key
+        let iter = group_indices.iter().zip(sort_keys.iter());
+        for (&group_index, group_sort_key) in iter {
+            // Does this group have seen a new sort_key?
+            if sort_key != group_sort_key {
+                current_sort = group_index;
+                sort_key = group_sort_key;
+            }
+        }
+
+        self.state = State::InProgress {
+            current_sort,
+            sort_key: sort_key.owned(),
+            current: max_group_index,
+        };
+
+        Ok(())
+    }
+
+    /// Return the size of memory allocated by this structure
+    pub(crate) fn size(&self) -> usize {
+        std::mem::size_of::<Self>()
+            + self.order_indices.allocated_size()
+            + self.row_converter.size()
+    }
+}
diff --git a/crates/core/src/physical_plan/streaming_window.rs b/crates/core/src/physical_plan/continuous/streaming_window.rs
similarity index 91%
rename from crates/core/src/physical_plan/streaming_window.rs
rename to crates/core/src/physical_plan/continuous/streaming_window.rs
index 0eeb0b2..93f0e57 100644
--- a/crates/core/src/physical_plan/streaming_window.rs
+++ b/crates/core/src/physical_plan/continuous/streaming_window.rs
@@ -41,9 +41,12 @@ use datafusion::physical_plan::{
 use futures::{Stream, StreamExt};
 use tracing::debug;
 
-use super::utils::{
-    accumulators::{create_accumulators, AccumulatorItem},
-    time::RecordBatchWatermark,
+use crate::physical_plan::{
+    continuous::grouped_window_agg_stream::GroupedWindowAggStream,
+    utils::{
+        accumulators::{create_accumulators, AccumulatorItem},
+        time::RecordBatchWatermark,
+    },
 };
 
 pub struct FranzWindowFrame {
@@ -78,6 +81,8 @@ impl DisplayAs for FranzWindowFrame {
 
 use datafusion::common::Result;
 
+use super::{add_window_columns_to_record_batch, add_window_columns_to_schema, batch_filter};
+
 impl FranzWindowFrame {
     pub fn new(
         window_start_time: SystemTime,
@@ -194,11 +199,11 @@ pub struct FranzStreamingWindowExec {
     pub filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
     /// Schema after the window is run
     pub group_by: PhysicalGroupBy,
-    schema: SchemaRef,
+    pub schema: SchemaRef,
     pub input_schema: SchemaRef,
 
     pub watermark: Arc<Mutex<Option<SystemTime>>>,
-    metrics: ExecutionPlanMetricsSet,
+    pub(crate) metrics: ExecutionPlanMetricsSet,
     cache: PlanProperties,
     pub mode: AggregateMode,
     pub window_type: FranzStreamingWindowType,
@@ -407,15 +412,27 @@ impl ExecutionPlan for FranzStreamingWindowExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
-        let stream: Pin<Box<WindowAggStream>> = Box::pin(WindowAggStream::new(
-            self,
-            context,
-            partition,
-            self.watermark.clone(),
-            self.window_type,
-            self.mode,
-        )?);
-        Ok(stream)
+        if self.group_by.is_empty() {
+            debug!("GROUP BY expression is empty creating a SimpleWindowAggStream");
+            Ok(Box::pin(WindowAggStream::new(
+                self,
+                context,
+                partition,
+                self.watermark.clone(),
+                self.window_type,
+                self.mode,
+            )?))
+        } else {
+            debug!("Creating a GroupedWindowAggStream");
+            Ok(Box::pin(GroupedWindowAggStream::new(
+                self,
+                context,
+                partition,
+                self.watermark.clone(),
+                self.window_type,
+                self.mode,
+            )?))
+        }
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -573,12 +590,6 @@ impl WindowAggStream {
         window_type: FranzStreamingWindowType,
         aggregation_mode: AggregateMode,
     ) -> Result<Self> {
-        // TODO: 6/12/2024 Why was this commented out?
-
-        // In WindowAggExec all partition by columns should be ordered.
-        //if window_expr[0].partition_by().len() != ordered_partition_by_indices.len() {
-        //    return internal_err!("All partition by columns should have an ordering");
-        //}
         let agg_schema = Arc::clone(&exec_operator.schema);
         let agg_filter_expr = exec_operator.filter_expressions.clone();
 
@@ -746,7 +757,7 @@ impl Stream for WindowAggStream {
     }
 }
 
-fn get_windows_for_watermark(
+pub fn get_windows_for_watermark(
     watermark: &RecordBatchWatermark,
     window_type: FranzStreamingWindowType,
 ) -> Vec<(SystemTime, SystemTime)> {
@@ -878,67 +889,3 @@ pub fn aggregate_batch(
 
     Ok(allocated)
 }
-
-fn add_window_columns_to_schema(schema: SchemaRef) -> Schema {
-    let fields = schema.flattened_fields().to_owned();
-
-    let mut builder = SchemaBuilder::new();
-
-    for field in fields {
-        builder.push(field.clone());
-    }
-    builder.push(Field::new(
-        "window_start_time",
-        DataType::Timestamp(TimeUnit::Millisecond, None),
-        false,
-    ));
-    builder.push(Field::new(
-        "window_end_time",
-        DataType::Timestamp(TimeUnit::Millisecond, None),
-        false,
-    ));
-
-    builder.finish()
-}
-
-fn add_window_columns_to_record_batch(
-    record_batch: RecordBatch,
-    start_time: SystemTime,
-    end_time: SystemTime,
-) -> RecordBatch {
-    let start_time_duration = start_time.duration_since(UNIX_EPOCH).unwrap().as_millis() as i64;
-    let end_time_duration = end_time.duration_since(UNIX_EPOCH).unwrap().as_millis() as i64;
-
-    let mut start_builder = PrimitiveBuilder::<TimestampMillisecondType>::new();
-    let mut end_builder = PrimitiveBuilder::<TimestampMillisecondType>::new();
-
-    for _ in 0..record_batch.num_rows() {
-        start_builder.append_value(start_time_duration);
-        end_builder.append_value(end_time_duration);
-    }
-
-    let start_array = start_builder.finish();
-    let end_array = end_builder.finish();
-
-    let new_schema = add_window_columns_to_schema(record_batch.schema());
-    let mut new_columns = record_batch.columns().to_vec();
-    new_columns.push(Arc::new(start_array));
-    new_columns.push(Arc::new(end_array));
-
-    RecordBatch::try_new(Arc::new(new_schema), new_columns).unwrap()
-}
-
-pub fn as_boolean_array(array: &dyn Array) -> Result<&BooleanArray> {
-    Ok(downcast_value!(array, BooleanArray))
-}
-
-fn batch_filter(batch: &RecordBatch, predicate: &Arc<dyn PhysicalExpr>) -> Result<RecordBatch> {
-    predicate
-        .evaluate(batch)
-        .and_then(|v| v.into_array(batch.num_rows()))
-        .and_then(|array| {
-            Ok(as_boolean_array(&array)?)
-                // apply filter array to record batch
-                .and_then(|filter_array| Ok(filter_record_batch(batch, filter_array)?))
-        })
-}
diff --git a/crates/core/src/physical_plan/mod.rs b/crates/core/src/physical_plan/mod.rs
index f4a9642..44831a7 100644
--- a/crates/core/src/physical_plan/mod.rs
+++ b/crates/core/src/physical_plan/mod.rs
@@ -1,2 +1,2 @@
-pub mod streaming_window;
+pub mod continuous;
 pub mod utils;
diff --git a/crates/core/src/planner/streaming_window.rs b/crates/core/src/planner/streaming_window.rs
index 34ec2d2..a054e41 100644
--- a/crates/core/src/planner/streaming_window.rs
+++ b/crates/core/src/planner/streaming_window.rs
@@ -17,7 +17,9 @@ use datafusion::physical_planner::{
 };
 
 use crate::logical_plan::streaming_window::{StreamingWindowPlanNode, StreamingWindowType};
-use crate::physical_plan::streaming_window::{FranzStreamingWindowExec, FranzStreamingWindowType};
+use crate::physical_plan::continuous::streaming_window::{
+    FranzStreamingWindowExec, FranzStreamingWindowType,
+};
 
 /// Physical planner for TopK nodes
 pub struct StreamingWindowPlanner {}
diff --git a/examples/examples/kafka_rideshare.rs b/examples/examples/kafka_rideshare.rs
index 90902bd..47b5bed 100644
--- a/examples/examples/kafka_rideshare.rs
+++ b/examples/examples/kafka_rideshare.rs
@@ -17,7 +17,7 @@ async fn main() -> Result<()> {
     tracing_log::LogTracer::init().expect("Failed to set up log tracer");
 
     let subscriber = FmtSubscriber::builder()
-        .with_max_level(tracing::Level::INFO)
+        .with_max_level(tracing::Level::DEBUG)
         .with_span_events(FmtSpan::CLOSE | FmtSpan::ENTER)
         .finish();
     tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");
@@ -69,7 +69,7 @@ async fn main() -> Result<()> {
         .await?;
 
     let ds = ctx.from_topic(source_topic).await?.window(
-        vec![],
+        vec![col("driver_id")],
         vec![
             max(col("imu_measurement").field("gps").field("speed")),
             min(col("imu_measurement").field("gps").field("altitude")),

From bc23ccdc760b9f21be37749106c4ad354687573a Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Tue, 13 Aug 2024 14:54:57 -0700
Subject: [PATCH 2/8] upgrade to datafusion 41.0.0

---
 Cargo.lock                              | 60 ++++++++++++-------------
 Cargo.toml                              |  9 +++-
 examples/examples/csv_streaming.rs      |  3 +-
 examples/examples/kafka_rideshare.rs    |  3 +-
 examples/examples/simple_aggregation.rs |  4 +-
 5 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4babca3..5c8668f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -796,8 +796,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -851,8 +851,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "arrow-schema",
  "async-trait",
@@ -864,8 +864,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -890,16 +890,16 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-execution"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "arrow",
  "chrono",
@@ -918,8 +918,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -936,8 +936,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -962,8 +962,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -979,8 +979,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1000,8 +1000,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1019,8 +1019,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1048,8 +1048,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1061,8 +1061,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "datafusion-common",
  "datafusion-execution",
@@ -1072,8 +1072,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "ahash",
  "arrow",
@@ -1105,8 +1105,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "40.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#7dc9d395099b4c3830852dbbf32f1882173332bb"
+version = "41.0.0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
 dependencies = [
  "arrow",
  "arrow-array",
diff --git a/Cargo.toml b/Cargo.toml
index 38ca59c..8df5af0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@ description = "Embeddable stream processing engine"
 
 [workspace.dependencies]
 df-streams-core = { path = "crates/core" }
-datafusion = "40.0"
+datafusion = "41.0.0"
 
 arrow = { version = "52.0.0", features = ["prettyprint"] }
 arrow-array = { version = "52.0.0", default-features = false, features = [
@@ -39,7 +39,12 @@ futures = "0.3"
 tracing = "0.1.40"
 tracing-log = "0.2.0"
 tracing-subscriber = "0.3.18"
-tokio = { version = "1.36", features = ["macros", "rt", "sync", "rt-multi-thread"] }
+tokio = { version = "1.36", features = [
+  "macros",
+  "rt",
+  "sync",
+  "rt-multi-thread",
+] }
 async-trait = "0.1.81"
 rdkafka = "0.36.2"
 log = "^0.4"
diff --git a/examples/examples/csv_streaming.rs b/examples/examples/csv_streaming.rs
index aee299f..b51930f 100644
--- a/examples/examples/csv_streaming.rs
+++ b/examples/examples/csv_streaming.rs
@@ -2,7 +2,8 @@ use datafusion::common::test_util::datafusion_test_data;
 use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::datasource::MemTable;
 use datafusion::error::Result;
-use datafusion::logical_expr::{col, max, min};
+use datafusion::functions_aggregate::expr_fn::{max, min};
+use datafusion::logical_expr::col;
 use datafusion::prelude::*;
 
 /// This example demonstrates executing a simple query against an Arrow data source (CSV) and
diff --git a/examples/examples/kafka_rideshare.rs b/examples/examples/kafka_rideshare.rs
index 47b5bed..423b799 100644
--- a/examples/examples/kafka_rideshare.rs
+++ b/examples/examples/kafka_rideshare.rs
@@ -1,7 +1,8 @@
 use datafusion::error::Result;
 use datafusion::functions::core::expr_ext::FieldAccessor;
 use datafusion::functions_aggregate::count::count;
-use datafusion::logical_expr::{col, max, min};
+use datafusion::functions_aggregate::expr_fn::{max, min};
+use datafusion::logical_expr::col;
 
 use df_streams_core::context::Context;
 use df_streams_core::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
diff --git a/examples/examples/simple_aggregation.rs b/examples/examples/simple_aggregation.rs
index 556192e..4097ea5 100644
--- a/examples/examples/simple_aggregation.rs
+++ b/examples/examples/simple_aggregation.rs
@@ -3,8 +3,8 @@ use std::time::Duration;
 use datafusion::error::Result;
 use datafusion::functions_aggregate::average::avg;
 use datafusion::functions_aggregate::count::count;
-use datafusion::logical_expr::lit;
-use datafusion::logical_expr::{col, max, min};
+use datafusion::functions_aggregate::expr_fn::{max, min};
+use datafusion::logical_expr::{col, lit};
 
 use df_streams_core::context::Context;
 use df_streams_core::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};

From e20fc66c5f859d3d0db32c5bb69b34b26fc018bf Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Tue, 13 Aug 2024 22:29:02 -0700
Subject: [PATCH 3/8] disable coalesce_batches

---
 crates/core/src/context.rs              | 18 +++++++++++++-----
 crates/core/src/datastream.rs           | 18 +++++++++++++++++-
 examples/examples/emit_measurements.rs  |  2 +-
 examples/examples/simple_aggregation.rs |  3 +--
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs
index a261ea7..d48df98 100644
--- a/crates/core/src/context.rs
+++ b/crates/core/src/context.rs
@@ -16,15 +16,23 @@ use crate::utils::get_default_optimizer_rules;
 
 #[derive(Clone)]
 pub struct Context {
-    session_conext: Arc<RwLock<SessionContext>>,
+    pub session_conext: Arc<RwLock<SessionContext>>,
 }
 
 impl Context {
     pub fn new() -> Result<Self, DataFusionError> {
-        let config = SessionConfig::new().set(
-            "datafusion.execution.batch_size",
-            datafusion::common::ScalarValue::UInt64(Some(32)),
-        );
+        let config = SessionConfig::new()
+            .set(
+                "datafusion.execution.batch_size",
+                datafusion::common::ScalarValue::UInt64(Some(32)),
+            )
+            // coalesce_batches slows down the pipeline and increases latency as it tries to concat
+            // small batches together so we disable it.
+            .set(
+                "datafusion.execution.coalesce_batches",
+                datafusion::common::ScalarValue::Boolean(Some(false)),
+            );
+
         let runtime = Arc::new(RuntimeEnv::default());
 
         let state = SessionStateBuilder::new()
diff --git a/crates/core/src/datastream.rs b/crates/core/src/datastream.rs
index 5e2159b..af877ea 100644
--- a/crates/core/src/datastream.rs
+++ b/crates/core/src/datastream.rs
@@ -9,6 +9,7 @@ use datafusion::execution::SendableRecordBatchStream;
 use datafusion::logical_expr::{
     logical_plan::LogicalPlanBuilder, utils::find_window_exprs, Expr, JoinType,
 };
+use datafusion::physical_plan::display::DisplayableExecutionPlan;
 
 use crate::context::Context;
 use crate::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
@@ -163,13 +164,28 @@ impl DataStream {
         Ok(self)
     }
 
-    /// Prints the underlying logical_plan.
+    /// Prints the underlying logical plan.
     /// Useful for debugging chained method calls.
     pub fn print_plan(self) -> Result<Self, DataFusionError> {
         println!("{}", self.df.logical_plan().display_indent());
         Ok(self)
     }
 
+    /// Prints the underlying physical plan.
+    /// Useful for debugging and development
+    pub async fn print_physical_plan(self) -> Result<Self, DataFusionError> {
+        let (session_state, plan) = self.df.as_ref().clone().into_parts();
+        let physical_plan = self.df.as_ref().clone().create_physical_plan().await?;
+        let displayable_plan = DisplayableExecutionPlan::new(physical_plan.as_ref());
+
+        println!("{}", displayable_plan.indent(true));
+
+        Ok(Self {
+            df: Arc::new(DataFrame::new(session_state, plan)),
+            context: self.context.clone(),
+        })
+    }
+
     /// execute the stream and write the results to a give kafka topic
     pub async fn sink_kafka(
         self,
diff --git a/examples/examples/emit_measurements.rs b/examples/examples/emit_measurements.rs
index 65a7198..736b81e 100644
--- a/examples/examples/emit_measurements.rs
+++ b/examples/examples/emit_measurements.rs
@@ -60,7 +60,7 @@ async fn main() -> Result<()> {
             .await
             .unwrap();
 
-        tokio::time::sleep(tokio::time::Duration::from_millis(1)).await;
+        tokio::time::sleep(tokio::time::Duration::from_micros(1)).await;
     }
 }
 
diff --git a/examples/examples/simple_aggregation.rs b/examples/examples/simple_aggregation.rs
index 4097ea5..4682819 100644
--- a/examples/examples/simple_aggregation.rs
+++ b/examples/examples/simple_aggregation.rs
@@ -41,7 +41,6 @@ async fn main() -> Result<()> {
     let ds = ctx
         .from_topic(source_topic)
         .await?
-        // .filter(col("reading").gt(lit(70)))?
         .window(
             vec![],
             vec![
@@ -53,7 +52,7 @@ async fn main() -> Result<()> {
             Duration::from_millis(1_000),
             None,
         )?
-        .filter(col("max").lt(lit(113)))?;
+        .filter(col("max").gt(lit(113)))?;
 
     ds.clone().print_stream().await?;
 

From 0e282d6c4152f5c2ea515ceada3562117e369338 Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:59:32 -0700
Subject: [PATCH 4/8] change package version and update name

---
 Cargo.lock                              | 10 +++++-----
 Cargo.toml                              |  6 +++---
 crates/core/Cargo.toml                  |  2 +-
 examples/Cargo.toml                     |  4 ++--
 examples/examples/emit_measurements.rs  |  2 +-
 examples/examples/kafka_rideshare.rs    |  6 +++---
 examples/examples/simple_aggregation.rs |  8 ++++----
 examples/examples/stream_join.rs        |  8 ++++----
 8 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5c8668f..c7785aa 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1131,8 +1131,8 @@ dependencies = [
 ]
 
 [[package]]
-name = "df-streams-core"
-version = "0.1.0"
+name = "denormalized"
+version = "0.0.1"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1161,13 +1161,13 @@ dependencies = [
 ]
 
 [[package]]
-name = "df-streams-examples"
-version = "0.1.0"
+name = "denormalized-examples"
+version = "0.0.1"
 dependencies = [
  "arrow",
  "arrow-schema",
  "datafusion",
- "df-streams-core",
+ "denormalized",
  "env_logger",
  "futures",
  "log",
diff --git a/Cargo.toml b/Cargo.toml
index 8df5af0..6d3cc8c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,12 +11,12 @@ edition = "2021"
 homepage = "https://github.com/probably-nothing-labs/denormalized"
 license = "Apache-2.0"
 readme = "README.md"
-repository = "https://github.com/probably-nothing-labs/denormalized"
-version = "0.1.0"
+repository = "https://github.com/probably-nothing-labs/denormalized.git"
+version = "0.0.1"
 description = "Embeddable stream processing engine"
 
 [workspace.dependencies]
-df-streams-core = { path = "crates/core" }
+denormalized = { path = "crates/core" }
 datafusion = "41.0.0"
 
 arrow = { version = "52.0.0", features = ["prettyprint"] }
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 7d68c0d..1d429e2 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "df-streams-core"
+name = "denormalized"
 version = { workspace = true }
 edition = { workspace = true }
 
diff --git a/examples/Cargo.toml b/examples/Cargo.toml
index 551a0c2..dfc991b 100644
--- a/examples/Cargo.toml
+++ b/examples/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "df-streams-examples"
+name = "denormalized-examples"
 version = { workspace = true }
 edition = { workspace = true }
 publish = false
@@ -7,7 +7,7 @@ publish = false
 [dependencies]
 datafusion = { workspace = true }
 
-df-streams-core = { workspace = true }
+denormalized = { workspace = true }
 
 arrow = { workspace = true }
 arrow-schema = { workspace = true }
diff --git a/examples/examples/emit_measurements.rs b/examples/examples/emit_measurements.rs
index 736b81e..a0925b5 100644
--- a/examples/examples/emit_measurements.rs
+++ b/examples/examples/emit_measurements.rs
@@ -7,7 +7,7 @@ use rdkafka::config::ClientConfig;
 use rdkafka::producer::FutureRecord;
 use rdkafka::util::Timeout;
 
-use df_streams_examples::Measurment;
+use denormalized_examples::Measurment;
 
 /// This script emits test data to a kafka cluster
 ///
diff --git a/examples/examples/kafka_rideshare.rs b/examples/examples/kafka_rideshare.rs
index 423b799..ecf06d9 100644
--- a/examples/examples/kafka_rideshare.rs
+++ b/examples/examples/kafka_rideshare.rs
@@ -4,9 +4,9 @@ use datafusion::functions_aggregate::count::count;
 use datafusion::functions_aggregate::expr_fn::{max, min};
 use datafusion::logical_expr::col;
 
-use df_streams_core::context::Context;
-use df_streams_core::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
-use df_streams_core::physical_plan::utils::time::TimestampUnit;
+use denormalized::context::Context;
+use denormalized::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
+use denormalized::physical_plan::utils::time::TimestampUnit;
 
 use std::time::Duration;
 use tracing_subscriber::{fmt::format::FmtSpan, FmtSubscriber};
diff --git a/examples/examples/simple_aggregation.rs b/examples/examples/simple_aggregation.rs
index 4682819..c3cb337 100644
--- a/examples/examples/simple_aggregation.rs
+++ b/examples/examples/simple_aggregation.rs
@@ -6,11 +6,11 @@ use datafusion::functions_aggregate::count::count;
 use datafusion::functions_aggregate::expr_fn::{max, min};
 use datafusion::logical_expr::{col, lit};
 
-use df_streams_core::context::Context;
-use df_streams_core::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
-use df_streams_core::physical_plan::utils::time::TimestampUnit;
+use denormalized::context::Context;
+use denormalized::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
+use denormalized::physical_plan::utils::time::TimestampUnit;
 
-use df_streams_examples::get_sample_json;
+use denormalized_examples::get_sample_json;
 
 /// Demonstrates a simple stream aggregate job on data generated via the `emit_measurements.rs`
 /// example script.
diff --git a/examples/examples/stream_join.rs b/examples/examples/stream_join.rs
index a9184fa..9b84f70 100644
--- a/examples/examples/stream_join.rs
+++ b/examples/examples/stream_join.rs
@@ -5,11 +5,11 @@ use datafusion::error::Result;
 use datafusion::functions_aggregate::average::avg;
 use datafusion::logical_expr::col;
 
-use df_streams_core::context::Context;
-use df_streams_core::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
-use df_streams_core::physical_plan::utils::time::TimestampUnit;
+use denormalized::context::Context;
+use denormalized::datasource::kafka::{ConnectionOpts, KafkaTopicBuilder};
+use denormalized::physical_plan::utils::time::TimestampUnit;
 
-use df_streams_examples::get_sample_json;
+use denormalized_examples::get_sample_json;
 
 /// Demonstrates a simple stream join on data generated via the `emit_measurements.rs`
 /// example script.

From b10ac784fc2fe30094fe29495323874ffe94b6e6 Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Thu, 15 Aug 2024 04:15:46 -0700
Subject: [PATCH 5/8] add aggregate to simple aggregation

---
 examples/examples/simple_aggregation.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/examples/simple_aggregation.rs b/examples/examples/simple_aggregation.rs
index c3cb337..fdadedd 100644
--- a/examples/examples/simple_aggregation.rs
+++ b/examples/examples/simple_aggregation.rs
@@ -42,7 +42,7 @@ async fn main() -> Result<()> {
         .from_topic(source_topic)
         .await?
         .window(
-            vec![],
+            vec![col("sensor_name")],
             vec![
                 count(col("reading")).alias("count"),
                 min(col("reading")).alias("min"),

From 81d141d2e341775995626fe565d361532dd7a046 Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Thu, 15 Aug 2024 04:17:32 -0700
Subject: [PATCH 6/8] add aggregate to simple aggregation

---
 crates/core/src/datastream.rs | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/crates/core/src/datastream.rs b/crates/core/src/datastream.rs
index af877ea..7ec6b59 100644
--- a/crates/core/src/datastream.rs
+++ b/crates/core/src/datastream.rs
@@ -132,14 +132,11 @@ impl DataStream {
         loop {
             match stream.next().await.transpose() {
                 Ok(Some(batch)) => {
-                    for i in 0..batch.num_rows() {
-                        let row = batch.slice(i, 1);
-                        println!(
-                            "{}",
-                            datafusion::common::arrow::util::pretty::pretty_format_batches(&[row])
-                                .unwrap()
-                        );
-                    }
+                    println!(
+                        "{}",
+                        datafusion::common::arrow::util::pretty::pretty_format_batches(&[batch])
+                            .unwrap()
+                    );
                 }
                 Ok(None) => {
                     log::warn!("No RecordBatch in stream");

From a9cc231a03cada89201b3dba802768dcee72112d Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Thu, 15 Aug 2024 04:51:18 -0700
Subject: [PATCH 7/8] update datafusion fork; remove duplicate modules

---
 Cargo.lock                                    |  30 +--
 .../continuous/group_values/bytes.rs          | 126 ---------
 .../continuous/group_values/bytes_view.rs     | 129 ---------
 .../continuous/group_values/mod.rs            |  97 -------
 .../continuous/group_values/primitive.rs      | 224 ---------------
 .../continuous/group_values/row.rs            | 254 ------------------
 .../continuous/grouped_window_agg_stream.rs   |  21 +-
 .../core/src/physical_plan/continuous/mod.rs  |   2 -
 .../physical_plan/continuous/order/full.rs    | 144 ----------
 .../src/physical_plan/continuous/order/mod.rs | 124 ---------
 .../physical_plan/continuous/order/partial.rs | 252 -----------------
 .../continuous/streaming_window.rs            |  11 +-
 12 files changed, 27 insertions(+), 1387 deletions(-)
 delete mode 100644 crates/core/src/physical_plan/continuous/group_values/bytes.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/group_values/bytes_view.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/group_values/mod.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/group_values/primitive.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/group_values/row.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/order/full.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/order/mod.rs
 delete mode 100644 crates/core/src/physical_plan/continuous/order/partial.rs

diff --git a/Cargo.lock b/Cargo.lock
index c7785aa..6d37b34 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -797,7 +797,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -852,7 +852,7 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow-schema",
  "async-trait",
@@ -865,7 +865,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -891,7 +891,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "tokio",
 ]
@@ -899,7 +899,7 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "chrono",
@@ -919,7 +919,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -937,7 +937,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -963,7 +963,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -980,7 +980,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1001,7 +1001,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1020,7 +1020,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -1049,7 +1049,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -1062,7 +1062,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "datafusion-common",
  "datafusion-execution",
@@ -1073,7 +1073,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -1106,7 +1106,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#f5ef67c91bf1f17166fa63a5005acd628501e5b0"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "arrow-array",
diff --git a/crates/core/src/physical_plan/continuous/group_values/bytes.rs b/crates/core/src/physical_plan/continuous/group_values/bytes.rs
deleted file mode 100644
index f04e516..0000000
--- a/crates/core/src/physical_plan/continuous/group_values/bytes.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::{Array, ArrayRef, OffsetSizeTrait, RecordBatch};
-use datafusion::common::Result;
-use datafusion::{
-    logical_expr::EmitTo, physical_expr::binary_map::OutputType,
-    physical_expr_common::binary_map::ArrowBytesMap,
-};
-
-use super::GroupValues;
-/// A [`GroupValues`] storing single column of Utf8/LargeUtf8/Binary/LargeBinary values
-///
-/// This specialization is significantly faster than using the more general
-/// purpose `Row`s format
-pub struct GroupValuesByes<O: OffsetSizeTrait> {
-    /// Map string/binary values to group index
-    map: ArrowBytesMap<O, usize>,
-    /// The total number of groups so far (used to assign group_index)
-    num_groups: usize,
-}
-
-impl<O: OffsetSizeTrait> GroupValuesByes<O> {
-    pub fn new(output_type: OutputType) -> Self {
-        Self {
-            map: ArrowBytesMap::new(output_type),
-            num_groups: 0,
-        }
-    }
-}
-
-impl<O: OffsetSizeTrait> GroupValues for GroupValuesByes<O> {
-    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
-        assert_eq!(cols.len(), 1);
-
-        // look up / add entries in the table
-        let arr = &cols[0];
-
-        groups.clear();
-        self.map.insert_if_new(
-            arr,
-            // called for each new group
-            |_value| {
-                // assign new group index on each insert
-                let group_idx = self.num_groups;
-                self.num_groups += 1;
-                group_idx
-            },
-            // called for each group
-            |group_idx| {
-                groups.push(group_idx);
-            },
-        );
-
-        // ensure we assigned a group to for each row
-        assert_eq!(groups.len(), arr.len());
-        Ok(())
-    }
-
-    fn size(&self) -> usize {
-        self.map.size() + std::mem::size_of::<Self>()
-    }
-
-    fn is_empty(&self) -> bool {
-        self.num_groups == 0
-    }
-
-    fn len(&self) -> usize {
-        self.num_groups
-    }
-
-    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
-        // Reset the map to default, and convert it into a single array
-        let map_contents = self.map.take().into_state();
-
-        let group_values = match emit_to {
-            EmitTo::All => {
-                self.num_groups -= map_contents.len();
-                map_contents
-            }
-            EmitTo::First(n) if n == self.len() => {
-                self.num_groups -= map_contents.len();
-                map_contents
-            }
-            EmitTo::First(n) => {
-                // if we only wanted to take the first n, insert the rest back
-                // into the map we could potentially avoid this reallocation, at
-                // the expense of much more complex code.
-                // see https://github.com/apache/datafusion/issues/9195
-                let emit_group_values = map_contents.slice(0, n);
-                let remaining_group_values = map_contents.slice(n, map_contents.len() - n);
-
-                self.num_groups = 0;
-                let mut group_indexes = vec![];
-                self.intern(&[remaining_group_values], &mut group_indexes)?;
-
-                // Verify that the group indexes were assigned in the correct order
-                assert_eq!(0, group_indexes[0]);
-
-                emit_group_values
-            }
-        };
-
-        Ok(vec![group_values])
-    }
-
-    fn clear_shrink(&mut self, _batch: &RecordBatch) {
-        // in theory we could potentially avoid this reallocation and clear the
-        // contents of the maps, but for now we just reset the map from the beginning
-        self.map.take();
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/group_values/bytes_view.rs b/crates/core/src/physical_plan/continuous/group_values/bytes_view.rs
deleted file mode 100644
index 6b57a3d..0000000
--- a/crates/core/src/physical_plan/continuous/group_values/bytes_view.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::{Array, ArrayRef, RecordBatch};
-use datafusion::{
-    common::Result, logical_expr::EmitTo, physical_expr::binary_map::OutputType,
-    physical_expr_common::binary_view_map::ArrowBytesViewMap,
-};
-
-use super::GroupValues;
-//use datafusion_expr::EmitTo;
-//use datafusion_physical_expr::binary_map::OutputType;
-//use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewMap;
-
-/// A [`GroupValues`] storing single column of Utf8View/BinaryView values
-///
-/// This specialization is significantly faster than using the more general
-/// purpose `Row`s format
-pub struct GroupValuesBytesView {
-    /// Map string/binary values to group index
-    map: ArrowBytesViewMap<usize>,
-    /// The total number of groups so far (used to assign group_index)
-    num_groups: usize,
-}
-
-impl GroupValuesBytesView {
-    pub fn new(output_type: OutputType) -> Self {
-        Self {
-            map: ArrowBytesViewMap::new(output_type),
-            num_groups: 0,
-        }
-    }
-}
-
-impl GroupValues for GroupValuesBytesView {
-    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
-        assert_eq!(cols.len(), 1);
-
-        // look up / add entries in the table
-        let arr = &cols[0];
-
-        groups.clear();
-        self.map.insert_if_new(
-            arr,
-            // called for each new group
-            |_value| {
-                // assign new group index on each insert
-                let group_idx = self.num_groups;
-                self.num_groups += 1;
-                group_idx
-            },
-            // called for each group
-            |group_idx| {
-                groups.push(group_idx);
-            },
-        );
-
-        // ensure we assigned a group to for each row
-        assert_eq!(groups.len(), arr.len());
-        Ok(())
-    }
-
-    fn size(&self) -> usize {
-        self.map.size() + std::mem::size_of::<Self>()
-    }
-
-    fn is_empty(&self) -> bool {
-        self.num_groups == 0
-    }
-
-    fn len(&self) -> usize {
-        self.num_groups
-    }
-
-    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
-        // Reset the map to default, and convert it into a single array
-        let map_contents = self.map.take().into_state();
-
-        let group_values = match emit_to {
-            EmitTo::All => {
-                self.num_groups -= map_contents.len();
-                map_contents
-            }
-            EmitTo::First(n) if n == self.len() => {
-                self.num_groups -= map_contents.len();
-                map_contents
-            }
-            EmitTo::First(n) => {
-                // if we only wanted to take the first n, insert the rest back
-                // into the map we could potentially avoid this reallocation, at
-                // the expense of much more complex code.
-                // see https://github.com/apache/datafusion/issues/9195
-                let emit_group_values = map_contents.slice(0, n);
-                let remaining_group_values = map_contents.slice(n, map_contents.len() - n);
-
-                self.num_groups = 0;
-                let mut group_indexes = vec![];
-                self.intern(&[remaining_group_values], &mut group_indexes)?;
-
-                // Verify that the group indexes were assigned in the correct order
-                assert_eq!(0, group_indexes[0]);
-
-                emit_group_values
-            }
-        };
-
-        Ok(vec![group_values])
-    }
-
-    fn clear_shrink(&mut self, _batch: &RecordBatch) {
-        // in theory we could potentially avoid this reallocation and clear the
-        // contents of the maps, but for now we just reset the map from the beginning
-        self.map.take();
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/group_values/mod.rs b/crates/core/src/physical_plan/continuous/group_values/mod.rs
deleted file mode 100644
index 111bf5f..0000000
--- a/crates/core/src/physical_plan/continuous/group_values/mod.rs
+++ /dev/null
@@ -1,97 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::record_batch::RecordBatch;
-use arrow_array::{downcast_primitive, ArrayRef};
-use arrow_schema::{DataType, SchemaRef};
-use bytes_view::GroupValuesBytesView;
-use datafusion::common::Result;
-
-pub(crate) mod primitive;
-use datafusion::logical_expr::EmitTo;
-use datafusion::physical_expr::binary_map::OutputType;
-use primitive::GroupValuesPrimitive;
-
-mod row;
-use row::GroupValuesRows;
-
-mod bytes;
-mod bytes_view;
-use bytes::GroupValuesByes;
-//use datafusion_physical_expr::binary_map::OutputType;
-
-/// An interning store for group keys
-pub trait GroupValues: Send {
-    /// Calculates the `groups` for each input row of `cols`
-    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()>;
-
-    /// Returns the number of bytes used by this [`GroupValues`]
-    fn size(&self) -> usize;
-
-    /// Returns true if this [`GroupValues`] is empty
-    fn is_empty(&self) -> bool;
-
-    /// The number of values stored in this [`GroupValues`]
-    fn len(&self) -> usize;
-
-    /// Emits the group values
-    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>>;
-
-    /// Clear the contents and shrink the capacity to the size of the batch (free up memory usage)
-    fn clear_shrink(&mut self, batch: &RecordBatch);
-}
-
-pub fn new_group_values(schema: SchemaRef) -> Result<Box<dyn GroupValues>> {
-    if schema.fields.len() == 1 {
-        let d = schema.fields[0].data_type();
-
-        macro_rules! downcast_helper {
-            ($t:ty, $d:ident) => {
-                return Ok(Box::new(GroupValuesPrimitive::<$t>::new($d.clone())))
-            };
-        }
-
-        downcast_primitive! {
-            d => (downcast_helper, d),
-            _ => {}
-        }
-
-        match d {
-            DataType::Utf8 => {
-                return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Utf8)));
-            }
-            DataType::LargeUtf8 => {
-                return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Utf8)));
-            }
-            DataType::Utf8View => {
-                return Ok(Box::new(GroupValuesBytesView::new(OutputType::Utf8View)));
-            }
-            DataType::Binary => {
-                return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Binary)));
-            }
-            DataType::LargeBinary => {
-                return Ok(Box::new(GroupValuesByes::<i64>::new(OutputType::Binary)));
-            }
-            DataType::BinaryView => {
-                return Ok(Box::new(GroupValuesBytesView::new(OutputType::BinaryView)));
-            }
-            _ => {}
-        }
-    }
-
-    Ok(Box::new(GroupValuesRows::try_new(schema)?))
-}
diff --git a/crates/core/src/physical_plan/continuous/group_values/primitive.rs b/crates/core/src/physical_plan/continuous/group_values/primitive.rs
deleted file mode 100644
index f9dc68f..0000000
--- a/crates/core/src/physical_plan/continuous/group_values/primitive.rs
+++ /dev/null
@@ -1,224 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use ahash::RandomState;
-use arrow::array::BooleanBufferBuilder;
-use arrow::buffer::NullBuffer;
-use arrow::datatypes::{i256, IntervalDayTime, IntervalMonthDayNano};
-use arrow::record_batch::RecordBatch;
-use arrow_array::cast::AsArray;
-use arrow_array::{ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, PrimitiveArray};
-//use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
-use arrow_schema::DataType;
-use datafusion::common::utils::proxy::VecAllocExt;
-use datafusion::common::Result;
-use datafusion::logical_expr::EmitTo;
-//use datafusion_common::Result;
-//use datafusion_execution::memory_pool::proxy::VecAllocExt;
-//use datafusion_expr::EmitTo;
-use half::f16;
-use hashbrown::raw::RawTable;
-use std::sync::Arc;
-
-use super::GroupValues;
-
-/// A trait to allow hashing of floating point numbers
-pub(crate) trait HashValue {
-    fn hash(&self, state: &RandomState) -> u64;
-}
-
-macro_rules! hash_integer {
-    ($($t:ty),+) => {
-        $(impl HashValue for $t {
-            #[cfg(not(feature = "force_hash_collisions"))]
-            fn hash(&self, state: &RandomState) -> u64 {
-                state.hash_one(self)
-            }
-
-            #[cfg(feature = "force_hash_collisions")]
-            fn hash(&self, _state: &RandomState) -> u64 {
-                0
-            }
-        })+
-    };
-}
-hash_integer!(i8, i16, i32, i64, i128, i256);
-hash_integer!(u8, u16, u32, u64);
-hash_integer!(IntervalDayTime, IntervalMonthDayNano);
-
-macro_rules! hash_float {
-    ($($t:ty),+) => {
-        $(impl HashValue for $t {
-            #[cfg(not(feature = "force_hash_collisions"))]
-            fn hash(&self, state: &RandomState) -> u64 {
-                state.hash_one(self.to_bits())
-            }
-
-            #[cfg(feature = "force_hash_collisions")]
-            fn hash(&self, _state: &RandomState) -> u64 {
-                0
-            }
-        })+
-    };
-}
-
-hash_float!(f16, f32, f64);
-
-/// A [`GroupValues`] storing a single column of primitive values
-///
-/// This specialization is significantly faster than using the more general
-/// purpose `Row`s format
-pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
-    /// The data type of the output array
-    data_type: DataType,
-    /// Stores the group index based on the hash of its value
-    ///
-    /// We don't store the hashes as hashing fixed width primitives
-    /// is fast enough for this not to benefit performance
-    map: RawTable<usize>,
-    /// The group index of the null value if any
-    null_group: Option<usize>,
-    /// The values for each group index
-    values: Vec<T::Native>,
-    /// The random state used to generate hashes
-    random_state: RandomState,
-}
-
-impl<T: ArrowPrimitiveType> GroupValuesPrimitive<T> {
-    pub fn new(data_type: DataType) -> Self {
-        assert!(PrimitiveArray::<T>::is_compatible(&data_type));
-        Self {
-            data_type,
-            map: RawTable::with_capacity(128),
-            values: Vec::with_capacity(128),
-            null_group: None,
-            random_state: Default::default(),
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> GroupValues for GroupValuesPrimitive<T>
-where
-    T::Native: HashValue,
-{
-    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
-        assert_eq!(cols.len(), 1);
-        groups.clear();
-
-        for v in cols[0].as_primitive::<T>() {
-            let group_id = match v {
-                None => *self.null_group.get_or_insert_with(|| {
-                    let group_id = self.values.len();
-                    self.values.push(Default::default());
-                    group_id
-                }),
-                Some(key) => {
-                    let state = &self.random_state;
-                    let hash = key.hash(state);
-                    let insert = self.map.find_or_find_insert_slot(
-                        hash,
-                        |g| unsafe { self.values.get_unchecked(*g).is_eq(key) },
-                        |g| unsafe { self.values.get_unchecked(*g).hash(state) },
-                    );
-
-                    // SAFETY: No mutation occurred since find_or_find_insert_slot
-                    unsafe {
-                        match insert {
-                            Ok(v) => *v.as_ref(),
-                            Err(slot) => {
-                                let g = self.values.len();
-                                self.map.insert_in_slot(hash, slot, g);
-                                self.values.push(key);
-                                g
-                            }
-                        }
-                    }
-                }
-            };
-            groups.push(group_id)
-        }
-        Ok(())
-    }
-
-    fn size(&self) -> usize {
-        self.map.capacity() * std::mem::size_of::<usize>() + self.values.allocated_size()
-    }
-
-    fn is_empty(&self) -> bool {
-        self.values.is_empty()
-    }
-
-    fn len(&self) -> usize {
-        self.values.len()
-    }
-
-    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
-        fn build_primitive<T: ArrowPrimitiveType>(
-            values: Vec<T::Native>,
-            null_idx: Option<usize>,
-        ) -> PrimitiveArray<T> {
-            let nulls = null_idx.map(|null_idx| {
-                let mut buffer = BooleanBufferBuilder::new(values.len());
-                buffer.append_n(values.len(), true);
-                buffer.set_bit(null_idx, false);
-                unsafe { NullBuffer::new_unchecked(buffer.finish(), 1) }
-            });
-            PrimitiveArray::<T>::new(values.into(), nulls)
-        }
-
-        let array: PrimitiveArray<T> = match emit_to {
-            EmitTo::All => {
-                self.map.clear();
-                build_primitive(std::mem::take(&mut self.values), self.null_group.take())
-            }
-            EmitTo::First(n) => {
-                // SAFETY: self.map outlives iterator and is not modified concurrently
-                unsafe {
-                    for bucket in self.map.iter() {
-                        // Decrement group index by n
-                        match bucket.as_ref().checked_sub(n) {
-                            // Group index was >= n, shift value down
-                            Some(sub) => *bucket.as_mut() = sub,
-                            // Group index was < n, so remove from table
-                            None => self.map.erase(bucket),
-                        }
-                    }
-                }
-                let null_group = match &mut self.null_group {
-                    Some(v) if *v >= n => {
-                        *v -= n;
-                        None
-                    }
-                    Some(_) => self.null_group.take(),
-                    None => None,
-                };
-                let mut split = self.values.split_off(n);
-                std::mem::swap(&mut self.values, &mut split);
-                build_primitive(split, null_group)
-            }
-        };
-        Ok(vec![Arc::new(array.with_data_type(self.data_type.clone()))])
-    }
-
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
-        let count = batch.num_rows();
-        self.values.clear();
-        self.values.shrink_to(count);
-        self.map.clear();
-        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/group_values/row.rs b/crates/core/src/physical_plan/continuous/group_values/row.rs
deleted file mode 100644
index 9a3167c..0000000
--- a/crates/core/src/physical_plan/continuous/group_values/row.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use ahash::RandomState;
-use arrow::compute::cast;
-use arrow::record_batch::RecordBatch;
-use arrow::row::{RowConverter, Rows, SortField};
-use arrow_array::{Array, ArrayRef};
-use arrow_schema::{DataType, SchemaRef};
-use datafusion::common::Result;
-use datafusion::common::{
-    hash_utils::create_hashes,
-    utils::proxy::{RawTableAllocExt, VecAllocExt},
-};
-use datafusion::error::DataFusionError;
-use datafusion::logical_expr::EmitTo;
-use hashbrown::raw::RawTable;
-
-use super::GroupValues;
-
-/// A [`GroupValues`] making use of [`Rows`]
-pub struct GroupValuesRows {
-    /// The output schema
-    schema: SchemaRef,
-
-    /// Converter for the group values
-    row_converter: RowConverter,
-
-    /// Logically maps group values to a group_index in
-    /// [`Self::group_values`] and in each accumulator
-    ///
-    /// Uses the raw API of hashbrown to avoid actually storing the
-    /// keys (group values) in the table
-    ///
-    /// keys: u64 hashes of the GroupValue
-    /// values: (hash, group_index)
-    map: RawTable<(u64, usize)>,
-
-    /// The size of `map` in bytes
-    map_size: usize,
-
-    /// The actual group by values, stored in arrow [`Row`] format.
-    /// `group_values[i]` holds the group value for group_index `i`.
-    ///
-    /// The row format is used to compare group keys quickly and store
-    /// them efficiently in memory. Quick comparison is especially
-    /// important for multi-column group keys.
-    ///
-    /// [`Row`]: arrow::row::Row
-    group_values: Option<Rows>,
-
-    /// reused buffer to store hashes
-    hashes_buffer: Vec<u64>,
-
-    /// reused buffer to store rows
-    rows_buffer: Rows,
-
-    /// Random state for creating hashes
-    random_state: RandomState,
-}
-
-impl GroupValuesRows {
-    pub fn try_new(schema: SchemaRef) -> Result<Self> {
-        let row_converter = RowConverter::new(
-            schema
-                .fields()
-                .iter()
-                .map(|f| SortField::new(f.data_type().clone()))
-                .collect(),
-        )?;
-
-        let map = RawTable::with_capacity(0);
-
-        let starting_rows_capacity = 1000;
-        let starting_data_capacity = 64 * starting_rows_capacity;
-        let rows_buffer = row_converter.empty_rows(starting_rows_capacity, starting_data_capacity);
-        Ok(Self {
-            schema,
-            row_converter,
-            map,
-            map_size: 0,
-            group_values: None,
-            hashes_buffer: Default::default(),
-            rows_buffer,
-            random_state: Default::default(),
-        })
-    }
-}
-
-impl GroupValues for GroupValuesRows {
-    fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
-        // Convert the group keys into the row format
-        let group_rows = &mut self.rows_buffer;
-        group_rows.clear();
-        self.row_converter.append(group_rows, cols)?;
-        let n_rows = group_rows.num_rows();
-
-        let mut group_values = match self.group_values.take() {
-            Some(group_values) => group_values,
-            None => self.row_converter.empty_rows(0, 0),
-        };
-
-        // tracks to which group each of the input rows belongs
-        groups.clear();
-
-        // 1.1 Calculate the group keys for the group values
-        let batch_hashes = &mut self.hashes_buffer;
-        batch_hashes.clear();
-        batch_hashes.resize(n_rows, 0);
-        create_hashes(cols, &self.random_state, batch_hashes)?;
-
-        for (row, &target_hash) in batch_hashes.iter().enumerate() {
-            let entry = self.map.get_mut(target_hash, |(exist_hash, group_idx)| {
-                // Somewhat surprisingly, this closure can be called even if the
-                // hash doesn't match, so check the hash first with an integer
-                // comparison first avoid the more expensive comparison with
-                // group value. https://github.com/apache/datafusion/pull/11718
-                target_hash == *exist_hash
-                    // verify that the group that we are inserting with hash is
-                    // actually the same key value as the group in
-                    // existing_idx  (aka group_values @ row)
-                    && group_rows.row(row) == group_values.row(*group_idx)
-            });
-
-            let group_idx = match entry {
-                // Existing group_index for this group value
-                Some((_hash, group_idx)) => *group_idx,
-                //  1.2 Need to create new entry for the group
-                None => {
-                    // Add new entry to aggr_state and save newly created index
-                    let group_idx = group_values.num_rows();
-                    group_values.push(group_rows.row(row));
-
-                    // for hasher function, use precomputed hash value
-                    self.map.insert_accounted(
-                        (target_hash, group_idx),
-                        |(hash, _group_index)| *hash,
-                        &mut self.map_size,
-                    );
-                    group_idx
-                }
-            };
-            groups.push(group_idx);
-        }
-
-        self.group_values = Some(group_values);
-
-        Ok(())
-    }
-
-    fn size(&self) -> usize {
-        let group_values_size = self.group_values.as_ref().map(|v| v.size()).unwrap_or(0);
-        self.row_converter.size()
-            + group_values_size
-            + self.map_size
-            + self.rows_buffer.size()
-            + self.hashes_buffer.allocated_size()
-    }
-
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    fn len(&self) -> usize {
-        self.group_values
-            .as_ref()
-            .map(|group_values| group_values.num_rows())
-            .unwrap_or(0)
-    }
-
-    fn emit(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
-        let mut group_values = self
-            .group_values
-            .take()
-            .expect("Can not emit from empty rows");
-
-        let mut output = match emit_to {
-            EmitTo::All => {
-                let output = self.row_converter.convert_rows(&group_values)?;
-                group_values.clear();
-                output
-            }
-            EmitTo::First(n) => {
-                let groups_rows = group_values.iter().take(n);
-                let output = self.row_converter.convert_rows(groups_rows)?;
-                // Clear out first n group keys by copying them to a new Rows.
-                // TODO file some ticket in arrow-rs to make this more efficient?
-                let mut new_group_values = self.row_converter.empty_rows(0, 0);
-                for row in group_values.iter().skip(n) {
-                    new_group_values.push(row);
-                }
-                std::mem::swap(&mut new_group_values, &mut group_values);
-
-                // SAFETY: self.map outlives iterator and is not modified concurrently
-                unsafe {
-                    for bucket in self.map.iter() {
-                        // Decrement group index by n
-                        match bucket.as_ref().1.checked_sub(n) {
-                            // Group index was >= n, shift value down
-                            Some(sub) => bucket.as_mut().1 = sub,
-                            // Group index was < n, so remove from table
-                            None => self.map.erase(bucket),
-                        }
-                    }
-                }
-                output
-            }
-        };
-
-        // TODO: Materialize dictionaries in group keys (#7647)
-        for (field, array) in self.schema.fields.iter().zip(&mut output) {
-            let expected = field.data_type();
-            if let DataType::Dictionary(_, v) = expected {
-                let actual = array.data_type();
-                if v.as_ref() != actual {
-                    return Err(DataFusionError::Internal(format!(
-                        "Converted group rows expected dictionary of {v} got {actual}"
-                    )));
-                }
-                *array = cast(array.as_ref(), expected)?;
-            }
-        }
-
-        self.group_values = Some(group_values);
-        Ok(output)
-    }
-
-    fn clear_shrink(&mut self, batch: &RecordBatch) {
-        let count = batch.num_rows();
-        self.group_values = self.group_values.take().map(|mut rows| {
-            rows.clear();
-            rows
-        });
-        self.map.clear();
-        self.map.shrink_to(count, |_| 0); // hasher does not matter since the map is cleared
-        self.map_size = self.map.capacity() * std::mem::size_of::<(u64, usize)>();
-        self.hashes_buffer.clear();
-        self.hashes_buffer.shrink_to(count);
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs b/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs
index c87a3e3..e16eaf7 100644
--- a/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs
+++ b/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs
@@ -17,39 +17,36 @@ use arrow_ord::cmp;
 use arrow_schema::{Schema, SchemaRef};
 use datafusion::{
     common::{utils::proxy::VecAllocExt, DataFusionError, Result},
-    execution::{
-        memory_pool::{MemoryConsumer, MemoryReservation},
-        runtime_env::RuntimeEnv,
-    },
+    execution::memory_pool::{MemoryConsumer, MemoryReservation},
     logical_expr::EmitTo,
     physical_plan::{aggregates::PhysicalGroupBy, PhysicalExpr},
 };
 use datafusion::{
     execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext},
     physical_plan::{
-        aggregates::{aggregate_expressions, AggregateMode},
+        aggregates::{
+            aggregate_expressions,
+            group_values::{new_group_values, GroupValues},
+            order::GroupOrdering,
+            AggregateMode,
+        },
         metrics::BaselineMetrics,
         AggregateExpr,
     },
 };
 use futures::{Stream, StreamExt};
 
-use crate::physical_plan::utils::{
-    accumulators::{create_accumulators, AccumulatorItem},
-    time::RecordBatchWatermark,
-};
+use crate::physical_plan::utils::time::RecordBatchWatermark;
 
 use super::{
     add_window_columns_to_record_batch, add_window_columns_to_schema, create_group_accumulator,
-    group_values::{new_group_values, GroupValues},
-    order::GroupOrdering,
     streaming_window::{
         get_windows_for_watermark, FranzStreamingWindowExec, FranzStreamingWindowType,
-        FranzWindowFrame,
     },
     GroupsAccumulatorItem,
 };
 
+#[allow(dead_code)]
 pub struct GroupedWindowAggStream {
     pub schema: SchemaRef,
     input: SendableRecordBatchStream,
diff --git a/crates/core/src/physical_plan/continuous/mod.rs b/crates/core/src/physical_plan/continuous/mod.rs
index e980eb3..8edcf05 100644
--- a/crates/core/src/physical_plan/continuous/mod.rs
+++ b/crates/core/src/physical_plan/continuous/mod.rs
@@ -17,8 +17,6 @@ use datafusion::{
 pub mod grouped_window_agg_stream;
 pub mod streaming_window;
 
-mod group_values;
-mod order;
 use datafusion::physical_expr::AggregateExpr;
 use log::debug;
 
diff --git a/crates/core/src/physical_plan/continuous/order/full.rs b/crates/core/src/physical_plan/continuous/order/full.rs
deleted file mode 100644
index 65af814..0000000
--- a/crates/core/src/physical_plan/continuous/order/full.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::logical_expr::EmitTo;
-
-/// Tracks grouping state when the data is ordered entirely by its
-/// group keys
-///
-/// When the group values are sorted, as soon as we see group `n+1` we
-/// know we will never see any rows for group `n again and thus they
-/// can be emitted.
-///
-/// For example, given `SUM(amt) GROUP BY id` if the input is sorted
-/// by `id` as soon as a new `id` value is seen all previous values
-/// can be emitted.
-///
-/// The state is tracked like this:
-///
-/// ```text
-///      ┌─────┐   ┌──────────────────┐
-///      │┌───┐│   │ ┌──────────────┐ │         ┏━━━━━━━━━━━━━━┓
-///      ││ 0 ││   │ │     123      │ │   ┌─────┃      13      ┃
-///      │└───┘│   │ └──────────────┘ │   │     ┗━━━━━━━━━━━━━━┛
-///      │ ... │   │    ...           │   │
-///      │┌───┐│   │ ┌──────────────┐ │   │         current
-///      ││12 ││   │ │     234      │ │   │
-///      │├───┤│   │ ├──────────────┤ │   │
-///      ││12 ││   │ │     234      │ │   │
-///      │├───┤│   │ ├──────────────┤ │   │
-///      ││13 ││   │ │     456      │◀┼───┘
-///      │└───┘│   │ └──────────────┘ │
-///      └─────┘   └──────────────────┘
-///
-///  group indices    group_values        current tracks the most
-/// (in group value                          recent group index
-///      order)
-/// ```
-///
-/// In this diagram, the current group is `13`, and thus groups
-/// `0..12` can be emitted. Note that `13` can not yet be emitted as
-/// there may be more values in the next batch with the same group_id.
-#[derive(Debug)]
-pub(crate) struct GroupOrderingFull {
-    state: State,
-}
-
-#[derive(Debug)]
-enum State {
-    /// Seen no input yet
-    Start,
-
-    /// Data is in progress. `current is the current group for which
-    /// values are being generated. Can emit `current` - 1
-    InProgress { current: usize },
-
-    /// Seen end of input: all groups can be emitted
-    Complete,
-}
-
-impl GroupOrderingFull {
-    pub fn new() -> Self {
-        Self {
-            state: State::Start,
-        }
-    }
-
-    // How many groups be emitted, or None if no data can be emitted
-    pub fn emit_to(&self) -> Option<EmitTo> {
-        match &self.state {
-            State::Start => None,
-            State::InProgress { current, .. } => {
-                if *current == 0 {
-                    // Can not emit if still on the first row
-                    None
-                } else {
-                    // otherwise emit all rows prior to the current group
-                    Some(EmitTo::First(*current))
-                }
-            }
-            State::Complete { .. } => Some(EmitTo::All),
-        }
-    }
-
-    /// remove the first n groups from the internal state, shifting
-    /// all existing indexes down by `n`
-    pub fn remove_groups(&mut self, n: usize) {
-        match &mut self.state {
-            State::Start => panic!("invalid state: start"),
-            State::InProgress { current } => {
-                // shift down by n
-                assert!(*current >= n);
-                *current -= n;
-            }
-            State::Complete { .. } => panic!("invalid state: complete"),
-        }
-    }
-
-    /// Note that the input is complete so any outstanding groups are done as well
-    pub fn input_done(&mut self) {
-        self.state = State::Complete;
-    }
-
-    /// Called when new groups are added in a batch. See documentation
-    /// on [`super::GroupOrdering::new_groups`]
-    pub fn new_groups(&mut self, total_num_groups: usize) {
-        assert_ne!(total_num_groups, 0);
-
-        // Update state
-        let max_group_index = total_num_groups - 1;
-        self.state = match self.state {
-            State::Start => State::InProgress {
-                current: max_group_index,
-            },
-            State::InProgress { current } => {
-                // expect to see new group indexes when called again
-                assert!(current <= max_group_index, "{current} <= {max_group_index}");
-                State::InProgress {
-                    current: max_group_index,
-                }
-            }
-            State::Complete { .. } => {
-                panic!("Saw new group after input was complete");
-            }
-        };
-    }
-
-    pub(crate) fn size(&self) -> usize {
-        std::mem::size_of::<Self>()
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/order/mod.rs b/crates/core/src/physical_plan/continuous/order/mod.rs
deleted file mode 100644
index 921452a..0000000
--- a/crates/core/src/physical_plan/continuous/order/mod.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::ArrayRef;
-use arrow_schema::Schema;
-use datafusion::{
-    common::Result, logical_expr::EmitTo, physical_expr::PhysicalSortExpr,
-    physical_plan::InputOrderMode,
-};
-
-mod full;
-mod partial;
-
-pub(crate) use full::GroupOrderingFull;
-pub(crate) use partial::GroupOrderingPartial;
-
-/// Ordering information for each group in the hash table
-#[derive(Debug)]
-pub(crate) enum GroupOrdering {
-    /// Groups are not ordered
-    None,
-    /// Groups are ordered by some pre-set of the group keys
-    Partial(GroupOrderingPartial),
-    /// Groups are entirely contiguous,
-    Full(GroupOrderingFull),
-}
-
-impl GroupOrdering {
-    /// Create a `GroupOrdering` for the specified ordering
-    pub fn try_new(
-        input_schema: &Schema,
-        mode: &InputOrderMode,
-        ordering: &[PhysicalSortExpr],
-    ) -> Result<Self> {
-        match mode {
-            InputOrderMode::Linear => Ok(GroupOrdering::None),
-            InputOrderMode::PartiallySorted(order_indices) => {
-                GroupOrderingPartial::try_new(input_schema, order_indices, ordering)
-                    .map(GroupOrdering::Partial)
-            }
-            InputOrderMode::Sorted => Ok(GroupOrdering::Full(GroupOrderingFull::new())),
-        }
-    }
-
-    // How many groups be emitted, or None if no data can be emitted
-    pub fn emit_to(&self) -> Option<EmitTo> {
-        match self {
-            GroupOrdering::None => None,
-            GroupOrdering::Partial(partial) => partial.emit_to(),
-            GroupOrdering::Full(full) => full.emit_to(),
-        }
-    }
-
-    /// Updates the state the input is done
-    pub fn input_done(&mut self) {
-        match self {
-            GroupOrdering::None => {}
-            GroupOrdering::Partial(partial) => partial.input_done(),
-            GroupOrdering::Full(full) => full.input_done(),
-        }
-    }
-
-    /// remove the first n groups from the internal state, shifting
-    /// all existing indexes down by `n`
-    pub fn remove_groups(&mut self, n: usize) {
-        match self {
-            GroupOrdering::None => {}
-            GroupOrdering::Partial(partial) => partial.remove_groups(n),
-            GroupOrdering::Full(full) => full.remove_groups(n),
-        }
-    }
-
-    /// Called when new groups are added in a batch
-    ///
-    /// * `total_num_groups`: total number of groups (so max
-    ///   group_index is total_num_groups - 1).
-    ///
-    /// * `group_values`: group key values for *each row* in the batch
-    ///
-    /// * `group_indices`: indices for each row in the batch
-    ///
-    /// * `hashes`: hash values for each row in the batch
-    pub fn new_groups(
-        &mut self,
-        batch_group_values: &[ArrayRef],
-        group_indices: &[usize],
-        total_num_groups: usize,
-    ) -> Result<()> {
-        match self {
-            GroupOrdering::None => {}
-            GroupOrdering::Partial(partial) => {
-                partial.new_groups(batch_group_values, group_indices, total_num_groups)?;
-            }
-            GroupOrdering::Full(full) => {
-                full.new_groups(total_num_groups);
-            }
-        };
-        Ok(())
-    }
-
-    /// Return the size of memory used by the ordering state, in bytes
-    pub(crate) fn size(&self) -> usize {
-        std::mem::size_of::<Self>()
-            + match self {
-                GroupOrdering::None => 0,
-                GroupOrdering::Partial(partial) => partial.size(),
-                GroupOrdering::Full(full) => full.size(),
-            }
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/order/partial.rs b/crates/core/src/physical_plan/continuous/order/partial.rs
deleted file mode 100644
index e941c10..0000000
--- a/crates/core/src/physical_plan/continuous/order/partial.rs
+++ /dev/null
@@ -1,252 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::row::{OwnedRow, RowConverter, Rows, SortField};
-use arrow_array::ArrayRef;
-use arrow_schema::Schema;
-use datafusion::{
-    common::utils::proxy::VecAllocExt, common::Result, logical_expr::EmitTo,
-    physical_expr::PhysicalSortExpr,
-};
-
-use std::sync::Arc;
-
-/// Tracks grouping state when the data is ordered by some subset of
-/// the group keys.
-///
-/// Once the next *sort key* value is seen, never see groups with that
-/// sort key again, so we can emit all groups with the previous sort
-/// key and earlier.
-///
-/// For example, given `SUM(amt) GROUP BY id, state` if the input is
-/// sorted by `state, when a new value of `state` is seen, all groups
-/// with prior values of `state` can be emitted.
-///
-/// The state is tracked like this:
-///
-/// ```text
-///                                            ┏━━━━━━━━━━━━━━━━━┓ ┏━━━━━━━┓
-///     ┌─────┐    ┌───────────────────┐ ┌─────┃        9        ┃ ┃ "MD"  ┃
-///     │┌───┐│    │ ┌──────────────┐  │ │     ┗━━━━━━━━━━━━━━━━━┛ ┗━━━━━━━┛
-///     ││ 0 ││    │ │  123, "MA"   │  │ │        current_sort      sort_key
-///     │└───┘│    │ └──────────────┘  │ │
-///     │ ... │    │    ...            │ │      current_sort tracks the
-///     │┌───┐│    │ ┌──────────────┐  │ │      smallest group index that had
-///     ││ 8 ││    │ │  765, "MA"   │  │ │      the same sort_key as current
-///     │├───┤│    │ ├──────────────┤  │ │
-///     ││ 9 ││    │ │  923, "MD"   │◀─┼─┘
-///     │├───┤│    │ ├──────────────┤  │        ┏━━━━━━━━━━━━━━┓
-///     ││10 ││    │ │  345, "MD"   │  │  ┌─────┃      11      ┃
-///     │├───┤│    │ ├──────────────┤  │  │     ┗━━━━━━━━━━━━━━┛
-///     ││11 ││    │ │  124, "MD"   │◀─┼──┘         current
-///     │└───┘│    │ └──────────────┘  │
-///     └─────┘    └───────────────────┘
-///
-///  group indices
-/// (in group value  group_values               current tracks the most
-///      order)                                    recent group index
-///```
-#[derive(Debug)]
-pub(crate) struct GroupOrderingPartial {
-    /// State machine
-    state: State,
-
-    /// The indexes of the group by columns that form the sort key.
-    /// For example if grouping by `id, state` and ordered by `state`
-    /// this would be `[1]`.
-    order_indices: Vec<usize>,
-
-    /// Converter for the sort key (used on the group columns
-    /// specified in `order_indexes`)
-    row_converter: RowConverter,
-}
-
-#[derive(Debug, Default)]
-enum State {
-    /// The ordering was temporarily taken.  `Self::Taken` is left
-    /// when state must be temporarily taken to satisfy the borrow
-    /// checker. If an error happens before the state can be restored,
-    /// the ordering information is lost and execution can not
-    /// proceed, but there is no undefined behavior.
-    #[default]
-    Taken,
-
-    /// Seen no input yet
-    Start,
-
-    /// Data is in progress.
-    InProgress {
-        /// Smallest group index with the sort_key
-        current_sort: usize,
-        /// The sort key of group_index `current_sort`
-        sort_key: OwnedRow,
-        /// index of the current group for which values are being
-        /// generated
-        current: usize,
-    },
-
-    /// Seen end of input, all groups can be emitted
-    Complete,
-}
-
-impl GroupOrderingPartial {
-    pub fn try_new(
-        input_schema: &Schema,
-        order_indices: &[usize],
-        ordering: &[PhysicalSortExpr],
-    ) -> Result<Self> {
-        assert!(!order_indices.is_empty());
-        assert!(order_indices.len() <= ordering.len());
-
-        // get only the section of ordering, that consist of group by expressions.
-        let fields = ordering[0..order_indices.len()]
-            .iter()
-            .map(|sort_expr| {
-                Ok(SortField::new_with_options(
-                    sort_expr.expr.data_type(input_schema)?,
-                    sort_expr.options,
-                ))
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(Self {
-            state: State::Start,
-            order_indices: order_indices.to_vec(),
-            row_converter: RowConverter::new(fields)?,
-        })
-    }
-
-    /// Creates sort keys from the group values
-    ///
-    /// For example, if group_values had `A, B, C` but the input was
-    /// only sorted on `B` and `C` this should return rows for (`B`,
-    /// `C`)
-    fn compute_sort_keys(&mut self, group_values: &[ArrayRef]) -> Result<Rows> {
-        // Take only the columns that are in the sort key
-        let sort_values: Vec<_> = self
-            .order_indices
-            .iter()
-            .map(|&idx| Arc::clone(&group_values[idx]))
-            .collect();
-
-        Ok(self.row_converter.convert_columns(&sort_values)?)
-    }
-
-    /// How many groups be emitted, or None if no data can be emitted
-    pub fn emit_to(&self) -> Option<EmitTo> {
-        match &self.state {
-            State::Taken => unreachable!("State previously taken"),
-            State::Start => None,
-            State::InProgress { current_sort, .. } => {
-                // Can not emit if we are still on the first row sort
-                // row otherwise we can emit all groups that had earlier sort keys
-                //
-                if *current_sort == 0 {
-                    None
-                } else {
-                    Some(EmitTo::First(*current_sort))
-                }
-            }
-            State::Complete => Some(EmitTo::All),
-        }
-    }
-
-    /// remove the first n groups from the internal state, shifting
-    /// all existing indexes down by `n`
-    pub fn remove_groups(&mut self, n: usize) {
-        match &mut self.state {
-            State::Taken => unreachable!("State previously taken"),
-            State::Start => panic!("invalid state: start"),
-            State::InProgress {
-                current_sort,
-                current,
-                sort_key: _,
-            } => {
-                // shift indexes down by n
-                assert!(*current >= n);
-                *current -= n;
-                assert!(*current_sort >= n);
-                *current_sort -= n;
-            }
-            State::Complete { .. } => panic!("invalid state: complete"),
-        }
-    }
-
-    /// Note that the input is complete so any outstanding groups are done as well
-    pub fn input_done(&mut self) {
-        self.state = match self.state {
-            State::Taken => unreachable!("State previously taken"),
-            _ => State::Complete,
-        };
-    }
-
-    /// Called when new groups are added in a batch. See documentation
-    /// on [`super::GroupOrdering::new_groups`]
-    pub fn new_groups(
-        &mut self,
-        batch_group_values: &[ArrayRef],
-        group_indices: &[usize],
-        total_num_groups: usize,
-    ) -> Result<()> {
-        assert!(total_num_groups > 0);
-        assert!(!batch_group_values.is_empty());
-
-        let max_group_index = total_num_groups - 1;
-
-        // compute the sort key values for each group
-        let sort_keys = self.compute_sort_keys(batch_group_values)?;
-
-        let old_state = std::mem::take(&mut self.state);
-        let (mut current_sort, mut sort_key) = match &old_state {
-            State::Taken => unreachable!("State previously taken"),
-            State::Start => (0, sort_keys.row(0)),
-            State::InProgress {
-                current_sort,
-                sort_key,
-                ..
-            } => (*current_sort, sort_key.row()),
-            State::Complete => {
-                panic!("Saw new group after the end of input");
-            }
-        };
-
-        // Find latest sort key
-        let iter = group_indices.iter().zip(sort_keys.iter());
-        for (&group_index, group_sort_key) in iter {
-            // Does this group have seen a new sort_key?
-            if sort_key != group_sort_key {
-                current_sort = group_index;
-                sort_key = group_sort_key;
-            }
-        }
-
-        self.state = State::InProgress {
-            current_sort,
-            sort_key: sort_key.owned(),
-            current: max_group_index,
-        };
-
-        Ok(())
-    }
-
-    /// Return the size of memory allocated by this structure
-    pub(crate) fn size(&self) -> usize {
-        std::mem::size_of::<Self>()
-            + self.order_indices.allocated_size()
-            + self.row_converter.size()
-    }
-}
diff --git a/crates/core/src/physical_plan/continuous/streaming_window.rs b/crates/core/src/physical_plan/continuous/streaming_window.rs
index 93f0e57..e058aa0 100644
--- a/crates/core/src/physical_plan/continuous/streaming_window.rs
+++ b/crates/core/src/physical_plan/continuous/streaming_window.rs
@@ -9,19 +9,14 @@ use std::{
 };
 
 use arrow::{
-    array::PrimitiveBuilder,
     compute::{concat_batches, filter_record_batch},
     datatypes::TimestampMillisecondType,
 };
-use arrow_array::{
-    Array, BooleanArray, PrimitiveArray, RecordBatch, StructArray, TimestampMillisecondArray,
-};
+use arrow_array::{Array, PrimitiveArray, RecordBatch, StructArray, TimestampMillisecondArray};
 use arrow_ord::cmp;
-use arrow_schema::{DataType, Field, Schema, SchemaBuilder, SchemaRef, TimeUnit};
+use arrow_schema::{Field, Schema, SchemaRef};
 
-use datafusion::common::{
-    downcast_value, internal_err, stats::Precision, DataFusionError, Statistics,
-};
+use datafusion::common::{internal_err, stats::Precision, DataFusionError, Statistics};
 use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
 use datafusion::physical_expr::{
     equivalence::{collapse_lex_req, ProjectionMapping},

From 5fec723731eccc9c8e7973ce0539c5dce2866201 Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Thu, 15 Aug 2024 04:56:59 -0700
Subject: [PATCH 8/8] update clippy rules

---
 Cargo.lock  | 30 +++++++++++++++---------------
 Cargo.toml  |  2 +-
 clippy.toml |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6d37b34..5a9ae2c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -797,7 +797,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -852,7 +852,7 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow-schema",
  "async-trait",
@@ -865,7 +865,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -891,7 +891,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "tokio",
 ]
@@ -899,7 +899,7 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "chrono",
@@ -919,7 +919,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -937,7 +937,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -963,7 +963,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -980,7 +980,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1001,7 +1001,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1020,7 +1020,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -1049,7 +1049,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -1062,7 +1062,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "datafusion-common",
  "datafusion-execution",
@@ -1073,7 +1073,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "ahash",
  "arrow",
@@ -1106,7 +1106,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/arrow-datafusion#fc67038398c05f760b1bf910809444446d7348ba"
+source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?rev=fc67038#fc67038398c05f760b1bf910809444446d7348ba"
 dependencies = [
  "arrow",
  "arrow-array",
diff --git a/Cargo.toml b/Cargo.toml
index 6d3cc8c..cbdf84f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -55,4 +55,4 @@ chrono = { version = "0.4.38", default-features = false }
 itertools = "0.13"
 
 [patch.crates-io]
-datafusion = { git = "https://github.com/probably-nothing-labs/arrow-datafusion" }
+datafusion = { git = "https://github.com/probably-nothing-labs/arrow-datafusion", rev="fc67038" }
diff --git a/clippy.toml b/clippy.toml
index c329b7a..4abee59 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -10,4 +10,4 @@ disallowed-types = [
 # Lowering the threshold to help prevent stack overflows (default is 16384)
 # See: https://rust-lang.github.io/rust-clippy/master/index.html#/large_futures
 future-size-threshold = 10000
-too-many-arguments-threshold = 10
+too-many-arguments-threshold = 15