datafusion-contrib · matthewmturner · Sep 13, 2024 · Sep 13, 2024 · Sep 14, 2024 · Sep 14, 2024
diff --git a/src/app/app_execution.rs b/src/app/app_execution.rs
@@ -21,22 +21,46 @@ use crate::app::state::tabs::sql::Query;
 use crate::app::AppEvent;
 use crate::execution::ExecutionContext;
 use color_eyre::eyre::Result;
+use datafusion::arrow::array::RecordBatch;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream};
 use futures::StreamExt;
 use log::{error, info};
+use std::fmt::Debug;
+use std::pin::Pin;
 use std::sync::Arc;
 use std::time::Duration;
 use tokio::sync::mpsc::UnboundedSender;
+use tokio::sync::Mutex;
 
 /// Handles executing queries for the TUI application, formatting results
 /// and sending them to the UI.
+#[derive(Debug)]
 pub(crate) struct AppExecution {
     inner: Arc<ExecutionContext>,
+    results: Arc<Mutex<Option<PaginatingRecordBatchStream>>>,
 }
 
 impl AppExecution {
     /// Create a new instance of [`AppExecution`].
     pub fn new(inner: Arc<ExecutionContext>) -> Self {
-        Self { inner }
+        Self {
+            inner,
+            results: Arc::new(Mutex::new(None)),
+        }
+    }
+
+    pub fn session_ctx(&self) -> &SessionContext {
+        self.inner.session_ctx()
+    }
+
+    pub fn results(&self) -> Arc<Mutex<Option<PaginatingRecordBatchStream>>> {
+        Arc::clone(&self.results)
+    }
+
+    async fn set_results(&self, results: PaginatingRecordBatchStream) {
+        let mut r = self.results.lock().await;
+        *r = Some(results);
     }
 
     /// Run the sequence of SQL queries, sending the results as [`AppEvent::QueryResult`] via the sender.
@@ -61,26 +85,30 @@ impl AppExecution {
             if i == statement_count - 1 {
                 info!("Executing last query and display results");
                 match self.inner.execute_sql(sql).await {
-                    Ok(mut stream) => {
-                        let mut batches = Vec::new();
-                        while let Some(maybe_batch) = stream.next().await {
-                            match maybe_batch {
-                                Ok(batch) => {
-                                    batches.push(batch);
-                                }
-                                Err(e) => {
-                                    let elapsed = start.elapsed();
-                                    query.set_error(Some(e.to_string()));
-                                    query.set_execution_time(elapsed);
-                                    break;
-                                }
-                            }
-                        }
-                        let elapsed = start.elapsed();
-                        let rows: usize = batches.iter().map(|r| r.num_rows()).sum();
-                        query.set_results(Some(batches));
-                        query.set_num_rows(Some(rows));
-                        query.set_execution_time(elapsed);
+                    Ok(stream) => {
+                        let mut paginating_stream = PaginatingRecordBatchStream::new(stream);
+                        paginating_stream.next_batch().await?;
+                        self.set_results(paginating_stream).await;
+
+                        // let mut batches = Vec::new();
+                        // while let Some(maybe_batch) = stream.next().await {
+                        //     match maybe_batch {
+                        //         Ok(batch) => {
+                        //             batches.push(batch);
+                        //         }
+                        //         Err(e) => {
+                        //             let elapsed = start.elapsed();
+                        //             query.set_error(Some(e.to_string()));
+                        //             query.set_execution_time(elapsed);
+                        //             break;
+                        //         }
+                        //     }
+                        // }
+                        // let elapsed = start.elapsed();
+                        // let rows: usize = batches.iter().map(|r| r.num_rows()).sum();
+                        // query.set_results(Some(batches));
+                        // query.set_num_rows(Some(rows));
+                        // query.set_execution_time(elapsed);
                     }
                     Err(e) => {
                         error!("Error creating dataframe: {:?}", e);
@@ -108,3 +136,188 @@ impl AppExecution {
         Ok(())
     }
 }
+
+/// A stream of [`RecordBatch`]es that can be paginated for display in the TUI.
+pub struct PaginatingRecordBatchStream {
+    // currently executing stream
+    inner: SendableRecordBatchStream,
+    // any batches that have been buffered so far
+    batches: Vec<RecordBatch>,
+    // current batch being shown
+    current_batch: Option<usize>,
+}
+
+impl PaginatingRecordBatchStream {
+    pub fn new(inner: Pin<Box<dyn RecordBatchStream + Send>>) -> Self {
+        Self {
+            inner,
+            batches: Vec::new(),
+            current_batch: None,
+        }
+    }
+
+    /// Return the batch at the current index
+    pub fn current_batch(&self) -> Option<&RecordBatch> {
+        if let Some(idx) = self.current_batch {
+            self.batches.get(idx)
+        } else {
+            None
+        }
+    }
+
+    /// Return the next batch
+    /// TBD on logic for handling the end
+    pub async fn next_batch(&mut self) -> Result<Option<&RecordBatch>> {
+        if let Some(b) = self.inner.next().await {
+            match b {
+                Ok(batch) => {
+                    self.batches.push(batch);
+                    self.current_batch = Some(self.batches.len() - 1);
+                    Ok(self.current_batch())
+                }
+                Err(e) => Err(e.into()),
+            }
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Return the previous batch
+    /// TBD on logic for handling the beginning
+    pub fn previous_batch(&mut self) -> Option<&RecordBatch> {
+        if let Some(idx) = self.current_batch {
+            if idx > 0 {
+                self.current_batch = Some(idx - 1);
+            }
+        }
+        self.current_batch()
+    }
+}
+
+impl Debug for PaginatingRecordBatchStream {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PaginatingRecordBatchStream")
+            .field("batches", &self.batches)
+            .field("current_batch", &self.current_batch)
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::PaginatingRecordBatchStream;
+    use datafusion::{
+        arrow::array::{ArrayRef, Int32Array, RecordBatch},
+        common::Result,
+        physical_plan::stream::RecordBatchStreamAdapter,
+    };
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn test_paginating_record_batch_stream() {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
+        let b: ArrayRef = Arc::new(Int32Array::from(vec![1, 1]));
+
+        let record_batch1 = RecordBatch::try_from_iter(vec![("a", a)]).unwrap();
+        let record_batch2 = RecordBatch::try_from_iter(vec![("b", b)]).unwrap();
+
+        let schema = record_batch1.schema();
+        let batches: Vec<Result<RecordBatch>> =
+            vec![Ok(record_batch1.clone()), Ok(record_batch2.clone())];
+        let stream = futures::stream::iter(batches);
+        let sendable_stream = Box::pin(RecordBatchStreamAdapter::new(schema, stream));
+
+        let mut paginating_stream = PaginatingRecordBatchStream::new(sendable_stream);
+
+        assert_eq!(paginating_stream.current_batch(), None);
+        assert_eq!(
+            paginating_stream.next_batch().await.unwrap(),
+            Some(&record_batch1)
+        );
+        assert_eq!(
+            paginating_stream.next_batch().await.unwrap(),
+            Some(&record_batch2)
+        );
+        assert_eq!(paginating_stream.next_batch().await.unwrap(), None);
+    }
+
+    #[tokio::test]
+    async fn test_paginating_record_batch_stream_previous() {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
+        let b: ArrayRef = Arc::new(Int32Array::from(vec![1, 1]));
+
+        let record_batch1 = RecordBatch::try_from_iter(vec![("a", a)]).unwrap();
+        let record_batch2 = RecordBatch::try_from_iter(vec![("b", b)]).unwrap();
+
+        let schema = record_batch1.schema();
+        let batches: Vec<Result<RecordBatch>> =
+            vec![Ok(record_batch1.clone()), Ok(record_batch2.clone())];
+        let stream = futures::stream::iter(batches);
+        let sendable_stream = Box::pin(RecordBatchStreamAdapter::new(schema, stream));
+
+        let mut paginating_stream = PaginatingRecordBatchStream::new(sendable_stream);
+
+        assert_eq!(paginating_stream.current_batch(), None);
+        assert_eq!(
+            paginating_stream.next_batch().await.unwrap(),
+            Some(&record_batch1)
+        );
+        assert_eq!(
+            paginating_stream.next_batch().await.unwrap(),
+            Some(&record_batch2)
+        );
+        assert_eq!(paginating_stream.next_batch().await.unwrap(), None);
+        assert_eq!(paginating_stream.current_batch(), Some(&record_batch2));
+        assert_eq!(paginating_stream.previous_batch(), Some(&record_batch1));
+        assert_eq!(paginating_stream.previous_batch(), Some(&record_batch1));
+    }
+
+    #[tokio::test]
+    async fn test_paginating_record_batch_stream_one_error() {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
+        let record_batch1 = RecordBatch::try_from_iter(vec![("a", a)]).unwrap();
+
+        let schema = record_batch1.schema();
+        let batches: Vec<Result<RecordBatch>> = vec![Err(
+            datafusion::error::DataFusionError::Execution("Error creating dataframe".to_string()),
+        )];
+        let stream = futures::stream::iter(batches);
+        let sendable_stream = Box::pin(RecordBatchStreamAdapter::new(schema, stream));
+
+        let mut paginating_stream = PaginatingRecordBatchStream::new(sendable_stream);
+
+        assert_eq!(paginating_stream.current_batch(), None);
+
+        let res = paginating_stream.next_batch().await;
+        assert!(res.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_paginating_record_batch_stream_successful_then_error() {
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
+
+        let record_batch1 = RecordBatch::try_from_iter(vec![("a", a)]).unwrap();
+
+        let schema = record_batch1.schema();
+        let batches: Vec<Result<RecordBatch>> = vec![
+            Ok(record_batch1.clone()),
+            Err(datafusion::error::DataFusionError::Execution(
+                "Error creating dataframe".to_string(),
+            )),
+        ];
+        let stream = futures::stream::iter(batches);
+        let sendable_stream = Box::pin(RecordBatchStreamAdapter::new(schema, stream));
+
+        let mut paginating_stream = PaginatingRecordBatchStream::new(sendable_stream);
+
+        assert_eq!(paginating_stream.current_batch(), None);
+        assert_eq!(
+            paginating_stream.next_batch().await.unwrap(),
+            Some(&record_batch1)
+        );
+        let res = paginating_stream.next_batch().await;
+        assert!(res.is_err());
+        assert_eq!(paginating_stream.next_batch().await.unwrap(), None);
+        assert_eq!(paginating_stream.current_batch(), Some(&record_batch1));
+    }
+}