diff --git a/shinkai-bin/shinkai-node/src/managers/sheet_manager.rs b/shinkai-bin/shinkai-node/src/managers/sheet_manager.rs index c6cc995ec..3e56e6e12 100644 --- a/shinkai-bin/shinkai-node/src/managers/sheet_manager.rs +++ b/shinkai-bin/shinkai-node/src/managers/sheet_manager.rs @@ -8,7 +8,7 @@ use shinkai_message_primitives::schemas::ws_types::{WSMessageType, WSUpdateHandl use shinkai_message_primitives::shinkai_message::shinkai_message_schemas::{ CallbackAction, JobCreationInfo, JobMessage, SheetJobAction, SheetManagerAction, WSTopic, }; -use shinkai_message_primitives::shinkai_utils::job_scope::JobScope; +use shinkai_message_primitives::shinkai_utils::job_scope::MinimalJobScope; use shinkai_sheet::cell_name_converter::CellNameConverter; use shinkai_sheet::sheet::{Sheet, SheetUpdate}; use shinkai_sqlite::errors::SqliteManagerError; @@ -209,7 +209,7 @@ impl SheetManager { for job_data in jobs { let job_creation_info = JobCreationInfo { - scope: JobScope::new_default(), + scope: MinimalJobScope::default(), is_hidden: Some(true), associated_ui: None, }; diff --git a/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands.rs b/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands.rs index 64aefceb7..76002d08e 100644 --- a/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands.rs +++ b/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands.rs @@ -10,8 +10,7 @@ use shinkai_http_api::{ node_api_router::{APIError, GetPublicKeysResponse}, }; use shinkai_message_primitives::{ - schemas::ws_types::WSUpdateHandler, - shinkai_message::shinkai_message_schemas::{CallbackAction, JobCreationInfo}, + schemas::ws_types::WSUpdateHandler, shinkai_message::shinkai_message_schemas::JobCreationInfo, shinkai_utils::job_scope::MinimalJobScope, }; use shinkai_message_primitives::{ @@ -34,10 +33,6 @@ use shinkai_message_primitives::{ }, }; use shinkai_sqlite::SqliteManager; -use shinkai_vector_fs::vector_fs::vector_fs::VectorFS; -use shinkai_vector_resources::{ - embedding_generator::RemoteEmbeddingGenerator, model_type::EmbeddingModelType, shinkai_time::ShinkaiStringTime, -}; use tokio::sync::Mutex; use x25519_dalek::PublicKey as EncryptionPublicKey; @@ -1450,7 +1445,7 @@ impl Node { match tool_generation::v2_create_and_send_job_message( bearer.clone(), JobCreationInfo { - scope: JobScope::new_default(), + scope: MinimalJobScope::default(), is_hidden: Some(true), associated_ui: None, }, diff --git a/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_tools.rs b/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_tools.rs index 4567861f5..b964360fa 100644 --- a/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_tools.rs +++ b/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_tools.rs @@ -1206,7 +1206,7 @@ impl Node { // We auto create a new job with the same configuration as the one from job_id let job_creation_info = JobCreationInfo { - scope: job.scope_with_files().cloned().unwrap_or(JobScope::new_default()), + scope: job.scope().clone(), is_hidden: Some(job.is_hidden()), associated_ui: None, }; @@ -1687,8 +1687,7 @@ impl Node { }; // Save the tool to the database - let mut db_write = db; - match db_write.add_tool(tool).await { + match db.add_tool(tool).await { Ok(tool) => { let archive_clone = archive.clone(); let files = archive_clone.file_names(); diff --git a/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_vecfs.rs b/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_vecfs.rs index 22d9408e5..7566b2231 100644 --- a/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_vecfs.rs +++ b/shinkai-bin/shinkai-node/src/network/v2_api/api_v2_commands_vecfs.rs @@ -17,7 +17,7 @@ use shinkai_message_primitives::{ shinkai_utils::shinkai_path::ShinkaiPath, }; use shinkai_sqlite::SqliteManager; -use tokio::sync::{Mutex, RwLock}; +use tokio::sync::Mutex; use crate::{ managers::IdentityManager, @@ -684,7 +684,7 @@ impl Node { } }; - let search_path_str = input_payload.path.as_deref().unwrap_or("/").clone(); + let search_path_str = input_payload.path.as_deref().unwrap_or("/").to_string(); let search_path = match ShinkaiPath::from_string(search_path_str) { Ok(path) => path, Err(e) => { diff --git a/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/llm_prompt_processor.rs b/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/llm_prompt_processor.rs index 110c7eabc..e6a6354be 100644 --- a/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/llm_prompt_processor.rs +++ b/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/llm_prompt_processor.rs @@ -89,7 +89,7 @@ impl ToolExecutor for LmPromptProcessorTool { let response = v2_create_and_send_job_message( bearer.clone(), JobCreationInfo { - scope: JobScope::new_default(), + scope: MinimalJobScope::default(), is_hidden: Some(true), associated_ui: None, }, diff --git a/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/tool_knowledge.rs b/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/tool_knowledge.rs index e091f07ff..452ee2692 100644 --- a/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/tool_knowledge.rs +++ b/shinkai-bin/shinkai-node/src/tools/tool_implementation/native_tools/tool_knowledge.rs @@ -89,7 +89,7 @@ impl ToolExecutor for KnowledgeTool { // TODO: how do we use app_id here? is it linked to a job somehow? // TODO: create e2e test using this fn so we can test it with some real data - let mut scope = JobScope::new_default(); + let mut scope = MinimalJobScope::default(); // Checks if job_id is provided in the parameters if let Some(job_id_value) = parameters.get("job_id") { diff --git a/shinkai-bin/shinkai-node/tests/it/db_job_tests.rs b/shinkai-bin/shinkai-node/tests/it/db_job_tests.rs index da15c9236..0923e8cef 100644 --- a/shinkai-bin/shinkai-node/tests/it/db_job_tests.rs +++ b/shinkai-bin/shinkai-node/tests/it/db_job_tests.rs @@ -94,7 +94,7 @@ mod tests { async fn test_create_new_job() { let job_id = "job1".to_string(); let agent_id = "agent1".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -125,7 +125,7 @@ mod tests { for i in 1..=5 { let job_id = format!("job{}", i); eprintln!("job_id: {}", job_id.clone()); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let _ = create_new_job(&shinkai_db, job_id, agent_id.clone(), scope).await; } @@ -148,7 +148,7 @@ mod tests { let job_id = "job_to_change_agent".to_string(); let initial_agent_id = "initial_agent".to_string(); let new_agent_id = "new_agent".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -180,7 +180,7 @@ mod tests { // let inbox_name = // InboxName::new("inbox::@@node1.shinkai/subidentity::@@node2.shinkai/subidentity2::true".to_string()) // .unwrap(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -207,7 +207,7 @@ mod tests { let (node1_encryption_sk, node1_encryption_pk) = unsafe_deterministic_encryption_keypair(0); let agent_id = "agent_test".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); // Create a new job let _ = create_new_job(&shinkai_db, job_id.clone(), agent_id.clone(), scope).await; @@ -318,7 +318,7 @@ mod tests { // let inbox_names = vec![inbox_name]; // let documents = vec!["document1".to_string(), "document2".to_string()]; - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let _ = create_new_job(&shinkai_db, job_id, agent_id.clone(), scope).await; } @@ -337,7 +337,7 @@ mod tests { async fn test_job_inbox_empty() { let job_id = "job_test".to_string(); let agent_id = "agent_test".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -372,7 +372,7 @@ mod tests { async fn test_job_inbox_tree_structure() { let job_id = "job_test".to_string(); let agent_id = "agent_test".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -468,7 +468,7 @@ mod tests { async fn test_job_inbox_tree_structure_with_step_history_and_execution_context() { let job_id = "job_test".to_string(); let agent_id = "agent_test".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -609,7 +609,7 @@ mod tests { let job_id = "test_job"; let agent_id = "agent_test".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -760,7 +760,7 @@ mod tests { async fn test_job_inbox_tree_structure_with_invalid_date() { let job_id = "job_test".to_string(); let agent_id = "agent_test".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -843,7 +843,7 @@ mod tests { async fn test_add_forked_job() { let job_id = "job1".to_string(); let agent_id = "agent1".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); @@ -953,7 +953,7 @@ mod tests { let job1_id = "job1".to_string(); let job2_id = "job2".to_string(); let agent_id = "agent1".to_string(); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); let db = setup_test_db(); let shinkai_db = Arc::new(db); diff --git a/shinkai-bin/shinkai-node/tests/it/utils/node_test_api.rs b/shinkai-bin/shinkai-node/tests/it/utils/node_test_api.rs index ace998448..7f13c4d8f 100644 --- a/shinkai-bin/shinkai-node/tests/it/utils/node_test_api.rs +++ b/shinkai-bin/shinkai-node/tests/it/utils/node_test_api.rs @@ -454,7 +454,7 @@ pub async fn api_create_job( sender_subidentity: &str, recipient_subidentity: &str, ) -> String { - let job_scope = JobScope::new_default(); + let job_scope = MinimalJobScope::default(); api_create_job_with_scope( node_commands_sender, subidentity_encryption_sk, diff --git a/shinkai-bin/shinkai-node/tests/it/websocket_tests.rs b/shinkai-bin/shinkai-node/tests/it/websocket_tests.rs index 7ef9a66a3..ec4869d2f 100644 --- a/shinkai-bin/shinkai-node/tests/it/websocket_tests.rs +++ b/shinkai-bin/shinkai-node/tests/it/websocket_tests.rs @@ -261,7 +261,7 @@ async fn test_websocket() { }; let _ = shinkai_db.insert_profile(sender_subidentity.clone()); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); match shinkai_db.create_new_job(job_id1, agent_id.clone(), scope.clone(), false, None, None) { Ok(_) => (), Err(e) => panic!("Failed to create a new job: {}", e), @@ -562,7 +562,7 @@ async fn test_websocket_smart_inbox() { }; let _ = shinkai_db.insert_profile(sender_subidentity.clone()); - let scope = JobScope::new_default(); + let scope = MinimalJobScope::default(); match shinkai_db.create_new_job(job_id1, agent_id.clone(), scope.clone(), false, None, None) { Ok(_) => (), Err(e) => panic!("Failed to create a new job: {}", e), diff --git a/shinkai-libs/shinkai-fs/src/lib.rs b/shinkai-libs/shinkai-fs/src/lib.rs index 55cf9012d..fd1cce43e 100644 --- a/shinkai-libs/shinkai-fs/src/lib.rs +++ b/shinkai-libs/shinkai-fs/src/lib.rs @@ -1,3 +1,4 @@ // pub mod file_parser; pub mod shinkai_fs_error; -pub mod shinkai_file_manager; \ No newline at end of file +pub mod shinkai_file_manager; +pub mod shinkai_file_manager_ops; diff --git a/shinkai-libs/shinkai-fs/src/shinkai_file_manager.rs b/shinkai-libs/shinkai-fs/src/shinkai_file_manager.rs index d2eed5e34..99f1ae63c 100644 --- a/shinkai-libs/shinkai-fs/src/shinkai_file_manager.rs +++ b/shinkai-libs/shinkai-fs/src/shinkai_file_manager.rs @@ -1,226 +1,43 @@ use std::fs; -use std::io; -use std::path::{Path, PathBuf}; -use thiserror::Error; +use std::path::Path; +use std::time::SystemTime; +use std::collections::HashMap; use shinkai_message_primitives::shinkai_utils::shinkai_path::ShinkaiPath; -use shinkai_sqlite::{errors::SqliteManagerError, SqliteManager}; - -use shinkai_message_primitives::schemas::shinkai_fs::{ParsedFile, ShinkaiFileChunk}; - -#[derive(Debug, Error)] -pub enum FileManagerError { - #[error("IO error occurred: {0}")] - Io(#[from] io::Error), - #[error("Database error: {0}")] - Database(#[from] SqliteManagerError), - #[error("File not found in database")] - FileNotFoundInDatabase, - #[error("File not found on filesystem")] - FileNotFoundOnFilesystem, - #[error("Folder not found on filesystem")] - FolderNotFoundOnFilesystem, - #[error("Cannot move folder into itself")] - InvalidFolderMove, -} - -pub struct ShinkaiFileManager; - -impl ShinkaiFileManager { - /// Add a file: writes a file from `data` to a relative path under `base_dir`. - pub fn add_file( - dest_path: ShinkaiPath, - data: Vec - ) -> Result<(), FileManagerError> { - // Ensure the parent directory exists - fs::create_dir_all(dest_path.as_path().parent().unwrap())?; - - // Write the data to the destination path - fs::write(dest_path.as_path(), data)?; - - Ok(()) - } - - /// Remove file: deletes file from filesystem and DB. - pub fn remove_file(path: ShinkaiPath, base_dir: &Path, sqlite_manager: &SqliteManager) -> Result<(), FileManagerError> { - // Check if file exists on filesystem - if !path.exists() { - return Err(FileManagerError::FileNotFoundOnFilesystem); - } +use shinkai_sqlite::SqliteManager; - // Remove from filesystem - fs::remove_file(path.as_path())?; +use shinkai_message_primitives::schemas::shinkai_fs::ParsedFile; - // Update DB - let rel_path = Self::compute_relative_path(&path, base_dir)?; - if let Some(parsed_file) = sqlite_manager.get_parsed_file_by_rel_path(&rel_path)? { - sqlite_manager.remove_parsed_file(parsed_file.id)?; - } else { - return Err(FileManagerError::FileNotFoundInDatabase); - } - - Ok(()) - } - - /// Create folder: just create a directory on the filesystem. - /// No DB changes since we don't store directories in DB. - pub fn create_folder(path: ShinkaiPath) -> Result<(), FileManagerError> { - fs::create_dir_all(path.as_path())?; - Ok(()) - } +use crate::shinkai_fs_error::ShinkaiFsError; - /// Remove folder: remove a directory from the filesystem. - /// This does not directly affect the DB, but any files in that folder - /// should have been removed first. If not, scanning the DB for files - /// might be necessary. - pub fn remove_folder(path: ShinkaiPath) -> Result<(), FileManagerError> { - if !path.exists() { - return Err(FileManagerError::FolderNotFoundOnFilesystem); - } - - // Check if the folder is empty - if fs::read_dir(path.as_path())?.next().is_some() { - return Err(FileManagerError::FolderNotFoundOnFilesystem); - } - - fs::remove_dir(path.as_path())?; - Ok(()) - } - - /// Rename file: rename a file in the filesystem and update `ParsedFile.relative_path` in DB. - pub fn rename_file( - old_path: ShinkaiPath, - new_relative_path: &str, - base_dir: &Path, - sqlite_manager: &SqliteManager - ) -> Result<(), FileManagerError> { - // Debugging: Check if the old file exists - if !old_path.exists() { - println!("Old file does not exist: {:?}", old_path); - return Err(FileManagerError::FileNotFoundOnFilesystem); - } - - let new_path = base_dir.join(new_relative_path); - // Debugging: Print the new path - println!("Renaming to new path: {:?}", new_path); - - fs::create_dir_all(new_path.parent().unwrap())?; - fs::rename(old_path.as_path(), &new_path)?; - - // Update DB - let old_rel_path = Self::compute_relative_path(&old_path, base_dir)?; - if let Some(mut parsed_file) = sqlite_manager.get_parsed_file_by_rel_path(&old_rel_path)? { - parsed_file.relative_path = new_relative_path.to_string(); - sqlite_manager.update_parsed_file(&parsed_file)?; - } else { - // File not found in DB is not necessarily an error if we just discovered it, - // but let's return an error for consistency. - return Err(FileManagerError::FileNotFoundInDatabase); - } - - Ok(()) - } +pub struct ShinkaiFileManager; - // /// Rename folder: rename a directory in the filesystem and update all `ParsedFile.relative_path` - // /// entries that are inside this folder. - // pub fn rename_folder( - // old_path: ShinkaiPath, - // new_relative_path: &str, - // base_dir: &Path, - // sqlite_manager: &SqliteManager - // ) -> Result<(), FileManagerError> { - // if !old_path.exists() { - // return Err(FileManagerError::FolderNotFoundOnFilesystem); - // } - - // let new_path = base_dir.join(new_relative_path); - // fs::create_dir_all(new_path.parent().unwrap())?; - // fs::rename(old_path.as_path(), &new_path)?; - - // // Update DB for all parsed_files under old_path - // let old_rel_path = Self::compute_relative_path(&old_path, base_dir)?; - // // Ensure old_rel_path always ends with a slash to match prefixes correctly - // let old_prefix = if !old_rel_path.ends_with('/') { - // format!("{}/", old_rel_path) - // } else { - // old_rel_path - // }; - - // let new_prefix = if !new_relative_path.ends_with('/') { - // format!("{}/", new_relative_path) - // } else { - // new_relative_path.to_string() - // }; - - // let all_files = sqlite_manager.get_all_parsed_files()?; - // for mut pf in all_files { - // if pf.relative_path.starts_with(&old_prefix) { - // let remainder = &pf.relative_path[old_prefix.len()..]; - // pf.relative_path = format!("{}{}", new_prefix, remainder); - // sqlite_manager.update_parsed_file(&pf)?; - // } - // } - - // Ok(()) - // } - - /// Move file: effectively the same as renaming a file to a new directory. - pub fn move_file( - old_path: ShinkaiPath, - new_relative_path: &str, - base_dir: &Path, - sqlite_manager: &SqliteManager - ) -> Result<(), FileManagerError> { - Self::rename_file(old_path, new_relative_path, base_dir, sqlite_manager) - } +#[derive(Debug)] +pub struct FileInfo { + pub name: String, + pub is_directory: bool, + pub created_time: Option, + pub modified_time: Option, + pub has_embeddings: bool, +} - // /// Move folder: like rename_folder, but the new folder can be somewhere else entirely in the directory tree. - // pub fn move_folder( - // old_path: ShinkaiPath, - // new_relative_path: &str, - // base_dir: &Path, - // sqlite_manager: &SqliteManager - // ) -> Result<(), FileManagerError> { - // // This is essentially the same operation as rename_folder if the only difference is the path. - // Self::rename_folder(old_path, new_relative_path, base_dir, sqlite_manager) - // } - - // /// Scan a folder: recursively discover all files in a directory, and `process_file` them. - // /// Files that have not been seen before are added, changed files are re-processed, and - // /// removed files should be cleaned up (if desired). - // pub fn scan_folder( - // directory: ShinkaiPath, - // base_dir: &Path, - // sqlite_manager: &SqliteManager - // ) -> Result<(), FileManagerError> { - // if !directory.exists() { - // return Err(FileManagerError::FolderNotFoundOnFilesystem); - // } - - // let files = Self::get_files_in_directory(directory)?; - // for file_path in files { - // Self::process_file(file_path, base_dir, sqlite_manager)?; - // } - - // // Optionally, remove entries from DB that no longer exist on filesystem by comparing DB entries with filesystem. - // // This step is optional and depends on your desired behavior. - - // Ok(()) - // } - - /// Check if file is supported for embedding (placeholder). - pub fn is_supported_for_embedding(parsed_file: &ParsedFile) -> bool { - match parsed_file.original_extension.as_deref() { - Some("txt") | Some("pdf") | Some("doc") => true, - _ => false, - } - } +pub enum FileProcessingMode { + Auto, + NoParsing, + MustParse, +} - /// Process file: If not in DB, add it. If supported, generate chunks. +impl ShinkaiFileManager { + /// Process file: If not in DB, add it. If supported, generate chunks. /// If already processed, consider checking if file changed (not implemented here). - pub fn process_file(path: ShinkaiPath, base_dir: &Path, sqlite_manager: &SqliteManager) -> Result<(), FileManagerError> { + pub fn process_file( + path: ShinkaiPath, + base_dir: &Path, + sqlite_manager: &SqliteManager, + mode: FileProcessingMode, + ) -> Result<(), ShinkaiFsError> { let rel_path = Self::compute_relative_path(&path, base_dir)?; - let mut parsed_file = if let Some(pf) = sqlite_manager.get_parsed_file_by_rel_path(&rel_path)? { + let parsed_file = if let Some(pf) = sqlite_manager.get_parsed_file_by_rel_path(&rel_path)? { pf } else { let original_extension = path @@ -247,68 +64,77 @@ impl ShinkaiFileManager { sqlite_manager.get_parsed_file_by_rel_path(&rel_path)?.unwrap() }; - // If supported for embedding, generate chunks. - if Self::is_supported_for_embedding(&parsed_file) { - let content = fs::read_to_string(path.as_path())?; - let chunks = Self::chunk_text(&content, 1000); - - // Remove old chunks if any - sqlite_manager.remove_chunks_for_parsed_file(parsed_file.id)?; - - for (i, chunk_text) in chunks.into_iter().enumerate() { - let chunk = ShinkaiFileChunk { - chunk_id: 0, - parsed_file_id: parsed_file.id, - position: i as i64, - content: chunk_text, - }; - sqlite_manager.add_chunk(&chunk)?; + match mode { + FileProcessingMode::Auto => { + // Implement logic for Auto mode + } + FileProcessingMode::NoParsing => { + // Implement logic for NoParsing mode + } + FileProcessingMode::MustParse => { + // Implement logic for MustParse mode } - - // Embeddings can be generated and stored if needed (not implemented here). } + // TODO: Implement embedding checking with sqlite_manager + Ok(()) } - /// Compute a relative path given a file path and a base directory. - fn compute_relative_path(file_path: &ShinkaiPath, base_dir: &Path) -> Result { - let abs_file_path = file_path.as_path().canonicalize()?; - let abs_base_dir = base_dir.canonicalize()?; + /// List all files and folders in a directory with additional metadata. + pub fn list_directory_contents( + path: ShinkaiPath, + sqlite_manager: &SqliteManager, + ) -> Result, ShinkaiFsError> { + let mut contents = Vec::new(); + let mut file_map = HashMap::new(); + + // Read directory contents and store in a hash map + for entry in fs::read_dir(path.as_path())? { + let entry = entry?; + let metadata = entry.metadata()?; + let file_name = entry.file_name().into_string().unwrap_or_default(); + file_map.insert(file_name.clone(), metadata.is_dir()); + + let file_info = FileInfo { + name: file_name, + is_directory: metadata.is_dir(), + created_time: metadata.created().ok(), + modified_time: metadata.modified().ok(), + has_embeddings: false, // Default to false, will update if found in DB + }; + contents.push(file_info); + } - let rel = abs_file_path - .strip_prefix(&abs_base_dir) - .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "File is not under the base directory"))?; + // Use the relative path for querying the database + let rel_path = Self::compute_relative_path(&path, path.as_path())?; + let files_with_embeddings = sqlite_manager.get_processed_files_in_directory(&rel_path)?; - Ok(rel.to_string_lossy().to_string()) - } + // Create a hash map for files with embeddings + let embeddings_map: std::collections::HashSet<_> = files_with_embeddings + .into_iter() + .map(|file| file.relative_path) + .collect(); - /// Returns the current UNIX timestamp (in seconds). - fn current_timestamp() -> i64 { - use std::time::{SystemTime, UNIX_EPOCH}; - let start = SystemTime::now(); - let since_epoch = start.duration_since(UNIX_EPOCH).unwrap(); - since_epoch.as_secs() as i64 - } + // Update the contents with embedding information + for file_info in &mut contents { + if embeddings_map.contains(&file_info.name) { + file_info.has_embeddings = true; + } + } - /// Splits text into chunks of approximately `chunk_size` characters. - fn chunk_text(text: &str, chunk_size: usize) -> Vec { - text.chars() - .collect::>() - .chunks(chunk_size) - .map(|c| c.iter().collect()) - .collect() + Ok(contents) } } #[cfg(test)] mod tests { use super::*; - use tempfile::{tempdir, NamedTempFile}; + use shinkai_embedding::model_type::{EmbeddingModelType, OllamaTextEmbeddingsInference}; use std::fs::{self, File}; - use std::io::Read; + use std::io::Write; use std::path::PathBuf; - use shinkai_embedding::model_type::{EmbeddingModelType, OllamaTextEmbeddingsInference}; + use tempfile::{tempdir, NamedTempFile}; fn setup_test_db() -> SqliteManager { let temp_file = NamedTempFile::new().unwrap(); @@ -320,110 +146,125 @@ mod tests { SqliteManager::new(db_path, api_url, model_type).unwrap() } + fn create_test_parsed_file(id: i64, relative_path: &str) -> ParsedFile { + ParsedFile { + id, + relative_path: relative_path.to_string(), + original_extension: None, + description: None, + source: None, + embedding_model_used: None, + keywords: None, + distribution_info: None, + created_time: None, + tags: None, + total_tokens: None, + total_characters: None, + } + } + #[test] - fn test_remove_empty_folder() { + fn test_list_directory_contents() { + let db = setup_test_db(); + + // Create a temporary directory let dir = tempdir().unwrap(); - let path = ShinkaiPath::from_string(dir.path().to_string_lossy().to_string()); + let dir_path = ShinkaiPath::from_string(dir.path().to_string_lossy().to_string()); - // Create an empty folder - fs::create_dir_all(path.as_path()).unwrap(); + // Create a subdirectory and a file inside the temporary directory + let subdir_path = dir.path().join("subdir"); + fs::create_dir(&subdir_path).unwrap(); - // Attempt to remove the empty folder - assert!(ShinkaiFileManager::remove_folder(path.clone()).is_ok()); + let file_path = dir.path().join("test_file.txt"); + let mut file = File::create(&file_path).unwrap(); + writeln!(file, "Hello, Shinkai!").unwrap(); - // Ensure the folder is removed - assert!(!path.exists()); - } + // Call the function to list directory contents + let contents = ShinkaiFileManager::list_directory_contents(dir_path, &db).unwrap(); - #[test] - fn test_remove_non_empty_folder() { - let dir = tempdir().unwrap(); - let path = ShinkaiPath::from_string(dir.path().to_string_lossy().to_string()); + // Check that the directory contents are correct + assert_eq!(contents.len(), 2); - // Create a folder and add a file inside it - fs::create_dir_all(path.as_path()).unwrap(); - let file_path = path.as_path().join("test_file.txt"); - File::create(&file_path).unwrap(); + let mut found_subdir = false; + let mut found_file = false; - // Attempt to remove the non-empty folder - assert!(ShinkaiFileManager::remove_folder(path.clone()).is_err()); + for entry in contents { + if entry.name == "subdir" && entry.is_directory { + found_subdir = true; + } else if entry.name == "test_file.txt" && !entry.is_directory { + found_file = true; + } + } - // Ensure the folder still exists - assert!(path.exists()); + assert!(found_subdir, "Subdirectory 'subdir' should be found."); + assert!(found_file, "File 'test_file.txt' should be found."); } #[test] - fn test_add_file() { - let dir = tempdir().unwrap(); - let path = ShinkaiPath::from_string(dir.path().join("test_file.txt").to_string_lossy().to_string()); - let data = b"Hello, Shinkai!".to_vec(); + fn test_list_directory_contents_with_db_entries() { + let db = setup_test_db(); - // Add the file - assert!(ShinkaiFileManager::add_file(path.clone(), data.clone()).is_ok()); + // Initialize the database tables + let conn = db.get_connection().unwrap(); + SqliteManager::initialize_filesystem_tables(&conn).unwrap(); - // Verify the file exists and contains the correct data - let mut file = File::open(path.as_path()).unwrap(); - let mut contents = Vec::new(); - file.read_to_end(&mut contents).unwrap(); - assert_eq!(contents, data); - } + // Add parsed files with different relative paths + let pf1 = create_test_parsed_file(1, "january.txt"); + let pf2 = create_test_parsed_file(2, "february.txt"); + db.add_parsed_file(&pf1).unwrap(); + db.add_parsed_file(&pf2).unwrap(); - // #[test] - // fn test_rename_file() { - // let dir = tempdir().unwrap(); - // let old_path = ShinkaiPath::from_string(dir.path().join("old_file.txt").to_string_lossy().to_string()); - // let new_relative_path = "new_file.txt"; - // let base_dir = dir.path(); - // let data = b"Hello, Shinkai!".to_vec(); - - // // Create the original file - // ShinkaiFileManager::add_file(old_path.clone(), data.clone()).unwrap(); - - // // Debugging: Verify the file was created - // assert!(old_path.exists(), "The file should exist on the filesystem after creation."); - - // // Setup the test database - // let sqlite_manager = setup_test_db(); - - // // Add the file to the database - // let rel_path = ShinkaiFileManager::compute_relative_path(&old_path, base_dir).unwrap(); - // let parsed_file = ParsedFile { - // id: 0, - // relative_path: rel_path.clone(), - // original_extension: Some("txt".to_string()), - // description: None, - // source: None, - // embedding_model_used: None, - // keywords: None, - // distribution_info: None, - // created_time: Some(ShinkaiFileManager::current_timestamp()), - // tags: None, - // total_tokens: None, - // total_characters: None, - // }; - // sqlite_manager.add_parsed_file(&parsed_file).unwrap(); - - // // Debugging: Check if the file is in the database - // let db_file = sqlite_manager.get_parsed_file_by_rel_path(&rel_path).unwrap(); - // assert!(db_file.is_some(), "The file should exist in the database before renaming."); - - // // Debugging: Print paths for verification - // println!("Old path: {:?}", old_path); - // println!("New path: {:?}", base_dir.join(new_relative_path)); - - // // Rename the file - // let rename_result = ShinkaiFileManager::rename_file(old_path.clone(), new_relative_path, base_dir, &sqlite_manager); - // assert!(rename_result.is_ok(), "Renaming the file should succeed: {:?}", rename_result); - - // // Verify the old file does not exist and the new file does - // let new_path = base_dir.join(new_relative_path); - // assert!(!old_path.exists(), "The old file should not exist after renaming."); - // assert!(new_path.exists(), "The new file should exist after renaming."); - - // // Verify the new file contains the correct data - // let mut file = File::open(new_path).unwrap(); - // let mut contents = Vec::new(); - // file.read_to_end(&mut contents).unwrap(); - // assert_eq!(contents, data); - // } + // Create a temporary directory + let dir = tempdir().unwrap(); + let dir_path = ShinkaiPath::from_string(dir.path().to_string_lossy().to_string()); + + // Create files in the temporary directory to match the database entries + let file_path1 = dir.path().join("january.txt"); + let mut file1 = File::create(&file_path1).unwrap(); + writeln!(file1, "January report").unwrap(); + + let file_path2 = dir.path().join("february.txt"); + let mut file2 = File::create(&file_path2).unwrap(); + writeln!(file2, "February report").unwrap(); + + // Create a file that is not in the database + let file_path3 = dir.path().join("march.txt"); + let mut file3 = File::create(&file_path3).unwrap(); + writeln!(file3, "March report").unwrap(); + + // Create a subdirectory + let subdir_path = dir.path().join("subdir"); + fs::create_dir(&subdir_path).unwrap(); + + // Call the function to list directory contents + let contents = ShinkaiFileManager::list_directory_contents(dir_path, &db).unwrap(); + + // Check that the directory contents are correct + assert_eq!(contents.len(), 4); + + let mut found_january = false; + let mut found_february = false; + let mut found_march = false; + let mut found_subdir = false; + + for entry in contents { + if entry.name == "january.txt" && !entry.is_directory { + found_january = true; + assert!(entry.has_embeddings, "File 'january.txt' should have embeddings."); + } else if entry.name == "february.txt" && !entry.is_directory { + found_february = true; + assert!(entry.has_embeddings, "File 'february.txt' should have embeddings."); + } else if entry.name == "march.txt" && !entry.is_directory { + found_march = true; + assert!(!entry.has_embeddings, "File 'march.txt' should not have embeddings."); + } else if entry.name == "subdir" && entry.is_directory { + found_subdir = true; + } + } + + assert!(found_january, "File 'january.txt' should be found."); + assert!(found_february, "File 'february.txt' should be found."); + assert!(found_march, "File 'march.txt' should be found."); + assert!(found_subdir, "Directory 'subdir' should be found."); + } } diff --git a/shinkai-libs/shinkai-fs/src/shinkai_file_manager_ops.rs b/shinkai-libs/shinkai-fs/src/shinkai_file_manager_ops.rs new file mode 100644 index 000000000..4ec4a1419 --- /dev/null +++ b/shinkai-libs/shinkai-fs/src/shinkai_file_manager_ops.rs @@ -0,0 +1,349 @@ +use std::fs; +use std::io; +use std::path::Path; + +use shinkai_message_primitives::shinkai_utils::shinkai_path::ShinkaiPath; +use shinkai_sqlite::SqliteManager; + +use shinkai_message_primitives::schemas::shinkai_fs::ParsedFile; + +use crate::shinkai_file_manager::ShinkaiFileManager; +use crate::shinkai_fs_error::ShinkaiFsError; + +impl ShinkaiFileManager { + /// Add a file: writes a file from `data` to a relative path under `base_dir`. + pub fn add_file(dest_path: ShinkaiPath, data: Vec) -> Result<(), ShinkaiFsError> { + // Ensure the parent directory exists + fs::create_dir_all(dest_path.as_path().parent().unwrap())?; + + // Write the data to the destination path + fs::write(dest_path.as_path(), data)?; + + Ok(()) + } + + /// Remove file: deletes file from filesystem and DB. + pub fn remove_file( + path: ShinkaiPath, + base_dir: &Path, + sqlite_manager: &SqliteManager, + ) -> Result<(), ShinkaiFsError> { + // Check if file exists on filesystem + if !path.exists() { + return Err(ShinkaiFsError::FileNotFoundOnFilesystem); + } + + // Remove from filesystem + fs::remove_file(path.as_path())?; + + // Update DB + let rel_path = Self::compute_relative_path(&path, base_dir)?; + if let Some(parsed_file) = sqlite_manager.get_parsed_file_by_rel_path(&rel_path)? { + sqlite_manager.remove_parsed_file(parsed_file.id)?; + } else { + return Err(ShinkaiFsError::FileNotFoundInDatabase); + } + + Ok(()) + } + + /// Create folder: just create a directory on the filesystem. + /// No DB changes since we don't store directories in DB. + pub fn create_folder(path: ShinkaiPath) -> Result<(), ShinkaiFsError> { + fs::create_dir_all(path.as_path())?; + Ok(()) + } + + /// Remove folder: remove a directory from the filesystem. + /// This does not directly affect the DB, but any files in that folder + /// should have been removed first. If not, scanning the DB for files + /// might be necessary. + pub fn remove_folder(path: ShinkaiPath) -> Result<(), ShinkaiFsError> { + if !path.exists() { + return Err(ShinkaiFsError::FolderNotFoundOnFilesystem); + } + + // Check if the folder is empty + if fs::read_dir(path.as_path())?.next().is_some() { + return Err(ShinkaiFsError::FolderNotFoundOnFilesystem); + } + + fs::remove_dir(path.as_path())?; + Ok(()) + } + + /// Rename file: rename a file in the filesystem and update `ParsedFile.relative_path` in DB. + pub fn rename_file( + old_path: ShinkaiPath, + new_relative_path: &str, + base_dir: &Path, + sqlite_manager: &SqliteManager, + ) -> Result<(), ShinkaiFsError> { + // Debugging: Check if the old file exists + if !old_path.exists() { + println!("Old file does not exist: {:?}", old_path); + return Err(ShinkaiFsError::FileNotFoundOnFilesystem); + } + + let new_path = base_dir.join(new_relative_path); + // Debugging: Print the new path + println!("Renaming to new path: {:?}", new_path); + + fs::create_dir_all(new_path.parent().unwrap())?; + fs::rename(old_path.as_path(), &new_path)?; + + // Update DB + let old_rel_path = Self::compute_relative_path(&old_path, base_dir)?; + if let Some(mut parsed_file) = sqlite_manager.get_parsed_file_by_rel_path(&old_rel_path)? { + parsed_file.relative_path = new_relative_path.to_string(); + sqlite_manager.update_parsed_file(&parsed_file)?; + } else { + // File not found in DB is not necessarily an error if we just discovered it, + // but let's return an error for consistency. + return Err(ShinkaiFsError::FileNotFoundInDatabase); + } + + Ok(()) + } + + // /// Rename folder: rename a directory in the filesystem and update all `ParsedFile.relative_path` + // /// entries that are inside this folder. + // pub fn rename_folder( + // old_path: ShinkaiPath, + // new_relative_path: &str, + // base_dir: &Path, + // sqlite_manager: &SqliteManager + // ) -> Result<(), FileManagerError> { + // if !old_path.exists() { + // return Err(FileManagerError::FolderNotFoundOnFilesystem); + // } + + // let new_path = base_dir.join(new_relative_path); + // fs::create_dir_all(new_path.parent().unwrap())?; + // fs::rename(old_path.as_path(), &new_path)?; + + // // Update DB for all parsed_files under old_path + // let old_rel_path = Self::compute_relative_path(&old_path, base_dir)?; + // // Ensure old_rel_path always ends with a slash to match prefixes correctly + // let old_prefix = if !old_rel_path.ends_with('/') { + // format!("{}/", old_rel_path) + // } else { + // old_rel_path + // }; + + // let new_prefix = if !new_relative_path.ends_with('/') { + // format!("{}/", new_relative_path) + // } else { + // new_relative_path.to_string() + // }; + + // let all_files = sqlite_manager.get_all_parsed_files()?; + // for mut pf in all_files { + // if pf.relative_path.starts_with(&old_prefix) { + // let remainder = &pf.relative_path[old_prefix.len()..]; + // pf.relative_path = format!("{}{}", new_prefix, remainder); + // sqlite_manager.update_parsed_file(&pf)?; + // } + // } + + // Ok(()) + // } + + /// Move file: effectively the same as renaming a file to a new directory. + pub fn move_file( + old_path: ShinkaiPath, + new_relative_path: &str, + base_dir: &Path, + sqlite_manager: &SqliteManager, + ) -> Result<(), ShinkaiFsError> { + Self::rename_file(old_path, new_relative_path, base_dir, sqlite_manager) + } + + // /// Move folder: like rename_folder, but the new folder can be somewhere else entirely in the directory tree. + // pub fn move_folder( + // old_path: ShinkaiPath, + // new_relative_path: &str, + // base_dir: &Path, + // sqlite_manager: &SqliteManager + // ) -> Result<(), FileManagerError> { + // // This is essentially the same operation as rename_folder if the only difference is the path. + // Self::rename_folder(old_path, new_relative_path, base_dir, sqlite_manager) + // } + + // /// Scan a folder: recursively discover all files in a directory, and `process_file` them. + // /// Files that have not been seen before are added, changed files are re-processed, and + // /// removed files should be cleaned up (if desired). + // pub fn scan_folder( + // directory: ShinkaiPath, + // base_dir: &Path, + // sqlite_manager: &SqliteManager + // ) -> Result<(), FileManagerError> { + // if !directory.exists() { + // return Err(FileManagerError::FolderNotFoundOnFilesystem); + // } + + // let files = Self::get_files_in_directory(directory)?; + // for file_path in files { + // Self::process_file(file_path, base_dir, sqlite_manager)?; + // } + + // // Optionally, remove entries from DB that no longer exist on filesystem by comparing DB entries with filesystem. + // // This step is optional and depends on your desired behavior. + + // Ok(()) + // } + + /// Check if file is supported for embedding (placeholder). + pub fn is_supported_for_embedding(parsed_file: &ParsedFile) -> bool { + match parsed_file.original_extension.as_deref() { + Some("txt") | Some("pdf") | Some("doc") => true, + _ => false, + } + } + + /// Compute a relative path given a file path and a base directory. + pub fn compute_relative_path(file_path: &ShinkaiPath, base_dir: &Path) -> Result { + let abs_file_path = file_path.as_path().canonicalize()?; + let abs_base_dir = base_dir.canonicalize()?; + + let rel = abs_file_path + .strip_prefix(&abs_base_dir) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "File is not under the base directory"))?; + + Ok(rel.to_string_lossy().to_string()) + } + + /// Returns the current UNIX timestamp (in seconds). + pub fn current_timestamp() -> i64 { + use std::time::{SystemTime, UNIX_EPOCH}; + let start = SystemTime::now(); + let since_epoch = start.duration_since(UNIX_EPOCH).unwrap(); + since_epoch.as_secs() as i64 + } + + /// Splits text into chunks of approximately `chunk_size` characters. + pub fn chunk_text(text: &str, chunk_size: usize) -> Vec { + text.chars() + .collect::>() + .chunks(chunk_size) + .map(|c| c.iter().collect()) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use shinkai_embedding::model_type::{EmbeddingModelType, OllamaTextEmbeddingsInference}; + use std::fs::{self, File}; + use std::io::Read; + use std::path::PathBuf; + use tempfile::{tempdir, NamedTempFile}; + + #[test] + fn test_remove_empty_folder() { + let dir = tempdir().unwrap(); + let path = ShinkaiPath::from_string(dir.path().to_string_lossy().to_string()); + + // Create an empty folder + fs::create_dir_all(path.as_path()).unwrap(); + + // Attempt to remove the empty folder + assert!(ShinkaiFileManager::remove_folder(path.clone()).is_ok()); + + // Ensure the folder is removed + assert!(!path.exists()); + } + + #[test] + fn test_remove_non_empty_folder() { + let dir = tempdir().unwrap(); + let path = ShinkaiPath::from_string(dir.path().to_string_lossy().to_string()); + + // Create a folder and add a file inside it + fs::create_dir_all(path.as_path()).unwrap(); + let file_path = path.as_path().join("test_file.txt"); + File::create(&file_path).unwrap(); + + // Attempt to remove the non-empty folder + assert!(ShinkaiFileManager::remove_folder(path.clone()).is_err()); + + // Ensure the folder still exists + assert!(path.exists()); + } + + #[test] + fn test_add_file() { + let dir = tempdir().unwrap(); + let path = ShinkaiPath::from_string(dir.path().join("test_file.txt").to_string_lossy().to_string()); + let data = b"Hello, Shinkai!".to_vec(); + + // Add the file + assert!(ShinkaiFileManager::add_file(path.clone(), data.clone()).is_ok()); + + // Verify the file exists and contains the correct data + let mut file = File::open(path.as_path()).unwrap(); + let mut contents = Vec::new(); + file.read_to_end(&mut contents).unwrap(); + assert_eq!(contents, data); + } + + // #[test] + // fn test_rename_file() { + // let dir = tempdir().unwrap(); + // let old_path = ShinkaiPath::from_string(dir.path().join("old_file.txt").to_string_lossy().to_string()); + // let new_relative_path = "new_file.txt"; + // let base_dir = dir.path(); + // let data = b"Hello, Shinkai!".to_vec(); + + // // Create the original file + // ShinkaiFileManager::add_file(old_path.clone(), data.clone()).unwrap(); + + // // Debugging: Verify the file was created + // assert!(old_path.exists(), "The file should exist on the filesystem after creation."); + + // // Setup the test database + // let sqlite_manager = setup_test_db(); + + // // Add the file to the database + // let rel_path = ShinkaiFileManager::compute_relative_path(&old_path, base_dir).unwrap(); + // let parsed_file = ParsedFile { + // id: 0, + // relative_path: rel_path.clone(), + // original_extension: Some("txt".to_string()), + // description: None, + // source: None, + // embedding_model_used: None, + // keywords: None, + // distribution_info: None, + // created_time: Some(ShinkaiFileManager::current_timestamp()), + // tags: None, + // total_tokens: None, + // total_characters: None, + // }; + // sqlite_manager.add_parsed_file(&parsed_file).unwrap(); + + // // Debugging: Check if the file is in the database + // let db_file = sqlite_manager.get_parsed_file_by_rel_path(&rel_path).unwrap(); + // assert!(db_file.is_some(), "The file should exist in the database before renaming."); + + // // Debugging: Print paths for verification + // println!("Old path: {:?}", old_path); + // println!("New path: {:?}", base_dir.join(new_relative_path)); + + // // Rename the file + // let rename_result = ShinkaiFileManager::rename_file(old_path.clone(), new_relative_path, base_dir, &sqlite_manager); + // assert!(rename_result.is_ok(), "Renaming the file should succeed: {:?}", rename_result); + + // // Verify the old file does not exist and the new file does + // let new_path = base_dir.join(new_relative_path); + // assert!(!old_path.exists(), "The old file should not exist after renaming."); + // assert!(new_path.exists(), "The new file should exist after renaming."); + + // // Verify the new file contains the correct data + // let mut file = File::open(new_path).unwrap(); + // let mut contents = Vec::new(); + // file.read_to_end(&mut contents).unwrap(); + // assert_eq!(contents, data); + // } +} diff --git a/shinkai-libs/shinkai-fs/src/shinkai_fs_error.rs b/shinkai-libs/shinkai-fs/src/shinkai_fs_error.rs index 4841bed63..87abcb1d7 100644 --- a/shinkai-libs/shinkai-fs/src/shinkai_fs_error.rs +++ b/shinkai-libs/shinkai-fs/src/shinkai_fs_error.rs @@ -1,6 +1,8 @@ +use shinkai_sqlite::errors::SqliteManagerError; +use std::io; use thiserror::Error; -#[derive(Error, Debug, PartialEq)] +#[derive(Error, Debug)] pub enum ShinkaiFsError { #[error("File not found")] FileNotFound, @@ -12,6 +14,18 @@ pub enum ShinkaiFsError { RequestFailed(String), #[error("Failed to generate embeddings")] FailedEmbeddingGeneration(String), + #[error("IO error occurred: {0}")] + Io(#[from] io::Error), + #[error("Database error: {0}")] + Database(#[from] SqliteManagerError), + #[error("File not found in database")] + FileNotFoundInDatabase, + #[error("File not found on filesystem")] + FileNotFoundOnFilesystem, + #[error("Folder not found on filesystem")] + FolderNotFoundOnFilesystem, + #[error("Cannot move folder into itself")] + InvalidFolderMove, } impl From for ShinkaiFsError { diff --git a/shinkai-libs/shinkai-sqlite/src/file_system.rs b/shinkai-libs/shinkai-sqlite/src/file_system.rs index 5627afcde..1a14b9a86 100644 --- a/shinkai-libs/shinkai-sqlite/src/file_system.rs +++ b/shinkai-libs/shinkai-sqlite/src/file_system.rs @@ -305,6 +305,42 @@ impl SqliteManager { tx.commit()?; Ok(()) } + + pub fn get_processed_files_in_directory(&self, directory_path: &str) -> Result, SqliteManagerError> { + let conn = self.get_connection()?; + let mut stmt = conn.prepare( + "SELECT id, relative_path, original_extension, description, source, embedding_model_used, keywords, + distribution_info, created_time, tags, total_tokens, total_characters + FROM parsed_files + WHERE relative_path LIKE ? AND relative_path NOT LIKE ?", + )?; + + let like_pattern = format!("{}%", directory_path); + let not_like_pattern = format!("{}%/%", directory_path); + + let rows = stmt.query_map(params![like_pattern, not_like_pattern], |row| { + Ok(ParsedFile { + id: row.get(0)?, + relative_path: row.get(1)?, + original_extension: row.get(2)?, + description: row.get(3)?, + source: row.get(4)?, + embedding_model_used: row.get(5)?, + keywords: row.get(6)?, + distribution_info: row.get(7)?, + created_time: row.get(8)?, + tags: row.get(9)?, + total_tokens: row.get(10)?, + total_characters: row.get(11)?, + }) + })?; + + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } } #[cfg(test)] @@ -427,4 +463,29 @@ mod tests { let unchanged_pf3 = db.get_parsed_file_by_rel_path("docs/reports/old_stuff/misc.txt").unwrap().unwrap(); assert_eq!(unchanged_pf3.relative_path, "docs/reports/old_stuff/misc.txt"); } + + #[test] + fn test_get_files_in_directory() { + let db = setup_test_db(); + + // Add parsed files with different relative paths + let pf1 = create_test_parsed_file(1, "docs/reports/2024/january.txt"); + let pf2 = create_test_parsed_file(2, "docs/reports/2024/february.txt"); + let pf3 = create_test_parsed_file(3, "docs/reports/2024/march/summary.txt"); + let pf4 = create_test_parsed_file(4, "docs/reports/old_stuff/misc.txt"); + db.add_parsed_file(&pf1).unwrap(); + db.add_parsed_file(&pf2).unwrap(); + db.add_parsed_file(&pf3).unwrap(); + db.add_parsed_file(&pf4).unwrap(); + + // Retrieve files directly under "docs/reports/2024/" + let files_in_directory = db.get_processed_files_in_directory("docs/reports/2024/").unwrap(); + + // Check that only pf1 and pf2 are returned + assert_eq!(files_in_directory.len(), 2); + assert!(files_in_directory.iter().any(|pf| pf.relative_path == "docs/reports/2024/january.txt")); + assert!(files_in_directory.iter().any(|pf| pf.relative_path == "docs/reports/2024/february.txt")); + assert!(!files_in_directory.iter().any(|pf| pf.relative_path == "docs/reports/2024/march/summary.txt")); + assert!(!files_in_directory.iter().any(|pf| pf.relative_path == "docs/reports/old_stuff/misc.txt")); + } }