From c2eafd072175d19275481f4dc5fed3bf32b9f88f Mon Sep 17 00:00:00 2001 From: Robert Kornacki <11645932+robkorn@users.noreply.github.com> Date: Fri, 8 Sep 2023 18:52:20 +0200 Subject: [PATCH] Added retrieval depth to retdatachunk --- .../shinkai-vector-resources/src/document_resource.rs | 5 ++++- .../shinkai-vector-resources/src/map_resource.rs | 2 ++ .../shinkai-vector-resources/src/vector_resource.rs | 10 +++++++--- .../src/vector_resource_types.rs | 1 + .../tests/vector_resource_tests.rs | 8 ++++++++ src/db/db_resources.rs | 2 +- 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/shinkai-libs/shinkai-vector-resources/src/document_resource.rs b/shinkai-libs/shinkai-vector-resources/src/document_resource.rs index bd9f21165..93122ee0c 100644 --- a/shinkai-libs/shinkai-vector-resources/src/document_resource.rs +++ b/shinkai-libs/shinkai-vector-resources/src/document_resource.rs @@ -177,6 +177,7 @@ impl DocumentVectorResource { chunk: chunk.clone(), score: 0.00, resource_pointer: self.get_resource_pointer(), + retrieval_depth: 0, }); } } @@ -184,7 +185,8 @@ impl DocumentVectorResource { Ok(chunks) } - /// Returns all DataChunks with a matching key/value pair in the metadata hashmap + /// Returns all DataChunks with a matching key/value pair in the metadata hashmap. + /// Does not perform any traversal. pub fn metadata_search( &self, metadata_key: &str, @@ -199,6 +201,7 @@ impl DocumentVectorResource { chunk: chunk.clone(), score: 0.00, resource_pointer: self.get_resource_pointer(), + retrieval_depth: 0, }), _ => (), } diff --git a/shinkai-libs/shinkai-vector-resources/src/map_resource.rs b/shinkai-libs/shinkai-vector-resources/src/map_resource.rs index e0131f929..d45172cd7 100644 --- a/shinkai-libs/shinkai-vector-resources/src/map_resource.rs +++ b/shinkai-libs/shinkai-vector-resources/src/map_resource.rs @@ -135,6 +135,7 @@ impl MapVectorResource { } /// Returns all DataChunks with a matching key/value pair in the metadata hashmap + /// Does not perform any traversal. pub fn metadata_search( &self, metadata_key: &str, @@ -149,6 +150,7 @@ impl MapVectorResource { chunk: chunk.clone(), score: 0.00, resource_pointer: self.get_resource_pointer(), + retrieval_depth: 0, }), _ => (), } diff --git a/shinkai-libs/shinkai-vector-resources/src/vector_resource.rs b/shinkai-libs/shinkai-vector-resources/src/vector_resource.rs index 766fea34b..e322360a7 100644 --- a/shinkai-libs/shinkai-vector-resources/src/vector_resource.rs +++ b/shinkai-libs/shinkai-vector-resources/src/vector_resource.rs @@ -13,7 +13,7 @@ pub use crate::vector_resource_types::*; #[derive(Debug, Clone, PartialEq)] pub enum TraversalMethod { /// Efficiently only goes deeper into Vector Resources if they are the highest scored DataChunks at their level. - /// Will go infinitely deep until hitting a level where no BaseVectorResources are parrt of the highest scored. + /// Will go infinitely deep until hitting a level where no BaseVectorResources are part of the highest scored. Efficient, /// Efficiently traverses until (and including) the specified depth is hit (or until there are no more levels to go). /// Will return BaseVectorResource DataChunks if they are the highest scored at the specified depth. @@ -223,6 +223,7 @@ pub trait VectorResource { chunk: data_chunk, score: 0.0, resource_pointer, + retrieval_depth: 0, }; matching_data_chunks.push(retrieved_data_chunk); } @@ -273,6 +274,7 @@ pub trait VectorResource { chunk: chunk.clone(), score, resource_pointer: self.get_resource_pointer(), + retrieval_depth: depth, }; current_level_results.push(ret_chunk); continue; @@ -341,6 +343,7 @@ pub trait VectorResource { chunk: chunk.clone(), score, resource_pointer: self.get_resource_pointer(), + retrieval_depth: depth, }); } } @@ -349,6 +352,7 @@ pub trait VectorResource { /// Performs a vector search using a query embedding and returns /// the most similar data chunks within a specific range. + /// Automatically uses Efficient Traversal. /// /// * `tolerance_range` - A float between 0 and 1, inclusive, that /// determines the range of acceptable similarity scores as a percentage @@ -361,14 +365,14 @@ pub trait VectorResource { let top_similarity_score = results.first().map_or(0.0, |ret_chunk| ret_chunk.score); // Find the range of acceptable similarity scores - self.vector_search_tolerance_ranged_score(query, tolerance_range, top_similarity_score) + self._vector_search_tolerance_ranged_score(query, tolerance_range, top_similarity_score) } /// Performs a vector search using a query embedding and returns /// the most similar data chunks within a specific range of the provided top similarity score. /// /// * `top_similarity_score` - A float that represents the top similarity score. - fn vector_search_tolerance_ranged_score( + fn _vector_search_tolerance_ranged_score( &self, query: Embedding, tolerance_range: f32, diff --git a/shinkai-libs/shinkai-vector-resources/src/vector_resource_types.rs b/shinkai-libs/shinkai-vector-resources/src/vector_resource_types.rs index 7e574e110..0ea45bf3c 100644 --- a/shinkai-libs/shinkai-vector-resources/src/vector_resource_types.rs +++ b/shinkai-libs/shinkai-vector-resources/src/vector_resource_types.rs @@ -23,6 +23,7 @@ pub struct RetrievedDataChunk { pub chunk: DataChunk, pub score: f32, pub resource_pointer: VectorResourcePointer, + pub retrieval_depth: u64, } impl RetrievedDataChunk { diff --git a/shinkai-libs/shinkai-vector-resources/tests/vector_resource_tests.rs b/shinkai-libs/shinkai-vector-resources/tests/vector_resource_tests.rs index 994091658..6ad08d07a 100644 --- a/shinkai-libs/shinkai-vector-resources/tests/vector_resource_tests.rs +++ b/shinkai-libs/shinkai-vector-resources/tests/vector_resource_tests.rs @@ -201,6 +201,9 @@ fn test_manual_resource_vector_search() { assert_eq!(fact6, res[0].chunk.get_data_string().unwrap()); + // + // Traversal Tests + // // Perform UntilDepth(0) traversal to ensure it is working properly, assert the dog fact1 cant be found let res = fruit_doc.vector_search_with_traversal(query_embedding1.clone(), 5, &TraversalMethod::UntilDepth(0)); assert_ne!(fact1, res[0].chunk.get_data_string().unwrap()); @@ -218,6 +221,11 @@ fn test_manual_resource_vector_search() { // Perform UntilDepth(2) traversal to ensure it is working properly, assert dog fact1 is found at the correct depth let res = fruit_doc.vector_search_with_traversal(query_embedding1.clone(), 5, &TraversalMethod::UntilDepth(2)); assert_eq!(DataContent::Data(fact1.to_string()), res[0].chunk.data); + + // Perform Exhaustive traversal to ensure it is working properly, assert dog fact1 is found at the correct depth + // By requesting only 1 result, Efficient traversal does not go deeper, while Exhaustive makes it all the way to the bottom + let res = fruit_doc.vector_search_with_traversal(query_embedding1.clone(), 1, &TraversalMethod::Exhaustive); + assert_eq!(DataContent::Data(fact1.to_string()), res[0].chunk.data); } #[test] diff --git a/src/db/db_resources.rs b/src/db/db_resources.rs index 884d2e501..406bc29e6 100644 --- a/src/db/db_resources.rs +++ b/src/db/db_resources.rs @@ -229,7 +229,7 @@ impl ShinkaiDB { let resources = self.vector_search_resources(query.clone(), num_of_resources, profile)?; let mut final_chunks = Vec::new(); for resource in resources { - let results = resource.as_trait_object().vector_search_tolerance_ranged_score( + let results = resource.as_trait_object()._vector_search_tolerance_ranged_score( query.clone(), tolerance_range, top_chunk.score,