Skip to content

Commit

Permalink
Added retrieval depth to retdatachunk
Browse files Browse the repository at this point in the history
  • Loading branch information
robkorn committed Sep 8, 2023
1 parent feb92eb commit c2eafd0
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,16 @@ impl DocumentVectorResource {
chunk: chunk.clone(),
score: 0.00,
resource_pointer: self.get_resource_pointer(),
retrieval_depth: 0,
});
}
}

Ok(chunks)
}

/// Returns all DataChunks with a matching key/value pair in the metadata hashmap
/// Returns all DataChunks with a matching key/value pair in the metadata hashmap.
/// Does not perform any traversal.
pub fn metadata_search(
&self,
metadata_key: &str,
Expand All @@ -199,6 +201,7 @@ impl DocumentVectorResource {
chunk: chunk.clone(),
score: 0.00,
resource_pointer: self.get_resource_pointer(),
retrieval_depth: 0,
}),
_ => (),
}
Expand Down
2 changes: 2 additions & 0 deletions shinkai-libs/shinkai-vector-resources/src/map_resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ impl MapVectorResource {
}

/// Returns all DataChunks with a matching key/value pair in the metadata hashmap
/// Does not perform any traversal.
pub fn metadata_search(
&self,
metadata_key: &str,
Expand All @@ -149,6 +150,7 @@ impl MapVectorResource {
chunk: chunk.clone(),
score: 0.00,
resource_pointer: self.get_resource_pointer(),
retrieval_depth: 0,
}),
_ => (),
}
Expand Down
10 changes: 7 additions & 3 deletions shinkai-libs/shinkai-vector-resources/src/vector_resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub use crate::vector_resource_types::*;
#[derive(Debug, Clone, PartialEq)]
pub enum TraversalMethod {
/// Efficiently only goes deeper into Vector Resources if they are the highest scored DataChunks at their level.
/// Will go infinitely deep until hitting a level where no BaseVectorResources are parrt of the highest scored.
/// Will go infinitely deep until hitting a level where no BaseVectorResources are part of the highest scored.
Efficient,
/// Efficiently traverses until (and including) the specified depth is hit (or until there are no more levels to go).
/// Will return BaseVectorResource DataChunks if they are the highest scored at the specified depth.
Expand Down Expand Up @@ -223,6 +223,7 @@ pub trait VectorResource {
chunk: data_chunk,
score: 0.0,
resource_pointer,
retrieval_depth: 0,
};
matching_data_chunks.push(retrieved_data_chunk);
}
Expand Down Expand Up @@ -273,6 +274,7 @@ pub trait VectorResource {
chunk: chunk.clone(),
score,
resource_pointer: self.get_resource_pointer(),
retrieval_depth: depth,
};
current_level_results.push(ret_chunk);
continue;
Expand Down Expand Up @@ -341,6 +343,7 @@ pub trait VectorResource {
chunk: chunk.clone(),
score,
resource_pointer: self.get_resource_pointer(),
retrieval_depth: depth,
});
}
}
Expand All @@ -349,6 +352,7 @@ pub trait VectorResource {

/// Performs a vector search using a query embedding and returns
/// the most similar data chunks within a specific range.
/// Automatically uses Efficient Traversal.
///
/// * `tolerance_range` - A float between 0 and 1, inclusive, that
/// determines the range of acceptable similarity scores as a percentage
Expand All @@ -361,14 +365,14 @@ pub trait VectorResource {
let top_similarity_score = results.first().map_or(0.0, |ret_chunk| ret_chunk.score);

// Find the range of acceptable similarity scores
self.vector_search_tolerance_ranged_score(query, tolerance_range, top_similarity_score)
self._vector_search_tolerance_ranged_score(query, tolerance_range, top_similarity_score)
}

/// Performs a vector search using a query embedding and returns
/// the most similar data chunks within a specific range of the provided top similarity score.
///
/// * `top_similarity_score` - A float that represents the top similarity score.
fn vector_search_tolerance_ranged_score(
fn _vector_search_tolerance_ranged_score(
&self,
query: Embedding,
tolerance_range: f32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub struct RetrievedDataChunk {
pub chunk: DataChunk,
pub score: f32,
pub resource_pointer: VectorResourcePointer,
pub retrieval_depth: u64,
}

impl RetrievedDataChunk {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ fn test_manual_resource_vector_search() {

assert_eq!(fact6, res[0].chunk.get_data_string().unwrap());

//
// Traversal Tests
//
// Perform UntilDepth(0) traversal to ensure it is working properly, assert the dog fact1 cant be found
let res = fruit_doc.vector_search_with_traversal(query_embedding1.clone(), 5, &TraversalMethod::UntilDepth(0));
assert_ne!(fact1, res[0].chunk.get_data_string().unwrap());
Expand All @@ -218,6 +221,11 @@ fn test_manual_resource_vector_search() {
// Perform UntilDepth(2) traversal to ensure it is working properly, assert dog fact1 is found at the correct depth
let res = fruit_doc.vector_search_with_traversal(query_embedding1.clone(), 5, &TraversalMethod::UntilDepth(2));
assert_eq!(DataContent::Data(fact1.to_string()), res[0].chunk.data);

// Perform Exhaustive traversal to ensure it is working properly, assert dog fact1 is found at the correct depth
// By requesting only 1 result, Efficient traversal does not go deeper, while Exhaustive makes it all the way to the bottom
let res = fruit_doc.vector_search_with_traversal(query_embedding1.clone(), 1, &TraversalMethod::Exhaustive);
assert_eq!(DataContent::Data(fact1.to_string()), res[0].chunk.data);
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion src/db/db_resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ impl ShinkaiDB {
let resources = self.vector_search_resources(query.clone(), num_of_resources, profile)?;
let mut final_chunks = Vec::new();
for resource in resources {
let results = resource.as_trait_object().vector_search_tolerance_ranged_score(
let results = resource.as_trait_object()._vector_search_tolerance_ranged_score(
query.clone(),
tolerance_range,
top_chunk.score,
Expand Down

0 comments on commit c2eafd0

Please sign in to comment.