Skip to content

Commit

Permalink
Add effective_search_ratio to vectorstore
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasonjo committed Dec 6, 2024
1 parent 7b6f4ef commit 4aae0b9
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
14 changes: 11 additions & 3 deletions libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,17 @@ def _get_search_index_query(
if index_type == IndexType.NODE:
if search_type == SearchType.VECTOR:
return (
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"CALL db.index.vector.queryNodes($index, $k * $ef, $embedding) "
"YIELD node, score "
"WITH node, score LIMIT $k "
)
elif search_type == SearchType.HYBRID:
call_prefix = "CALL () { " if neo4j_version_is_5_23_or_above else "CALL { "

query_body = (
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"CALL db.index.vector.queryNodes($index, $k * $ef, $embedding) "
"YIELD node, score "
"WITH node, score LIMIT $k "
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n "
"RETURN n.node AS node, (n.score / max) AS score UNION "
Expand All @@ -117,8 +119,9 @@ def _get_search_index_query(
raise ValueError(f"Unsupported SearchType: {search_type}")
else:
return (
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
"CALL db.index.vector.queryRelationships($index, $k * $ef, $embedding) "
"YIELD relationship, score "
"WITH relationship, score LIMIT $k "
)


Expand Down Expand Up @@ -461,6 +464,8 @@ class Neo4jVector(VectorStore):
'NODE' or 'RELATIONSHIP'
pre_delete_collection: If True, will delete existing data if it exists.
(default: False). Useful for testing.
effective_search_ratio: Controls the candidate pool size by multiplying $k
to balance query accuracy and performance.
Example:
.. code-block:: python
Expand Down Expand Up @@ -504,6 +509,7 @@ def __init__(
relevance_score_fn: Optional[Callable[[float], float]] = None,
index_type: IndexType = DEFAULT_INDEX_TYPE,
graph: Optional[Neo4jGraph] = None,
effective_search_ratio: int = 1,
) -> None:
try:
import neo4j
Expand Down Expand Up @@ -587,6 +593,7 @@ def __init__(
self.retrieval_query = retrieval_query
self.search_type = search_type
self._index_type = index_type
self.effective_search_ratio = effective_search_ratio
# Calculate embedding dimension
self.embedding_dimension = len(embedding.embed_query("foo"))

Expand Down Expand Up @@ -1154,6 +1161,7 @@ def similarity_search_with_score_by_vector(
"embedding": embedding,
"keyword_index": self.keyword_index_name,
"query": remove_lucene_chars(kwargs["query"]),
"ef": self.effective_search_ratio,
**params,
**filter_params,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ def test_hybrid_score_normalization() -> None:
"index": "vector",
"k": 1,
"embedding": FakeEmbeddingsWithOsDimension().embed_query("foo"),
"ef": 1,
"query": "foo",
"keyword_index": "keyword",
},
Expand Down Expand Up @@ -992,6 +993,21 @@ def test_neo4j_max_marginal_relevance_search() -> None:

drop_vector_indexes(docsearch)

def test_neo4jvector_effective_search_ratio() -> None:
"""Test effective search parameter."""
docsearch = Neo4jVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
url=url,
username=username,
password=password,
pre_delete_collection=True,
effective_search_ratio=2,
)
output = docsearch.similarity_search("foo", k=2)
assert len(output) == 2

drop_vector_indexes(docsearch)

def test_neo4jvector_passing_graph_object() -> None:
"""Test end to end construction and search with passing graph object."""
Expand Down

0 comments on commit 4aae0b9

Please sign in to comment.