Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add effective_search_ratio to vectorstore #18

Merged
merged 6 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,17 @@ def _get_search_index_query(
if index_type == IndexType.NODE:
if search_type == SearchType.VECTOR:
return (
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"CALL db.index.vector.queryNodes($index, $k * $ef, $embedding) "
"YIELD node, score "
"WITH node, score LIMIT $k "
)
elif search_type == SearchType.HYBRID:
call_prefix = "CALL () { " if neo4j_version_is_5_23_or_above else "CALL { "

query_body = (
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"CALL db.index.vector.queryNodes($index, $k * $ef, $embedding) "
"YIELD node, score "
"WITH node, score LIMIT $k "
alexthomas93 marked this conversation as resolved.
Show resolved Hide resolved
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n "
"RETURN n.node AS node, (n.score / max) AS score UNION "
Expand All @@ -117,8 +119,9 @@ def _get_search_index_query(
raise ValueError(f"Unsupported SearchType: {search_type}")
else:
return (
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
"CALL db.index.vector.queryRelationships($index, $k * $ef, $embedding) "
"YIELD relationship, score "
"WITH relationship, score LIMIT $k "
)


Expand Down Expand Up @@ -461,6 +464,8 @@ class Neo4jVector(VectorStore):
'NODE' or 'RELATIONSHIP'
pre_delete_collection: If True, will delete existing data if it exists.
(default: False). Useful for testing.
effective_search_ratio: Controls the candidate pool size by multiplying $k
to balance query accuracy and performance.

Example:
.. code-block:: python
Expand Down Expand Up @@ -504,6 +509,7 @@ def __init__(
relevance_score_fn: Optional[Callable[[float], float]] = None,
index_type: IndexType = DEFAULT_INDEX_TYPE,
graph: Optional[Neo4jGraph] = None,
effective_search_ratio: int = 1,
) -> None:
try:
import neo4j
Expand Down Expand Up @@ -587,6 +593,7 @@ def __init__(
self.retrieval_query = retrieval_query
self.search_type = search_type
self._index_type = index_type
self.effective_search_ratio = effective_search_ratio
# Calculate embedding dimension
self.embedding_dimension = len(embedding.embed_query("foo"))

Expand Down Expand Up @@ -1154,6 +1161,7 @@ def similarity_search_with_score_by_vector(
"embedding": embedding,
"keyword_index": self.keyword_index_name,
"query": remove_lucene_chars(kwargs["query"]),
"ef": self.effective_search_ratio,
willtai marked this conversation as resolved.
Show resolved Hide resolved
**params,
**filter_params,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ def test_hybrid_score_normalization() -> None:
"index": "vector",
"k": 1,
"embedding": FakeEmbeddingsWithOsDimension().embed_query("foo"),
"ef": 1,
"query": "foo",
"keyword_index": "keyword",
},
Expand Down Expand Up @@ -993,6 +994,23 @@ def test_neo4j_max_marginal_relevance_search() -> None:
drop_vector_indexes(docsearch)


def test_neo4jvector_effective_search_ratio() -> None:
"""Test effective search parameter."""
docsearch = Neo4jVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
url=url,
username=username,
password=password,
pre_delete_collection=True,
effective_search_ratio=2,
)
output = docsearch.similarity_search("foo", k=2)
assert len(output) == 2

drop_vector_indexes(docsearch)


def test_neo4jvector_passing_graph_object() -> None:
"""Test end to end construction and search with passing graph object."""
graph = Neo4jGraph(url=url, username=username, password=password)
Expand Down
Loading