Skip to content

Commit

Permalink
Update subqueries building import query based on neo4j version
Browse files Browse the repository at this point in the history
  • Loading branch information
willtai committed Nov 20, 2024
1 parent f619c6f commit 9c01b92
Show file tree
Hide file tree
Showing 5 changed files with 842 additions and 290 deletions.
130 changes: 130 additions & 0 deletions libs/neo4j/examples/graph_prompting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os

from langchain_neo4j.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_neo4j.graphs.neo4j_graph import Neo4jGraph
from langchain_neo4j.vectorstores.neo4j_vector import Neo4jVector

os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Neo4jGraph()

# Import movie information

movies_query = """
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'
AS row
MERGE (m:Movie {id:row.movieId})
SET m.released = date(row.released),
m.title = row.title,
m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') |
MERGE (p:Person {name:trim(director)})
MERGE (p)-[:DIRECTED]->(m))
FOREACH (actor in split(row.actors, '|') |
MERGE (p:Person {name:trim(actor)})
MERGE (p)-[:ACTED_IN]->(m))
FOREACH (genre in split(row.genres, '|') |
MERGE (g:Genre {name:trim(genre)})
MERGE (m)-[:IN_GENRE]->(g))
"""

graph.query(movies_query)

graph.refresh_schema()
print(graph.schema)

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
chain = GraphCypherQAChain.from_llm(
graph=graph,
llm=llm,
exclude_types=["Genre"],
verbose=True,
allow_dangerous_requests=True,
)
print(chain.graph_schema)

examples = [
{
"question": "How many artists are there?",
"query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
},
{
"question": "Which actors played in the movie Casino?",
"query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
},
{
"question": "How many movies has Tom Hanks acted in?",
"query": "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
},
{
"question": "List all the genres of the movie Schindler's List",
"query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
},
{
"question": "Which actors have worked in movies from both the comedy and action genres?",
"query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
},
{
"question": "Which directors have made movies with at least three different actors named 'John'?",
"query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
},
{
"question": "Identify movies where directors also played a role in the film.",
"query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
},
{
"question": "Find the actor with the highest number of movies in the database.",
"query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
},
]

from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
"User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
examples=examples[:5],
example_prompt=example_prompt,
prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
suffix="User input: {question}\nCypher query: ",
input_variables=["question", "schema"],
)

print(prompt.format(question="How many artists are there?", schema="foo"))

from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings

example_selector = SemanticSimilarityExampleSelector.from_examples(
examples,
OpenAIEmbeddings(),
Neo4jVector,
k=5,
input_keys=["question"],
)

print(example_selector.select_examples({"question": "how many artists are there?"}))

prompt = FewShotPromptTemplate(
example_selector=example_selector,
example_prompt=example_prompt,
prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
suffix="User input: {question}\nCypher query: ",
input_variables=["question", "schema"],
)
print(prompt.format(question="how many artists are there?", schema="foo"))
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
chain = GraphCypherQAChain.from_llm(
graph=graph,
llm=llm,
cypher_prompt=prompt,
verbose=True,
allow_dangerous_requests=True,
)
chain.invoke("How many actors are in the graph?")
37 changes: 26 additions & 11 deletions libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,17 +905,7 @@ def add_embeddings(
if not metadatas:
metadatas = [{} for _ in texts]

import_query = (
"UNWIND $data AS row "
"CALL (row) { WITH row "
f"MERGE (c:`{self.node_label}` {{id: row.id}}) "
"WITH c, row "
f"CALL db.create.setNodeVectorProperty(c, "
f"'{self.embedding_node_property}', row.embedding) "
f"SET c.`{self.text_node_property}` = row.text "
"SET c += row.metadata "
"} IN TRANSACTIONS OF 1000 ROWS "
)
import_query = self._build_import_query()

parameters = {
"data": [
Expand All @@ -930,6 +920,31 @@ def add_embeddings(

return ids

def _build_import_query(self) -> str:
"""
Build the Cypher import query string based on the Neo4j version.
Returns:
str: The constructed Cypher query string.
"""
if self.neo4j_version_is_5_23_or_above:
call_prefix = "CALL (row) { "
else:
call_prefix = "CALL { WITH row "

import_query = (
"UNWIND $data AS row "
f"{call_prefix}"
f"MERGE (c:`{self.node_label}` {{id: row.id}}) "
"WITH c, row "
f"CALL db.create.setNodeVectorProperty(c, '{self.embedding_node_property}', row.embedding) "

Check failure on line 940 in libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py

View workflow job for this annotation

GitHub Actions / cd libs/neo4j / make lint #3.12

Ruff (E501)

langchain_neo4j/vectorstores/neo4j_vector.py:940:89: E501 Line too long (104 > 88)
f"SET c.`{self.text_node_property}` = row.text "
"SET c += row.metadata "
"} IN TRANSACTIONS OF 1000 ROWS "
)

return import_query

def add_texts(
self,
texts: Iterable[str],
Expand Down
Loading

0 comments on commit 9c01b92

Please sign in to comment.