Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(llm): added the process of intelligent generated gremlin for retrieval before subgraph retrieval #105

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions hugegraph-llm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,18 @@ graph systems and large language models.
```bash
python3 -m hugegraph_llm.demo.rag_demo.app --host 127.0.0.1 --port 18001
```
6. Or start the gradio interactive demo of **Text2Gremlin**, you can run with the following command, and open http://127.0.0.1:8002 after starting. You can also change the default host `0.0.0.0` and port `8002` as above. (🚧ing)
```bash
python3 -m hugegraph_llm.demo.gremlin_generate_web_demo
```
7. After running the web demo, the config file `.env` will be automatically generated at the path `hugegraph-llm/.env`. Additionally, a prompt-related configuration file `config_prompt.yaml` will also be generated at the path `hugegraph-llm/src/hugegraph_llm/resources/demo/config_prompt.yaml`.

6. After running the web demo, the config file `.env` will be automatically generated at the path `hugegraph-llm/.env`. Additionally, a prompt-related configuration file `config_prompt.yaml` will also be generated at the path `hugegraph-llm/src/hugegraph_llm/resources/demo/config_prompt.yaml`.
You can modify the content on the web page, and it will be automatically saved to the configuration file after the corresponding feature is triggered. You can also modify the file directly without restarting the web application; simply refresh the page to load your latest changes.
(Optional)To regenerate the config file, you can use `config.generate` with `-u` or `--update`.
```bash
python3 -m hugegraph_llm.config.generate --update
```
8. (__Optional__) You could use
7. (__Optional__) You could use
[hugegraph-hubble](https://hugegraph.apache.org/docs/quickstart/hugegraph-hubble/#21-use-docker-convenient-for-testdev)
to visit the graph data, could run it via [Docker/Docker-Compose](https://hub.docker.com/r/hugegraph/hubble)
for guidance. (Hubble is a graph-analysis dashboard include data loading/schema management/graph traverser/display).
9. (__Optional__) offline download NLTK stopwords
8. (__Optional__) offline download NLTK stopwords
```bash
python ./hugegraph_llm/operators/common_op/nltk_helper.py
```
Expand Down
208 changes: 0 additions & 208 deletions hugegraph-llm/src/hugegraph_llm/demo/gremlin_generate_web_demo.py

This file was deleted.

10 changes: 6 additions & 4 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
apply_graph_config,
)
from hugegraph_llm.demo.rag_demo.other_block import create_other_block
from hugegraph_llm.demo.rag_demo.text2gremlin_block import create_text2gremlin_block
from hugegraph_llm.demo.rag_demo.rag_block import create_rag_block, rag_answer
from hugegraph_llm.demo.rag_demo.vector_graph_block import create_vector_graph_block
from hugegraph_llm.demo.rag_demo.admin_block import create_admin_block, log_stream
Expand Down Expand Up @@ -93,11 +94,12 @@ def init_rag_ui() -> gr.Interface:
textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
textbox_inp, textbox_answer_prompt_input = create_rag_block()
with gr.Tab(label="3. Graph Tools 🚧"):
with gr.Tab(label="3. Text2gremlin ⚙️"):
create_text2gremlin_block()
with gr.Tab(label="4. Graph Tools 🚧"):
create_other_block()
with gr.Tab(label="4. Admin Tools ⚙️"):
with gr.Tab(label="5. Admin Tools ⚙️"):
create_admin_block()


def refresh_ui_config_prompt() -> tuple:
settings.from_env()
Expand Down Expand Up @@ -146,7 +148,7 @@ def refresh_ui_config_prompt() -> tuple:
apply_reranker_config)

admin_http_api(api_auth, log_stream)

app.include_router(api_auth)

# TODO: support multi-user login when need
Expand Down
4 changes: 2 additions & 2 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pandas as pd
from gradio.utils import NamedString

from hugegraph_llm.config import resource_path, prompt
from hugegraph_llm.config import resource_path, prompt, settings
from hugegraph_llm.operators.graph_rag_task import RAGPipeline
from hugegraph_llm.utils.log import log

Expand Down Expand Up @@ -66,7 +66,7 @@ def rag_answer(
if vector_search:
rag.query_vector_index()
if graph_search:
rag.extract_keywords().keywords_to_vid().query_graphdb()
rag.extract_keywords().keywords_to_vid().import_schema(settings.graph_name).query_graphdb()
# TODO: add more user-defined search strategies
rag.merge_dedup_rerank(graph_ratio, rerank_method, near_neighbor_first, custom_related_information)
rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt)
Expand Down
108 changes: 108 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import json

import pandas as pd
import gradio as gr

from hugegraph_llm.models.embeddings.init_embedding import Embeddings
from hugegraph_llm.models.llms.init_llm import LLMs
from hugegraph_llm.operators.gremlin_generate_task import GremlinGenerator


def build_example_vector_index(temp_file):
full_path = temp_file.name
if full_path.endswith(".json"):
with open(full_path, "r", encoding="utf-8") as f:
examples = json.load(f)
elif full_path.endswith(".csv"):
examples = pd.read_csv(full_path).to_dict('records')
else:
return "ERROR: please input json file."
builder = GremlinGenerator(
llm=LLMs().get_llm(),
embedding=Embeddings().get_embedding(),
)
return builder.example_index_build(examples).run()


def gremlin_generate(inp, use_schema, use_example, example_num, schema):
generator = GremlinGenerator(
llm=LLMs().get_llm(),
embedding=Embeddings().get_embedding(),
)
if use_example:
generator.example_index_query(example_num)
context = generator.gremlin_generate(use_schema, use_example, schema).run(query=inp)
return context.get("match_result", "No Results"), context["result"]


def create_text2gremlin_block() -> list:
gr.Markdown("""## Text2gremlin Tools """)

gr.Markdown("## Build Example Vector Index")
gr.Markdown("Uploaded json file should be in format below:\n"
"[{\"query\":\"who is peter\", \"gremlin\":\"g.V().has('name', 'peter')\"}]\n"
"Uploaded csv file should be in format below:\n"
"query,gremlin\n\"who is peter\",\"g.V().has('name', 'peter')\"")
with gr.Row():
file = gr.File(label="Upload Example Query-Gremlin Pairs Json")
out = gr.Textbox(label="Result Message")
with gr.Row():
btn = gr.Button("Build Example Vector Index")
btn.click(build_example_vector_index, inputs=[file], outputs=[out]) # pylint: disable=no-member
gr.Markdown("## Nature Language To Gremlin")
SCHEMA = """{
"vertices": [
{"vertex_label": "entity", "properties": []}
],
"edges": [
{
"edge_label": "relation",
"source_vertex_label": "entity",
"target_vertex_label": "entity",
"properties": {}
}
]
}"""
with gr.Row():
with gr.Column(scale=1):
schema_box = gr.Textbox(value=SCHEMA, label="Schema")
with gr.Column(scale=1):
input_box = gr.Textbox(value="Tell me about Al Pacino.",
label="Nature Language Query")
match = gr.Textbox(label="Best-Matched Examples")
out = gr.Textbox(label="Structured Query Language: Gremlin")
with gr.Column(scale=1):
use_example_radio = gr.Radio(choices=[True, False], value=False,
label="Use example")
use_schema_radio = gr.Radio(choices=[True, False], value=False,
label="Use schema")
example_num_slider = gr.Slider(
minimum=1,
maximum=10,
step=1,
value=5,
label="Number of examples"
)
btn = gr.Button("Text2Gremlin")
btn.click( # pylint: disable=no-member
fn=gremlin_generate,
inputs=[input_box, use_schema_radio, use_example_radio, example_num_slider, schema_box],
outputs=[match, out]
)
Loading
Loading