diff --git a/hugegraph-llm/src/hugegraph_llm/config/config.py b/hugegraph-llm/src/hugegraph_llm/config/config.py
index 6a4c5078..8d691f74 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/config.py
@@ -33,6 +33,16 @@
yaml_file_path = os.path.join(package_path, f"src/hugegraph_llm/resources/demo/{F_NAME}")
+def read_dotenv() -> dict[str, Optional[str]]:
+ """Read a .env file in the given root path."""
+ env_config = dotenv_values(f"{env_path}")
+ log.info("Loading %s successfully!", env_path)
+ for key, value in env_config.items():
+ if key not in os.environ:
+ os.environ[key] = value or ""
+ return env_config
+
+
@dataclass
class Config(ConfigData):
def from_env(self):
@@ -77,16 +87,6 @@ def update_env(self):
set_key(env_path, k, v, quote_mode="never")
-def read_dotenv() -> dict[str, Optional[str]]:
- """Read a .env file in the given root path."""
- env_config = dotenv_values(f"{env_path}")
- log.info("Loading %s successfully!", env_path)
- for key, value in env_config.items():
- if key not in os.environ:
- os.environ[key] = value or ""
- return env_config
-
-
class PromptConfig(PromptData):
def __init__(self):
diff --git a/hugegraph-llm/src/hugegraph_llm/config/config_data.py b/hugegraph-llm/src/hugegraph_llm/config/config_data.py
index 4e429cb7..07421908 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/config_data.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/config_data.py
@@ -95,15 +95,19 @@ class PromptData:
# Data is detached from hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
extract_graph_prompt = """## Main Task
Given the following graph schema and a piece of text, your task is to analyze the text and extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.
+
## Basic Rules
### Schema Format
Graph Schema:
- Vertices: [List of vertex labels and their properties]
- Edges: [List of edge labels, their source and target vertex labels, and properties]
+
### Content Rule
Please read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema. For each piece of information that matches a vertex or edge, format it according to the following JSON structures:
+
#### Vertex Format:
{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue", ...}}
+
#### Edge Format:
{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
Also follow the rules:
@@ -112,12 +116,15 @@ class PromptData:
3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator)
4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema
5. Translate the schema fields into Chinese if the given text is Chinese but the schema is in English (Optional)
+
## Example
### Input example:
#### text
Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist.
+
#### graph schema
{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}], "edges":[{"edge_label":"roommate", "source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
+
### Output example:
[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
"""
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
index 0b5285aa..912b064f 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -25,6 +25,7 @@
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from hugegraph_llm.api.rag_api import rag_http_api
+from hugegraph_llm.config import settings, prompt
from hugegraph_llm.demo.rag_demo.configs_block import (
create_configs_block,
apply_llm_config,
@@ -61,14 +62,64 @@ def init_rag_ui() -> gr.Interface:
) as hugegraph_llm_ui:
gr.Markdown("# HugeGraph LLM RAG Demo")
- create_configs_block()
+ """
+ TODO: leave a general idea of the unresolved part
+ graph_config_input = textbox_array_graph_config
+ = [settings.graph_ip, settings.graph_port, settings.graph_name, graph_user, settings.graph_pwd, settings.graph_space]
+
+ llm_config_input = textbox_array_llm_config
+ = if settings.llm_type == openai [settings.openai_api_key, settings.openai_api_base, settings.openai_language_model, settings.openai_max_tokens]
+ = else if settings.llm_type == ollama [settings.ollama_host, settings.ollama_port, settings.ollama_language_model, ""]
+ = else if settings.llm_type == qianfan_wenxin [settings.qianfan_api_key, settings.qianfan_secret_key, settings.qianfan_language_model, ""]
+ = else ["","","", ""]
+
+ embedding_config_input = textbox_array_embedding_config
+ = if settings.embedding_type == openai [settings.openai_api_key, settings.openai_api_base, settings.openai_embedding_model]
+ = else if settings.embedding_type == ollama [settings.ollama_host, settings.ollama_port, settings.ollama_embedding_model]
+ = else if settings.embedding_type == qianfan_wenxin [settings.qianfan_api_key, settings.qianfan_secret_key, settings.qianfan_embedding_model]
+ = else ["","",""]
+
+ reranker_config_input = textbox_array_reranker_config
+ = if settings.reranker_type == cohere [settings.reranker_api_key, settings.reranker_model, settings.cohere_base_url]
+ = else if settings.reranker_type == siliconflow [settings.reranker_api_key, "BAAI/bge-reranker-v2-m3", ""]
+ = else ["","",""]
+ """
+
+
+ textbox_array_graph_config = create_configs_block()
with gr.Tab(label="1. Build RAG Index 💡"):
- create_vector_graph_block()
+ textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
- create_rag_block()
+ textbox_inp, textbox_answer_prompt_input = create_rag_block()
with gr.Tab(label="3. Others Tools 🚧"):
create_other_block()
+
+
+ def refresh_ui_config_prompt() -> tuple:
+ settings.from_env()
+ prompt.ensure_yaml_file_exists()
+ return (
+ settings.graph_ip, settings.graph_port, settings.graph_name, settings.graph_user,
+ settings.graph_pwd, settings.graph_space, prompt.graph_schema, prompt.extract_graph_prompt,
+ prompt.default_question, prompt.answer_prompt
+ )
+
+
+ hugegraph_llm_ui.load(fn=refresh_ui_config_prompt, outputs=[
+ textbox_array_graph_config[0],
+ textbox_array_graph_config[1],
+ textbox_array_graph_config[2],
+ textbox_array_graph_config[3],
+ textbox_array_graph_config[4],
+ textbox_array_graph_config[5],
+
+ textbox_input_schema,
+ textbox_info_extract_template,
+
+ textbox_inp,
+ textbox_answer_prompt_input
+ ])
return hugegraph_llm_ui
@@ -89,7 +140,6 @@ def init_rag_ui() -> gr.Interface:
auth_enabled = os.getenv("ENABLE_LOGIN", "False").lower() == "true"
log.info("(Status) Authentication is %s now.", "enabled" if auth_enabled else "disabled")
# TODO: support multi-user login when need
-
app = gr.mount_gradio_app(app, hugegraph_llm, path="/", auth=("rag", os.getenv("TOKEN")) if auth_enabled else None)
# TODO: we can't use reload now due to the config 'app' of uvicorn.run
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
index be161a80..824f41a8 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
@@ -179,8 +179,8 @@ def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int:
settings.update_env()
return status_code
-
-def create_configs_block():
+# TODO: refactor the function to reduce the number of statements & separate the logic
+def create_configs_block() -> list:
# pylint: disable=R0915 (too-many-statements)
with gr.Accordion("1. Set up the HugeGraph server.", open=False):
with gr.Row():
@@ -226,10 +226,16 @@ def llm_settings(llm_type):
gr.Textbox(value="", visible=False),
]
else:
- llm_config_input = []
- llm_config_button = gr.Button("Apply Configuration")
+ llm_config_input = [
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ ]
+ llm_config_button = gr.Button("Apply configuration")
llm_config_button.click(apply_llm_config, inputs=llm_config_input) # pylint: disable=no-member
+
with gr.Accordion("3. Set up the Embedding.", open=False):
embedding_dropdown = gr.Dropdown(
choices=["openai", "qianfan_wenxin", "ollama"], value=settings.embedding_type, label="Embedding"
@@ -245,13 +251,6 @@ def embedding_settings(embedding_type):
gr.Textbox(value=settings.openai_api_base, label="api_base"),
gr.Textbox(value=settings.openai_embedding_model, label="model_name"),
]
- elif embedding_type == "qianfan_wenxin":
- with gr.Row():
- embedding_config_input = [
- gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"),
- gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"),
- gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"),
- ]
elif embedding_type == "ollama":
with gr.Row():
embedding_config_input = [
@@ -259,8 +258,19 @@ def embedding_settings(embedding_type):
gr.Textbox(value=str(settings.ollama_port), label="port"),
gr.Textbox(value=settings.ollama_embedding_model, label="model_name"),
]
+ elif embedding_type == "qianfan_wenxin":
+ with gr.Row():
+ embedding_config_input = [
+ gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"),
+ gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"),
+ gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"),
+ ]
else:
- embedding_config_input = []
+ embedding_config_input = [
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ ]
embedding_config_button = gr.Button("Apply Configuration")
@@ -296,10 +306,15 @@ def reranker_settings(reranker_type):
label="model",
info="Please refer to https://siliconflow.cn/pricing",
),
+ gr.Textbox(value="", visible=False),
]
else:
- reranker_config_input = []
- reranker_config_button = gr.Button("Apply Configuration")
+ reranker_config_input = [
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ gr.Textbox(value="", visible=False),
+ ]
+ reranker_config_button = gr.Button("Apply configuration")
# TODO: use "gr.update()" or other way to update the config in time (refactor the click event)
# Call the separate apply_reranker_configuration function here
@@ -307,3 +322,5 @@ def reranker_settings(reranker_type):
fn=apply_reranker_config,
inputs=reranker_config_input, # pylint: disable=no-member
)
+ # The reason for returning this partial value is the functional need to refresh the ui
+ return graph_config_input
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
index f514a833..66c9b19f 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
@@ -99,10 +99,9 @@ def create_rag_block():
vector_only_out = gr.Textbox(label="Vector-only Answer", show_copy_button=True)
graph_only_out = gr.Textbox(label="Graph-only Answer", show_copy_button=True)
graph_vector_out = gr.Textbox(label="Graph-Vector Answer", show_copy_button=True)
- from hugegraph_llm.operators.llm_op.answer_synthesize import DEFAULT_ANSWER_TEMPLATE
answer_prompt_input = gr.Textbox(
- value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt", show_copy_button=True, lines=7
+ value=prompt.answer_prompt, label="Custom Prompt", show_copy_button=True, lines=7
)
with gr.Column(scale=1):
with gr.Row():
@@ -266,3 +265,4 @@ def several_rag_answer(
)
questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)
+ return inp, answer_prompt_input
\ No newline at end of file
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
index 1319bd93..1af8a518 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
@@ -22,7 +22,6 @@
import gradio as gr
from hugegraph_llm.config import resource_path, prompt
-from hugegraph_llm.operators.llm_op.property_graph_extract import SCHEMA_EXAMPLE_PROMPT
from hugegraph_llm.utils.graph_index_utils import (
get_graph_index_info,
clean_all_graph_index,
@@ -33,22 +32,31 @@
from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info
+def store_prompt(schema, example_prompt):
+ # update env variables: schema and example_prompt
+ if prompt.graph_schema != schema or prompt.extract_graph_prompt != example_prompt:
+ prompt.graph_schema = schema
+ prompt.extract_graph_prompt = example_prompt
+ prompt.update_yaml_file()
+
+
def create_vector_graph_block():
+ # pylint: disable=no-member
gr.Markdown(
"""## 1. Build Vector/Graph Index & Extract Knowledge Graph
- Docs:
- text: Build rag index from plain text
- file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together)
- [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**)
- - User-defined Schema (JSON format, follow the template to modify it)
+ - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125)
+ to modify it)
- Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like
**"hugegraph"**)
- Graph extract head: The user-defined prompt of graph extracting
+- If already exist the graph data, you should click "**Rebuild vid Index**" to update the index
"""
)
- schema = prompt.graph_schema
-
with gr.Row():
with gr.Column():
with gr.Tab("text") as tab_upload_text:
@@ -59,9 +67,9 @@ def create_vector_graph_block():
label="Docs (multi-files can be selected together)",
file_count="multiple",
)
- input_schema = gr.Textbox(value=schema, label="Schema", lines=15, show_copy_button=True)
+ input_schema = gr.Textbox(value=prompt.graph_schema, label="Schema", lines=15, show_copy_button=True)
info_extract_template = gr.Textbox(
- value=SCHEMA_EXAMPLE_PROMPT, label="Graph extract head", lines=15, show_copy_button=True
+ value=prompt.extract_graph_prompt, label="Graph extract head", lines=15, show_copy_button=True
)
out = gr.Code(label="Output", language="json", elem_classes="code-container-edit")
@@ -80,21 +88,40 @@ def create_vector_graph_block():
graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary")
graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True)
- vector_index_btn0.click(get_vector_index_info, outputs=out) # pylint: disable=no-member
- vector_index_btn1.click(clean_vector_index) # pylint: disable=no-member
- vector_import_bt.click(
- build_vector_index, inputs=[input_file, input_text], outputs=out
- ) # pylint: disable=no-member
- graph_index_btn0.click(get_graph_index_info, outputs=out) # pylint: disable=no-member
- graph_index_btn1.click(clean_all_graph_index) # pylint: disable=no-member
- graph_index_rebuild_bt.click(fit_vid_index, outputs=out) # pylint: disable=no-member
+ vector_index_btn0.click(get_vector_index_info, outputs=out).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
+ vector_index_btn1.click(clean_vector_index).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
+ vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
+ graph_index_btn0.click(get_graph_index_info, outputs=out).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
+ graph_index_btn1.click(clean_all_graph_index).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
+ graph_index_rebuild_bt.click(fit_vid_index, outputs=out).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
# origin_out = gr.Textbox(visible=False)
- graph_extract_bt.click( # pylint: disable=no-member
+ graph_extract_bt.click(
extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out]
- )
+ ).then(store_prompt, inputs=[input_schema, info_extract_template], )
- graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]) # pylint: disable=no-member
+ graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(
+ store_prompt,
+ inputs=[input_schema, info_extract_template],
+ )
def on_tab_select(input_f, input_t, evt: gr.SelectData):
print(f"You selected {evt.value} at {evt.index} from {evt.target}")
@@ -104,9 +131,7 @@ def on_tab_select(input_f, input_t, evt: gr.SelectData):
return [], input_t
return [], ""
- tab_upload_file.select(
- fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]
- ) # pylint: disable=no-member
- tab_upload_text.select(
- fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]
- ) # pylint: disable=no-member
+ tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
+ tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
+
+ return input_schema, info_extract_template
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
index 2771ceff..baf61e64 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
@@ -25,6 +25,11 @@
from hugegraph_llm.models.llms.init_llm import LLMs
from hugegraph_llm.utils.log import log
+"""
+TODO: It is not clear whether there is any other dependence on the SCHEMA_EXAMPLE_PROMPT variable.
+Because the SCHEMA_EXAMPLE_PROMPT variable will no longer change based on
+prompt.extract_graph_prompt changes after the system loads, this does not seem to meet expectations.
+"""
DEFAULT_ANSWER_TEMPLATE = prompt.answer_prompt
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
index 523a8b84..945fd303 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
@@ -26,8 +26,15 @@
from hugegraph_llm.models.llms.base import BaseLLM
from hugegraph_llm.utils.log import log
+
+"""
+TODO: It is not clear whether there is any other dependence on the SCHEMA_EXAMPLE_PROMPT variable.
+Because the SCHEMA_EXAMPLE_PROMPT variable will no longer change based on
+prompt.extract_graph_prompt changes after the system loads, this does not seem to meet expectations.
+"""
SCHEMA_EXAMPLE_PROMPT = prompt.extract_graph_prompt
+
def generate_extract_property_graph_prompt(text, schema=None) -> str:
return f"""---
Following the full instructions above, try to extract the following text from the given schema, output the JSON result:
@@ -83,7 +90,7 @@ class PropertyGraphExtract:
def __init__(
self,
llm: BaseLLM,
- example_prompt: str = SCHEMA_EXAMPLE_PROMPT
+ example_prompt: str = prompt.extract_graph_prompt
) -> None:
self.llm = llm
self.example_prompt = example_prompt
diff --git a/hugegraph-llm/src/hugegraph_llm/utils/graph_index_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/graph_index_utils.py
index 5735eae7..a8ea1156 100644
--- a/hugegraph-llm/src/hugegraph_llm/utils/graph_index_utils.py
+++ b/hugegraph-llm/src/hugegraph_llm/utils/graph_index_utils.py
@@ -26,7 +26,7 @@
from .hugegraph_utils import get_hg_client, clean_hg_data
from .log import log
from .vector_index_utils import read_documents
-from ..config import resource_path, settings, prompt
+from ..config import resource_path, settings
from ..indices.vector_index import VectorIndex
from ..models.embeddings.init_embedding import Embeddings
from ..models.llms.init_llm import LLMs
@@ -52,11 +52,6 @@ def clean_all_graph_index():
def extract_graph(input_file, input_text, schema, example_prompt) -> str:
- # update env variables: schema and example_prompt
- if prompt.graph_schema != schema or prompt.extract_graph_prompt != example_prompt:
- prompt.graph_schema = schema
- prompt.extract_graph_prompt = example_prompt
- prompt.update_yaml_file()
texts = read_documents(input_file, input_text)
builder = KgBuilder(LLMs().get_llm(), Embeddings().get_embedding(), get_hg_client())
diff --git a/hugegraph-llm/src/hugegraph_llm/utils/vector_index_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/vector_index_utils.py
index edd1902f..a7afdf8a 100644
--- a/hugegraph-llm/src/hugegraph_llm/utils/vector_index_utils.py
+++ b/hugegraph-llm/src/hugegraph_llm/utils/vector_index_utils.py
@@ -19,6 +19,7 @@
import docx
import gradio as gr
+
from hugegraph_llm.config import resource_path, settings
from hugegraph_llm.indices.vector_index import VectorIndex
from hugegraph_llm.models.embeddings.init_embedding import Embeddings