Skip to content

Commit

Permalink
fix(llm): update config in time after apply changes in UI (#90)
Browse files Browse the repository at this point in the history
## Brief description
1. Fixed an issue where rag web demo could not update system configuration in time after configuration file changes in file `hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py`
	- graph config, the picture is as follows
![image](https://github.com/user-attachments/assets/64e012f4-01e6-4f8f-af43-b79b80cf96b1)
	- prompt, the picture is as follows
![image](https://github.com/user-attachments/assets/a6ac0610-fa50-48d2-9990-4987d8d26cfd)
	- rag with hugegraph, as follows
![image](https://github.com/user-attachments/assets/fefe5a65-99f7-46d0-a7da-3aaa914bb521)


To avoid errors that result in an empty return value in file `hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py`

2. Adjust the order of functions and classes in file `hugegraph-llm/src/hugegraph_llm/config/config.py`

3. Adjust the format of the prompt to make it easier to read in file `hugegraph-llm/src/hugegraph_llm/config/config_data.py`

4. Add the ability to save prompt to the configuration file after a button click to the kg build section in file `hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py`, the position as follows
![image](https://github.com/user-attachments/assets/fb819c13-55c9-4c67-942e-e7ac57509f8f)


## issue
fix some of the #91 

---------

Co-authored-by: imbajin <[email protected]>
  • Loading branch information
returnToInnocence and imbajin authored Oct 15, 2024
1 parent aff3bbe commit 8bb13f9
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 60 deletions.
20 changes: 10 additions & 10 deletions hugegraph-llm/src/hugegraph_llm/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@
yaml_file_path = os.path.join(package_path, f"src/hugegraph_llm/resources/demo/{F_NAME}")


def read_dotenv() -> dict[str, Optional[str]]:
"""Read a .env file in the given root path."""
env_config = dotenv_values(f"{env_path}")
log.info("Loading %s successfully!", env_path)
for key, value in env_config.items():
if key not in os.environ:
os.environ[key] = value or ""
return env_config


@dataclass
class Config(ConfigData):
def from_env(self):
Expand Down Expand Up @@ -77,16 +87,6 @@ def update_env(self):
set_key(env_path, k, v, quote_mode="never")


def read_dotenv() -> dict[str, Optional[str]]:
"""Read a .env file in the given root path."""
env_config = dotenv_values(f"{env_path}")
log.info("Loading %s successfully!", env_path)
for key, value in env_config.items():
if key not in os.environ:
os.environ[key] = value or ""
return env_config


class PromptConfig(PromptData):

def __init__(self):
Expand Down
7 changes: 7 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/config/config_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,19 @@ class PromptData:
# Data is detached from hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
extract_graph_prompt = """## Main Task
Given the following graph schema and a piece of text, your task is to analyze the text and extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.
## Basic Rules
### Schema Format
Graph Schema:
- Vertices: [List of vertex labels and their properties]
- Edges: [List of edge labels, their source and target vertex labels, and properties]
### Content Rule
Please read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema. For each piece of information that matches a vertex or edge, format it according to the following JSON structures:
#### Vertex Format:
{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue", ...}}
#### Edge Format:
{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
Also follow the rules:
Expand All @@ -112,12 +116,15 @@ class PromptData:
3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator)
4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema
5. Translate the schema fields into Chinese if the given text is Chinese but the schema is in English (Optional)
## Example
### Input example:
#### text
Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist.
#### graph schema
{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}], "edges":[{"edge_label":"roommate", "source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
### Output example:
[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
"""
Expand Down
58 changes: 54 additions & 4 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials

from hugegraph_llm.api.rag_api import rag_http_api
from hugegraph_llm.config import settings, prompt
from hugegraph_llm.demo.rag_demo.configs_block import (
create_configs_block,
apply_llm_config,
Expand Down Expand Up @@ -61,14 +62,64 @@ def init_rag_ui() -> gr.Interface:
) as hugegraph_llm_ui:
gr.Markdown("# HugeGraph LLM RAG Demo")

create_configs_block()
"""
TODO: leave a general idea of the unresolved part
graph_config_input = textbox_array_graph_config
= [settings.graph_ip, settings.graph_port, settings.graph_name, graph_user, settings.graph_pwd, settings.graph_space]
llm_config_input = textbox_array_llm_config
= if settings.llm_type == openai [settings.openai_api_key, settings.openai_api_base, settings.openai_language_model, settings.openai_max_tokens]
= else if settings.llm_type == ollama [settings.ollama_host, settings.ollama_port, settings.ollama_language_model, ""]
= else if settings.llm_type == qianfan_wenxin [settings.qianfan_api_key, settings.qianfan_secret_key, settings.qianfan_language_model, ""]
= else ["","","", ""]
embedding_config_input = textbox_array_embedding_config
= if settings.embedding_type == openai [settings.openai_api_key, settings.openai_api_base, settings.openai_embedding_model]
= else if settings.embedding_type == ollama [settings.ollama_host, settings.ollama_port, settings.ollama_embedding_model]
= else if settings.embedding_type == qianfan_wenxin [settings.qianfan_api_key, settings.qianfan_secret_key, settings.qianfan_embedding_model]
= else ["","",""]
reranker_config_input = textbox_array_reranker_config
= if settings.reranker_type == cohere [settings.reranker_api_key, settings.reranker_model, settings.cohere_base_url]
= else if settings.reranker_type == siliconflow [settings.reranker_api_key, "BAAI/bge-reranker-v2-m3", ""]
= else ["","",""]
"""


textbox_array_graph_config = create_configs_block()

with gr.Tab(label="1. Build RAG Index 💡"):
create_vector_graph_block()
textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
create_rag_block()
textbox_inp, textbox_answer_prompt_input = create_rag_block()
with gr.Tab(label="3. Others Tools 🚧"):
create_other_block()


def refresh_ui_config_prompt() -> tuple:
settings.from_env()
prompt.ensure_yaml_file_exists()
return (
settings.graph_ip, settings.graph_port, settings.graph_name, settings.graph_user,
settings.graph_pwd, settings.graph_space, prompt.graph_schema, prompt.extract_graph_prompt,
prompt.default_question, prompt.answer_prompt
)


hugegraph_llm_ui.load(fn=refresh_ui_config_prompt, outputs=[
textbox_array_graph_config[0],
textbox_array_graph_config[1],
textbox_array_graph_config[2],
textbox_array_graph_config[3],
textbox_array_graph_config[4],
textbox_array_graph_config[5],

textbox_input_schema,
textbox_info_extract_template,

textbox_inp,
textbox_answer_prompt_input
])

return hugegraph_llm_ui

Expand All @@ -89,7 +140,6 @@ def init_rag_ui() -> gr.Interface:
auth_enabled = os.getenv("ENABLE_LOGIN", "False").lower() == "true"
log.info("(Status) Authentication is %s now.", "enabled" if auth_enabled else "disabled")
# TODO: support multi-user login when need

app = gr.mount_gradio_app(app, hugegraph_llm, path="/", auth=("rag", os.getenv("TOKEN")) if auth_enabled else None)

# TODO: we can't use reload now due to the config 'app' of uvicorn.run
Expand Down
45 changes: 31 additions & 14 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int:
settings.update_env()
return status_code


def create_configs_block():
# TODO: refactor the function to reduce the number of statements & separate the logic
def create_configs_block() -> list:
# pylint: disable=R0915 (too-many-statements)
with gr.Accordion("1. Set up the HugeGraph server.", open=False):
with gr.Row():
Expand Down Expand Up @@ -226,10 +226,16 @@ def llm_settings(llm_type):
gr.Textbox(value="", visible=False),
]
else:
llm_config_input = []
llm_config_button = gr.Button("Apply Configuration")
llm_config_input = [
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
]
llm_config_button = gr.Button("Apply configuration")
llm_config_button.click(apply_llm_config, inputs=llm_config_input) # pylint: disable=no-member


with gr.Accordion("3. Set up the Embedding.", open=False):
embedding_dropdown = gr.Dropdown(
choices=["openai", "qianfan_wenxin", "ollama"], value=settings.embedding_type, label="Embedding"
Expand All @@ -245,22 +251,26 @@ def embedding_settings(embedding_type):
gr.Textbox(value=settings.openai_api_base, label="api_base"),
gr.Textbox(value=settings.openai_embedding_model, label="model_name"),
]
elif embedding_type == "qianfan_wenxin":
with gr.Row():
embedding_config_input = [
gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"),
gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"),
gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"),
]
elif embedding_type == "ollama":
with gr.Row():
embedding_config_input = [
gr.Textbox(value=settings.ollama_host, label="host"),
gr.Textbox(value=str(settings.ollama_port), label="port"),
gr.Textbox(value=settings.ollama_embedding_model, label="model_name"),
]
elif embedding_type == "qianfan_wenxin":
with gr.Row():
embedding_config_input = [
gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"),
gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"),
gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"),
]
else:
embedding_config_input = []
embedding_config_input = [
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
]

embedding_config_button = gr.Button("Apply Configuration")

Expand Down Expand Up @@ -296,14 +306,21 @@ def reranker_settings(reranker_type):
label="model",
info="Please refer to https://siliconflow.cn/pricing",
),
gr.Textbox(value="", visible=False),
]
else:
reranker_config_input = []
reranker_config_button = gr.Button("Apply Configuration")
reranker_config_input = [
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
]
reranker_config_button = gr.Button("Apply configuration")

# TODO: use "gr.update()" or other way to update the config in time (refactor the click event)
# Call the separate apply_reranker_configuration function here
reranker_config_button.click( # pylint: disable=no-member
fn=apply_reranker_config,
inputs=reranker_config_input, # pylint: disable=no-member
)
# The reason for returning this partial value is the functional need to refresh the ui
return graph_config_input
4 changes: 2 additions & 2 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,9 @@ def create_rag_block():
vector_only_out = gr.Textbox(label="Vector-only Answer", show_copy_button=True)
graph_only_out = gr.Textbox(label="Graph-only Answer", show_copy_button=True)
graph_vector_out = gr.Textbox(label="Graph-Vector Answer", show_copy_button=True)
from hugegraph_llm.operators.llm_op.answer_synthesize import DEFAULT_ANSWER_TEMPLATE

answer_prompt_input = gr.Textbox(
value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt", show_copy_button=True, lines=7
value=prompt.answer_prompt, label="Custom Prompt", show_copy_button=True, lines=7
)
with gr.Column(scale=1):
with gr.Row():
Expand Down Expand Up @@ -266,3 +265,4 @@ def several_rag_answer(
)
questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)
return inp, answer_prompt_input
71 changes: 48 additions & 23 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import gradio as gr

from hugegraph_llm.config import resource_path, prompt
from hugegraph_llm.operators.llm_op.property_graph_extract import SCHEMA_EXAMPLE_PROMPT
from hugegraph_llm.utils.graph_index_utils import (
get_graph_index_info,
clean_all_graph_index,
Expand All @@ -33,22 +32,31 @@
from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info


def store_prompt(schema, example_prompt):
# update env variables: schema and example_prompt
if prompt.graph_schema != schema or prompt.extract_graph_prompt != example_prompt:
prompt.graph_schema = schema
prompt.extract_graph_prompt = example_prompt
prompt.update_yaml_file()


def create_vector_graph_block():
# pylint: disable=no-member
gr.Markdown(
"""## 1. Build Vector/Graph Index & Extract Knowledge Graph
- Docs:
- text: Build rag index from plain text
- file: Upload file(s) which should be <u>TXT</u> or <u>.docx</u> (Multiple files can be selected together)
- [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**)
- User-defined Schema (JSON format, follow the template to modify it)
- User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125)
to modify it)
- Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like
**"hugegraph"**)
- Graph extract head: The user-defined prompt of graph extracting
- If already exist the graph data, you should click "**Rebuild vid Index**" to update the index
"""
)

schema = prompt.graph_schema

with gr.Row():
with gr.Column():
with gr.Tab("text") as tab_upload_text:
Expand All @@ -59,9 +67,9 @@ def create_vector_graph_block():
label="Docs (multi-files can be selected together)",
file_count="multiple",
)
input_schema = gr.Textbox(value=schema, label="Schema", lines=15, show_copy_button=True)
input_schema = gr.Textbox(value=prompt.graph_schema, label="Schema", lines=15, show_copy_button=True)
info_extract_template = gr.Textbox(
value=SCHEMA_EXAMPLE_PROMPT, label="Graph extract head", lines=15, show_copy_button=True
value=prompt.extract_graph_prompt, label="Graph extract head", lines=15, show_copy_button=True
)
out = gr.Code(label="Output", language="json", elem_classes="code-container-edit")

Expand All @@ -80,21 +88,40 @@ def create_vector_graph_block():
graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary")
graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True)

vector_index_btn0.click(get_vector_index_info, outputs=out) # pylint: disable=no-member
vector_index_btn1.click(clean_vector_index) # pylint: disable=no-member
vector_import_bt.click(
build_vector_index, inputs=[input_file, input_text], outputs=out
) # pylint: disable=no-member
graph_index_btn0.click(get_graph_index_info, outputs=out) # pylint: disable=no-member
graph_index_btn1.click(clean_all_graph_index) # pylint: disable=no-member
graph_index_rebuild_bt.click(fit_vid_index, outputs=out) # pylint: disable=no-member
vector_index_btn0.click(get_vector_index_info, outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
vector_index_btn1.click(clean_vector_index).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
graph_index_btn0.click(get_graph_index_info, outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
graph_index_btn1.click(clean_all_graph_index).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
graph_index_rebuild_bt.click(fit_vid_index, outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)

# origin_out = gr.Textbox(visible=False)
graph_extract_bt.click( # pylint: disable=no-member
graph_extract_bt.click(
extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out]
)
).then(store_prompt, inputs=[input_schema, info_extract_template], )

graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]) # pylint: disable=no-member
graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)

def on_tab_select(input_f, input_t, evt: gr.SelectData):
print(f"You selected {evt.value} at {evt.index} from {evt.target}")
Expand All @@ -104,9 +131,7 @@ def on_tab_select(input_f, input_t, evt: gr.SelectData):
return [], input_t
return [], ""

tab_upload_file.select(
fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]
) # pylint: disable=no-member
tab_upload_text.select(
fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]
) # pylint: disable=no-member
tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])

return input_schema, info_extract_template
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from hugegraph_llm.models.llms.init_llm import LLMs
from hugegraph_llm.utils.log import log

"""
TODO: It is not clear whether there is any other dependence on the SCHEMA_EXAMPLE_PROMPT variable.
Because the SCHEMA_EXAMPLE_PROMPT variable will no longer change based on
prompt.extract_graph_prompt changes after the system loads, this does not seem to meet expectations.
"""
DEFAULT_ANSWER_TEMPLATE = prompt.answer_prompt


Expand Down
Loading

0 comments on commit 8bb13f9

Please sign in to comment.