Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(llm): update config in time after apply changes in UI #90

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c441160
chore(rag): Test SCHEMA EXAMPLE PROMPT variable problem
returnToInnocence Sep 27, 2024
97b53a4
Merge branch 'fix-config-schema-graph-head-update' of https://github.…
returnToInnocence Sep 27, 2024
cf6862b
chore(rag): add gradio refrash demo
returnToInnocence Sep 27, 2024
a49d4eb
Merge branch 'apache:main' into fix-config-schema-graph-head-update
returnToInnocence Sep 30, 2024
0aa6730
chore: Add component refresh example
returnToInnocence Oct 1, 2024
49e3e8b
chore: Determine the ui refresh event
returnToInnocence Oct 2, 2024
0cb1821
chore(rag): Complete the function validation of the gradio fetch page…
returnToInnocence Oct 2, 2024
5d88f7b
fix(rag): Change the location of the rag dependent SCHEMA_EXAMPLE_PRO…
returnToInnocence Oct 2, 2024
cb27a49
chore(rag): Order of adjustment functions
returnToInnocence Oct 3, 2024
dbedf34
fix(rag): Resolved prompt saving issue
returnToInnocence Oct 7, 2024
9dc6416
chore(rag): Fine tune prompt text formatting
returnToInnocence Oct 8, 2024
84d6bbd
chore(rag): A submission with an error cannot get the value of llm co…
returnToInnocence Oct 9, 2024
9fdae57
fix(rag): Fixed the issue that ui refresh could not get the latest gr…
returnToInnocence Oct 9, 2024
bf99d8f
fix(rag): Fixed the issue that ui refresh could not get the latest co…
returnToInnocence Oct 9, 2024
7b00dec
chore(rag): Modify the comments to leave a general idea of the unreso…
returnToInnocence Oct 9, 2024
9e25198
tiny fix
returnToInnocence Oct 9, 2024
895c5fa
Merge branch 'main' into fix-config-schema-graph-head-update
imbajin Oct 10, 2024
23dde4d
change #pylint comment to method level
imbajin Oct 10, 2024
d2b9907
Merge branch 'main' into fix-config-schema-graph-head-update
imbajin Oct 11, 2024
ed6792e
chore(llm): Comment format changes
returnToInnocence Oct 11, 2024
2c2176e
Merge branch 'fix-config-schema-graph-head-update' of https://github.…
returnToInnocence Oct 11, 2024
e92697b
chore(llm): Comment format changes
returnToInnocence Oct 11, 2024
fed7150
Merge branch 'main' into fix-config-schema-graph-head-update
imbajin Oct 14, 2024
8d4408c
tiny improve
imbajin Oct 14, 2024
e1f3dc2
fix(rag): RAG with HugeGraph refresh rendering issue
returnToInnocence Oct 15, 2024
b70964e
doc: add a refer for original prompt
imbajin Oct 15, 2024
c02ac19
clean unused imports
imbajin Oct 15, 2024
a4f5c2a
Merge branch 'main' into fix-config-schema-graph-head-update
imbajin Oct 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions hugegraph-llm/src/hugegraph_llm/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@
yaml_file_path = os.path.join(package_path, f"src/hugegraph_llm/resources/demo/{F_NAME}")


def read_dotenv() -> dict[str, Optional[str]]:
"""Read a .env file in the given root path."""
env_config = dotenv_values(f"{env_path}")
log.info("Loading %s successfully!", env_path)
for key, value in env_config.items():
if key not in os.environ:
os.environ[key] = value or ""
return env_config


returnToInnocence marked this conversation as resolved.
Show resolved Hide resolved
@dataclass
class Config(ConfigData):
def from_env(self):
Expand Down Expand Up @@ -77,16 +87,6 @@ def update_env(self):
set_key(env_path, k, v, quote_mode="never")


def read_dotenv() -> dict[str, Optional[str]]:
"""Read a .env file in the given root path."""
env_config = dotenv_values(f"{env_path}")
log.info("Loading %s successfully!", env_path)
for key, value in env_config.items():
if key not in os.environ:
os.environ[key] = value or ""
return env_config


class PromptConfig(PromptData):

def __init__(self):
Expand Down
7 changes: 7 additions & 0 deletions hugegraph-llm/src/hugegraph_llm/config/config_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,19 @@ class PromptData:
# Data is detached from hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
extract_graph_prompt = """## Main Task
Given the following graph schema and a piece of text, your task is to analyze the text and extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.

## Basic Rules
### Schema Format
Graph Schema:
- Vertices: [List of vertex labels and their properties]
- Edges: [List of edge labels, their source and target vertex labels, and properties]

### Content Rule
Please read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema. For each piece of information that matches a vertex or edge, format it according to the following JSON structures:

#### Vertex Format:
{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue", ...}}

#### Edge Format:
{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
Also follow the rules:
Expand All @@ -112,12 +116,15 @@ class PromptData:
3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator)
4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema
5. Translate the schema fields into Chinese if the given text is Chinese but the schema is in English (Optional)

## Example
### Input example:
#### text
Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist.

#### graph schema
{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}], "edges":[{"edge_label":"roommate", "source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}

### Output example:
[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
"""
Expand Down
58 changes: 54 additions & 4 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials

from hugegraph_llm.api.rag_api import rag_http_api
from hugegraph_llm.config import settings, prompt
from hugegraph_llm.demo.rag_demo.configs_block import (
create_configs_block,
apply_llm_config,
Expand Down Expand Up @@ -61,14 +62,64 @@ def init_rag_ui() -> gr.Interface:
) as hugegraph_llm_ui:
gr.Markdown("# HugeGraph LLM RAG Demo")

create_configs_block()
"""
TODO: leave a general idea of the unresolved part
graph_config_input = textbox_array_graph_config
= [settings.graph_ip, settings.graph_port, settings.graph_name, graph_user, settings.graph_pwd, settings.graph_space]

llm_config_input = textbox_array_llm_config
= if settings.llm_type == openai [settings.openai_api_key, settings.openai_api_base, settings.openai_language_model, settings.openai_max_tokens]
= else if settings.llm_type == ollama [settings.ollama_host, settings.ollama_port, settings.ollama_language_model, ""]
= else if settings.llm_type == qianfan_wenxin [settings.qianfan_api_key, settings.qianfan_secret_key, settings.qianfan_language_model, ""]
= else ["","","", ""]

embedding_config_input = textbox_array_embedding_config
= if settings.embedding_type == openai [settings.openai_api_key, settings.openai_api_base, settings.openai_embedding_model]
= else if settings.embedding_type == ollama [settings.ollama_host, settings.ollama_port, settings.ollama_embedding_model]
= else if settings.embedding_type == qianfan_wenxin [settings.qianfan_api_key, settings.qianfan_secret_key, settings.qianfan_embedding_model]
= else ["","",""]

reranker_config_input = textbox_array_reranker_config
= if settings.reranker_type == cohere [settings.reranker_api_key, settings.reranker_model, settings.cohere_base_url]
= else if settings.reranker_type == siliconflow [settings.reranker_api_key, "BAAI/bge-reranker-v2-m3", ""]
= else ["","",""]
"""


textbox_array_graph_config = create_configs_block()

with gr.Tab(label="1. Build RAG Index 💡"):
create_vector_graph_block()
textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
create_rag_block()
textbox_inp, textbox_answer_prompt_input = create_rag_block()
with gr.Tab(label="3. Others Tools 🚧"):
create_other_block()


def refresh_ui_config_prompt() -> tuple:
settings.from_env()
prompt.ensure_yaml_file_exists()
return (
settings.graph_ip, settings.graph_port, settings.graph_name, settings.graph_user,
settings.graph_pwd, settings.graph_space, prompt.graph_schema, prompt.extract_graph_prompt,
prompt.default_question, prompt.answer_prompt
)


hugegraph_llm_ui.load(fn=refresh_ui_config_prompt, outputs=[
textbox_array_graph_config[0],
textbox_array_graph_config[1],
textbox_array_graph_config[2],
textbox_array_graph_config[3],
textbox_array_graph_config[4],
textbox_array_graph_config[5],

textbox_input_schema,
textbox_info_extract_template,

textbox_inp,
textbox_answer_prompt_input
])

return hugegraph_llm_ui

Expand All @@ -89,7 +140,6 @@ def init_rag_ui() -> gr.Interface:
auth_enabled = os.getenv("ENABLE_LOGIN", "False").lower() == "true"
log.info("(Status) Authentication is %s now.", "enabled" if auth_enabled else "disabled")
# TODO: support multi-user login when need

app = gr.mount_gradio_app(app, hugegraph_llm, path="/", auth=("rag", os.getenv("TOKEN")) if auth_enabled else None)

# TODO: we can't use reload now due to the config 'app' of uvicorn.run
Expand Down
45 changes: 31 additions & 14 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/configs_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ def apply_llm_config(arg1, arg2, arg3, arg4, origin_call=None) -> int:
settings.update_env()
return status_code


def create_configs_block():
# TODO: refactor the function to reduce the number of statements & separate the logic
def create_configs_block() -> list:
# pylint: disable=R0915 (too-many-statements)
with gr.Accordion("1. Set up the HugeGraph server.", open=False):
with gr.Row():
Expand Down Expand Up @@ -226,10 +226,16 @@ def llm_settings(llm_type):
gr.Textbox(value="", visible=False),
]
else:
llm_config_input = []
llm_config_button = gr.Button("Apply Configuration")
llm_config_input = [
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
]
llm_config_button = gr.Button("Apply configuration")
llm_config_button.click(apply_llm_config, inputs=llm_config_input) # pylint: disable=no-member


with gr.Accordion("3. Set up the Embedding.", open=False):
embedding_dropdown = gr.Dropdown(
choices=["openai", "qianfan_wenxin", "ollama"], value=settings.embedding_type, label="Embedding"
Expand All @@ -245,22 +251,26 @@ def embedding_settings(embedding_type):
gr.Textbox(value=settings.openai_api_base, label="api_base"),
gr.Textbox(value=settings.openai_embedding_model, label="model_name"),
]
elif embedding_type == "qianfan_wenxin":
with gr.Row():
embedding_config_input = [
gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"),
gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"),
gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"),
]
elif embedding_type == "ollama":
with gr.Row():
embedding_config_input = [
gr.Textbox(value=settings.ollama_host, label="host"),
gr.Textbox(value=str(settings.ollama_port), label="port"),
gr.Textbox(value=settings.ollama_embedding_model, label="model_name"),
]
elif embedding_type == "qianfan_wenxin":
with gr.Row():
embedding_config_input = [
gr.Textbox(value=settings.qianfan_api_key, label="api_key", type="password"),
gr.Textbox(value=settings.qianfan_secret_key, label="secret_key", type="password"),
gr.Textbox(value=settings.qianfan_embedding_model, label="model_name"),
]
else:
embedding_config_input = []
embedding_config_input = [
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
]

embedding_config_button = gr.Button("Apply Configuration")

Expand Down Expand Up @@ -296,14 +306,21 @@ def reranker_settings(reranker_type):
label="model",
info="Please refer to https://siliconflow.cn/pricing",
),
gr.Textbox(value="", visible=False),
]
else:
reranker_config_input = []
reranker_config_button = gr.Button("Apply Configuration")
reranker_config_input = [
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
gr.Textbox(value="", visible=False),
]
reranker_config_button = gr.Button("Apply configuration")

# TODO: use "gr.update()" or other way to update the config in time (refactor the click event)
# Call the separate apply_reranker_configuration function here
reranker_config_button.click( # pylint: disable=no-member
fn=apply_reranker_config,
inputs=reranker_config_input, # pylint: disable=no-member
)
# The reason for returning this partial value is the functional need to refresh the ui
return graph_config_input
4 changes: 2 additions & 2 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,9 @@ def create_rag_block():
vector_only_out = gr.Textbox(label="Vector-only Answer", show_copy_button=True)
graph_only_out = gr.Textbox(label="Graph-only Answer", show_copy_button=True)
graph_vector_out = gr.Textbox(label="Graph-Vector Answer", show_copy_button=True)
from hugegraph_llm.operators.llm_op.answer_synthesize import DEFAULT_ANSWER_TEMPLATE

answer_prompt_input = gr.Textbox(
value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt", show_copy_button=True, lines=7
value=prompt.answer_prompt, label="Custom Prompt", show_copy_button=True, lines=7
)
with gr.Column(scale=1):
with gr.Row():
Expand Down Expand Up @@ -266,3 +265,4 @@ def several_rag_answer(
)
questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)
return inp, answer_prompt_input
71 changes: 48 additions & 23 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import gradio as gr

from hugegraph_llm.config import resource_path, prompt
from hugegraph_llm.operators.llm_op.property_graph_extract import SCHEMA_EXAMPLE_PROMPT
from hugegraph_llm.utils.graph_index_utils import (
get_graph_index_info,
clean_all_graph_index,
Expand All @@ -33,22 +32,31 @@
from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info


def store_prompt(schema, example_prompt):
# update env variables: schema and example_prompt
if prompt.graph_schema != schema or prompt.extract_graph_prompt != example_prompt:
prompt.graph_schema = schema
prompt.extract_graph_prompt = example_prompt
prompt.update_yaml_file()


def create_vector_graph_block():
# pylint: disable=no-member
gr.Markdown(
"""## 1. Build Vector/Graph Index & Extract Knowledge Graph
- Docs:
- text: Build rag index from plain text
- file: Upload file(s) which should be <u>TXT</u> or <u>.docx</u> (Multiple files can be selected together)
- [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**)
- User-defined Schema (JSON format, follow the template to modify it)
- User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125)
to modify it)
- Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like
**"hugegraph"**)
- Graph extract head: The user-defined prompt of graph extracting
- If already exist the graph data, you should click "**Rebuild vid Index**" to update the index
"""
)

schema = prompt.graph_schema

with gr.Row():
with gr.Column():
with gr.Tab("text") as tab_upload_text:
Expand All @@ -59,9 +67,9 @@ def create_vector_graph_block():
label="Docs (multi-files can be selected together)",
file_count="multiple",
)
input_schema = gr.Textbox(value=schema, label="Schema", lines=15, show_copy_button=True)
input_schema = gr.Textbox(value=prompt.graph_schema, label="Schema", lines=15, show_copy_button=True)
info_extract_template = gr.Textbox(
value=SCHEMA_EXAMPLE_PROMPT, label="Graph extract head", lines=15, show_copy_button=True
value=prompt.extract_graph_prompt, label="Graph extract head", lines=15, show_copy_button=True
)
out = gr.Code(label="Output", language="json", elem_classes="code-container-edit")

Expand All @@ -80,21 +88,40 @@ def create_vector_graph_block():
graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary")
graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True)

vector_index_btn0.click(get_vector_index_info, outputs=out) # pylint: disable=no-member
vector_index_btn1.click(clean_vector_index) # pylint: disable=no-member
vector_import_bt.click(
build_vector_index, inputs=[input_file, input_text], outputs=out
) # pylint: disable=no-member
graph_index_btn0.click(get_graph_index_info, outputs=out) # pylint: disable=no-member
graph_index_btn1.click(clean_all_graph_index) # pylint: disable=no-member
graph_index_rebuild_bt.click(fit_vid_index, outputs=out) # pylint: disable=no-member
vector_index_btn0.click(get_vector_index_info, outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
vector_index_btn1.click(clean_vector_index).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
graph_index_btn0.click(get_graph_index_info, outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
graph_index_btn1.click(clean_all_graph_index).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)
graph_index_rebuild_bt.click(fit_vid_index, outputs=out).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)

# origin_out = gr.Textbox(visible=False)
graph_extract_bt.click( # pylint: disable=no-member
graph_extract_bt.click(
extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out]
)
).then(store_prompt, inputs=[input_schema, info_extract_template], )

graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]) # pylint: disable=no-member
graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(
store_prompt,
inputs=[input_schema, info_extract_template],
)

def on_tab_select(input_f, input_t, evt: gr.SelectData):
print(f"You selected {evt.value} at {evt.index} from {evt.target}")
Expand All @@ -104,9 +131,7 @@ def on_tab_select(input_f, input_t, evt: gr.SelectData):
return [], input_t
return [], ""

tab_upload_file.select(
fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]
) # pylint: disable=no-member
tab_upload_text.select(
fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]
) # pylint: disable=no-member
tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])

return input_schema, info_extract_template
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from hugegraph_llm.models.llms.init_llm import LLMs
from hugegraph_llm.utils.log import log

"""
TODO: It is not clear whether there is any other dependence on the SCHEMA_EXAMPLE_PROMPT variable.
Because the SCHEMA_EXAMPLE_PROMPT variable will no longer change based on
prompt.extract_graph_prompt changes after the system loads, this does not seem to meet expectations.
"""
DEFAULT_ANSWER_TEMPLATE = prompt.answer_prompt


Expand Down
Loading
Loading