apache · vichayturen · Nov 1, 2024 · Nov 2, 2024 · Nov 5, 2024 · Nov 12, 2024
diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md
@@ -45,21 +45,18 @@ graph systems and large language models.
     ```bash
     python3 -m hugegraph_llm.demo.rag_demo.app --host 127.0.0.1 --port 18001
     ```
-6. Or start the gradio interactive demo of **Text2Gremlin**, you can run with the following command, and open http://127.0.0.1:8002 after starting. You can also change the default host `0.0.0.0` and port `8002` as above. (🚧ing)
-    ```bash
-    python3 -m hugegraph_llm.demo.gremlin_generate_web_demo
-   ```
-7. After running the web demo, the config file `.env` will be automatically generated at the path `hugegraph-llm/.env`.    Additionally, a prompt-related configuration file `config_prompt.yaml` will also be generated at the path `hugegraph-llm/src/hugegraph_llm/resources/demo/config_prompt.yaml`.
+
+6. After running the web demo, the config file `.env` will be automatically generated at the path `hugegraph-llm/.env`.    Additionally, a prompt-related configuration file `config_prompt.yaml` will also be generated at the path `hugegraph-llm/src/hugegraph_llm/resources/demo/config_prompt.yaml`.
     You can modify the content on the web page, and it will be automatically saved to the configuration file after the corresponding feature is triggered.  You can also modify the file directly without restarting the web application;  simply refresh the page to load your latest changes.  
     (Optional)To regenerate the config file, you can use `config.generate` with `-u` or `--update`.  
     ```bash
     python3 -m hugegraph_llm.config.generate --update
     ```
-8. (__Optional__) You could use 
+7. (__Optional__) You could use 
     [hugegraph-hubble](https://hugegraph.apache.org/docs/quickstart/hugegraph-hubble/#21-use-docker-convenient-for-testdev) 
     to visit the graph data, could run it via [Docker/Docker-Compose](https://hub.docker.com/r/hugegraph/hubble) 
     for guidance. (Hubble is a graph-analysis dashboard include data loading/schema management/graph traverser/display).
-9. (__Optional__) offline download NLTK stopwords  
+8. (__Optional__) offline download NLTK stopwords  
     ```bash
     python ./hugegraph_llm/operators/common_op/nltk_helper.py
     ```

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/gremlin_generate_web_demo.py b/hugegraph-llm/src/hugegraph_llm/demo/gremlin_generate_web_demo.py
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -35,6 +35,7 @@
     apply_graph_config,
 )
 from hugegraph_llm.demo.rag_demo.other_block import create_other_block
+from hugegraph_llm.demo.rag_demo.text2gremlin_block import create_text2gremlin_block
 from hugegraph_llm.demo.rag_demo.rag_block import create_rag_block, rag_answer
 from hugegraph_llm.demo.rag_demo.vector_graph_block import create_vector_graph_block
 from hugegraph_llm.demo.rag_demo.admin_block import create_admin_block, log_stream
@@ -93,11 +94,12 @@ def init_rag_ui() -> gr.Interface:
             textbox_input_schema, textbox_info_extract_template = create_vector_graph_block()
         with gr.Tab(label="2. (Graph)RAG & User Functions 📖"):
             textbox_inp, textbox_answer_prompt_input = create_rag_block()
-        with gr.Tab(label="3. Graph Tools 🚧"):
+        with gr.Tab(label="3. Text2gremlin ⚙️"):
+            create_text2gremlin_block()
+        with gr.Tab(label="4. Graph Tools 🚧"):
             create_other_block()
-        with gr.Tab(label="4. Admin Tools ⚙️"):
+        with gr.Tab(label="5. Admin Tools ⚙️"):
             create_admin_block()
-
 
         def refresh_ui_config_prompt() -> tuple:
             settings.from_env()
@@ -146,7 +148,7 @@ def refresh_ui_config_prompt() -> tuple:
                  apply_reranker_config)
 
     admin_http_api(api_auth, log_stream)
-    
+
     app.include_router(api_auth)
 
     # TODO: support multi-user login when need

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
@@ -24,7 +24,7 @@
 import pandas as pd
 from gradio.utils import NamedString
 
-from hugegraph_llm.config import resource_path, prompt
+from hugegraph_llm.config import resource_path, prompt, settings
 from hugegraph_llm.operators.graph_rag_task import RAGPipeline
 from hugegraph_llm.utils.log import log
 
@@ -66,7 +66,7 @@ def rag_answer(
     if vector_search:
         rag.query_vector_index()
     if graph_search:
-        rag.extract_keywords().keywords_to_vid().query_graphdb()
+        rag.extract_keywords().keywords_to_vid().import_schema(settings.graph_name).query_graphdb()
     # TODO: add more user-defined search strategies
     rag.merge_dedup_rerank(graph_ratio, rerank_method, near_neighbor_first, custom_related_information)
     rag.synthesize_answer(raw_answer, vector_only_answer, graph_only_answer, graph_vector_answer, answer_prompt)

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+
+import pandas as pd
+import gradio as gr
+
+from hugegraph_llm.models.embeddings.init_embedding import Embeddings
+from hugegraph_llm.models.llms.init_llm import LLMs
+from hugegraph_llm.operators.gremlin_generate_task import GremlinGenerator
+
+
+def build_example_vector_index(temp_file):
+    full_path = temp_file.name
+    if full_path.endswith(".json"):
+        with open(full_path, "r", encoding="utf-8") as f:
+            examples = json.load(f)
+    elif full_path.endswith(".csv"):
+        examples = pd.read_csv(full_path).to_dict('records')
+    else:
+        return "ERROR: please input json file."
+    builder = GremlinGenerator(
+        llm=LLMs().get_llm(),
+        embedding=Embeddings().get_embedding(),
+    )
+    return builder.example_index_build(examples).run()
+
+
+def gremlin_generate(inp, use_schema, use_example, example_num, schema):
+    generator = GremlinGenerator(
+        llm=LLMs().get_llm(),
+        embedding=Embeddings().get_embedding(),
+    )
+    if use_example:
+        generator.example_index_query(example_num)
+    context = generator.gremlin_generate(use_schema, use_example, schema).run(query=inp)
+    return context.get("match_result", "No Results"), context["result"]
+
+
+def create_text2gremlin_block() -> list:
+    gr.Markdown("""## Text2gremlin Tools """)
+
+    gr.Markdown("## Build Example Vector Index")
+    gr.Markdown("Uploaded json file should be in format below:\n"
+                "[{\"query\":\"who is peter\", \"gremlin\":\"g.V().has('name', 'peter')\"}]\n"
+                "Uploaded csv file should be in format below:\n"
+                "query,gremlin\n\"who is peter\",\"g.V().has('name', 'peter')\"")
+    with gr.Row():
+        file = gr.File(label="Upload Example Query-Gremlin Pairs Json")
+        out = gr.Textbox(label="Result Message")
+    with gr.Row():
+        btn = gr.Button("Build Example Vector Index")
+    btn.click(build_example_vector_index, inputs=[file], outputs=[out])  # pylint: disable=no-member
+    gr.Markdown("## Nature Language To Gremlin")
+    SCHEMA = """{
+        "vertices": [
+            {"vertex_label": "entity", "properties": []}
+        ],
+        "edges": [
+            {
+                "edge_label": "relation",
+                "source_vertex_label": "entity",
+                "target_vertex_label": "entity",
+                "properties": {}
+            }
+        ]
+    }"""
+    with gr.Row():
+        with gr.Column(scale=1):
+            schema_box = gr.Textbox(value=SCHEMA, label="Schema")
+        with gr.Column(scale=1):
+            input_box = gr.Textbox(value="Tell me about Al Pacino.",
+                                   label="Nature Language Query")
+            match = gr.Textbox(label="Best-Matched Examples")
+            out = gr.Textbox(label="Structured Query Language: Gremlin")
+        with gr.Column(scale=1):
+            use_example_radio = gr.Radio(choices=[True, False], value=False,
+                                         label="Use example")
+            use_schema_radio = gr.Radio(choices=[True, False], value=False,
+                                        label="Use schema")
+            example_num_slider = gr.Slider(
+                minimum=1,
+                maximum=10,
+                step=1,
+                value=5,
+                label="Number of examples"
+            )
+            btn = gr.Button("Text2Gremlin")
+    btn.click(  # pylint: disable=no-member
+        fn=gremlin_generate,
+        inputs=[input_box, use_schema_radio, use_example_radio, example_num_slider, schema_box],
+        outputs=[match, out]
+    )