Skip to content

Commit

Permalink
allow list of texts as input to pure free text
Browse files Browse the repository at this point in the history
  • Loading branch information
george1459 committed Jun 10, 2024
1 parent bae7e09 commit e8cd09b
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Package metadata
name = "suql"
version = "1.1.7a6"
version = "1.1.7a7"
description = "Structured and Unstructured Query Language (SUQL) Python API"
author = "Shicheng Liu"
author_email = "[email protected]"
Expand Down
13 changes: 10 additions & 3 deletions src/suql/free_text_fcns_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@
# engine = "gpt-3.5-turbo-0613"


def _answer(source, query, type_prompt = None, k=5, max_input_token=3800, engine="gpt-3.5-turbo-0613"):
def _answer(
source,
query,
type_prompt = None,
k=5,
max_input_token=10000,
engine="gpt-3.5-turbo-0125"
):
from suql.prompt_continuation import llm_generate
if not source:
return {"result": "no information"}
Expand Down Expand Up @@ -52,9 +59,9 @@ def _answer(source, query, type_prompt = None, k=5, max_input_token=3800, engine
"type_prompt": type_prompt,
},
engine=engine,
max_tokens=200,
max_tokens=1000,
temperature=0.0,
stop_tokens=["\n"],
stop_tokens=[],
postprocess=False,
)
return {"result": continuation}
Expand Down
23 changes: 23 additions & 0 deletions src/suql/loaders/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json

def chunk_store_documents(data, output_file):
from llama_index.core.schema import Document
data = [Document(text=data)] # llama index expects a list

from llama_index.embeddings.fastembed import FastEmbedEmbedding
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-large-en-v1.5")

from llama_index.core.node_parser import SemanticSplitterNodeParser
splitter = SemanticSplitterNodeParser(
embed_model=embed_model
)
nodes = splitter.get_nodes_from_documents(data)

chunked_documents = [node.text for node in nodes]

with open(output_file, "w") as fd:
json.dump(
chunked_documents,
fd,
indent=2
)
16 changes: 14 additions & 2 deletions src/suql/sql_free_text_support/execute_free_text_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections import defaultdict
from copy import deepcopy
from typing import List, Union
from functools import lru_cache

import pglast
import requests
Expand Down Expand Up @@ -1656,13 +1657,24 @@ def _parse_standalone_answer(suql):
else:
return None


@lru_cache(maxsize=16)
def _read_source_file(filename):
try:
with open(filename, "r") as fd:
content = json.load(fd)
return content
except json.JSONDecodeError:
with open(filename, "r") as fd:
return fd.read()


def _execute_standalone_answer(suql, source_file_mapping):
source, query = _parse_standalone_answer(suql)
if source not in source_file_mapping:
return None

with open(source_file_mapping[source], "r") as fd:
source_content = fd.read()
source_content = _read_source_file(source_file_mapping[source])

return _answer(source_content, query)

Expand Down

0 comments on commit e8cd09b

Please sign in to comment.