Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix feedback logging #23

Merged
merged 1 commit into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: black
name: tests

on: [push, pull_request]
on: [push]

jobs:
lint:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Defog Python

[![black](https://github.com/defog-ai/defog-python/actions/workflows/main.yml/badge.svg)](https://github.com/defog-ai/defog-python/actions/workflows/main.yml)
[![tests](https://github.com/defog-ai/defog-python/actions/workflows/main.yml/badge.svg)](https://github.com/defog-ai/defog-python/actions/workflows/main.yml)

Defog converts your natural language text queries into SQL and other machine readable code. This library allows you to easily integrate Defog into your python application, and has a CLI to help you get started.

Expand Down
12 changes: 0 additions & 12 deletions defog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,18 +1005,6 @@ def get_predefined_queries(self):
else:
return []

def execute_predefined_query(self, query):
"""
Executes a predefined query
"""
resp = execute_query(
query["query"],
self.api_key,
self.db_type,
self.db_creds,
)
return resp

def update_db_schema_csv(self, path_to_csv):
"""
Update the DB schema via a CSV, rather than by via a Google Sheet
Expand Down
123 changes: 32 additions & 91 deletions defog/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
import requests

import defog
from defog import Defog
from defog.util import parse_update
from defog.util import get_feedback, parse_update
from prompt_toolkit import prompt

USAGE_STRING = """
Expand Down Expand Up @@ -327,8 +326,6 @@ def query():
else:
query = sys.argv[2]

feedback_mode = False
feedback = ""
user_question = ""
sql_generated = ""
while True:
Expand All @@ -338,100 +335,44 @@ def query():
elif query == "":
print("Your query cannot be empty.")
query = prompt("Enter a query, or type 'e' to exit: ")
if feedback_mode:
if feedback not in ["y", "n"]:
pass
elif feedback == "y":
# send data to /feedback endpoint
try:
requests.post(
"https://api.defog.ai/feedback",
json={
"api_key": df.api_key,
"feedback": "good",
"db_type": df.db_type,
"question": user_question,
"query": sql_generated,
},
timeout=1,
)
print("Thank you for the feedback!")
feedback_mode = False
except:
pass

elif feedback == "n":
# send data to /feedback endpoint
feedback_text = prompt(
"Could you tell us why this was a bad query? This will help us improve the model for you. Just hit enter if you want to leave this blank.\n"
)
try:
requests.post(
"https://api.defog.ai/feedback",
json={
"api_key": df.api_key,
"feedback": "bad",
"text": feedback_text,
"db_type": df.db_type,
"question": user_question,
"query": sql_generated,
},
timeout=1,
)
except:
pass
user_question = query
resp = df.run_query(query, retries=3)
if not resp["ran_successfully"]:
if "query_generated" in resp:
print("Defog generated the following query to answer your question:\n")
print(f"\033[1m{resp['query_generated']}\033[0m\n")

print(
"Thank you for the feedback! We retrain our models every week, and you should see much better performance on these kinds of queries in another week.\n"
f"However, your query did not run successfully. The error message generated while running the query on your database was\n\n\033[1m{resp['error_message']}\033[0m\n."
)

print(
f"If you continue to get these errors, please consider updating the metadata in your schema by editing the google sheet generated and running `defog update <url>`, or by updating your glossary.\n"
)
feedback_mode = False
query = prompt("Enter another query, or type 'e' to exit: ")
else:
user_question = query
resp = df.run_query(query, retries=3)
if not resp["ran_successfully"]:
if "query_generated" in resp:
print(
"Defog generated the following query to answer your question:\n"
)
print(f"\033[1m{resp['query_generated']}\033[0m\n")

print(
f"However, your query did not run successfully. The error message generated while running the query on your database was\n\n\033[1m{resp['error_message']}\033[0m\n."
)

print(
f"If you continue to get these errors, please consider updating the metadata in your schema by editing the google sheet generated and running `defog update <url>`, or by updating your glossary.\n"
)
else:
print(
f"Defog was unable to generate a query for your question. The error message generated while running the query on your database was\n\n\033[1m{resp.get('error_message')}\033[0m\n."
)
query = prompt("Enter another query, or type 'e' to exit: ")
else:
sql_generated = resp.get("query_generated")
print("Defog generated the following query to answer your question:\n")
print(f"\033[1m{resp['query_generated']}\033[0m\n")
reason_for_query = resp.get("reason_for_query", "")
reason_for_query = reason_for_query.replace(". ", "\n\n")

print("This was its reasoning for generating this query:\n")
print(f"\033[1m{reason_for_query}\033[0m\n")

print("Results:\n")
# print results in tabular format using 'columns' and 'data' keys
try:
print_table(resp["columns"], resp["data"])
except:
print(resp)

print()
feedback_mode = True
feedback = prompt(
"Did Defog answer your question well? Just hit enter to skip (y/n):\n"
print(
f"Defog was unable to generate a query for your question. The error message generated while running the query on your database was\n\n\033[1m{resp.get('error_message')}\033[0m\n."
)
else:
sql_generated = resp.get("query_generated")
print("Defog generated the following query to answer your question:\n")
print(f"\033[1m{resp['query_generated']}\033[0m\n")
reason_for_query = resp.get("reason_for_query", "")
reason_for_query = reason_for_query.replace(". ", "\n\n")

print("This was its reasoning for generating this query:\n")
print(f"\033[1m{reason_for_query}\033[0m\n")

print("Results:\n")
# print results in tabular format using 'columns' and 'data' keys
try:
print_table(resp["columns"], resp["data"])
except:
print(resp)

print()
get_feedback(df.api_key, df.db_type, user_question, sql_generated)
query = prompt("Please enter another query, or type 'e' to exit: ")


def deploy():
Expand Down
27 changes: 20 additions & 7 deletions defog/query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
import requests
from defog.util import write_logs

Expand Down Expand Up @@ -139,18 +140,22 @@ def execute_query(
retries: int = 3,
schema: dict = None,
):
"""
Execute the query and retry with adaptive learning if there is an error.
Raises an Exception if there are no retries left, or if the error is a connection error.
"""
err_msg = None
try:
return execute_query_once(db_type, db_creds, query) + (query,)
except Exception as e:
err_msg = str(e)
print(
"There was an error when running the previous query. Retrying with adaptive learning..."
)

# log this error to our feedback system
if is_connection_error(err_msg):
raise Exception(
f"There was a connection issue to your database:\n{err_msg}\n\nPlease check your database credentials and try again."
)
# log this error to our feedback system first (this is a 1-way side-effect)
try:
r = requests.post(
requests.post(
"https://api.defog.ai/feedback",
json={
"api_key": api_key,
Expand All @@ -164,8 +169,9 @@ def execute_query(
)
except:
pass

# log locally
write_logs(str(e))
# retry with adaptive learning
while retries > 0:
write_logs(f"Retries left: {retries}")
try:
Expand Down Expand Up @@ -196,3 +202,10 @@ def execute_query(
write_logs(str(e))
retries -= 1
raise Exception(err_msg)


def is_connection_error(err_msg: str) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! This works very well for Postgres, though may not work as well for Snowflake and other flavors

250001 (08001): Failed to connect to DB. Verify the account name is correct: [`REDACTED`/). HTTP 403: Forbidden

MySQL

1045 (28000): Access denied for user 'root'@'localhost' (using password: YES)

etc. Can fix later, though!

return (
isinstance(err_msg, str)
and re.search(r"connection.*failed", err_msg) is not None
)
51 changes: 51 additions & 0 deletions defog/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
from typing import List

from prompt_toolkit import prompt
import requests


def parse_update(
args_list: List[str], attributes_list: List[str], config_dict: dict
Expand Down Expand Up @@ -94,3 +97,51 @@ def identify_categorical_columns(
top_values = [i[0] for i in top_values if i[0] is not None]
rows[idx]["top_values"] = top_values
return rows


def get_feedback(api_key: str, db_type: str, user_question: str, sql_generated: str):
"""
Get feedback from the user on whether the query was good or bad, and why.
"""
feedback = prompt(
"Did Defog answer your question well? Just hit enter to skip (y/n):\n"
)
while feedback not in ["y", "n", ""]:
feedback = prompt("Please enter y or n:\n")
if feedback == "":
# user skipped feedback
return
# get explanation for negative feedback
if feedback == "n":
feedback_text = prompt(
"Could you tell us why this was a bad query? This will help us improve the model for you. Just hit enter if you want to leave this blank.\n"
)
else:
feedback_text = ""
try:
data = {
"api_key": api_key,
"feedback": "good" if feedback == "y" else "bad",
"db_type": db_type,
"question": user_question,
"query": sql_generated,
}
if feedback_text != "":
data["feedback_text"] = feedback_text
requests.post(
"https://api.defog.ai/feedback",
json=data,
timeout=1,
)
if feedback == "y":
print("Thank you for the feedback!")
else:
print(
"Thank you for the feedback! We retrain our models every week, and you should see much better performance on these kinds of queries in another week.\n"
)
print(
f"If you continue to get these errors, please consider updating the metadata in your schema by editing the google sheet generated and running `defog update <url>`, or by updating your glossary.\n"
)
except Exception as e:
write_logs(f"Error in get_feedback:\n{e}")
pass
9 changes: 5 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
requests>=2.28.2
psycopg2-binary>=2.9.5
pandas
tabulate
pwinput
prompt_toolkit
psycopg2-binary>=2.9.5
pwinput
requests>=2.28.2
tabulate
33 changes: 32 additions & 1 deletion tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import unittest
from unittest import mock

from defog.query import execute_query_once, execute_query
from defog.query import is_connection_error, execute_query_once, execute_query


class ExecuteQueryOnceTestCase(unittest.TestCase):
Expand Down Expand Up @@ -166,5 +166,36 @@ def side_effect(db_type, db_creds, query):
self.assertIn(json.dumps(json_req), lines[2])


class TestConnectionError(unittest.TestCase):
def test_connection_failed(self):
self.assertTrue(
is_connection_error(
"""connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such file or directory
Is the server running locally and accepting connections on that socket?"""
)
)

def test_not_connection_failed(self):
self.assertFalse(
is_connection_error(
'psycopg2.errors.UndefinedTable: relation "nonexistent_table" does not exist'
)
)
self.assertFalse(
is_connection_error(
'psycopg2.errors.SyntaxError: syntax error at or near "nonexistent_table"'
)
)
self.assertFalse(
is_connection_error(
'psycopg2.errors.UndefinedColumn: column "nonexistent_column" does not exist'
)
)

def test_empty_string(self):
self.assertFalse(is_connection_error(""))
self.assertFalse(is_connection_error(None))


if __name__ == "__main__":
unittest.main()
Loading
Loading