Skip to content

Commit

Permalink
feat(ibis): introduce the relationship validation (#815)
Browse files Browse the repository at this point in the history
* implement relationship validation

* fix tests for other source

* fix the method argument

* fix test

* fix test

* address comment
  • Loading branch information
goldmedal authored Oct 8, 2024
1 parent f792e27 commit 5be3385
Show file tree
Hide file tree
Showing 14 changed files with 314 additions and 25 deletions.
121 changes: 117 additions & 4 deletions ibis-server/app/model/validator.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
from __future__ import annotations

import base64
import json

from app.mdl.rewriter import Rewriter
from app.model import UnprocessableEntityError
from app.model.connector import Connector

rules = ["column_is_valid"]
rules = ["column_is_valid", "relationship_is_valid"]


class Validator:
def __init__(self, connector: Connector, rewriter: Rewriter):
self.connector = connector
self.rewriter = rewriter

def validate(self, rule: str, parameters: dict[str, str]):
def validate(self, rule: str, parameters: dict[str, str], manifest_str: str):
if rule not in rules:
raise RuleNotFoundError(rule)
try:
getattr(self, f"_validate_{rule}")(parameters)
getattr(self, f"_validate_{rule}")(parameters, manifest_str)
except ValidationError as e:
raise e
except Exception as e:
raise ValidationError(f"Unknown exception: {type(e)}, message: {e!s}")

def _validate_column_is_valid(self, parameters: dict[str, str]):
def _validate_column_is_valid(self, parameters: dict[str, str], manifest_str: str):
model_name = parameters.get("modelName")
column_name = parameters.get("columnName")
if model_name is None:
Expand All @@ -37,6 +40,116 @@ def _validate_column_is_valid(self, parameters: dict[str, str]):
except Exception as e:
raise ValidationError(f"Exception: {type(e)}, message: {e!s}")

def _validate_relationship_is_valid(
self, parameters: dict[str, str], manifest_str: str
):
relationship_name = parameters.get("relationshipName")
if relationship_name is None:
raise MissingRequiredParameterError("relationship")
decoded_manifest = base64.b64decode(manifest_str).decode("utf-8")
manifest = json.loads(decoded_manifest)

relationship = list(
filter(lambda r: r["name"] == relationship_name, manifest["relationships"])
)

if len(relationship) == 0:
raise ValidationError(
f"Relationship {relationship_name} not found in manifest"
)

left_model = self._get_model(manifest, relationship[0]["models"][0])
right_model = self._get_model(manifest, relationship[0]["models"][1])
relationship_type = relationship[0]["joinType"].lower()
condition = relationship[0]["condition"]
columns = condition.split("=")
left_column = columns[0].strip().split(".")[1]
right_column = columns[1].strip().split(".")[1]

def generate_column_is_unique_sql(model_name, column_name):
return f'SELECT count(*) = count(distinct {column_name}) AS result FROM "{model_name}"'

def generate_is_exist_join_sql(
left_model, right_model, left_column, right_column
):
return f'SELECT count(*) > 0 AS result FROM "{left_model}" JOIN "{right_model}" ON "{left_model}"."{left_column}" = "{right_model}"."{right_column}"'

def generate_sql_from_type(
relationship_type, left_model, right_model, left_column, right_column
):
if relationship_type == "one_to_one":
return f"""WITH
lefttable AS ({generate_column_is_unique_sql(left_model, left_column)}),
righttable AS ({generate_column_is_unique_sql(right_model, right_column)}),
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
SELECT lefttable.result AND righttable.result AND joinexist.result result,
lefttable.result left_table_unique,
righttable.result right_table_unique,
joinexist.result is_related
FROM lefttable, righttable, joinexist"""
elif relationship_type == "many_to_one":
return f"""WITH
righttable AS ({generate_column_is_unique_sql(right_model, right_column)}),
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
SELECT righttable.result AND joinexist.result result,
righttable.result right_table_unique,
joinexist.result is_related
FROM righttable, joinexist"""
elif relationship_type == "one_to_many":
return f"""WITH
lefttable AS ({generate_column_is_unique_sql(left_model, left_column)}),
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
SELECT lefttable.result AND joinexist.result result,
lefttable.result left_table_unique,
joinexist.result is_related
FROM lefttable, joinexist"""
elif relationship_type == "many_to_many":
return f"""WITH
joinexist AS ({generate_is_exist_join_sql(left_model, right_model, left_column, right_column)})
SELECT joinexist.result result,
joinexist.result is_related
FROM joinexist"""
else:
raise ValidationError(f"Unknown relationship type: {relationship_type}")

def format_result(result):
output = {}
output["result"] = str(result.get("result").get(0))
output["is_related"] = str(result.get("is_related").get(0))
if result.get("left_table_unique") is not None:
output["left_table_unique"] = str(
result.get("left_table_unique").get(0)
)
if result.get("right_table_unique") is not None:
output["right_table_unique"] = str(
result.get("right_table_unique").get(0)
)
return output

sql = generate_sql_from_type(
relationship_type,
left_model["name"],
right_model["name"],
left_column,
right_column,
)
try:
rewritten_sql = self.rewriter.rewrite(sql)
result = self.connector.query(rewritten_sql, limit=1)
if not result.get("result").get(0):
raise ValidationError(
f"Relationship {relationship_name} is not valid: {format_result(result)}"
)

except Exception as e:
raise ValidationError(f"Exception: {type(e)}, message: {e!s}")

def _get_model(self, manifest, model_name):
models = list(filter(lambda m: m["name"] == model_name, manifest["models"]))
if len(models) == 0:
raise ValidationError(f"Model {model_name} not found in manifest")
return models[0]


class ValidationError(UnprocessableEntityError):
pass
Expand Down
2 changes: 1 addition & 1 deletion ibis-server/app/routers/v2/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def validate(data_source: DataSource, rule_name: str, dto: ValidateDTO) -> Respo
Connector(data_source, dto.connection_info, dto.manifest_str),
Rewriter(dto.manifest_str, data_source=data_source),
)
validator.validate(rule_name, dto.parameters)
validator.validate(rule_name, dto.parameters, dto.manifest_str)
return Response(status_code=204)


Expand Down
2 changes: 1 addition & 1 deletion ibis-server/app/routers/v3/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,5 @@ def validate(data_source: DataSource, rule_name: str, dto: ValidateDTO) -> Respo
Connector(data_source, dto.connection_info, dto.manifest_str),
Rewriter(dto.manifest_str, data_source=data_source, experiment=True),
)
validator.validate(rule_name, dto.parameters)
validator.validate(rule_name, dto.parameters, dto.manifest_str)
return Response(status_code=204)
19 changes: 17 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from fastapi.testclient import TestClient

from app.main import app
from app.model.validator import rules

pytestmark = pytest.mark.bigquery

Expand Down Expand Up @@ -185,6 +186,20 @@ def test_query_with_dry_run_and_invalid_sql():
assert response.text is not None


def test_query_values():
response = client.post(
url=f"{base_url}/query",
params={"dryRun": True},
json={
"connectionInfo": connection_info,
"manifestStr": manifest_str,
"sql": "SELECT * FROM (VALUES (1, 2), (3, 4))",
},
)

assert response.status_code == 204


def test_validate_with_unknown_rule():
response = client.post(
url=f"{base_url}/validate/unknown_rule",
Expand All @@ -194,10 +209,10 @@ def test_validate_with_unknown_rule():
"parameters": {"modelName": "Orders", "columnName": "orderkey"},
},
)

assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
4 changes: 2 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from testcontainers.clickhouse import ClickHouseContainer

from app.main import app
from app.model.validator import rules
from tests.confest import file_path

pytestmark = pytest.mark.clickhouse
Expand Down Expand Up @@ -410,8 +411,7 @@ def test_validate_with_unknown_rule(clickhouse: ClickHouseContainer):
)
assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
4 changes: 2 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from testcontainers.mssql import SqlServerContainer

from app.main import app
from app.model.validator import rules
from tests.confest import file_path

pytestmark = pytest.mark.mssql
Expand Down Expand Up @@ -251,8 +252,7 @@ def test_validate_with_unknown_rule(mssql: SqlServerContainer):
)
assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
4 changes: 2 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from testcontainers.mysql import MySqlContainer

from app.main import app
from app.model.validator import rules
from tests.confest import file_path

pytestmark = pytest.mark.mysql
Expand Down Expand Up @@ -257,8 +258,7 @@ def test_validate_with_unknown_rule(mysql: MySqlContainer):
)
assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
4 changes: 2 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from testcontainers.postgres import PostgresContainer

from app.main import app
from app.model.validator import rules
from tests.confest import file_path

pytestmark = pytest.mark.postgres
Expand Down Expand Up @@ -288,8 +289,7 @@ def test_validate_with_unknown_rule(postgres: PostgresContainer):
)
assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
4 changes: 2 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from fastapi.testclient import TestClient

from app.main import app
from app.model.validator import rules

pytestmark = pytest.mark.snowflake

Expand Down Expand Up @@ -192,8 +193,7 @@ def test_validate_with_unknown_rule():
)
assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
4 changes: 2 additions & 2 deletions ibis-server/tests/routers/v2/connector/test_trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from trino.dbapi import connect

from app.main import app
from app.model.validator import rules

pytestmark = pytest.mark.trino

Expand Down Expand Up @@ -268,8 +269,7 @@ def test_validate_with_unknown_rule(trino: TrinoContainer):
)
assert response.status_code == 422
assert (
response.text
== "The rule `unknown_rule` is not in the rules, rules: ['column_is_valid']"
response.text == f"The rule `unknown_rule` is not in the rules, rules: {rules}"
)


Expand Down
Loading

0 comments on commit 5be3385

Please sign in to comment.