Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
fabioz committed Dec 11, 2024
1 parent d6dcb27 commit dd04c16
Show file tree
Hide file tree
Showing 20 changed files with 759 additions and 238 deletions.
7 changes: 7 additions & 0 deletions sema4ai/codegen/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,13 @@ def __init__(
add_to_package_json=True,
icon="$(trash)",
),
Command(
"sema4ai.setupDataSource",
"Setup Data Source",
server_handled=False,
hide_from_command_palette=False,
icon="$(diff-added)",
),
]


Expand Down
5 changes: 5 additions & 0 deletions sema4ai/codegen/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,11 @@ def __init__(
MenuGroup.NAVIGATION,
when="viewItem == robotItem",
),
Menu(
"sema4ai.setupDataSource",
MenuGroup.INLINE,
when="viewItem == datasourceItem",
),
Menu(
"sema4ai.openRobotTreeSelection",
MenuGroup.NAVIGATION,
Expand Down
12 changes: 12 additions & 0 deletions sema4ai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@
"onCommand:sema4ai.runActionPackageDevTask",
"onCommand:sema4ai.getActionsMetadata",
"onCommand:sema4ai.dropDataSource",
"onCommand:sema4ai.setupDataSource",
"onDebugInitialConfigurations",
"onDebugResolve:sema4ai",
"onView:sema4ai-task-packages-tree",
Expand Down Expand Up @@ -1013,6 +1014,12 @@
"title": "Drop Data Sources",
"category": "Sema4.ai",
"icon": "$(trash)"
},
{
"command": "sema4ai.setupDataSource",
"title": "Setup Data Source",
"category": "Sema4.ai",
"icon": "$(diff-added)"
}
],
"menus": {
Expand Down Expand Up @@ -1144,6 +1151,11 @@
"when": "view == sema4ai-task-packages-tree && viewItem == robotItem",
"group": "navigation"
},
{
"command": "sema4ai.setupDataSource",
"when": "view == sema4ai-task-packages-tree && viewItem == datasourceItem",
"group": "inline"
},
{
"command": "sema4ai.openRobotTreeSelection",
"when": "view == sema4ai-task-packages-tree && viewItem == robotItem",
Expand Down
1 change: 1 addition & 0 deletions sema4ai/src/sema4ai_code/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@
SEMA4AI_RUN_ACTION_PACKAGE_DEV_TASK = "sema4ai.runActionPackageDevTask" # Run dev-task (from Action Package)
SEMA4AI_GET_ACTIONS_METADATA = "sema4ai.getActionsMetadata" # Get Actions Metadata
SEMA4AI_DROP_DATA_SOURCE = "sema4ai.dropDataSource" # Drop Data Sources
SEMA4AI_SETUP_DATA_SOURCE = "sema4ai.setupDataSource" # Setup Data Source

ALL_SERVER_COMMANDS = [
SEMA4AI_GET_PLUGINS_DIR,
Expand Down
6 changes: 6 additions & 0 deletions sema4ai/src/sema4ai_code/data/data_server_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from pathlib import Path
from typing import Any, Optional

from sema4ai_ls_core.protocols import DatasourceInfoTypedDict

if typing.TYPE_CHECKING:
from .result_set import ResultSet

Expand Down Expand Up @@ -211,3 +213,7 @@ def run_sql(

def upload_file(self, file_path: Path, table_name: str) -> None:
self._http_connection.upload_file(file_path, table_name)

def get_data_sources(self, where: str) -> list[dict[str, Any]]:
result_set = self.query("", f"SHOW DATABASES {where}")
return list(result_set.iter_as_dicts())
144 changes: 144 additions & 0 deletions sema4ai/src/sema4ai_code/data/data_source_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import typing
from pathlib import Path

from sema4ai_ls_core.protocols import DatasourceInfoTypedDict

if typing.TYPE_CHECKING:
from sema4ai_code.data.data_server_connection import DataServerConnection


class DataSourceHelper:
def __init__(
self,
root_path: Path,
datasource: "DatasourceInfoTypedDict",
connection: "DataServerConnection",
):
# model datasource: a model created in a project
self._is_model_datasource = False

# table datasource: it's either a file or a custom engine that creates a table
self._is_table_datasource = False
self._root_path = root_path
self._custom_sql: tuple[str, ...] | None = None

self.datasource = datasource
self.connection = connection

# Do as the last thing as it may update fields (such as custom_sql, is_model_datasource, is_table_datasource, etc.)
error = self._compute_validation_error()
self._validation_errors: tuple[str, ...] = (error,) if error else ()

@property
def is_model_datasource(self) -> bool:
return self._is_model_datasource

@property
def is_table_datasource(self) -> bool:
return self._is_table_datasource

@property
def custom_sql(self) -> tuple[str, ...] | None:
return self._custom_sql

@property
def root_path(self) -> Path:
return self._root_path

def get_validation_errors(self) -> tuple[str, ...]:
return self._validation_errors

def _compute_validation_error(self) -> str | None:
datasource = self.datasource
datasource_name = datasource.get("name")

if not datasource_name:
return "It was not possible to statically discover the name of a datasource. Please specify the name of the datasource directly in the datasource definition."

datasource_engine = datasource.get("engine")
if not datasource_engine:
return f"It was not possible to statically discover the engine of a datasource ({datasource_name}). Please specify the engine of the datasource directly in the datasource definition."

created_table = datasource.get("created_table")
model_name = datasource.get("model_name")

if created_table and model_name:
return f"DataSource: {datasource_name} - The datasource cannot specify both the created_table and model_name fields."

if datasource_engine == "custom":
# Custom engine must have sql
error = self._update_custom_sql(datasource)
if error:
return error

if created_table:
self._is_model_datasource = True
elif model_name:
self._is_model_datasource = True
return None

if datasource_engine == "files":
if not created_table:
return f"DataSource: {datasource_name} - The files engine requires the created_table field to be set."

relative_path = datasource.get("file")
if not relative_path:
return f"DataSource: {datasource_name} - The files engine requires the file field to be set."

full_path = Path(self.root_path) / relative_path
if not full_path.exists():
return f"DataSource: {datasource_name} - The files engine requires the file field to be set to a valid file path. File does not exist: {full_path}"

self._is_table_datasource = True
return None
else:
if created_table:
return f"DataSource: {datasource_name} - The engine: {datasource_engine} does not support the created_table field."

if datasource_engine.startswith("prediction:"):
error = self._update_custom_sql(datasource)
if error:
return error

if not model_name:
return f"DataSource: {datasource_name} - The prediction engine requires the model_name field to be set."
self._is_model_datasource = True
return None
else:
if model_name:
return f"DataSource: {datasource_name} - The engine: {datasource_engine} does not support the model_name field."

return None

def _update_custom_sql(self, datasource: "DatasourceInfoTypedDict") -> None | str:
datasource_name = datasource.get("name")
setup_sql = datasource.get("setup_sql")
setup_sql_files = datasource.get("setup_sql_files")
if not setup_sql and not setup_sql_files:
return f"DataSource: {datasource_name} - The custom engine requires the setup_sql or setup_sql_files field to be set."

if setup_sql_files and setup_sql:
return f"DataSource: {datasource_name} - The custom engine cannot specify both the setup_sql and setup_sql_files fields."

if setup_sql:
if isinstance(setup_sql, str):
setup_sql = [setup_sql]
if not isinstance(setup_sql, list):
return f"DataSource: {datasource_name} - The setup_sql field must be a string or a list of strings."
self._custom_sql = tuple(setup_sql)
else:
if isinstance(setup_sql_files, str):
setup_sql_files = [setup_sql_files]
if not isinstance(setup_sql_files, list):
return f"DataSource: {datasource_name} - The setup_sql_files field must be a string or a list of strings."

# read the files
sqls = []
for file in setup_sql_files:
full_path = Path(self.root_path) / file
if not full_path.exists():
return f"DataSource: {datasource_name} - The setup_sql_files field must be set to a list of valid file paths. File does not exist: {full_path}"
txt = full_path.read_text()
sqls.append(txt)
self._custom_sql = tuple(sqls)
return None
6 changes: 4 additions & 2 deletions sema4ai/src/sema4ai_code/robo/collect_actions_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def _resolve_value(node: ast_module.AST, variable_values: dict[str, str]) -> str
return node.value
elif isinstance(node, ast_module.Name) and node.id in variable_values:
return variable_values[node.id]
elif isinstance(node, ast_module.List):
return [_resolve_value(item, variable_values) for item in node.elts]
return None


Expand Down Expand Up @@ -148,8 +150,8 @@ def _extract_datasource_info(
],
keyword.arg,
)
name = _resolve_value(keyword.value, variable_values)
info[key] = name
value = _resolve_value(keyword.value, variable_values)
info[key] = value

if info.get("engine") == "files":
info["name"] = "files"
Expand Down
Loading

0 comments on commit dd04c16

Please sign in to comment.