Skip to content

Commit

Permalink
feat: port over initial python schema extraction PoC (#3332)
Browse files Browse the repository at this point in the history
# Summary
This PR ports python schema extraction PoC into this repo

> [!WARNING]
> Still mostly unusable but this lays in the vast majority of a
foundation.

# Changes
- refactor `verb` decorator. switched to using a pattern that doesn't
require dynamic dangling mutable properties on functions. Instead
encapsulates decorated verb functions in a class and extracts type
information useful for runtime grpc server and schema extraction
- sets up initial stab at a directory structure. will attempt to create
1 package per schema feature. each package will contain:
    - encapsulating class
    - decorator
    - schema extraction
- minor refactor of `VerbExtractor` to use `isinstance(func, Verb)`
instead of `getattr(func, "_is_ftl_verb", False)`.
- set up runnable `schema_extractor` (note: this will change in the next
PR to a standalone executable. temporarily in a `cli` dir)
- added TODO in python language plugin's `Build` method

props to @worstell for doing the actual schema extraction work! will
co-author the merge commit


# Usage
> [!NOTE]
> From the `python-runtime/ftl` dir

```
❯ uv run python -m ftl.cli.schema_extractor $(pwd)/../../examples/python/echo
Error importing module decorator: attempted relative import with no known parent package
/Users/moe/code/tbd/ose/ftl/python-runtime/ftl/src/ftl/extract/context.py:76: UserWarning: google.protobuf.service module is deprecated. RPC implementations should provide code generator plugins which generate code specific to the RPC implementation. service.py will be removed in Jan 2025
  spec.loader.exec_module(module)
Extracted Decl:
pos {
  filename: "/path/to/ftl/python-runtime/ftl/../../examples/python/echo/echo.py"
  line: 17
}
name: "echo"
request {
  ref {
    name: "EchoRequest"
    module: "echo"
  }
}
response {
  ref {
    name: "EchoResponse"
    module: "echo"
  }
}

Extracted Decl:
pos {
  filename: "/path/to/ftl/python-runtime/ftl/../../examples/python/echo/echo.py"
  line: 7
}
name: "EchoRequest"
fields {
  name: "name"
  type {
    string {
    }
  }
}

Extracted Decl:
pos {
  filename: "/path/to/ftl/python-runtime/ftl/../../examples/python/echo/echo.py"
  line: 12
}
name: "EchoResponse"
fields {
  name: "message"
  type {
    string {
    }
  }
}
```

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Alec Thomas <[email protected]>
Co-authored by: worstell <[email protected]>
  • Loading branch information
mistermoe authored Nov 6, 2024
1 parent fdaa8bb commit d3c73f5
Show file tree
Hide file tree
Showing 18 changed files with 545 additions and 34 deletions.
1 change: 1 addition & 0 deletions .github/workflows/workflow-roadmap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ jobs:
["jvm"]=2439
["security"]=2438
["dx"]=2436
["python"]=3339
)
issue_number=${label_to_issue_map["${{ matrix.label }}"]}
Expand Down
Empty file added backend/protos/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions examples/python/echo/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions python-runtime/compile/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ func Build(ctx context.Context, projectRootDir, stubsRoot string, config modulec

buildDir := buildDir(config.Dir)

// TODO: call the python schema extractor. grab the output of le script. unmarshal into schema proto. unmarshal that into go type. return
// same with build errors

if err := internal.ScaffoldZip(buildTemplateFiles(), buildDir, mctx, scaffolder.Functions(scaffoldFuncs)); err != nil {
return moduleSch, nil, fmt.Errorf("failed to scaffold build template: %w", err)
}
Expand Down
14 changes: 14 additions & 0 deletions python-runtime/ftl/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,17 @@ dependencies = [
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"


[tool.ruff]
select = [
"ANN001", # missing type annotation for function argument
"ANN002", # missing type annotation for *args
"ANN003", # missing type annotation for **kwargs
"ANN102", # missing type annotation for cls in classmethod
"ANN201", # missing return type annotation for public function
"ANN202", # missing return type annotation for private function
"ANN204", # missing return type annotation for special method
"ANN205", # missing return type annotation for staticmethod
"ANN206", # missing return type annotation for classmethod
]
2 changes: 1 addition & 1 deletion python-runtime/ftl/src/ftl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .decorators import verb
from .verb import verb

__all__ = ["verb"]
Empty file.
105 changes: 105 additions & 0 deletions python-runtime/ftl/src/ftl/cli/schema_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import argparse
import ast
import concurrent.futures
import os
import sys
from contextlib import contextmanager

from ftl.extract import (
GlobalExtractionContext,
TransitiveExtractor,
)
from ftl.verb import (
VerbExtractor,
)

# analyzers is now a list of lists, where each sublist contains analyzers that can run in parallel
analyzers = [
[VerbExtractor],
[TransitiveExtractor],
]


@contextmanager
def set_analysis_mode(path):
original_sys_path = sys.path.copy()
sys.path.append(path)
try:
yield
finally:
sys.path = original_sys_path


def analyze_directory(module_dir):
"""Analyze all Python files in the given module_dir in parallel."""
global_ctx = GlobalExtractionContext()

file_paths = []
for dirpath, _, filenames in os.walk(module_dir):
for filename in filenames:
if filename.endswith(".py"):
file_paths.append(os.path.join(dirpath, filename))

for analyzer_batch in analyzers:
with concurrent.futures.ProcessPoolExecutor() as executor:
future_to_file = {
executor.submit(
analyze_file, global_ctx, file_path, analyzer_batch
): file_path
for file_path in file_paths
}

for future in concurrent.futures.as_completed(future_to_file):
file_path = future_to_file[future]
try:
future.result() # raise any exception that occurred in the worker process
except Exception as exc:
print(f"failed to extract schema from {file_path}: {exc};")
# else:
# print(f"File {file_path} analyzed successfully.")

for ref_key, decl in global_ctx.deserialize().items():
print(f"Extracted Decl:\n{decl}")


def analyze_file(global_ctx: GlobalExtractionContext, file_path, analyzer_batch):
"""Analyze a single Python file using multiple analyzers in parallel."""
module_name = os.path.splitext(os.path.basename(file_path))[0]
file_ast = ast.parse(open(file_path).read())
local_ctx = global_ctx.init_local_context()

with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [
executor.submit(
run_analyzer,
analyzer_class,
local_ctx,
module_name,
file_path,
file_ast,
)
for analyzer_class in analyzer_batch
]

for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as exc:
print(f"Analyzer generated an exception: {exc} in {file_path}")


def run_analyzer(analyzer_class, context, module_name, file_path, file_ast):
analyzer = analyzer_class(context, module_name, file_path)
analyzer.visit(file_ast)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"module_dir", type=str, help="The Python module directory to analyze."
)
args = parser.parse_args()

dir = args.module_dir
with set_analysis_mode(dir):
analyze_directory(dir)
3 changes: 0 additions & 3 deletions python-runtime/ftl/src/ftl/decorators/__init__.py

This file was deleted.

30 changes: 0 additions & 30 deletions python-runtime/ftl/src/ftl/decorators/verb.py

This file was deleted.

22 changes: 22 additions & 0 deletions python-runtime/ftl/src/ftl/extract/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from .common import (
extract_basic_type,
extract_class_type,
extract_function_type,
extract_map,
extract_slice,
extract_type,
)
from .context import GlobalExtractionContext, LocalExtractionContext
from .transitive import TransitiveExtractor

__all__ = [
"extract_type",
"extract_slice",
"extract_map",
"extract_basic_type",
"extract_class_type",
"extract_function_type",
"LocalExtractionContext",
"GlobalExtractionContext",
"TransitiveExtractor",
]
99 changes: 99 additions & 0 deletions python-runtime/ftl/src/ftl/extract/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from typing import Any, Dict, List, Optional, Type

from ftl.protos.xyz.block.ftl.v1.schema import schema_pb2 as schemapb

from .context import LocalExtractionContext


def extract_type(
local_ctx: LocalExtractionContext, type_hint: Type[Any]
) -> Optional[schemapb.Type]:
"""Extracts type information from Python type hints and maps it to schema types."""
if isinstance(type_hint, list):
return extract_slice(local_ctx, type_hint)

elif isinstance(type_hint, dict):
return extract_map(local_ctx, type_hint)

elif type_hint is Any:
return schemapb.Type(any=schemapb.Any())

elif isinstance(type_hint, type):
if (
type_hint is str
or type_hint is int
or type_hint is bool
or type_hint is float
):
return extract_basic_type(type_hint)

if hasattr(type_hint, "__bases__"):
return extract_class_type(local_ctx, type_hint)

if callable(type_hint):
return extract_function_type(local_ctx, type_hint)

# Handle parametric types (e.g., List[int], Dict[str, int]) - Optional, uncomment if needed
# elif hasattr(type_hint, "__origin__"):
# return extract_parametric_type(local_ctx, type_hint)

# TODO: raise exception for unsupported types
return None


def extract_slice(
local_ctx: LocalExtractionContext, type_hint: List[Any]
) -> Optional[schemapb.Type]:
if isinstance(type_hint, list) and type_hint:
element_type = extract_type(local_ctx, type_hint[0]) # Assuming non-empty list
if element_type:
return schemapb.Type(array=schemapb.Array(element=element_type))
return None


def extract_map(
local_ctx: LocalExtractionContext, type_hint: Dict[Any, Any]
) -> Optional[schemapb.Type]:
if isinstance(type_hint, dict):
key_type = extract_type(local_ctx, list(type_hint.keys())[0])
value_type = extract_type(local_ctx, list(type_hint.values())[0])
if key_type and value_type:
return schemapb.Type(map=schemapb.Map(key=key_type, value=value_type))
return None


def extract_basic_type(type_hint: Type[Any]) -> Optional[schemapb.Type]:
type_map = {
str: schemapb.Type(string=schemapb.String()),
int: schemapb.Type(int=schemapb.Int()),
bool: schemapb.Type(bool=schemapb.Bool()),
float: schemapb.Type(float=schemapb.Float()),
}
return type_map.get(type_hint, None)


# Uncomment and implement parametric types if needed
# def extract_parametric_type(local_ctx: LocalExtractionContext, type_hint: Type[Any]) -> Optional[schemapb.Type]:
# if hasattr(type_hint, "__args__"):
# base_type = extract_type(local_ctx, type_hint.__origin__)
# param_types = [extract_type(local_ctx, arg) for arg in type_hint.__args__]
# if isinstance(base_type, schemapb.Ref):
# base_type.type_parameters.extend(param_types)
# return base_type
# return None


def extract_class_type(
local_ctx: LocalExtractionContext, type_hint: Type[Any]
) -> Optional[schemapb.Type]:
ref = schemapb.Ref(name=type_hint.__name__, module=type_hint.__module__)
local_ctx.add_needs_extraction(ref)
return schemapb.Type(ref=ref)


def extract_function_type(
local_ctx: LocalExtractionContext, type_hint: Type[Any]
) -> Optional[schemapb.Type]:
ref = schemapb.Ref(name=type_hint.__name__, module=type_hint.__module__)
local_ctx.add_needs_extraction(ref)
return schemapb.Type(ref=ref)
Loading

0 comments on commit d3c73f5

Please sign in to comment.