generated from TBD54566975/tbd-project-template
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: port over initial python schema extraction PoC (#3332)
# Summary This PR ports python schema extraction PoC into this repo > [!WARNING] > Still mostly unusable but this lays in the vast majority of a foundation. # Changes - refactor `verb` decorator. switched to using a pattern that doesn't require dynamic dangling mutable properties on functions. Instead encapsulates decorated verb functions in a class and extracts type information useful for runtime grpc server and schema extraction - sets up initial stab at a directory structure. will attempt to create 1 package per schema feature. each package will contain: - encapsulating class - decorator - schema extraction - minor refactor of `VerbExtractor` to use `isinstance(func, Verb)` instead of `getattr(func, "_is_ftl_verb", False)`. - set up runnable `schema_extractor` (note: this will change in the next PR to a standalone executable. temporarily in a `cli` dir) - added TODO in python language plugin's `Build` method props to @worstell for doing the actual schema extraction work! will co-author the merge commit # Usage > [!NOTE] > From the `python-runtime/ftl` dir ``` ❯ uv run python -m ftl.cli.schema_extractor $(pwd)/../../examples/python/echo Error importing module decorator: attempted relative import with no known parent package /Users/moe/code/tbd/ose/ftl/python-runtime/ftl/src/ftl/extract/context.py:76: UserWarning: google.protobuf.service module is deprecated. RPC implementations should provide code generator plugins which generate code specific to the RPC implementation. service.py will be removed in Jan 2025 spec.loader.exec_module(module) Extracted Decl: pos { filename: "/path/to/ftl/python-runtime/ftl/../../examples/python/echo/echo.py" line: 17 } name: "echo" request { ref { name: "EchoRequest" module: "echo" } } response { ref { name: "EchoResponse" module: "echo" } } Extracted Decl: pos { filename: "/path/to/ftl/python-runtime/ftl/../../examples/python/echo/echo.py" line: 7 } name: "EchoRequest" fields { name: "name" type { string { } } } Extracted Decl: pos { filename: "/path/to/ftl/python-runtime/ftl/../../examples/python/echo/echo.py" line: 12 } name: "EchoResponse" fields { name: "message" type { string { } } } ``` --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Alec Thomas <[email protected]> Co-authored by: worstell <[email protected]>
- Loading branch information
Showing
18 changed files
with
545 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from .decorators import verb | ||
from .verb import verb | ||
|
||
__all__ = ["verb"] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import argparse | ||
import ast | ||
import concurrent.futures | ||
import os | ||
import sys | ||
from contextlib import contextmanager | ||
|
||
from ftl.extract import ( | ||
GlobalExtractionContext, | ||
TransitiveExtractor, | ||
) | ||
from ftl.verb import ( | ||
VerbExtractor, | ||
) | ||
|
||
# analyzers is now a list of lists, where each sublist contains analyzers that can run in parallel | ||
analyzers = [ | ||
[VerbExtractor], | ||
[TransitiveExtractor], | ||
] | ||
|
||
|
||
@contextmanager | ||
def set_analysis_mode(path): | ||
original_sys_path = sys.path.copy() | ||
sys.path.append(path) | ||
try: | ||
yield | ||
finally: | ||
sys.path = original_sys_path | ||
|
||
|
||
def analyze_directory(module_dir): | ||
"""Analyze all Python files in the given module_dir in parallel.""" | ||
global_ctx = GlobalExtractionContext() | ||
|
||
file_paths = [] | ||
for dirpath, _, filenames in os.walk(module_dir): | ||
for filename in filenames: | ||
if filename.endswith(".py"): | ||
file_paths.append(os.path.join(dirpath, filename)) | ||
|
||
for analyzer_batch in analyzers: | ||
with concurrent.futures.ProcessPoolExecutor() as executor: | ||
future_to_file = { | ||
executor.submit( | ||
analyze_file, global_ctx, file_path, analyzer_batch | ||
): file_path | ||
for file_path in file_paths | ||
} | ||
|
||
for future in concurrent.futures.as_completed(future_to_file): | ||
file_path = future_to_file[future] | ||
try: | ||
future.result() # raise any exception that occurred in the worker process | ||
except Exception as exc: | ||
print(f"failed to extract schema from {file_path}: {exc};") | ||
# else: | ||
# print(f"File {file_path} analyzed successfully.") | ||
|
||
for ref_key, decl in global_ctx.deserialize().items(): | ||
print(f"Extracted Decl:\n{decl}") | ||
|
||
|
||
def analyze_file(global_ctx: GlobalExtractionContext, file_path, analyzer_batch): | ||
"""Analyze a single Python file using multiple analyzers in parallel.""" | ||
module_name = os.path.splitext(os.path.basename(file_path))[0] | ||
file_ast = ast.parse(open(file_path).read()) | ||
local_ctx = global_ctx.init_local_context() | ||
|
||
with concurrent.futures.ThreadPoolExecutor() as executor: | ||
futures = [ | ||
executor.submit( | ||
run_analyzer, | ||
analyzer_class, | ||
local_ctx, | ||
module_name, | ||
file_path, | ||
file_ast, | ||
) | ||
for analyzer_class in analyzer_batch | ||
] | ||
|
||
for future in concurrent.futures.as_completed(futures): | ||
try: | ||
future.result() | ||
except Exception as exc: | ||
print(f"Analyzer generated an exception: {exc} in {file_path}") | ||
|
||
|
||
def run_analyzer(analyzer_class, context, module_name, file_path, file_ast): | ||
analyzer = analyzer_class(context, module_name, file_path) | ||
analyzer.visit(file_ast) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"module_dir", type=str, help="The Python module directory to analyze." | ||
) | ||
args = parser.parse_args() | ||
|
||
dir = args.module_dir | ||
with set_analysis_mode(dir): | ||
analyze_directory(dir) |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from .common import ( | ||
extract_basic_type, | ||
extract_class_type, | ||
extract_function_type, | ||
extract_map, | ||
extract_slice, | ||
extract_type, | ||
) | ||
from .context import GlobalExtractionContext, LocalExtractionContext | ||
from .transitive import TransitiveExtractor | ||
|
||
__all__ = [ | ||
"extract_type", | ||
"extract_slice", | ||
"extract_map", | ||
"extract_basic_type", | ||
"extract_class_type", | ||
"extract_function_type", | ||
"LocalExtractionContext", | ||
"GlobalExtractionContext", | ||
"TransitiveExtractor", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
from typing import Any, Dict, List, Optional, Type | ||
|
||
from ftl.protos.xyz.block.ftl.v1.schema import schema_pb2 as schemapb | ||
|
||
from .context import LocalExtractionContext | ||
|
||
|
||
def extract_type( | ||
local_ctx: LocalExtractionContext, type_hint: Type[Any] | ||
) -> Optional[schemapb.Type]: | ||
"""Extracts type information from Python type hints and maps it to schema types.""" | ||
if isinstance(type_hint, list): | ||
return extract_slice(local_ctx, type_hint) | ||
|
||
elif isinstance(type_hint, dict): | ||
return extract_map(local_ctx, type_hint) | ||
|
||
elif type_hint is Any: | ||
return schemapb.Type(any=schemapb.Any()) | ||
|
||
elif isinstance(type_hint, type): | ||
if ( | ||
type_hint is str | ||
or type_hint is int | ||
or type_hint is bool | ||
or type_hint is float | ||
): | ||
return extract_basic_type(type_hint) | ||
|
||
if hasattr(type_hint, "__bases__"): | ||
return extract_class_type(local_ctx, type_hint) | ||
|
||
if callable(type_hint): | ||
return extract_function_type(local_ctx, type_hint) | ||
|
||
# Handle parametric types (e.g., List[int], Dict[str, int]) - Optional, uncomment if needed | ||
# elif hasattr(type_hint, "__origin__"): | ||
# return extract_parametric_type(local_ctx, type_hint) | ||
|
||
# TODO: raise exception for unsupported types | ||
return None | ||
|
||
|
||
def extract_slice( | ||
local_ctx: LocalExtractionContext, type_hint: List[Any] | ||
) -> Optional[schemapb.Type]: | ||
if isinstance(type_hint, list) and type_hint: | ||
element_type = extract_type(local_ctx, type_hint[0]) # Assuming non-empty list | ||
if element_type: | ||
return schemapb.Type(array=schemapb.Array(element=element_type)) | ||
return None | ||
|
||
|
||
def extract_map( | ||
local_ctx: LocalExtractionContext, type_hint: Dict[Any, Any] | ||
) -> Optional[schemapb.Type]: | ||
if isinstance(type_hint, dict): | ||
key_type = extract_type(local_ctx, list(type_hint.keys())[0]) | ||
value_type = extract_type(local_ctx, list(type_hint.values())[0]) | ||
if key_type and value_type: | ||
return schemapb.Type(map=schemapb.Map(key=key_type, value=value_type)) | ||
return None | ||
|
||
|
||
def extract_basic_type(type_hint: Type[Any]) -> Optional[schemapb.Type]: | ||
type_map = { | ||
str: schemapb.Type(string=schemapb.String()), | ||
int: schemapb.Type(int=schemapb.Int()), | ||
bool: schemapb.Type(bool=schemapb.Bool()), | ||
float: schemapb.Type(float=schemapb.Float()), | ||
} | ||
return type_map.get(type_hint, None) | ||
|
||
|
||
# Uncomment and implement parametric types if needed | ||
# def extract_parametric_type(local_ctx: LocalExtractionContext, type_hint: Type[Any]) -> Optional[schemapb.Type]: | ||
# if hasattr(type_hint, "__args__"): | ||
# base_type = extract_type(local_ctx, type_hint.__origin__) | ||
# param_types = [extract_type(local_ctx, arg) for arg in type_hint.__args__] | ||
# if isinstance(base_type, schemapb.Ref): | ||
# base_type.type_parameters.extend(param_types) | ||
# return base_type | ||
# return None | ||
|
||
|
||
def extract_class_type( | ||
local_ctx: LocalExtractionContext, type_hint: Type[Any] | ||
) -> Optional[schemapb.Type]: | ||
ref = schemapb.Ref(name=type_hint.__name__, module=type_hint.__module__) | ||
local_ctx.add_needs_extraction(ref) | ||
return schemapb.Type(ref=ref) | ||
|
||
|
||
def extract_function_type( | ||
local_ctx: LocalExtractionContext, type_hint: Type[Any] | ||
) -> Optional[schemapb.Type]: | ||
ref = schemapb.Ref(name=type_hint.__name__, module=type_hint.__module__) | ||
local_ctx.add_needs_extraction(ref) | ||
return schemapb.Type(ref=ref) |
Oops, something went wrong.