Skip to content

Commit

Permalink
Include docstrings in schemas (#133)
Browse files Browse the repository at this point in the history
* incl docstrings in schema

* lint

* incl docstring in schema

* lint

* fix lint

* lint

* add include_documentation flag

* lint

* lint

* lint

* add test

* lint

* lint

* fix test

* fix

* fix indentation

* fix indentation

* remove commented lines

* rename variables

* rename variables

* rename variables

* rename variables

* rename variables

* add docstring
  • Loading branch information
jana-starkova authored Aug 2, 2023
1 parent 05dc4b3 commit d3c4781
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 7 deletions.
32 changes: 30 additions & 2 deletions tests/_schema/test_get_schema_definition.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
from typedspark._core.datatypes import DayTimeIntervalType
from typedspark._core.literaltype import IntervalType
from typing import Annotated

from pyspark.sql.types import IntegerType, StringType

from typedspark import Column, DayTimeIntervalType, IntervalType, Schema
from typedspark._schema.get_schema_definition import _replace_literal, _replace_literals


class A(Schema):
"""This is a docstring for A."""

a: Annotated[Column[IntegerType], "Some column"]
b: Column[StringType]


def test_replace_literal():
result = _replace_literal(
"DayTimeIntervalType[Literal[0], Literal[1]]",
Expand All @@ -24,3 +34,21 @@ def test_replace_literals():
expected = "DayTimeIntervalType[IntervalType.DAY, IntervalType.HOUR]"

assert result == expected


def test_get_schema_definition_as_string():
result = A.get_schema_definition_as_string(include_documentation=True)
expected = '''from typing import Annotated
from pyspark.sql.types import IntegerType, StringType
from typedspark import Column, ColumnMeta, Schema
class A(Schema):
"""This is a docstring for A."""
a: Annotated[Column[IntegerType], ColumnMeta(comment="Some column")]
b: Annotated[Column[StringType], ColumnMeta(comment="")]
'''
assert result == expected
28 changes: 23 additions & 5 deletions typedspark/_schema/get_schema_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,18 @@ def get_schema_definition_as_string(
return imports + schema_string


def _get_comment(schema: Type[Schema], col_name: str) -> str:
"""Return the comment of a given column."""
if (
hasattr(schema.__annotations__[col_name], "__metadata__")
and schema.__annotations__[col_name].__metadata__ is not None
):
comment = schema.__annotations__[col_name].__metadata__[0]
else:
comment = ""
return comment


def _build_schema_definition_string(
schema: Type[Schema],
include_documentation: bool,
Expand All @@ -44,11 +56,14 @@ def _build_schema_definition_string(
"""Return the code for a given ``Schema`` as a string."""
lines = f"class {class_name}(Schema):\n"
if include_documentation:
lines += ' """Add documentation here."""\n\n'
if schema.get_docstring() is not None:
lines += f' """{schema.get_docstring()}"""\n\n'
else:
lines += ' """Add documentation here."""\n\n'

for k, val in get_type_hints(schema).items():
for col_name, col_object in get_type_hints(schema).items():
typehint = (
str(val)
str(col_object)
.replace("typedspark._core.column.", "")
.replace("typedspark._core.datatypes.", "")
.replace("typedspark._schema.schema.", "")
Expand All @@ -59,9 +74,12 @@ def _build_schema_definition_string(
typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType
)
if include_documentation:
lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n'
col_annotated_start = f" {col_name}: Annotated[{typehint}, "
if col_name in schema.__annotations__:
comment = _get_comment(schema, col_name)
lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n'
else:
lines += f" {k}: {typehint}\n"
lines += f" {col_name}: {typehint}\n"

if add_subschemas:
lines += _add_subschemas(schema, add_subschemas, include_documentation)
Expand Down

0 comments on commit d3c4781

Please sign in to comment.