Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include docstrings in schemas - improved readibility #136

Merged
Merged
Changes from 28 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
d181e75
incl docstrings in schema
jana-starkova Jul 28, 2023
3465e0b
lint
jana-starkova Jul 28, 2023
03e0dfe
incl docstring in schema
jana-starkova Jul 28, 2023
6e50246
lint
jana-starkova Jul 28, 2023
dd51597
fix lint
jana-starkova Jul 28, 2023
5d9f288
lint
jana-starkova Jul 28, 2023
1abcc48
add include_documentation flag
jana-starkova Jul 28, 2023
269ed54
lint
jana-starkova Jul 28, 2023
12f22ce
lint
jana-starkova Jul 28, 2023
5a09c99
lint
jana-starkova Jul 28, 2023
2070f43
add test
jana-starkova Jul 28, 2023
80664d8
lint
jana-starkova Jul 28, 2023
0a0293e
lint
jana-starkova Jul 28, 2023
0967325
fix test
jana-starkova Jul 28, 2023
1c5de10
fix
jana-starkova Jul 28, 2023
ae8a619
fix indentation
jana-starkova Jul 31, 2023
08b1c50
fix indentation
jana-starkova Jul 31, 2023
4aa5d21
Merge branch 'main' into include_docstrings_in_schemas
jana-starkova Jul 31, 2023
e14fdeb
Merge branch 'main' into include_docstrings_in_schemas
jana-starkova Aug 2, 2023
a495667
remove commented lines
jana-starkova Aug 2, 2023
bb871e9
rename variables
jana-starkova Aug 2, 2023
806756d
rename variables
jana-starkova Aug 2, 2023
cbbaf78
rename variables
jana-starkova Aug 2, 2023
f3841e9
rename variables
jana-starkova Aug 2, 2023
9d4b0f0
rename variables
jana-starkova Aug 2, 2023
3ed98b4
add docstring
jana-starkova Aug 2, 2023
761419f
improve readability
jana-starkova Aug 2, 2023
65d782b
improve readability
jana-starkova Aug 2, 2023
f697f5a
improve readability
jana-starkova Aug 2, 2023
55eab23
fix lint
jana-starkova Aug 2, 2023
cf2e849
fix lint
jana-starkova Aug 2, 2023
a17a99f
rename
jana-starkova Aug 2, 2023
c358596
rename
jana-starkova Aug 2, 2023
541aae4
improve readability
jana-starkova Aug 2, 2023
d92da01
improve readability
jana-starkova Aug 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 50 additions & 33 deletions typedspark/_schema/get_schema_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,53 @@ def get_schema_definition_as_string(
return imports + schema_string


def _get_comment(schema: Type[Schema], col_name: str) -> str:
"""Return the comment of a given column."""
if (
hasattr(schema.__annotations__[col_name], "__metadata__")
and schema.__annotations__[col_name].__metadata__ is not None
):
comment = schema.__annotations__[col_name].__metadata__[0]
def _create_docstring(schema: Type[Schema]) -> str:
jana-starkova marked this conversation as resolved.
Show resolved Hide resolved
"""Create the docstring for a given ``Schema``."""
if schema.get_docstring() is not None:
docstring = f' """{schema.get_docstring()}"""\n\n'
else:
comment = ""
return comment
docstring = ' """Add documentation here."""\n\n'
return docstring


def _extract_comment(typehint: str) -> tuple[str, str]:
"""Extract the comment from a typehint."""
comment = ""
if "Annotated" in typehint:
typehint, comment = re.search(r"Annotated\[(.*), '(.*)'\]", typehint).groups()
return typehint, comment


def _create_typehint_comment(col_type) -> list[str]:
nanne-aben marked this conversation as resolved.
Show resolved Hide resolved
"""Create a typehint and comment for a given column."""
typehint = (
str(col_type)
.replace("typedspark._core.column.", "")
.replace("typedspark._core.datatypes.", "")
.replace("typedspark._schema.schema.", "")
.replace("pyspark.sql.types.", "")
.replace("typing.", "")
.replace("Annotated[Annotated", "Annotated")
)
typehint, comment = _extract_comment(typehint)
typehint = _replace_literals(
typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType
)
return [typehint, comment]


def _add_lines_with_typehint(include_documentation, schema):
"""Add a line with the typehint for each column in the ``Schema``."""
lines = ""
for col_name, col_type in get_type_hints(schema, include_extras=True).items():
typehint, comment = _create_typehint_comment(col_type)
jana-starkova marked this conversation as resolved.
Show resolved Hide resolved

if include_documentation:
col_annotated_start = f" {col_name}: Annotated[{typehint}, "
lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n'
jana-starkova marked this conversation as resolved.
Show resolved Hide resolved
else:
lines += f" {col_name}: {typehint}\n"
return lines


def _build_schema_definition_string(
Expand All @@ -55,31 +92,11 @@ def _build_schema_definition_string(
) -> str:
"""Return the code for a given ``Schema`` as a string."""
lines = f"class {class_name}(Schema):\n"

if include_documentation:
if schema.get_docstring() is not None:
lines += f' """{schema.get_docstring()}"""\n\n'
else:
lines += ' """Add documentation here."""\n\n'

for col_name, col_object in get_type_hints(schema).items():
typehint = (
str(col_object)
.replace("typedspark._core.column.", "")
.replace("typedspark._core.datatypes.", "")
.replace("typedspark._schema.schema.", "")
.replace("pyspark.sql.types.", "")
.replace("typing.", "")
)
typehint = _replace_literals(
typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType
)
if include_documentation:
col_annotated_start = f" {col_name}: Annotated[{typehint}, "
if col_name in schema.__annotations__:
comment = _get_comment(schema, col_name)
lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n'
else:
lines += f" {col_name}: {typehint}\n"
lines += _create_docstring(schema)

lines += _add_lines_with_typehint(include_documentation, schema)

if add_subschemas:
lines += _add_subschemas(schema, add_subschemas, include_documentation)
Expand Down
Loading