Skip to content

Commit

Permalink
Add typedspark_schema property to a dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
hahamark1 committed Aug 8, 2023
1 parent 5cda894 commit f474ce5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
5 changes: 5 additions & 0 deletions tests/_core/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ def test_inherrited_functions_with_other_dataset(spark: SparkSession):

df_a.join(df_b, A.a.str)
df_a.unionByName(df_b)


def test_schema_property_of_dataset(spark: SparkSession):
df = create_empty_dataset(spark, A)
assert df.typedspark_schema == A
5 changes: 5 additions & 0 deletions typedspark/_core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def _add_schema_metadata(self) -> None:
for field in self._schema_annotations.get_structtype().fields:
self.schema[field.name].metadata = field.metadata

@property
def typedspark_schema(self) -> Schema:
"""Returns the ``Schema`` of the ``DataSet``."""
return self._schema_annotations

"""The following functions are equivalent to their parents in ``DataFrame``, but since they
don't affect the ``Schema``, we can add type annotations here. We're omitting docstrings,
such that the docstring from the parent will appear."""
Expand Down

0 comments on commit f474ce5

Please sign in to comment.