From d181e75fbd7cfd069e97753c84342a96a98a7afe Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 14:26:06 +0200 Subject: [PATCH 01/32] incl docstrings in schema --- typedspark/_schema/get_schema_definition.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 738b810..9c16c2e 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -44,7 +44,10 @@ def _build_schema_definition_string( """Return the code for a given ``Schema`` as a string.""" lines = f"class {class_name}(Schema):\n" if include_documentation: - lines += ' """Add documentation here."""\n\n' + if schema.get_docstring() is not None: + lines += f' """{schema.get_docstring()}"""\n\n' + else: + lines += ' """Add documentation here."""\n\n' for k, val in get_type_hints(schema).items(): typehint = ( @@ -57,9 +60,14 @@ def _build_schema_definition_string( ) typehint = _replace_literals( typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType - ) + ) if include_documentation: - lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' + if hasattr(schema.__annotations__[k], "__metadata__"): + print("attribute exists") + if schema.__annotations__[k].__metadata__ is not None: + lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' + else: + lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' else: lines += f" {k}: {typehint}\n" From 3465e0b988ba70877d93e898d98fa949fcfee95a Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 14:27:24 +0200 Subject: [PATCH 02/32] lint --- typedspark/_schema/get_schema_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 9c16c2e..83503c4 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -60,7 +60,7 @@ def _build_schema_definition_string( ) typehint = _replace_literals( typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType - ) + ) if include_documentation: if hasattr(schema.__annotations__[k], "__metadata__"): print("attribute exists") From 03e0dfeda4aaf7ec3464ca52112388658a42c97d Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 14:35:14 +0200 Subject: [PATCH 03/32] incl docstring in schema --- typedspark/_schema/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index 7d8cc67..5f52175 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -50,7 +50,7 @@ def __repr__(cls) -> str: return f"\n{str(cls)}" def __str__(cls) -> str: - return cls.get_schema_definition_as_string(add_subschemas=False) + return cls.get_schema_definition_as_string(include_documentation=True, add_subschemas=False) def __getattribute__(cls, name: str) -> Any: """Python base function that gets attributes. From 6e50246db7554ef7dee1fe6de01ca70140ea575e Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 14:41:35 +0200 Subject: [PATCH 04/32] lint --- typedspark/_schema/get_schema_definition.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 83503c4..810044a 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -65,7 +65,8 @@ def _build_schema_definition_string( if hasattr(schema.__annotations__[k], "__metadata__"): print("attribute exists") if schema.__annotations__[k].__metadata__ is not None: - lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' + lines += f' {k}: Annotated[{typehint}, ' + + 'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' else: lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' else: From dd51597e8387e5d5ed2614a089e4c2fd0de4d4c8 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 14:52:03 +0200 Subject: [PATCH 05/32] fix lint --- typedspark/_schema/get_schema_definition.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 810044a..78c64fd 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -65,8 +65,10 @@ def _build_schema_definition_string( if hasattr(schema.__annotations__[k], "__metadata__"): print("attribute exists") if schema.__annotations__[k].__metadata__ is not None: - lines += f' {k}: Annotated[{typehint}, ' - + 'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' + lines += ( + f' {k}: Annotated[{typehint}, ' + + f'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' + ) else: lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' else: From 5d9f2885ccef405e928c09f334f3b1d730a01e65 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 15:02:57 +0200 Subject: [PATCH 06/32] lint --- typedspark/_schema/get_schema_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 78c64fd..896c926 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -66,7 +66,7 @@ def _build_schema_definition_string( print("attribute exists") if schema.__annotations__[k].__metadata__ is not None: lines += ( - f' {k}: Annotated[{typehint}, ' + f" {k}: Annotated[{typehint}, " + f'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' ) else: From 1abcc48b918c00c68fd757b7623c18d4d2a2f2e2 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 15:47:10 +0200 Subject: [PATCH 07/32] add include_documentation flag --- typedspark/_schema/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index 5f52175..936489e 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -49,8 +49,8 @@ def __new__(cls, name: str, bases: Any, dct: Dict[str, Any]): def __repr__(cls) -> str: return f"\n{str(cls)}" - def __str__(cls) -> str: - return cls.get_schema_definition_as_string(include_documentation=True, add_subschemas=False) + def __str__(cls, include_documentation=False) -> str: + return cls.get_schema_definition_as_string(include_documentation=include_documentation, add_subschemas=False) def __getattribute__(cls, name: str) -> Any: """Python base function that gets attributes. From 269ed54b399e4a6d905db0c4ae60a7c282f909b9 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 16:21:25 +0200 Subject: [PATCH 08/32] lint --- typedspark/_schema/schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index 936489e..40bd3fc 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -50,7 +50,9 @@ def __repr__(cls) -> str: return f"\n{str(cls)}" def __str__(cls, include_documentation=False) -> str: - return cls.get_schema_definition_as_string(include_documentation=include_documentation, add_subschemas=False) + return cls.get_schema_definition_as_string( + include_documentation=include_documentation, add_subschemas=False + ) def __getattribute__(cls, name: str) -> Any: """Python base function that gets attributes. From 12f22ce110ec86ce3a2afa1c5f941f7badceee38 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 16:25:47 +0200 Subject: [PATCH 09/32] lint --- typedspark/_schema/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index 40bd3fc..be19108 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -51,7 +51,7 @@ def __repr__(cls) -> str: def __str__(cls, include_documentation=False) -> str: return cls.get_schema_definition_as_string( - include_documentation=include_documentation, add_subschemas=False + include_documentation=include_documentation, add_subschemas=False ) def __getattribute__(cls, name: str) -> Any: From 5a09c99ae91258df8d22b749ba7aaad2e56ea609 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 16:25:57 +0200 Subject: [PATCH 10/32] lint --- typedspark/_schema/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index be19108..40bd3fc 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -51,7 +51,7 @@ def __repr__(cls) -> str: def __str__(cls, include_documentation=False) -> str: return cls.get_schema_definition_as_string( - include_documentation=include_documentation, add_subschemas=False + include_documentation=include_documentation, add_subschemas=False ) def __getattribute__(cls, name: str) -> Any: From 2070f43f6bdfc0babd439632119f3695f25d958e Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 16:53:37 +0200 Subject: [PATCH 11/32] add test --- tests/_schema/test_get_schema_definition.py | 33 +++++++++++++++++++++ typedspark/_schema/schema.py | 20 ++++++++++--- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tests/_schema/test_get_schema_definition.py b/tests/_schema/test_get_schema_definition.py index 266b2f1..dddfa31 100644 --- a/tests/_schema/test_get_schema_definition.py +++ b/tests/_schema/test_get_schema_definition.py @@ -1,6 +1,17 @@ +from typing import Annotated +from pyspark.sql import Column +from pyspark.sql.types import IntegerType, StringType +from typedspark._core.column_meta import ColumnMeta from typedspark._core.datatypes import DayTimeIntervalType from typedspark._core.literaltype import IntervalType from typedspark._schema.get_schema_definition import _replace_literal, _replace_literals +from typedspark._schema.schema import Schema + + +class A(Schema): + """"This is a docstring for A.""" + a: Annotated[Column[IntegerType], "Some column"] + b: Column[StringType] def test_replace_literal(): @@ -24,3 +35,25 @@ def test_replace_literals(): expected = "DayTimeIntervalType[IntervalType.DAY, IntervalType.HOUR]" assert result == expected + + +def test_get_schema_definition_as_string( + # schema: Type[Schema], + # include_documentation: bool, + # generate_imports: bool, + # add_subschemas: bool, + # class_name: str = "MyNewSchema", + ): + result = A.get_schema_definition_as_string(include_documentation=True) + expected = (''' + from pyspark.sql.types import IntegerType, StringType + + from typedspark import Column, Schema + + + class A(Schema): + """This is a docstring for A.""" + a: Annotated[Column[IntegerType], ColumnMeta(comment="Some column")] + b: Annotated[Column[StringType], ColumnMeta(comment="")]"' + ''') + assert result == expected diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index 40bd3fc..9e66dea 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -1,10 +1,10 @@ """Module containing classes and functions related to TypedSpark Schemas.""" import inspect import re -from typing import Any, Dict, List, Optional, Type, Union, get_args, get_type_hints +from typing import Annotated, Any, Dict, List, Optional, Type, Union, get_args, get_type_hints from pyspark.sql import DataFrame -from pyspark.sql.types import DataType, StructType +from pyspark.sql.types import DataType, IntegerType, StructType from typedspark._core.column import Column from typedspark._schema.dlt_kwargs import DltKwargs @@ -49,9 +49,9 @@ def __new__(cls, name: str, bases: Any, dct: Dict[str, Any]): def __repr__(cls) -> str: return f"\n{str(cls)}" - def __str__(cls, include_documentation=False) -> str: + def __str__(cls) -> str: return cls.get_schema_definition_as_string( - include_documentation=include_documentation, add_subschemas=False + add_subschemas=False ) def __getattribute__(cls, name: str) -> Any: @@ -190,3 +190,15 @@ class Schema(metaclass=MetaSchema): # to not add a docstring to the Schema class (otherwise the Schema # docstring would be added to any schema without a docstring). pass + + +new_schema = type("SomeModel", (Schema,), {}) +cols = { + "a": Annotated[Column[IntegerType], "Some column"], + "b": Column[IntegerType] +} +new_schema.__annotations__ = cols +new_schema.__doc__ = "This is a docstring for SomeModel." + +print(new_schema.get_schema_definition_as_string(include_documentation=True)) +print(new_schema) \ No newline at end of file From 80664d83b626e8619b7d876d073ec08364ce4b96 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 17:02:51 +0200 Subject: [PATCH 12/32] lint --- tests/_schema/test_get_schema_definition.py | 18 +++++++----------- typedspark/_schema/schema.py | 11 +++-------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/tests/_schema/test_get_schema_definition.py b/tests/_schema/test_get_schema_definition.py index dddfa31..dca1242 100644 --- a/tests/_schema/test_get_schema_definition.py +++ b/tests/_schema/test_get_schema_definition.py @@ -1,7 +1,8 @@ from typing import Annotated + from pyspark.sql import Column from pyspark.sql.types import IntegerType, StringType -from typedspark._core.column_meta import ColumnMeta + from typedspark._core.datatypes import DayTimeIntervalType from typedspark._core.literaltype import IntervalType from typedspark._schema.get_schema_definition import _replace_literal, _replace_literals @@ -9,7 +10,8 @@ class A(Schema): - """"This is a docstring for A.""" + """ "This is a docstring for A.""" + a: Annotated[Column[IntegerType], "Some column"] b: Column[StringType] @@ -37,15 +39,9 @@ def test_replace_literals(): assert result == expected -def test_get_schema_definition_as_string( - # schema: Type[Schema], - # include_documentation: bool, - # generate_imports: bool, - # add_subschemas: bool, - # class_name: str = "MyNewSchema", - ): +def test_get_schema_definition_as_string(): result = A.get_schema_definition_as_string(include_documentation=True) - expected = (''' + expected = ''' from pyspark.sql.types import IntegerType, StringType from typedspark import Column, Schema @@ -55,5 +51,5 @@ class A(Schema): """This is a docstring for A.""" a: Annotated[Column[IntegerType], ColumnMeta(comment="Some column")] b: Annotated[Column[StringType], ColumnMeta(comment="")]"' - ''') + ''' assert result == expected diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index 9e66dea..fcd2948 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -50,9 +50,7 @@ def __repr__(cls) -> str: return f"\n{str(cls)}" def __str__(cls) -> str: - return cls.get_schema_definition_as_string( - add_subschemas=False - ) + return cls.get_schema_definition_as_string(add_subschemas=False) def __getattribute__(cls, name: str) -> Any: """Python base function that gets attributes. @@ -193,12 +191,9 @@ class Schema(metaclass=MetaSchema): new_schema = type("SomeModel", (Schema,), {}) -cols = { - "a": Annotated[Column[IntegerType], "Some column"], - "b": Column[IntegerType] -} +cols = {"a": Annotated[Column[IntegerType], "Some column"], "b": Column[IntegerType]} new_schema.__annotations__ = cols new_schema.__doc__ = "This is a docstring for SomeModel." print(new_schema.get_schema_definition_as_string(include_documentation=True)) -print(new_schema) \ No newline at end of file +print(new_schema) From 0a0293e967aed6fe1c24eb0e9eba9edaada33935 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 17:03:55 +0200 Subject: [PATCH 13/32] lint --- typedspark/_schema/schema.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/typedspark/_schema/schema.py b/typedspark/_schema/schema.py index fcd2948..7d8cc67 100644 --- a/typedspark/_schema/schema.py +++ b/typedspark/_schema/schema.py @@ -1,10 +1,10 @@ """Module containing classes and functions related to TypedSpark Schemas.""" import inspect import re -from typing import Annotated, Any, Dict, List, Optional, Type, Union, get_args, get_type_hints +from typing import Any, Dict, List, Optional, Type, Union, get_args, get_type_hints from pyspark.sql import DataFrame -from pyspark.sql.types import DataType, IntegerType, StructType +from pyspark.sql.types import DataType, StructType from typedspark._core.column import Column from typedspark._schema.dlt_kwargs import DltKwargs @@ -188,12 +188,3 @@ class Schema(metaclass=MetaSchema): # to not add a docstring to the Schema class (otherwise the Schema # docstring would be added to any schema without a docstring). pass - - -new_schema = type("SomeModel", (Schema,), {}) -cols = {"a": Annotated[Column[IntegerType], "Some column"], "b": Column[IntegerType]} -new_schema.__annotations__ = cols -new_schema.__doc__ = "This is a docstring for SomeModel." - -print(new_schema.get_schema_definition_as_string(include_documentation=True)) -print(new_schema) From 096732583bcde6917dc782dcf6a2361268576e68 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 17:57:47 +0200 Subject: [PATCH 14/32] fix test --- tests/_schema/test_get_schema_definition.py | 26 +++++++++++---------- typedspark/_schema/get_schema_definition.py | 1 - 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/_schema/test_get_schema_definition.py b/tests/_schema/test_get_schema_definition.py index dca1242..abd3f63 100644 --- a/tests/_schema/test_get_schema_definition.py +++ b/tests/_schema/test_get_schema_definition.py @@ -1,16 +1,16 @@ from typing import Annotated -from pyspark.sql import Column from pyspark.sql.types import IntegerType, StringType -from typedspark._core.datatypes import DayTimeIntervalType -from typedspark._core.literaltype import IntervalType +from typedspark import Column, DayTimeIntervalType, IntervalType, Schema + +# from typedspark._core.datatypes import DayTimeIntervalType +# from typedspark._core.literaltype import IntervalType from typedspark._schema.get_schema_definition import _replace_literal, _replace_literals -from typedspark._schema.schema import Schema class A(Schema): - """ "This is a docstring for A.""" + """This is a docstring for A.""" a: Annotated[Column[IntegerType], "Some column"] b: Column[StringType] @@ -41,15 +41,17 @@ def test_replace_literals(): def test_get_schema_definition_as_string(): result = A.get_schema_definition_as_string(include_documentation=True) - expected = ''' - from pyspark.sql.types import IntegerType, StringType + expected = '''from typing import Annotated + +from pyspark.sql.types import IntegerType, StringType - from typedspark import Column, Schema +from typedspark import Column, ColumnMeta, Schema - class A(Schema): +class A(Schema): """This is a docstring for A.""" - a: Annotated[Column[IntegerType], ColumnMeta(comment="Some column")] - b: Annotated[Column[StringType], ColumnMeta(comment="")]"' - ''' + + a: Annotated[Column[IntegerType], ColumnMeta(comment="Some column")] + b: Annotated[Column[StringType], ColumnMeta(comment="")] +''' assert result == expected diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 896c926..18d7034 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -63,7 +63,6 @@ def _build_schema_definition_string( ) if include_documentation: if hasattr(schema.__annotations__[k], "__metadata__"): - print("attribute exists") if schema.__annotations__[k].__metadata__ is not None: lines += ( f" {k}: Annotated[{typehint}, " From 1c5de10924e0e47912fe35841d371cf9df5195b8 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Fri, 28 Jul 2023 18:16:22 +0200 Subject: [PATCH 15/32] fix --- typedspark/_schema/get_schema_definition.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 18d7034..d0cc32b 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -62,8 +62,11 @@ def _build_schema_definition_string( typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType ) if include_documentation: - if hasattr(schema.__annotations__[k], "__metadata__"): - if schema.__annotations__[k].__metadata__ is not None: + if k in schema.__annotations__: + if ( + hasattr(schema.__annotations__[k], "__metadata__") + and schema.__annotations__[k].__metadata__ is not None + ): lines += ( f" {k}: Annotated[{typehint}, " + f'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' From ae8a619980467aca5054f5a57e236f4c8d63d67e Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Mon, 31 Jul 2023 09:54:59 +0200 Subject: [PATCH 16/32] fix indentation --- typedspark/_schema/get_schema_definition.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index d0cc32b..eed90cb 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -71,10 +71,10 @@ def _build_schema_definition_string( f" {k}: Annotated[{typehint}, " + f'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' ) + else: + lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' else: - lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' - else: - lines += f" {k}: {typehint}\n" + lines += f" {k}: {typehint}\n" if add_subschemas: lines += _add_subschemas(schema, add_subschemas, include_documentation) From 08b1c504c092ce4d36082333740409c1ba761f1d Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Mon, 31 Jul 2023 10:06:08 +0200 Subject: [PATCH 17/32] fix indentation --- typedspark/_schema/get_schema_definition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index eed90cb..3299ba5 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -73,8 +73,8 @@ def _build_schema_definition_string( ) else: lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' - else: - lines += f" {k}: {typehint}\n" + else: + lines += f" {k}: {typehint}\n" if add_subschemas: lines += _add_subschemas(schema, add_subschemas, include_documentation) From a495667b9ab20469eda2088484e20b05ea4bbd1a Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 10:24:59 +0200 Subject: [PATCH 18/32] remove commented lines --- tests/_schema/test_get_schema_definition.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/_schema/test_get_schema_definition.py b/tests/_schema/test_get_schema_definition.py index abd3f63..c08c03f 100644 --- a/tests/_schema/test_get_schema_definition.py +++ b/tests/_schema/test_get_schema_definition.py @@ -4,8 +4,6 @@ from typedspark import Column, DayTimeIntervalType, IntervalType, Schema -# from typedspark._core.datatypes import DayTimeIntervalType -# from typedspark._core.literaltype import IntervalType from typedspark._schema.get_schema_definition import _replace_literal, _replace_literals From bb871e9c7dd39334b9b5028abc657259e00f8c1e Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 10:52:16 +0200 Subject: [PATCH 19/32] rename variables --- typedspark/_schema/get_schema_definition.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 3299ba5..3c98c66 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -49,9 +49,9 @@ def _build_schema_definition_string( else: lines += ' """Add documentation here."""\n\n' - for k, val in get_type_hints(schema).items(): + for col_name, col in get_type_hints(schema).items(): typehint = ( - str(val) + str(col) .replace("typedspark._core.column.", "") .replace("typedspark._core.datatypes.", "") .replace("typedspark._schema.schema.", "") @@ -62,19 +62,19 @@ def _build_schema_definition_string( typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType ) if include_documentation: - if k in schema.__annotations__: + if col_name in schema.__annotations__: if ( - hasattr(schema.__annotations__[k], "__metadata__") - and schema.__annotations__[k].__metadata__ is not None + hasattr(schema.__annotations__[col_name], "__metadata__") + and schema.__annotations__[col_name].__metadata__ is not None ): lines += ( - f" {k}: Annotated[{typehint}, " - + f'ColumnMeta(comment="{schema.__annotations__[k].__metadata__[0]}")]\n' + f" {col_name}: Annotated[{typehint}, " + + f'ColumnMeta(comment="{schema.__annotations__[col_name].__metadata__[0]}")]\n' ) else: - lines += f' {k}: Annotated[{typehint}, ColumnMeta(comment="")]\n' + lines += f' {col_name}: Annotated[{typehint}, ColumnMeta(comment="")]\n' else: - lines += f" {k}: {typehint}\n" + lines += f" {col_name}: {typehint}\n" if add_subschemas: lines += _add_subschemas(schema, add_subschemas, include_documentation) From 806756d8855f76568bf393bfe93850b916f34e19 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 11:04:48 +0200 Subject: [PATCH 20/32] rename variables --- tests/_schema/test_get_schema_definition.py | 1 - typedspark/_schema/get_schema_definition.py | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/_schema/test_get_schema_definition.py b/tests/_schema/test_get_schema_definition.py index c08c03f..2ab79a8 100644 --- a/tests/_schema/test_get_schema_definition.py +++ b/tests/_schema/test_get_schema_definition.py @@ -3,7 +3,6 @@ from pyspark.sql.types import IntegerType, StringType from typedspark import Column, DayTimeIntervalType, IntervalType, Schema - from typedspark._schema.get_schema_definition import _replace_literal, _replace_literals diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 3c98c66..6f35ca8 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -49,9 +49,9 @@ def _build_schema_definition_string( else: lines += ' """Add documentation here."""\n\n' - for col_name, col in get_type_hints(schema).items(): + for col_name, col_object in get_type_hints(schema).items(): typehint = ( - str(col) + str(col_object) .replace("typedspark._core.column.", "") .replace("typedspark._core.datatypes.", "") .replace("typedspark._schema.schema.", "") @@ -62,17 +62,18 @@ def _build_schema_definition_string( typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType ) if include_documentation: + col_annotated_start = f" {col_name}: Annotated[{typehint}, " if col_name in schema.__annotations__: if ( hasattr(schema.__annotations__[col_name], "__metadata__") and schema.__annotations__[col_name].__metadata__ is not None ): lines += ( - f" {col_name}: Annotated[{typehint}, " + col_annotated_start + f'ColumnMeta(comment="{schema.__annotations__[col_name].__metadata__[0]}")]\n' ) else: - lines += f' {col_name}: Annotated[{typehint}, ColumnMeta(comment="")]\n' + lines += col_annotated_start + 'ColumnMeta(comment="")]\n' else: lines += f" {col_name}: {typehint}\n" From cbbaf786a675c93c19159c76b617e221d5ddbdf1 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 11:07:35 +0200 Subject: [PATCH 21/32] rename variables --- typedspark/_schema/get_schema_definition.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 6f35ca8..c70112a 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -68,15 +68,15 @@ def _build_schema_definition_string( hasattr(schema.__annotations__[col_name], "__metadata__") and schema.__annotations__[col_name].__metadata__ is not None ): - lines += ( + comment == ( col_annotated_start + f'ColumnMeta(comment="{schema.__annotations__[col_name].__metadata__[0]}")]\n' ) else: - lines += col_annotated_start + 'ColumnMeta(comment="")]\n' + comment = col_annotated_start + 'ColumnMeta(comment="")]\n' else: - lines += f" {col_name}: {typehint}\n" - + comment = f" {col_name}: {typehint}\n" + lines += comment if add_subschemas: lines += _add_subschemas(schema, add_subschemas, include_documentation) From f3841e9dd2ab98ca9ad4e74a2067c987a1b3043f Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 11:31:15 +0200 Subject: [PATCH 22/32] rename variables --- typedspark/_schema/get_schema_definition.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index c70112a..7c8d696 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -68,15 +68,14 @@ def _build_schema_definition_string( hasattr(schema.__annotations__[col_name], "__metadata__") and schema.__annotations__[col_name].__metadata__ is not None ): - comment == ( - col_annotated_start - + f'ColumnMeta(comment="{schema.__annotations__[col_name].__metadata__[0]}")]\n' - ) + comment = schema.__annotations__[col_name].__metadata__[0] else: - comment = col_annotated_start + 'ColumnMeta(comment="")]\n' + comment = "" + + lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n' else: - comment = f" {col_name}: {typehint}\n" - lines += comment + lines += f" {col_name}: {typehint}\n" + if add_subschemas: lines += _add_subschemas(schema, add_subschemas, include_documentation) From 9d4b0f0240403e8d2f9672faea0da4786316c1ff Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 11:35:42 +0200 Subject: [PATCH 23/32] rename variables --- typedspark/_schema/get_schema_definition.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 7c8d696..ba06c0d 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -35,6 +35,17 @@ def get_schema_definition_as_string( return imports + schema_string +def _get_comment(schema: Type[Schema], col_name: str) -> str: + if ( + hasattr(schema.__annotations__[col_name], "__metadata__") + and schema.__annotations__[col_name].__metadata__ is not None + ): + comment = schema.__annotations__[col_name].__metadata__[0] + else: + comment = "" + return comment + + def _build_schema_definition_string( schema: Type[Schema], include_documentation: bool, @@ -64,14 +75,7 @@ def _build_schema_definition_string( if include_documentation: col_annotated_start = f" {col_name}: Annotated[{typehint}, " if col_name in schema.__annotations__: - if ( - hasattr(schema.__annotations__[col_name], "__metadata__") - and schema.__annotations__[col_name].__metadata__ is not None - ): - comment = schema.__annotations__[col_name].__metadata__[0] - else: - comment = "" - + comment = _get_comment(schema, col_name) lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n' else: lines += f" {col_name}: {typehint}\n" From 3ed98b40ea2829ccc3f05e18e76146c802f89e21 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 11:36:33 +0200 Subject: [PATCH 24/32] add docstring --- typedspark/_schema/get_schema_definition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index ba06c0d..9bf546c 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -36,6 +36,7 @@ def get_schema_definition_as_string( def _get_comment(schema: Type[Schema], col_name: str) -> str: + """Return the comment of a given column.""" if ( hasattr(schema.__annotations__[col_name], "__metadata__") and schema.__annotations__[col_name].__metadata__ is not None From 761419fb9917609fd3447a44c82c8afacc68bfc6 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 14:16:44 +0200 Subject: [PATCH 25/32] improve readability --- typedspark/_schema/get_schema_definition.py | 83 +++++++++++++-------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 9bf546c..93925e7 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -35,16 +35,53 @@ def get_schema_definition_as_string( return imports + schema_string -def _get_comment(schema: Type[Schema], col_name: str) -> str: - """Return the comment of a given column.""" - if ( - hasattr(schema.__annotations__[col_name], "__metadata__") - and schema.__annotations__[col_name].__metadata__ is not None - ): - comment = schema.__annotations__[col_name].__metadata__[0] +def _create_docstring(schema: Type[Schema]) -> str: + """Create the docstring for a given ``Schema``.""" + if schema.get_docstring() is not None: + docstring = f' """{schema.get_docstring()}"""\n\n' else: - comment = "" - return comment + docstring = ' """Add documentation here."""\n\n' + return docstring + + +def _extract_comment(typehint: str) -> tuple[str, str]: + """Extract the comment from a typehint.""" + comment = "" + if "Annotated" in typehint: + typehint, comment = re.search(r"Annotated\[(.*), '(.*)'\]", typehint).groups() + return typehint, comment + + +def _create_typehint_comment(col_type) -> list[str]: + """Create a typehint and comment for a given column.""" + typehint = ( + str(col_type) + .replace("typedspark._core.column.", "") + .replace("typedspark._core.datatypes.", "") + .replace("typedspark._schema.schema.", "") + .replace("pyspark.sql.types.", "") + .replace("typing.", "") + .replace("Annotated[Annotated", "Annotated") + ) + typehint, comment = _extract_comment(typehint) + typehint = _replace_literals( + typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType + ) + return [typehint, comment] + + +def _add_lines_with_typehint(include_documentation, schema): + """Add a line with the typehint for each column in the ``Schema``.""" + lines = "" + for col_name, col_type in get_type_hints(schema, include_extras=True).items(): + typehint, comment = _create_typehint_comment(col_type) + + if include_documentation: + col_annotated_start = f" {col_name}: Annotated[{typehint}, " + lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n' + else: + lines += f" {col_name}: {typehint}\n" + return lines def _build_schema_definition_string( @@ -55,31 +92,11 @@ def _build_schema_definition_string( ) -> str: """Return the code for a given ``Schema`` as a string.""" lines = f"class {class_name}(Schema):\n" + if include_documentation: - if schema.get_docstring() is not None: - lines += f' """{schema.get_docstring()}"""\n\n' - else: - lines += ' """Add documentation here."""\n\n' - - for col_name, col_object in get_type_hints(schema).items(): - typehint = ( - str(col_object) - .replace("typedspark._core.column.", "") - .replace("typedspark._core.datatypes.", "") - .replace("typedspark._schema.schema.", "") - .replace("pyspark.sql.types.", "") - .replace("typing.", "") - ) - typehint = _replace_literals( - typehint, replace_literals_in=DayTimeIntervalType, replace_literals_by=IntervalType - ) - if include_documentation: - col_annotated_start = f" {col_name}: Annotated[{typehint}, " - if col_name in schema.__annotations__: - comment = _get_comment(schema, col_name) - lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n' - else: - lines += f" {col_name}: {typehint}\n" + lines += _create_docstring(schema) + + lines += _add_lines_with_typehint(include_documentation, schema) if add_subschemas: lines += _add_subschemas(schema, add_subschemas, include_documentation) From f697f5acd576702538137f0679252fa384c90e33 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 14:55:00 +0200 Subject: [PATCH 26/32] improve readability --- typedspark/_schema/get_schema_definition.py | 72 +++++++++++---------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 93925e7..aae8b92 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -35,6 +35,26 @@ def get_schema_definition_as_string( return imports + schema_string +def _build_schema_definition_string( + schema: Type[Schema], + include_documentation: bool, + add_subschemas: bool, + class_name: str = "MyNewSchema", +) -> str: + """Return the code for a given ``Schema`` as a string.""" + lines = f"class {class_name}(Schema):\n" + + if include_documentation: + lines += _create_docstring(schema) + + lines += _add_lines_with_typehint(include_documentation, schema) + + if add_subschemas: + lines += _add_subschemas(schema, add_subschemas, include_documentation) + + return lines + + def _create_docstring(schema: Type[Schema]) -> str: """Create the docstring for a given ``Schema``.""" if schema.get_docstring() is not None: @@ -44,11 +64,27 @@ def _create_docstring(schema: Type[Schema]) -> str: return docstring +def _add_lines_with_typehint(include_documentation, schema): + """Add a line with the typehint for each column in the ``Schema``.""" + lines = "" + for col_name, col_type in get_type_hints(schema, include_extras=True).items(): + typehint, comment = _create_typehint_comment(col_type) + + if include_documentation: + lines += f' {col_name}: Annotated[{typehint}, ColumnMeta(comment="{comment}")]\n' + else: + lines += f" {col_name}: {typehint}\n" + return lines + + + def _extract_comment(typehint: str) -> tuple[str, str]: """Extract the comment from a typehint.""" comment = "" if "Annotated" in typehint: - typehint, comment = re.search(r"Annotated\[(.*), '(.*)'\]", typehint).groups() + m = re.search(r"Annotated\[(.*), '(.*)'\]", typehint) + if m is not None: + typehint, comment = m.groups() return typehint, comment @@ -61,7 +97,6 @@ def _create_typehint_comment(col_type) -> list[str]: .replace("typedspark._schema.schema.", "") .replace("pyspark.sql.types.", "") .replace("typing.", "") - .replace("Annotated[Annotated", "Annotated") ) typehint, comment = _extract_comment(typehint) typehint = _replace_literals( @@ -70,39 +105,6 @@ def _create_typehint_comment(col_type) -> list[str]: return [typehint, comment] -def _add_lines_with_typehint(include_documentation, schema): - """Add a line with the typehint for each column in the ``Schema``.""" - lines = "" - for col_name, col_type in get_type_hints(schema, include_extras=True).items(): - typehint, comment = _create_typehint_comment(col_type) - - if include_documentation: - col_annotated_start = f" {col_name}: Annotated[{typehint}, " - lines += f'{col_annotated_start}ColumnMeta(comment="{comment}")]\n' - else: - lines += f" {col_name}: {typehint}\n" - return lines - - -def _build_schema_definition_string( - schema: Type[Schema], - include_documentation: bool, - add_subschemas: bool, - class_name: str = "MyNewSchema", -) -> str: - """Return the code for a given ``Schema`` as a string.""" - lines = f"class {class_name}(Schema):\n" - - if include_documentation: - lines += _create_docstring(schema) - - lines += _add_lines_with_typehint(include_documentation, schema) - - if add_subschemas: - lines += _add_subschemas(schema, add_subschemas, include_documentation) - - return lines - def _replace_literals( typehint: str, From 55eab235803137e8fcf5cd4ced46cfd3be1d596e Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 14:56:38 +0200 Subject: [PATCH 27/32] fix lint --- typedspark/_schema/get_schema_definition.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index aae8b92..cdfb8e5 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -77,7 +77,6 @@ def _add_lines_with_typehint(include_documentation, schema): return lines - def _extract_comment(typehint: str) -> tuple[str, str]: """Extract the comment from a typehint.""" comment = "" @@ -105,7 +104,6 @@ def _create_typehint_comment(col_type) -> list[str]: return [typehint, comment] - def _replace_literals( typehint: str, replace_literals_in: Type[TypedSparkDataType], From cf2e8498bf2917df8ea4c2c97fbc43bd8e22b750 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 15:01:19 +0200 Subject: [PATCH 28/32] fix lint --- typedspark/_schema/get_schema_definition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index cdfb8e5..36c2a20 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -81,8 +81,8 @@ def _extract_comment(typehint: str) -> tuple[str, str]: """Extract the comment from a typehint.""" comment = "" if "Annotated" in typehint: - m = re.search(r"Annotated\[(.*), '(.*)'\]", typehint) - if m is not None: + match = re.search(r"Annotated\[(.*), '(.*)'\]", typehint) + if match is not None: typehint, comment = m.groups() return typehint, comment From a17a99fdbb1f673aa06ead011876a07506f3da8b Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 15:09:09 +0200 Subject: [PATCH 29/32] rename --- typedspark/_schema/get_schema_definition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 36c2a20..ba1ec8f 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -68,7 +68,7 @@ def _add_lines_with_typehint(include_documentation, schema): """Add a line with the typehint for each column in the ``Schema``.""" lines = "" for col_name, col_type in get_type_hints(schema, include_extras=True).items(): - typehint, comment = _create_typehint_comment(col_type) + typehint, comment = _create_typehint_and_comment(col_type) if include_documentation: lines += f' {col_name}: Annotated[{typehint}, ColumnMeta(comment="{comment}")]\n' @@ -87,7 +87,7 @@ def _extract_comment(typehint: str) -> tuple[str, str]: return typehint, comment -def _create_typehint_comment(col_type) -> list[str]: +def _create_typehint_and_comment(col_type) -> list[str]: """Create a typehint and comment for a given column.""" typehint = ( str(col_type) From c3585964fe80be2fc1ca5cd0fcd0f1f0f93b239d Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 15:11:43 +0200 Subject: [PATCH 30/32] rename --- typedspark/_schema/get_schema_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index ba1ec8f..7cb27a5 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -83,7 +83,7 @@ def _extract_comment(typehint: str) -> tuple[str, str]: if "Annotated" in typehint: match = re.search(r"Annotated\[(.*), '(.*)'\]", typehint) if match is not None: - typehint, comment = m.groups() + typehint, comment = match.groups() return typehint, comment From 541aae43c0cb8b96ffe45376ca78ee2945d5a9da Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 15:30:26 +0200 Subject: [PATCH 31/32] improve readability --- typedspark/_schema/get_schema_definition.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 7cb27a5..451dc17 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -77,16 +77,6 @@ def _add_lines_with_typehint(include_documentation, schema): return lines -def _extract_comment(typehint: str) -> tuple[str, str]: - """Extract the comment from a typehint.""" - comment = "" - if "Annotated" in typehint: - match = re.search(r"Annotated\[(.*), '(.*)'\]", typehint) - if match is not None: - typehint, comment = match.groups() - return typehint, comment - - def _create_typehint_and_comment(col_type) -> list[str]: """Create a typehint and comment for a given column.""" typehint = ( @@ -104,6 +94,16 @@ def _create_typehint_and_comment(col_type) -> list[str]: return [typehint, comment] +def _extract_comment(typehint: str) -> tuple[str, str]: + """Extract the comment from a typehint.""" + comment = "" + if "Annotated" in typehint: + match = re.search(r"Annotated\[(.*), '(.*)'\]", typehint) + if match is not None: + typehint, comment = match.groups() + return typehint, comment + + def _replace_literals( typehint: str, replace_literals_in: Type[TypedSparkDataType], From d92da01c0aaad47e55fec9a0dde9d021a8249864 Mon Sep 17 00:00:00 2001 From: Jana Starkova Date: Wed, 2 Aug 2023 15:32:33 +0200 Subject: [PATCH 32/32] improve readability --- typedspark/_schema/get_schema_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typedspark/_schema/get_schema_definition.py b/typedspark/_schema/get_schema_definition.py index 451dc17..237b722 100644 --- a/typedspark/_schema/get_schema_definition.py +++ b/typedspark/_schema/get_schema_definition.py @@ -102,7 +102,7 @@ def _extract_comment(typehint: str) -> tuple[str, str]: if match is not None: typehint, comment = match.groups() return typehint, comment - + def _replace_literals( typehint: str,