diff --git a/fennel/CHANGELOG.md b/fennel/CHANGELOG.md index 21a48c7aa..03ac77843 100644 --- a/fennel/CHANGELOG.md +++ b/fennel/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## [1.5.13] - 2024-08-29 +- Allow not passing a column for optional types in struct + ## [1.5.12] - 2024-08-29 - Add support for timestamp dtype parsing in case of non timestamp field. diff --git a/fennel/testing/test_cast_df_to_schema.py b/fennel/testing/test_cast_df_to_schema.py index f2633d058..ab420cc66 100644 --- a/fennel/testing/test_cast_df_to_schema.py +++ b/fennel/testing/test_cast_df_to_schema.py @@ -514,6 +514,15 @@ def test_cast_col_to_pandas_dtype(): "d": b"hello world", } ] + parsed_value = [ + { + "a": 1, + "b": {"a": 1, "b": 2, "c": 3}, + "c": [1, 2, 3, 4], + "d": b"hello world", + "e": pd.NA, + } + ] data = pd.Series([value], name="testing") data_type = schema_proto.DataType( array_type=schema_proto.ArrayType( @@ -555,6 +564,16 @@ def test_cast_col_to_pandas_dtype(): bytes_type=schema_proto.BytesType() ), ), + schema_proto.Field( + name="e", + dtype=schema_proto.DataType( + optional_type=schema_proto.OptionalType( + of=schema_proto.DataType( + timestamp_type=schema_proto.TimestampType() + ) + ) + ), + ), ] ) ) @@ -565,7 +584,7 @@ def test_cast_col_to_pandas_dtype(): pandas_dtype_data = cast_col_to_pandas_dtype(arrow_dtype_data, data_type) assert pandas_dtype_data.dtype == object - assert pandas_dtype_data.tolist()[0] == value + assert pandas_dtype_data.tolist()[0] == parsed_value def test_optional_timestamp_cast_col_to_pandas_dtype(): diff --git a/fennel/testing/test_utils.py b/fennel/testing/test_utils.py index 6e1bd2892..40951c832 100644 --- a/fennel/testing/test_utils.py +++ b/fennel/testing/test_utils.py @@ -185,9 +185,14 @@ def parse_datetime_in_value( ) output: Dict[Any, Any] = {} for field in dtype.struct_type.fields: - output[field.name] = parse_datetime_in_value( - value[field.name], field.dtype - ) + dtype = field.dtype + name = field.name + if not dtype.HasField("optional_type") and name not in value: + raise ValueError( + f"value not found for non optional field : {field}" + ) + if name in value: + output[name] = parse_datetime_in_value(value[name], dtype) return output else: return value @@ -312,9 +317,16 @@ def convert_val_to_pandas_dtype( fields = data_type.struct_type.fields output = {} for field in fields: - output[field.name] = convert_val_to_pandas_dtype( - value[field.name], field.dtype, nullable - ) + dtype = field.dtype + name = field.name + if not dtype.HasField("optional_type") and name not in value: + raise ValueError( + f"value not found for non optional field : {field}" + ) + if name in value: + output[name] = convert_val_to_pandas_dtype( + value[name], dtype, nullable + ) return output diff --git a/pyproject.toml b/pyproject.toml index 8d948eea9..2dbe0cf06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fennel-ai" -version = "1.5.12" +version = "1.5.13" description = "The modern realtime feature engineering platform" authors = ["Fennel AI "] packages = [{ include = "fennel" }]