diff --git a/changelog.md b/changelog.md index a0eb9b6..60beaf4 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,10 @@ # Changelog +v0.0.9: + +- Pydantic bumped to `1.10` +- Bug Fix: Categorical constraints (`exact_set`, `oneof`, `include`) now can keeo `int` and `float` values. That expands to legacy schemas as well. + v0.0.8: Legacy Schema Aliases (support for legacy schemas): - `min_value` now also supports `min` alias diff --git a/dfschema/core/column.py b/dfschema/core/column.py index 9a0ae1e..60ad7a9 100644 --- a/dfschema/core/column.py +++ b/dfschema/core/column.py @@ -1,6 +1,6 @@ import sys from datetime import date, datetime -from typing import List, Optional, Set, Union, Tuple # , Pattern +from typing import List, Optional, FrozenSet, Union, Tuple # , Pattern from warnings import warn import pandas as pd @@ -140,7 +140,7 @@ def validate_column(self, series: pd.Series, root, col_name: Optional[str] = Non class Categorical(BaseModel): # type: ignore - value_set: Optional[Set[str]] = None + value_set: Optional[Union[FrozenSet[int], FrozenSet[float], FrozenSet[str],]] = None mode: Optional[Literal["oneof", "exact_set", "include"]] = None unique: bool = Field( False, description="if true, the column must contain only unique values" diff --git a/dfschema/core/core.py b/dfschema/core/core.py index efc2d44..5fafba3 100644 --- a/dfschema/core/core.py +++ b/dfschema/core/core.py @@ -14,6 +14,7 @@ from .legacy import infer_protocol_version, LegacySchemaRegistry from .generate import generate_schema_dict_from_df +# from .utils import SchemaEncoder # from .base_config import BaseConfig @@ -40,13 +41,17 @@ class MetaData(BaseModel): ) -class DfSchema(BaseModel, extra=Extra.forbid, arbitrary_types_allowed=True): # type: ignore +class DfSchema(BaseModel): # type: ignore """Main class of the package Represents a Schema to check (validate) dataframe against. Schema is flavor-agnostic (does not specify what kind of dataframe it is) """ + class Config: + extra = Extra.forbid + arbitrary_types_allowed = True + metadata: Optional[MetaData] = Field( MetaData(), description="optional metadata, including version and protocol version", @@ -225,11 +230,14 @@ def to_file(self, path: Union[str, Path]) -> None: path = Path(path) try: - schema_dict = self.dict(exclude_none=True) + if path.suffix == ".json": + schema_json = self.json(exclude_none=True, indent=4) with path.open("w") as f: - json.dump(schema_dict, f, indent=4) + f.write(schema_json) elif path.suffix in (".yml", ".yaml"): + schema_dict = self.dict(exclude_none=True) + try: import yaml @@ -246,10 +254,7 @@ def to_file(self, path: Union[str, Path]) -> None: raise DataFrameSchemaError(f"Error wriging schema to file {path}") from e @classmethod - def from_dict( - cls, - dict_: dict, - ) -> "DfSchema": + def from_dict(cls, dict_: dict,) -> "DfSchema": """create DfSchema from dict. same as `DfSchema(**dict_)`, but will also migrate old protocol schemas if necessary. @@ -324,10 +329,7 @@ class SubsetSchema(BaseModel, extra=Extra.forbid, arbitrary_types_allowed=True): predicate to select subset. - If string, will be interpreted as query for `df.query()`. - If dict, keys should be column names, values should be values to exactly match""" - predicate: Union[ - dict, - str, - ] = Field(..., description=_predicate_description) + predicate: Union[dict, str,] = Field(..., description=_predicate_description) shape: Optional[ShapeSchema] = Field(None, description="shape expectations") columns: Optional[List[ColSchema]] = Field([], description="columns expectations") diff --git a/dfschema/core/legacy/v1.py b/dfschema/core/legacy/v1.py index ff63f51..f0040f3 100644 --- a/dfschema/core/legacy/v1.py +++ b/dfschema/core/legacy/v1.py @@ -2,7 +2,7 @@ # import json -from typing import Optional, Union, Dict, List, Tuple +from typing import Optional, Union, Dict, List, Tuple, Set from ..logger import logger from ..dtype import DtypeLiteral @@ -35,8 +35,8 @@ class Config: na_limit: Union[None, bool, float] = Field(None, gt=0, le=1.0) - include: Optional[List[str]] = None - oneof: Optional[List[str]] = Field(None, alias="one_of") + include: Optional[Union[Set[int], Set[float], Set[str]]] = None + oneof: Optional[Union[Set[int], Set[float], Set[str]]] = Field(None, alias="one_of") unique: Optional[bool] = None @@ -54,9 +54,7 @@ class Config: allow_population_by_field_name = True version: Optional[str] = Field( - None, - description="version of the schema", - example="2022-06-12", + None, description="version of the schema", example="2022-06-12", ) protocol_version: float = Field(1.0, description="version of the protocol") @@ -122,7 +120,10 @@ def migrate(self) -> Tuple[dict, float]: if col.get(k) is not None: categorical = col.get("categorical", dict()) try: - categorical["value_set"] = set(col.pop(k, {})) + categorical["value_set"] = set(col.pop(k, set())) + logger.debug( + f'Converting Categorical value set for mode={k}: {categorical["value_set"]}' + ) except TypeError as e: raise TypeError(k, col, e) categorical["mode"] = k diff --git a/poetry.lock b/poetry.lock index 9b44d28..f09058d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1196,14 +1196,14 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "pydantic" -version = "1.9.1" +version = "1.10.7" description = "Data validation and settings management using python type hints" category = "main" optional = false -python-versions = ">=3.6.1" +python-versions = ">=3.7" [package.dependencies] -typing-extensions = ">=3.7.4.3" +typing-extensions = ">=4.2.0" [package.extras] dotenv = ["python-dotenv (>=0.10.4)"] @@ -1693,7 +1693,7 @@ yaml = ["PyYAML"] [metadata] lock-version = "1.1" python-versions = ">=3.7.1,<4.0" -content-hash = "92b5fa29548fbd59ae9ac9e82af228a895da8a0639980f7e6f68b5b78db1d7d8" +content-hash = "c9f47bf3bfecb7360120114b02946d8ab9ff76ec17e9fc198678f11f0bf16e82" [metadata.files] anyio = [ @@ -2418,41 +2418,42 @@ pycparser = [ {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] pydantic = [ - {file = "pydantic-1.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8098a724c2784bf03e8070993f6d46aa2eeca031f8d8a048dff277703e6e193"}, - {file = "pydantic-1.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c320c64dd876e45254bdd350f0179da737463eea41c43bacbee9d8c9d1021f11"}, - {file = "pydantic-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18f3e912f9ad1bdec27fb06b8198a2ccc32f201e24174cec1b3424dda605a310"}, - {file = "pydantic-1.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11951b404e08b01b151222a1cb1a9f0a860a8153ce8334149ab9199cd198131"}, - {file = "pydantic-1.9.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8bc541a405423ce0e51c19f637050acdbdf8feca34150e0d17f675e72d119580"}, - {file = "pydantic-1.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e565a785233c2d03724c4dc55464559639b1ba9ecf091288dd47ad9c629433bd"}, - {file = "pydantic-1.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:a4a88dcd6ff8fd47c18b3a3709a89adb39a6373f4482e04c1b765045c7e282fd"}, - {file = "pydantic-1.9.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:447d5521575f18e18240906beadc58551e97ec98142266e521c34968c76c8761"}, - {file = "pydantic-1.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:985ceb5d0a86fcaa61e45781e567a59baa0da292d5ed2e490d612d0de5796918"}, - {file = "pydantic-1.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059b6c1795170809103a1538255883e1983e5b831faea6558ef873d4955b4a74"}, - {file = "pydantic-1.9.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d12f96b5b64bec3f43c8e82b4aab7599d0157f11c798c9f9c528a72b9e0b339a"}, - {file = "pydantic-1.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:ae72f8098acb368d877b210ebe02ba12585e77bd0db78ac04a1ee9b9f5dd2166"}, - {file = "pydantic-1.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:79b485767c13788ee314669008d01f9ef3bc05db9ea3298f6a50d3ef596a154b"}, - {file = "pydantic-1.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:494f7c8537f0c02b740c229af4cb47c0d39840b829ecdcfc93d91dcbb0779892"}, - {file = "pydantic-1.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0f047e11febe5c3198ed346b507e1d010330d56ad615a7e0a89fae604065a0e"}, - {file = "pydantic-1.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:969dd06110cb780da01336b281f53e2e7eb3a482831df441fb65dd30403f4608"}, - {file = "pydantic-1.9.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:177071dfc0df6248fd22b43036f936cfe2508077a72af0933d0c1fa269b18537"}, - {file = "pydantic-1.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9bcf8b6e011be08fb729d110f3e22e654a50f8a826b0575c7196616780683380"}, - {file = "pydantic-1.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a955260d47f03df08acf45689bd163ed9df82c0e0124beb4251b1290fa7ae728"}, - {file = "pydantic-1.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9ce157d979f742a915b75f792dbd6aa63b8eccaf46a1005ba03aa8a986bde34a"}, - {file = "pydantic-1.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0bf07cab5b279859c253d26a9194a8906e6f4a210063b84b433cf90a569de0c1"}, - {file = "pydantic-1.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d93d4e95eacd313d2c765ebe40d49ca9dd2ed90e5b37d0d421c597af830c195"}, - {file = "pydantic-1.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1542636a39c4892c4f4fa6270696902acb186a9aaeac6f6cf92ce6ae2e88564b"}, - {file = "pydantic-1.9.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a9af62e9b5b9bc67b2a195ebc2c2662fdf498a822d62f902bf27cccb52dbbf49"}, - {file = "pydantic-1.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fe4670cb32ea98ffbf5a1262f14c3e102cccd92b1869df3bb09538158ba90fe6"}, - {file = "pydantic-1.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:9f659a5ee95c8baa2436d392267988fd0f43eb774e5eb8739252e5a7e9cf07e0"}, - {file = "pydantic-1.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b83ba3825bc91dfa989d4eed76865e71aea3a6ca1388b59fc801ee04c4d8d0d6"}, - {file = "pydantic-1.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1dd8fecbad028cd89d04a46688d2fcc14423e8a196d5b0a5c65105664901f810"}, - {file = "pydantic-1.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02eefd7087268b711a3ff4db528e9916ac9aa18616da7bca69c1871d0b7a091f"}, - {file = "pydantic-1.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7eb57ba90929bac0b6cc2af2373893d80ac559adda6933e562dcfb375029acee"}, - {file = "pydantic-1.9.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4ce9ae9e91f46c344bec3b03d6ee9612802682c1551aaf627ad24045ce090761"}, - {file = "pydantic-1.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:72ccb318bf0c9ab97fc04c10c37683d9eea952ed526707fabf9ac5ae59b701fd"}, - {file = "pydantic-1.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:61b6760b08b7c395975d893e0b814a11cf011ebb24f7d869e7118f5a339a82e1"}, - {file = "pydantic-1.9.1-py3-none-any.whl", hash = "sha256:4988c0f13c42bfa9ddd2fe2f569c9d54646ce84adc5de84228cfe83396f3bd58"}, - {file = "pydantic-1.9.1.tar.gz", hash = "sha256:1ed987c3ff29fff7fd8c3ea3a3ea877ad310aae2ef9889a119e22d3f2db0691a"}, + {file = "pydantic-1.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e79e999e539872e903767c417c897e729e015872040e56b96e67968c3b918b2d"}, + {file = "pydantic-1.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:01aea3a42c13f2602b7ecbbea484a98169fb568ebd9e247593ea05f01b884b2e"}, + {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:516f1ed9bc2406a0467dd777afc636c7091d71f214d5e413d64fef45174cfc7a"}, + {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae150a63564929c675d7f2303008d88426a0add46efd76c3fc797cd71cb1b46f"}, + {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209"}, + {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af"}, + {file = "pydantic-1.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:a7cd2251439988b413cb0a985c4ed82b6c6aac382dbaff53ae03c4b23a70e80a"}, + {file = "pydantic-1.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:68792151e174a4aa9e9fc1b4e653e65a354a2fa0fed169f7b3d09902ad2cb6f1"}, + {file = "pydantic-1.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe2507b8ef209da71b6fb5f4e597b50c5a34b78d7e857c4f8f3115effaef5fe"}, + {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10a86d8c8db68086f1e30a530f7d5f83eb0685e632e411dbbcf2d5c0150e8dcd"}, + {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75ae19d2a3dbb146b6f324031c24f8a3f52ff5d6a9f22f0683694b3afcb16fb"}, + {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:464855a7ff7f2cc2cf537ecc421291b9132aa9c79aef44e917ad711b4a93163b"}, + {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:193924c563fae6ddcb71d3f06fa153866423ac1b793a47936656e806b64e24ca"}, + {file = "pydantic-1.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:b4a849d10f211389502059c33332e91327bc154acc1845f375a99eca3afa802d"}, + {file = "pydantic-1.10.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cc1dde4e50a5fc1336ee0581c1612215bc64ed6d28d2c7c6f25d2fe3e7c3e918"}, + {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0cfe895a504c060e5d36b287ee696e2fdad02d89e0d895f83037245218a87fe"}, + {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:670bb4683ad1e48b0ecb06f0cfe2178dcf74ff27921cdf1606e527d2617a81ee"}, + {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:950ce33857841f9a337ce07ddf46bc84e1c4946d2a3bba18f8280297157a3fd1"}, + {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c15582f9055fbc1bfe50266a19771bbbef33dd28c45e78afbe1996fd70966c2a"}, + {file = "pydantic-1.10.7-cp37-cp37m-win_amd64.whl", hash = "sha256:82dffb306dd20bd5268fd6379bc4bfe75242a9c2b79fec58e1041fbbdb1f7914"}, + {file = "pydantic-1.10.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c7f51861d73e8b9ddcb9916ae7ac39fb52761d9ea0df41128e81e2ba42886cd"}, + {file = "pydantic-1.10.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6434b49c0b03a51021ade5c4daa7d70c98f7a79e95b551201fff682fc1661245"}, + {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64d34ab766fa056df49013bb6e79921a0265204c071984e75a09cbceacbbdd5d"}, + {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:701daea9ffe9d26f97b52f1d157e0d4121644f0fcf80b443248434958fd03dc3"}, + {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf135c46099ff3f919d2150a948ce94b9ce545598ef2c6c7bf55dca98a304b52"}, + {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0f85904f73161817b80781cc150f8b906d521fa11e3cdabae19a581c3606209"}, + {file = "pydantic-1.10.7-cp38-cp38-win_amd64.whl", hash = "sha256:9f6f0fd68d73257ad6685419478c5aece46432f4bdd8d32c7345f1986496171e"}, + {file = "pydantic-1.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c230c0d8a322276d6e7b88c3f7ce885f9ed16e0910354510e0bae84d54991143"}, + {file = "pydantic-1.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:976cae77ba6a49d80f461fd8bba183ff7ba79f44aa5cfa82f1346b5626542f8e"}, + {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d45fc99d64af9aaf7e308054a0067fdcd87ffe974f2442312372dfa66e1001d"}, + {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2a5ebb48958754d386195fe9e9c5106f11275867051bf017a8059410e9abf1f"}, + {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:abfb7d4a7cd5cc4e1d1887c43503a7c5dd608eadf8bc615413fc498d3e4645cd"}, + {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:80b1fab4deb08a8292d15e43a6edccdffa5377a36a4597bb545b93e79c5ff0a5"}, + {file = "pydantic-1.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:d71e69699498b020ea198468e2480a2f1e7433e32a3a99760058c6520e2bea7e"}, + {file = "pydantic-1.10.7-py3-none-any.whl", hash = "sha256:0cd181f1d0b1d00e2b705f1bf1ac7799a2d938cce3376b8007df62b29be3c2c6"}, + {file = "pydantic-1.10.7.tar.gz", hash = "sha256:cfc83c0678b6ba51b0532bea66860617c4cd4251ecf76e9846fa5a9f3454e97e"}, ] pyflakes = [ {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, diff --git a/property_benchmarks_v2.json b/property_benchmarks_v2.json new file mode 100644 index 0000000..df69b26 --- /dev/null +++ b/property_benchmarks_v2.json @@ -0,0 +1 @@ +{"metadata": {"protocol_version": 2.0, "version": "2023-01-30"}, "columns": [{"name": "ID", "dtype": "int", "na_limit": 0.99, "value_limits": {"min": 1.0}}, {"name": "LAST_SALE_ID", "dtype": "float", "na_limit": 0.9}, {"name": "BUILDING_ID", "dtype": "float", "na_limit": 0.9}, {"name": "UNITTYPE", "dtype": "str", "na_limit": 0.9}, {"name": "NEIGHBORHOOD_ID", "dtype": "int", "na_limit": 0.99}, {"name": "SUBMARKET_ID", "dtype": "number", "na_limit": 0.99}, {"name": "BOROUGH_ID", "dtype": "int", "na_limit": 0.99, "categorical": {"value_set": \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6a0934a..7db9969 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dfschema" -version = "0.0.8" +version = "0.0.9" description = "lightweight pandas.DataFrame schema" authors = ["Philipp "] readme = "README.md" @@ -13,7 +13,7 @@ python = ">=3.7.1,<4.0" pandas = "^1.2.4" sqlalchemy = {version = "1.*", optional = true} pandera = {version = "^0.6", optional = true} -pydantic = "^1.9.1" +pydantic = ">1.10" typer = {version = "^0.6.1", optional = true} PyYAML = {version = "^6.0", optional = true} diff --git a/tests/test_schemas/v1/good/property_benchmarks.json b/tests/test_schemas/v1/good/property_benchmarks.json new file mode 100644 index 0000000..d598e70 --- /dev/null +++ b/tests/test_schemas/v1/good/property_benchmarks.json @@ -0,0 +1,14 @@ +{ + "version": "2023-01-30", + "strict": true, + "columns": { + "ID": {"dtype":"int", "na_limit":1, "min_value":1}, + "LAST_SALE_ID": {"dtype":"float", "na_limit":0.9}, + "BUILDING_ID": {"dtype":"float", "na_limit":0.9}, + "UNITTYPE": {"dtype":"str", "na_limit":0.9}, + "NEIGHBORHOOD_ID": {"dtype":"int", "na_limit":1}, + "SUBMARKET_ID": {"dtype":"number", "na_limit":true}, + "BOROUGH_ID": {"dtype":"int", "na_limit":1, "oneof":[100,200,300,400,500]}, + "MEDIAN_NHOOD_PPSF": {"dtype":"number", "na_limit":0.9} + } +} diff --git a/tests/test_v1.py b/tests/test_v1.py index 76eb6fa..c8d86cd 100644 --- a/tests/test_v1.py +++ b/tests/test_v1.py @@ -17,3 +17,16 @@ def test_schema_objects(good_schema_v1: dict): new = S.dict() model_col = [c for c in new["columns"] if c["name"] == "model"][0] assert model_col.get("categorical", {}).get("mode") == "exact_set" + + +def test_categorical_dtypes(): + from dfschema.core.core import DfSchema + import json + from pathlib import Path + + path = Path(__name__).parent / "tests/test_schemas/v1/good/property_benchmarks.json" + schema = json.loads(path.read_text()) + + S = DfSchema.from_dict(schema) + catcol = [el for el in S.columns if el.name == "BOROUGH_ID"][0] + assert catcol.categorical.value_set == frozenset((100, 200, 300, 400, 500))