Skip to content

Commit

Permalink
Merge pull request #17 from StreetEasy/bug/datetime-dtype-generation
Browse files Browse the repository at this point in the history
Bug/datetime dtype generation
  • Loading branch information
Casyfill authored Oct 24, 2022
2 parents 61b33ae + 77107ce commit 800fa26
Show file tree
Hide file tree
Showing 13 changed files with 44 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
max-line-length = 88
ignore = E501, E203, W503, E265
ignore = E501, E203, W503, E265, E231
per-file-ignores = __init__.py:F401
exclude =
.git
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
**/__pycache__/*
*.pyc
.DS_Store
**/.DS_Store
**/.DS_Store

.hypothesis/*
7 changes: 7 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog




v0.0.5:
- fix column dtype generation/validation bug

## Pre-Publication
v1.3.0
- renamed strict_column_set to additionalColumns
- renamed strict_column_order to exactColumnOrder
Expand Down
2 changes: 1 addition & 1 deletion dfschema/core/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def _validate_dtype(self, series: pd.Series) -> None:
if not self._dtype_test_func[_dtype](series):
txt = _tmplt.format(self.name, series.dtype, _dtype, self.dtype)
raise DataFrameValidationError(txt)
elif series.dtype != self._dtype:
elif series.dtype != _dtype:
txt = _tmplt.format(self.name, series.dtype, _dtype, self.dtype)
raise DataFrameValidationError(txt)

Expand Down
1 change: 1 addition & 0 deletions dfschema/core/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"int32": "int",
"int16": "int",
# time
"datetime64[ns]": "datetime64[ns]",
"datetime": "datetime64[ns]",
"date": "datetime64[ns]",
"timedelta": "timedelta64[ns]",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dfschema"
version = "0.0.4" # set via gitlab-ci
version = "0.0.5" # set via gitlab-ci
description = "lightweight pandas.DataFrame schema"
authors = ["Philipp <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_jsonschema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
if __name__ == "__main__":
from dfs.core.core import DfSchema
from dfschema.core.core import DfSchema

with open("./jsonschemas/schema.json", "w") as f:
f.write(DfSchema.schema_json(indent=2))
Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_v2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from dfs.core.core import DfSchema
from dfschema.core.core import DfSchema


def get_files(path: Path) -> list:
Expand Down
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ def df3():
return pd.DataFrame({"x": [np.nan] * 4, "y": ["foo", "bar", "baz", np.nan]})


@pytest.fixture()
def df4():
df = pd.DataFrame(
{"x": [1, 2, 3, 4], "y": ["foo", "bar", "baz", None], "z": ["2022-10-23",] * 4}
)
df["z"] = pd.to_datetime(df["z"])
return df


# This section for `test_jsonvalidate.py`


Expand Down
16 changes: 15 additions & 1 deletion tests/test_generate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# import pytest


def test_generate(df1):
def test_generate_df1(df1):
from dfschema.core import DfSchema

print(df1.dtypes)
Expand All @@ -13,3 +13,17 @@ def test_generate(df1):
raise Exception(sd, e)

S.validate_df(df1) # type: ignore


def test_generate_df4(df4):
from dfschema.core import DfSchema

print(df4.dtypes)

try:
S = DfSchema.from_df(df4)
except Exception as e: # for debugging
sd = DfSchema.from_df(df4, return_dict=True)
raise Exception(sd, e)

S.validate_df(df4) # type: ignore
5 changes: 1 addition & 4 deletions tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,7 @@
],
"df2": [ # protocol 1.0
{
"columns": {
"x": {"min_value": 1},
"y": {"dtype": "string"},
},
"columns": {"x": {"min_value": 1}, "y": {"dtype": "string"},},
"strict_cols": True,
},
{
Expand Down
12 changes: 2 additions & 10 deletions tests/test_subsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@ def test_subset_dict(df_subset):
{
"predicate": {"y": "baz"},
"columns": [
{
"name": "x",
"dtype": "int",
"value_limits": {"min": 3, "max": 3},
}
{"name": "x", "dtype": "int", "value_limits": {"min": 3, "max": 3},}
],
},
],
Expand All @@ -49,11 +45,7 @@ def test_subset_query(df_subset):
"predicate": "x >= 3",
"shape": {"rows": 2},
"columns": [
{
"name": "x",
"dtype": "int",
"value_limits": {"max": 4, "min": 3},
}
{"name": "x", "dtype": "int", "value_limits": {"max": 4, "min": 3},}
],
},
],
Expand Down
7 changes: 1 addition & 6 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@ def test_validate_df(df1, schema):
{"shape": {"min_cols": 3}},
{"columns": ["x", "y", "z"]},
{"columns": {"x": {"dtype": "floating"}, "y": {"dtype": "floating"}}},
{
"columns": {
"x": {"dtype": "int"},
"y": {"dtype": "character", "na_limit": 0.2},
}
},
{"columns": {"x": {"dtype": "int"}, "y": {"dtype": "character", "na_limit": 0.2},}},
]


Expand Down

0 comments on commit 800fa26

Please sign in to comment.