Skip to content

Commit

Permalink
try pandera: add jupyterlite notebooks, add support for py3.7 (#951)
Browse files Browse the repository at this point in the history
* add jupyterlite to docs

* handle test errors

* add jupyterlite_sphinx to deps

* exclude 3.7 from docs build
  • Loading branch information
cosmicBboy authored Oct 4, 2022
1 parent dc6b39c commit 1bcfe01
Show file tree
Hide file tree
Showing 17 changed files with 217 additions and 67 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
pandas-version: ["1.2.0", "1.3.0", "latest"]
exclude:
- python-version: "3.10"
Expand Down Expand Up @@ -196,9 +196,9 @@ jobs:
uses: codecov/codecov-action@v3

- name: Check Docstrings
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' }}
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' && matrix.python-version != '3.7' }}
run: nox ${{ env.NOX_FLAGS }} --session doctests

- name: Check Docs
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' }}
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' && matrix.python-version != '3.7' }}
run: nox ${{ env.NOX_FLAGS }} --session docs
22 changes: 15 additions & 7 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"sphinx_copybutton",
"recommonmark",
"sphinx_panels",
"jupyterlite_sphinx",
]

doctest_global_setup = """
Expand Down Expand Up @@ -192,15 +193,17 @@ def filter(self, record: pylogging.LogRecord) -> bool:
# that dataclass name is in the message, so that you don't filter out
# other meaningful warnings
return not (
record.getMessage().startswith(
"Cannot resolve forward reference in type annotations of "
'"pandera.typing.DataFrame"'
)
# NOTE: forward reference false positive needs to be handled
# correctly
or record.getMessage().startswith(
"Cannot resolve forward reference in type annotations of "
'"pandera.schemas.DataFrameSchema'
record.getMessage().startswith(
(
"Cannot resolve forward reference in type annotations of "
'"pandera.typing.DataFrame"',
"Cannot resolve forward reference in type annotations of "
'"pandera.schemas.DataFrameSchema',
"Cannot resolve forward reference in type annotations of "
'"pandera.typing.DataFrame.style"',
)
)
)

Expand Down Expand Up @@ -259,3 +262,8 @@ def linkcode_resolve(domain, info):
)

return f"https://github.com/pandera-dev/pandera/blob/{tag}/pandera/{fn}{linespec}"


# jupyterlite config
jupyterlite_contents = ["notebooks/try_pandera.ipynb"]
jupyterlite_bind_ipynb_suffix = False
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ page or reach out to the maintainers and pandera community on
:hidden:

self
Try Pandera ▶️ <try_pandera>

.. toctree::
:maxdepth: 6
Expand Down
11 changes: 11 additions & 0 deletions docs/source/jupyterlite_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"LiteBuildConfig": {
"federated_extensions": [
"https://conda.anaconda.org/conda-forge/noarch/pandera-0.12.0-hd8ed1ab_0.tar.bz2",
],
"ignore_sys_prefix": true,
"piplite_urls": [
"https://files.pythonhosted.org/packages/95/cc/e058935b0b34d50214596297f0a9edb0781fc5201bf2c6eb8cf1a026d710/pandera-0.12.0-py3-none-any.whl",
]
}
}
80 changes: 80 additions & 0 deletions docs/source/notebooks/try_pandera.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "ac4294bb",
"metadata": {},
"outputs": [],
"source": [
"import piplite\n",
"\n",
"\n",
"for package in [\n",
" \"wrapt\",\n",
" \"typing_extensions\",\n",
" \"mypy_extensions\",\n",
" \"typing_inspect\",\n",
" \"pydantic\",\n",
" \"pandera\",\n",
"]:\n",
" await piplite.install(package, deps=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9a4eef5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pandera as pa\n",
"from pandera.typing import DataFrame, Series\n",
"\n",
"\n",
"class Schema(pa.SchemaModel):\n",
" item: Series[str] = pa.Field(isin=[\"apple\", \"orange\"], coerce=True)\n",
" price: Series[float] = pa.Field(gt=0)\n",
"\n",
"\n",
"@pa.check_types(lazy=True)\n",
"def transform_data(data: DataFrame[Schema]):\n",
" ...\n",
"\n",
"\n",
"data = pd.DataFrame.from_records([\n",
" {\"item\": \"applee\", \"price\": 0.5},\n",
" {\"item\": \"orange\", \"price\": -1000}\n",
"])\n",
"\n",
"\n",
"try:\n",
" transform_data(data)\n",
"except pa.errors.SchemaErrors as exc:\n",
" display(exc.failure_cases)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
7 changes: 7 additions & 0 deletions docs/source/try_pandera.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Try Pandera
===============

In the notebook below, you can get a sense of how to use pandera right in the
browser without having to install anything locally!

.. retrolite:: notebooks/try_pandera.ipynb
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ dependencies:
- python-multipart

# documentation
- jupyterlite_sphinx
- sphinx
- sphinx-panels
- sphinx-autodoc-typehints <= 1.14.1
Expand Down
6 changes: 5 additions & 1 deletion pandera/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@
Any,
Callable,
Iterable,
Literal,
Optional,
Tuple,
Type,
TypeVar,
Union,
)

try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore[misc]


class DataType(ABC):
"""Base class of all Pandera data types."""
Expand Down
14 changes: 14 additions & 0 deletions pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
from . import engine, numpy_engine, utils
from .type_aliases import PandasDataType, PandasExtensionType, PandasObject

try:
import pyarrow # pylint:disable=unused-import

PYARROW_INSTALLED = True
except ImportError:
PYARROW_INSTALLED = False


def pandas_version():
"""Return the pandas version."""
Expand Down Expand Up @@ -605,6 +612,13 @@ class STRING(DataType, dtypes.String):
storage: Optional[Literal["python", "pyarrow"]] = "python"

def __post_init__(self):
if self.storage == "pyarrow" and not PYARROW_INSTALLED:
raise ModuleNotFoundError(
"pyarrow needs to be installed when using the "
"string[pyarrow] pandas data type. Please "
"`pip install pyarrow` or "
"`conda install -c conda-forge pyarrow` before proceeding."
)
type_ = pd.StringDtype(self.storage)
object.__setattr__(self, "type", type_)

Expand Down
7 changes: 6 additions & 1 deletion pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Any,
Dict,
List,
Literal,
Optional,
Type,
TypeVar,
Expand All @@ -41,6 +40,12 @@
from .error_handlers import SchemaErrorHandler
from .hypotheses import Hypothesis

try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore[misc]


if TYPE_CHECKING:
from pandera.schema_components import Column

Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ nox
importlib_metadata
uvicorn
python-multipart
jupyterlite_sphinx
sphinx
sphinx-panels
sphinx-autodoc-typehints <= 1.14.1
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@
"typing_extensions >= 3.7.4.3 ; python_version<'3.8'",
"typing_inspect >= 0.6.0",
"wrapt",
"pyarrow",
],
extras_require=extras_require,
python_requires=">=3.8",
python_requires=">=3.7",
platforms="any",
classifiers=[
"Development Status :: 5 - Production/Stable",
Expand All @@ -64,6 +63,7 @@
"Intended Audience :: Science/Research",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
}

nullable_string_dtypes = {pd.StringDtype: "string"}
if pa.PANDAS_1_3_0_PLUS:
if pa.PANDAS_1_3_0_PLUS and pandas_engine.PYARROW_INSTALLED:
nullable_string_dtypes.update(
{pd.StringDtype(storage="pyarrow"): "string[pyarrow]"}
)
Expand Down
60 changes: 43 additions & 17 deletions tests/core/test_from_to_format_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest

import pandera as pa
from pandera.engines import pandas_engine


class InSchema(pa.SchemaModel):
Expand Down Expand Up @@ -92,6 +93,19 @@ def invalid_input_dataframe() -> pd.DataFrame:
return pd.DataFrame({"str_col": ["a"]})


def _needs_pyarrow(schema) -> bool:
return (
schema
in {
InSchemaParquet,
InSchemaFeather,
OutSchemaParquet,
OutSchemaFeather,
}
and not pandas_engine.PYARROW_INSTALLED
)


@pytest.mark.parametrize(
"schema,to_fn,buf_cls",
[
Expand Down Expand Up @@ -122,24 +136,30 @@ def fn(df: pa.typing.DataFrame[schema]):
(mock_dataframe(), False),
(invalid_input_dataframe(), True),
]:

buf = None if buf_cls is None else buf_cls()
arg = to_fn(df, *([buf] if buf else []))
if buf:
if buf.closed:
pytest.skip(
"skip test for older pandas versions where to_pickle "
"closes user-provided buffers: "
"https://github.com/pandas-dev/pandas/issues/35679"
)
buf.seek(0)
arg = buf
if invalid:
with pytest.raises(pa.errors.SchemaError):
fn(arg)
return

out = fn(arg)
assert df.equals(out)

if _needs_pyarrow(schema):
with pytest.raises(ImportError):
to_fn(df, *([buf] if buf else []))
else:
arg = to_fn(df, *([buf] if buf else []))
if buf:
if buf.closed:
pytest.skip(
"skip test for older pandas versions where to_pickle "
"closes user-provided buffers: "
"https://github.com/pandas-dev/pandas/issues/35679"
)
buf.seek(0)
arg = buf
if invalid:
with pytest.raises(pa.errors.SchemaError):
fn(arg)
return

out = fn(arg)
assert df.equals(out)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -170,6 +190,12 @@ def invalid_fn(
return df

df = mock_dataframe()

if _needs_pyarrow(schema):
with pytest.raises((ImportError)):
fn(df)
return

try:
out = fn(df)
except IOError:
Expand Down
Loading

0 comments on commit 1bcfe01

Please sign in to comment.