diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 9471d2a4a..90fb01235 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -46,9 +46,12 @@ jobs: - uses: actions/setup-python@v3 with: python-version: ${{ matrix.python }} - - working-directory: ./python - run: | - pip install -e .[dev] - pip install -U tox-gh-actions - - working-directory: ./python - run: tox + - name: Install deps + working-directory: ./python + run: pip3 install -e .[dev,arrow] + - name: Run linters + working-directory: ./python + run: make lint + - name: Run tests + working-directory: ./python + run: make test diff --git a/.gitignore b/.gitignore index 07170446c..a7a414e74 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,5 @@ derby.log # Python stuff python_legacy/.mypy_cache/ python/.mypy_cache/ +python/htmlcov +python/coverage.xml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b9d9bf230..51ab47019 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -52,8 +52,6 @@ For Java styling, check out the section [Setting up IDE and Code Style](https://iceberg.apache.org/community/#setting-up-ide-and-code-style) from the documentation site. -For Python, please use the tox command `tox -e format` to apply autoformatting to the project. - ### Java style guidelines #### Line breaks diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml new file mode 100644 index 000000000..6fea287cb --- /dev/null +++ b/python/.pre-commit-config.yaml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +files: ^python/ + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-docstring-first + - id: debug-statements + - id: check-yaml + - id: check-ast + - repo: https://github.com/ambv/black + rev: 22.3.0 + hooks: + - id: black + - repo: https://github.com/pre-commit/mirrors-isort + rev: v5.10.1 + hooks: + - id: isort + args: [ --settings-path=python/pyproject.toml ] + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.950 + hooks: + - id: mypy + - repo: https://github.com/hadialqattan/pycln + rev: v1.3.2 + hooks: + - id: pycln + args: [--config=python/pyproject.toml] diff --git a/python/CONTRIBUTING.md b/python/CONTRIBUTING.md new file mode 100644 index 000000000..13506a7b8 --- /dev/null +++ b/python/CONTRIBUTING.md @@ -0,0 +1,44 @@ + + +# Contributing to the Iceberg Python libary + +## Linting + +We rely on `pre-commit` to apply autoformatting and linting: + +```bash +make install +make lint +``` + +By default, it only runs on the files known by git. + +Pre-commit will automatically fix the violations such as import orders, formatting etc. Pylint errors you need to fix yourself. + +In contrast to the name, it doesn't run on the git pre-commit hook by default. If this is something that you like, you can set this up by running `pre-commit install`. + +## Testing + +For Python, we use pytest in combination with coverage to maintain 90% code coverage + +```bash +make install +make test +``` diff --git a/python/Makefile b/python/Makefile new file mode 100644 index 000000000..c4ff2153d --- /dev/null +++ b/python/Makefile @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install: + pip install -e ".[dev,arrow]" + +lint: + pre-commit run --all-files + +test: + coverage run --source=src/ -m pytest tests/ + coverage report -m --fail-under=90 + coverage html + coverage xml diff --git a/python/README.md b/python/README.md index e2c2e38a4..e06ab40b7 100644 --- a/python/README.md +++ b/python/README.md @@ -17,7 +17,7 @@ # Iceberg Python -py-iceberg is a python library for programmatic access to iceberg table metadata as well as to table data in iceberg format. +py-iceberg is a python library for programmatic access to iceberg table metadata as well as to table data in iceberg format. It is an implementation of [iceberg table spec](https://iceberg.apache.org/spec/) in Python. ## Getting Started diff --git a/python/pyproject.toml b/python/pyproject.toml index 7389f493b..09b810c40 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -20,4 +20,30 @@ requires = [ "setuptools>=42", "wheel" ] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 130 +target-version = ['py38'] + +[tool.isort] +src_paths = ["src/", "tests/"] +multi_line_output = 3 +profile = 'black' +line_length = 130 +force_grid_wrap = 4 + +[tool.pycln] +all = true + +[tool.mypy] +no_implicit_optional = true +warn_redundant_casts = true +warn_unreachable = true + +[[tool.mypy.overrides]] +module = "mypy-pyarrow.*" +ignore_missing_imports = true + +[tool.coverage.run] +source = ['src/'] diff --git a/python/setup.cfg b/python/setup.cfg index ab1aa16e3..6a983275f 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -46,9 +46,14 @@ install_requires = mmh3 [options.extras_require] arrow = - pyarrow + pyarrow==8.0.0 dev= - tox-travis==0.12 - pytest + pip>=21.1 + coverage[toml]==6.3.3 + mock==4.0.3 + pytest==7.1.2 + pytest-checkdocs==2.7.1 + pyenchant==3.2.2 + pre-commit==2.19.0 [options.packages.find] where = src diff --git a/python/spellcheck-dictionary.txt b/python/spellcheck-dictionary.txt index 3d7092938..ef3ab6642 100644 --- a/python/spellcheck-dictionary.txt +++ b/python/spellcheck-dictionary.txt @@ -55,4 +55,4 @@ URI UnboundPredicate BoundPredicate BooleanExpression -BooleanExpressionVisitor \ No newline at end of file +BooleanExpressionVisitor diff --git a/python/src/iceberg/catalog/__init__.py b/python/src/iceberg/catalog/__init__.py index 13a83393a..d78cfc86a 100644 --- a/python/src/iceberg/catalog/__init__.py +++ b/python/src/iceberg/catalog/__init__.py @@ -14,3 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from typing import Dict, Tuple + +Identifier = Tuple[str, ...] +Properties = Dict[str, str] diff --git a/python/src/iceberg/catalog/base.py b/python/src/iceberg/catalog/base.py index df541fcc6..40f2747ab 100644 --- a/python/src/iceberg/catalog/base.py +++ b/python/src/iceberg/catalog/base.py @@ -15,15 +15,20 @@ # specific language governing permissions and limitations # under the License. -from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Set, Tuple, Union +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import ( + List, + Optional, + Set, + Union, +) + +from iceberg.catalog import Identifier, Properties from iceberg.schema import Schema from iceberg.table.base import PartitionSpec, Table -Identifier = Tuple[str, ...] -Properties = Dict[str, str] - class Catalog(ABC): """Base Catalog for table operations like - create, drop, load, list and others. diff --git a/python/src/iceberg/io/pyarrow.py b/python/src/iceberg/io/pyarrow.py index a67ff5d36..008fd7449 100644 --- a/python/src/iceberg/io/pyarrow.py +++ b/python/src/iceberg/io/pyarrow.py @@ -28,7 +28,13 @@ from pyarrow.fs import FileInfo, FileSystem, FileType -from iceberg.io.base import FileIO, InputFile, InputStream, OutputFile, OutputStream +from iceberg.io.base import ( + FileIO, + InputFile, + InputStream, + OutputFile, + OutputStream, +) class PyArrowFile(InputFile, OutputFile): diff --git a/python/src/iceberg/schema.py b/python/src/iceberg/schema.py index 0ba7313ff..0b0295580 100644 --- a/python/src/iceberg/schema.py +++ b/python/src/iceberg/schema.py @@ -21,7 +21,15 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from functools import singledispatch -from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar +from typing import ( + Any, + Dict, + Generic, + List, + Optional, + Tuple, + TypeVar, +) from iceberg.files import StructProtocol from iceberg.types import ( diff --git a/python/src/iceberg/table/base.py b/python/src/iceberg/table/base.py index 023d1f3bb..f14ec6422 100644 --- a/python/src/iceberg/table/base.py +++ b/python/src/iceberg/table/base.py @@ -18,6 +18,9 @@ from __future__ import annotations from abc import ABC +from typing import Union + +from iceberg.catalog.base import Identifier class Table(ABC): @@ -26,6 +29,8 @@ class Table(ABC): To be implemented by https://github.com/apache/iceberg/issues/3227 """ + identifier: Union[str, Identifier] + class PartitionSpec: """Placeholder for Partition Specification diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py index 07e19e5f8..22aa6d8a6 100644 --- a/python/src/iceberg/types.py +++ b/python/src/iceberg/types.py @@ -31,7 +31,12 @@ """ from dataclasses import dataclass, field from functools import cached_property -from typing import ClassVar, Dict, Optional, Tuple +from typing import ( + ClassVar, + Dict, + Optional, + Tuple, +) class Singleton: diff --git a/python/src/iceberg/utils/schema_conversion.py b/python/src/iceberg/utils/schema_conversion.py index 11571f27b..e6fba45a0 100644 --- a/python/src/iceberg/utils/schema_conversion.py +++ b/python/src/iceberg/utils/schema_conversion.py @@ -20,7 +20,12 @@ from __future__ import annotations import logging -from typing import Any, Dict, List, Tuple +from typing import ( + Any, + Dict, + List, + Tuple, +) from iceberg.schema import Schema from iceberg.types import ( diff --git a/python/tests/catalog/test_base.py b/python/tests/catalog/test_base.py index ecb34ca1c..3e5606427 100644 --- a/python/tests/catalog/test_base.py +++ b/python/tests/catalog/test_base.py @@ -15,11 +15,18 @@ # specific language governing permissions and limitations # under the License. -from typing import Dict, List, Optional, Set, Union +from typing import ( + Dict, + List, + Optional, + Set, + Union, +) import pytest -from iceberg.catalog.base import Catalog, Identifier, Properties +from iceberg.catalog import Identifier, Properties +from iceberg.catalog.base import Catalog from iceberg.exceptions import ( AlreadyExistsError, NamespaceNotEmptyError, @@ -136,13 +143,13 @@ def update_namespace_properties( self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Optional[Properties] = None ) -> None: namespace = Catalog.identifier_to_tuple(namespace) - removals = {} if not removals else removals - updates = [] if not updates else updates if namespace in self.__namespaces: - for key in removals: - if key in self.__namespaces[namespace]: - del self.__namespaces[namespace][key] - self.__namespaces[namespace].update(updates) + if removals: + for key in removals: + if key in self.__namespaces[namespace]: + del self.__namespaces[namespace][key] + if updates: + self.__namespaces[namespace].update(updates) else: raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") diff --git a/python/tests/io/__init__.py b/python/tests/io/__init__.py new file mode 100644 index 000000000..00eaa2ffe --- /dev/null +++ b/python/tests/io/__init__.py @@ -0,0 +1,11 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/tests/io/test_io_base.py b/python/tests/io/test_io_base.py index 7bd61a0e0..3cccd90a0 100644 --- a/python/tests/io/test_io_base.py +++ b/python/tests/io/test_io_base.py @@ -22,7 +22,13 @@ import pytest -from iceberg.io.base import FileIO, InputFile, InputStream, OutputFile, OutputStream +from iceberg.io.base import ( + FileIO, + InputFile, + InputStream, + OutputFile, + OutputStream, +) from iceberg.io.pyarrow import PyArrowFile, PyArrowFileIO @@ -112,12 +118,13 @@ def new_input(self, location: str): def new_output(self, location: str): return LocalOutputFile(location=location) - def delete(self, location: Union[str, LocalInputFile, LocalOutputFile]): - parsed_location = location.parsed_location if isinstance(location, (InputFile, OutputFile)) else urlparse(location) + def delete(self, location: Union[str, InputFile, OutputFile]) -> None: + location = location.location if isinstance(location, (InputFile, OutputFile)) else location + parsed_location = urlparse(location) try: os.remove(parsed_location.path) except FileNotFoundError as e: - raise FileNotFoundError(f"Cannot delete file, does not exist: {parsed_location.path} - Caused by: " + str(e)) + raise FileNotFoundError(f"Cannot delete file, does not exist: {parsed_location.path} - Caused by: {e}") @pytest.mark.parametrize("CustomInputFile", [LocalInputFile, PyArrowFile]) diff --git a/python/tests/io/test_pyarrow.py b/python/tests/io/test_pyarrow.py index bbde45b39..79e88f3e2 100644 --- a/python/tests/io/test_pyarrow.py +++ b/python/tests/io/test_pyarrow.py @@ -180,7 +180,7 @@ def test_raise_on_creating_a_local_file_no_permission(): assert "Cannot get file info, access denied:" in str(exc_info.value) -def test_raise_on_checking_if_local_file_exists_no_permission(): +def test_raise_on_delete_file_with_no_permission(): """Test that a PyArrowFile raises when deleting a local file without permission""" with tempfile.TemporaryDirectory() as tmpdirname: diff --git a/python/tox.ini b/python/tox.ini deleted file mode 100644 index 488957edf..000000000 --- a/python/tox.ini +++ /dev/null @@ -1,111 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[tox] -envlist = py38,py39,py310,linters -skip_missing_interpreters = true - -[testenv] -usedevelop = true -extras = arrow -deps = - pip>=21.1 - coverage - mock - pytest - pytest-checkdocs - pyarrow -setenv = - COVERAGE_FILE = test-reports/{envname}/.coverage - PYTEST_ADDOPTS = --junitxml=test-reports/{envname}/junit.xml -vv -commands = - coverage run --source src --parallel-mode -m pytest {posargs} - coverage combine - coverage report -m --fail-under=90 - coverage html -d test-reports/{envname}/coverage-html - coverage xml -o test-reports/{envname}/coverage.xml - -[testenv:linters] -usedevelop = true -deps = - {[testenv:format-check]deps} - {[testenv:type-check]deps} -commands = - {[testenv:format-check]commands} - {[testenv:type-check]commands} - -[testenv:format-check] -deps = - black - isort - autoflake - pylint - pyenchant -commands = - autoflake -r --check --ignore-init-module-imports --remove-all-unused-imports src tests - isort --profile black --check-only src tests - black --line-length 130 --check --diff src tests - pylint src tests - -[testenv:format] -deps = - {[testenv:format-check]deps} -commands = - autoflake -r --in-place --ignore-init-module-imports --remove-all-unused-imports src tests - isort --profile black src tests - black --line-length 130 src tests - -[testenv:type-check] -deps = - mypy -commands = - mypy --install-types --non-interactive --config tox.ini src - -[testenv:docs] -basepython = python3 -deps = - -r docs/source/requirements.txt -commands = - sphinx-build -E -W -c docs/source/ -b html docs/source/ docs/build/html - sphinx-build -E -W -c docs/source/ -b man docs/source/ docs/build/man - -[testenv:serve-docs] -basepython = python3 -skip_install = true -changedir = docs/build/html -deps = -commands = - python -m http.server {posargs} - -[pytest] -norecursedirs=.* -addopts = --doctest-modules - -[gh-actions] -python = - 3.8: py38, linters - 3.9: py39 - 3.10: py310 - -[mypy] - -no_implicit_optional=True -warn_redundant_casts=True -warn_unreachable=True - -[mypy-pyarrow.*] -ignore_missing_imports = True