Skip to content

Commit

Permalink
ruff: update config and fix errors
Browse files Browse the repository at this point in the history
  • Loading branch information
karlicoss committed Aug 30, 2024
1 parent 1773802 commit 9bcea3f
Show file tree
Hide file tree
Showing 55 changed files with 399 additions and 263 deletions.
124 changes: 121 additions & 3 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,50 @@
target-version = "py38" # NOTE: inferred from pyproject.toml if present

lint.extend-select = [
"F", # flakes rules -- default, but extend just in case
"E", # pycodestyle -- default, but extend just in case
"W", # various warnings

"B", # 'bugbear' set -- various possible bugs
"C4", # flake8-comprehensions -- unnecessary list/map/dict calls
"COM", # trailing commas
"EXE", # various checks wrt executable files
# "I", # sort imports
"ICN", # various import conventions
"FBT", # detect use of boolean arguments
"FURB", # various rules
"PERF", # various potential performance speedups
"PD", # pandas rules
"PIE", # 'misc' lints
"PLC", # pylint convention rules
"PLR", # pylint refactor rules
"PLW", # pylint warnings
"PT", # pytest stuff
"PYI", # various type hinting rules
"RET", # early returns
"RUF", # various ruff-specific rules
"TID", # various imports suggestions
"TRY", # various exception handling rules
"UP", # detect deprecated python stdlib stuff
"FA", # suggest using from __future__ import annotations
"PTH", # pathlib migration
"ARG", # unused argument checks
"A", # builtin shadowing
# "EM", # TODO hmm could be helpful to prevent duplicate err msg in traceback.. but kinda annoying

# "ALL", # uncomment this to check for new rules!
]

lint.ignore = [
"D", # annoying nags about docstrings
"N", # pep naming
"TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING
"S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks
"DTZ", # datetimes checks -- complaining about missing tz and mostly false positives
"FIX", # complains about fixmes/todos -- annoying
"TD", # complains about todo formatting -- too annoying
"ANN", # missing type annotations? seems way to strict though

### too opinionated style checks
"E501", # too long lines
"E702", # Multiple statements on one line (semicolon)
Expand All @@ -17,9 +63,81 @@ lint.ignore = [
"E402", # Module level import not at top of file

### maybe consider these soon
# sometimes it's useful to give a variable a name even if we don't use it as a documentation
# on the other hand, often is a sign of error
# sometimes it's useful to give a variable a name even if we don't use it as a documentation
# on the other hand, often is a sign of error
"F841", # Local variable `count` is assigned to but never used
"F401", # imported but unused
###

"RUF100", # unused noqa -- handle later
"RUF012", # mutable class attrs should be annotated with ClassVar... ugh pretty annoying for user configs

### these are just nitpicky, we usually know better
"PLR0911", # too many return statements
"PLR0912", # too many branches
"PLR0913", # too many function arguments
"PLR0915", # too many statements
"PLR1714", # consider merging multiple comparisons
"PLR2044", # line with empty comment
"PLR5501", # use elif instead of else if
"PLR2004", # magic value in comparison -- super annoying in tests
###
"PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check

"B009", # calling gettattr with constant attribute -- this is useful to convince mypy
"B010", # same as above, but setattr
"B011", # complains about assert False
"B017", # pytest.raises(Exception)
"B023", # seems to result in false positives?
"B028", # suggest using explicit stacklevel? TODO double check later, but not sure it's useful

# complains about useless pass, but has sort of a false positive if the function has a docstring?
# this is common for click entrypoints (e.g. in __main__), so disable
"PIE790",

# a bit too annoying, offers to convert for loops to list comprehension
# , which may heart readability
"PERF401",

# suggests no using exception in for loops
# we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost"
"PERF203",

"RET504", # unnecessary assignment before returning -- that can be useful for readability
"RET505", # unnecessary else after return -- can hurt readability

"PLW0603", # global variable update.. we usually know why we are doing this
"PLW2901", # for loop variable overwritten, usually this is intentional

"PT004", # deprecated rule, will be removed later
"PT011", # pytest raises should is too broad
"PT012", # pytest raises should contain a single statement

"COM812", # trailing comma missing -- mostly just being annoying with long multiline strings

"PD901", # generic variable name df

"TRY003", # suggests defining exception messages in exception class -- kinda annoying
"TRY004", # prefer TypeError -- don't see the point
"TRY201", # raise without specifying exception name -- sometimes hurts readability
"TRY400", # TODO double check this, might be useful
"TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging

"PGH", # TODO force error code in mypy instead

"TID252", # Prefer absolute imports over relative imports from parent modules

"UP038", # suggests using | (union) in isisntance checks.. but it results in slower code

## too annoying
"T20", # just complains about prints and pprints
"Q", # flake quotes, too annoying
"C90", # some complexity checking
"G004", # logging statement uses f string
"ERA001", # commented out code
"SLF001", # private member accessed
"BLE001", # do not catch 'blind' Exception
"INP001", # complains about implicit namespace packages
"SIM", # some if statements crap
"RSE102", # complains about missing parens in exceptions
##
]
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def main() -> None:

install_requires=[
'more-itertools',
'typing-extensions',
'click' , # nicer cli
'plumbum' , # nicer command composition/piping
'python-magic' , # better mimetype detection (could be optional?)
Expand Down
5 changes: 4 additions & 1 deletion src/bleanser/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from .common import logger
from .main import main

__all__ = [
'logger',
]
13 changes: 7 additions & 6 deletions src/bleanser/core/common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import NamedTuple, Sequence, Union, List
from typing import TYPE_CHECKING, Sequence, Union

from .utils import assert_never
from .ext.logging import LazyLogger


logger = LazyLogger(__name__, level='debug')


Expand Down Expand Up @@ -61,11 +61,12 @@ class Keep(Instruction):

### helper to define paramertized tests in function's body
from .utils import under_pytest
if under_pytest:

if TYPE_CHECKING or under_pytest:
import pytest
parametrize = pytest.mark.parametrize
else:
parametrize = lambda *args,**kwargs: (lambda f: f) # type: ignore
parametrize = lambda *_args, **_kwargs: (lambda f: f)
###


Expand Down Expand Up @@ -100,7 +101,7 @@ def divide_by_size(*, buckets: int, paths: Sequence[Path]) -> Sequence[Sequence[
with_size = [(p, p.stat().st_size) for p in paths]
bucket_size = sum(sz for _, sz in with_size) / buckets

group: List[Path] = []
group: list[Path] = []
group_size = 0

def dump() -> None:
Expand Down
6 changes: 6 additions & 0 deletions src/bleanser/core/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys

if sys.version_info[:2] >= (3, 11):
from typing import Never, assert_never, assert_type, Self # noqa: F401
else:
from typing_extensions import Never, assert_never, assert_type, Self # noqa: F401
8 changes: 5 additions & 3 deletions src/bleanser/core/ext/dummy_executor.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

# https://stackoverflow.com/a/10436851/706389
from typing import Any, Optional
from typing import Any
from concurrent.futures import Future, Executor
class DummyExecutor(Executor):
def __init__(self, max_workers: Optional[int]=1) -> None:
def __init__(self, max_workers: int | None=1) -> None:
self._shutdown = False
self._max_workers = max_workers

Expand All @@ -22,5 +24,5 @@ def submit(self, fn, *args, **kwargs): # type: ignore[override,unused-ignore]

return f

def shutdown(self, wait: bool=True, **kwargs) -> None:
def shutdown(self, wait: bool = True, **kwargs) -> None: # noqa: FBT001,FBT002,ARG002
self._shutdown = True
1 change: 0 additions & 1 deletion src/bleanser/core/ext/logging.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3
'''
Default logger is a bit meh, see 'test'/run this file for a demo
TODO name 'klogging' to avoid possible conflict with default 'logging' module
Expand Down
17 changes: 9 additions & 8 deletions src/bleanser/core/ext/sqlite_dumben.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@
# - constraints
# this is useful if you want to mess/cleanup the database, but don't want to trip over constraints/triggers
# NOTE: handling everything as bytes since not sure I wanna mess with encoding here (esp. row data encoding)
from __future__ import annotations

import hashlib
import os
from pathlib import Path
from tempfile import TemporaryDirectory, TemporaryFile
import re
import shutil
import sqlite3
import subprocess
from subprocess import check_output, check_call, DEVNULL
import sys
from typing import List, Dict, Optional
from pathlib import Path
from subprocess import DEVNULL, check_call, check_output
from tempfile import TemporaryDirectory
from typing import Dict


Tables = Dict[str, Dict[str, str]]
Expand All @@ -35,7 +36,7 @@ def _get_tables(db: Path) -> Tables:
tables.append(table)

for table in tables:
schema: Dict[str, str] = {}
schema: dict[str, str] = {}
for row in conn.execute(f'PRAGMA table_info({table})'):
col = row[1]
type_ = row[2]
Expand All @@ -45,7 +46,7 @@ def _get_tables(db: Path) -> Tables:


def _sqlite(*cmd):
return ['sqlite3', '-bail'] + list(cmd)
return ['sqlite3', '-bail', *cmd]


def _dumben_db(output_db: Path) -> None:
Expand Down Expand Up @@ -108,7 +109,7 @@ def _dumben_db(output_db: Path) -> None:
subprocess.check_call(_sqlite(output_db, 'PRAGMA integrity_check;'), stdout=DEVNULL)


def run(*, db: Path, output: Optional[Path], output_as_db: bool) -> None:
def run(*, db: Path, output: Path | None, output_as_db: bool) -> None:
if output is not None:
assert not output.exists(), output

Expand All @@ -118,7 +119,7 @@ def run(*, db: Path, output: Optional[Path], output_as_db: bool) -> None:
if output_as_db:
assert output is not None

dumben_cache: Optional[Path] = None
dumben_cache: Path | None = None
_DUMBEN_CACHE_BASE = os.environ.get('SQLITE_DUMBEN_USE_CACHE')
if _DUMBEN_CACHE_BASE is not None:
DUMBEN_CACHE_BASE = Path(_DUMBEN_CACHE_BASE)
Expand Down
38 changes: 23 additions & 15 deletions src/bleanser/core/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# not to confuse with __main__.py... meh
from __future__ import annotations

import os
from glob import glob as do_glob
from pathlib import Path
import os
from typing import Optional, List, Type, cast

from .common import logger, Dry, Move, Remove, Mode
from .processor import BaseNormaliser, compute_instructions, apply_instructions, bleanser_tmp_directory
from typing import cast

import click

from .common import Dry, Mode, Move, Remove, logger
from .processor import (
BaseNormaliser,
apply_instructions,
bleanser_tmp_directory,
compute_instructions,
)


# TODO use context and default_map
# https://click.palletsprojects.com/en/7.x/commands/#overriding-defaults
def main(*, Normaliser: Type[BaseNormaliser]) -> None:
def main(*, Normaliser: type[BaseNormaliser]) -> None:
# meh.. by default the width is stupid, like 80 chars
context_settings = {
'max_content_width': 120,
Expand All @@ -34,7 +41,7 @@ def call_main() -> None:
@click.option ('--difftool' , type=str , help='Custom difftool to use')
@click.option ('--from', 'from_', type=int , default=None)
@click.option ('--to' , type=int , default=None , help='non-inclusive, i.e. [from, to)')
def diff(path1: str, path2: Path, *, glob: bool, from_: Optional[int], to: Optional[int], vim: bool, difftool: str) -> None:
def diff(path1: str, path2: Path, *, glob: bool, from_: int | None, to: int | None, vim: bool, difftool: str) -> None:
path1_: Path
if glob:
assert path2 is cast(Path, _DEFAULT), path2
Expand All @@ -44,7 +51,8 @@ def diff(path1: str, path2: Path, *, glob: bool, from_: Optional[int], to: Optio
paths = _get_paths(path=path1, from_=from_, to=to, glob=glob)
else:
assert cast(str, path2) is not _DEFAULT
assert from_ is None and to is None # just for sanity
assert from_ is None
assert to is None
path1_ = Path(path1)
path2 = Path(path2)
paths = [path1_, path2]
Expand All @@ -63,7 +71,7 @@ def diff(path1: str, path2: Path, *, glob: bool, from_: Optional[int], to: Optio
@call_main.command(name='normalised', short_help='normalise file and dump to stdout')
@click.argument('path', type=Path)
@click.option('--stdout', is_flag=True, help='print normalised files to stdout instead of printing the path to it')
def normalised(path: Path, stdout: bool) -> None:
def normalised(*, path: Path, stdout: bool) -> None:
with bleanser_tmp_directory() as base_tmp_dir:
n = Normaliser(original=path, base_tmp_dir=base_tmp_dir)
with n.do_normalise() as cleaned:
Expand Down Expand Up @@ -94,8 +102,8 @@ def normalised(path: Path, stdout: bool) -> None:
##
@click.option ('--multiway' , is_flag=True, default=None , help='force "multiway" cleanup')
@click.option ('--prune-dominated', is_flag=True, default=None)
def prune(path: str, sort_by: str, glob: bool, dry: bool, move: Optional[Path], remove: bool, threads: Optional[int], from_: Optional[int], to: Optional[int], multiway: Optional[bool], prune_dominated: Optional[bool], yes: bool) -> None:
modes: List[Mode] = []
def prune(*, path: str, sort_by: str, glob: bool, dry: bool, move: Path | None, remove: bool, threads: int | None, from_: int | None, to: int | None, multiway: bool | None, prune_dominated: bool | None, yes: bool) -> None:
modes: list[Mode] = []
if dry is True:
modes.append(Dry())
if move is not None:
Expand Down Expand Up @@ -128,18 +136,18 @@ def prune(path: str, sort_by: str, glob: bool, dry: bool, move: Optional[Path],
call_main()


def _get_paths(*, path: str, from_: Optional[int], to: Optional[int], sort_by: str = "name", glob: bool=False) -> List[Path]:
def _get_paths(*, path: str, from_: int | None, to: int | None, sort_by: str = "name", glob: bool=False) -> list[Path]:
if not glob:
pp = Path(path)
assert pp.is_dir(), pp
path = str(pp) + os.sep + '**'
paths = [Path(p) for p in do_glob(path, recursive=True)]
paths = [Path(p) for p in do_glob(path, recursive=True)] # noqa: PTH207
paths = [p for p in paths if p.is_file()]
if sort_by == "name":
# assumes sort order is same as date order? guess it's reasonable
paths = list(sorted(paths))
paths = sorted(paths)
else:
paths = list(sorted(paths, key=lambda s: s.stat().st_size))
paths = sorted(paths, key=lambda s: s.stat().st_size)

if from_ is None:
from_ = 0
Expand Down
12 changes: 8 additions & 4 deletions src/bleanser/core/modules/extract.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from contextlib import contextmanager
from pathlib import Path
from typing import Iterator, Any


from bleanser.core.processor import BaseNormaliser, unique_file_in_tempdir, sort_file, Normalised
from typing import Any, Iterator

from bleanser.core.processor import (
BaseNormaliser,
Normalised,
sort_file,
unique_file_in_tempdir,
)


class ExtractObjectsNormaliser(BaseNormaliser):
Expand Down
Loading

0 comments on commit 9bcea3f

Please sign in to comment.