Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Duration/Memory Convenience Click Types #448

Merged
merged 7 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 193 additions & 5 deletions flepimop/gempyor_pkg/src/gempyor/shared_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@
supported options for config file overrides, and custom click decorators.
"""

__all__ = []


from datetime import timedelta
from math import ceil
import multiprocessing
import pathlib
import warnings
from typing import Callable, Any
import re
from typing import Any, Callable, Literal
import warnings

import click
import confuse

from .utils import config, as_list

__all__ = []
from .utils import as_list, config


@click.group()
Expand Down Expand Up @@ -124,6 +127,191 @@ def cli(ctx: click.Context) -> None:
}


class DurationParamType(click.ParamType):
"""
A custom Click parameter type for parsing duration strings into `timedelta` objects.

Attributes:
name: The name of the parameter type.

Examples:
>>> from gempyor.shared_cli import DurationParamType
>>> duration_param_type = DurationParamType(False)
>>> duration_param_type.convert("23min", None, None)
datetime.timedelta(seconds=1380)
>>> duration_param_type.convert("2.5hr", None, None)
datetime.timedelta(seconds=9000)
>>> duration_param_type.convert("-2", None, None)
datetime.timedelta(days=-1, seconds=86280)
"""

name = "duration"
_abbreviations = {
"s": "seconds",
"sec": "seconds",
"secs": "seconds",
"second": "seconds",
"seconds": "seconds",
"m": "minutes",
"min": "minutes",
"mins": "minutes",
"minute": "minutes",
"minutes": "minutes",
"h": "hours",
"hr": "hours",
"hrs": "hours",
"hour": "hours",
"hours": "hours",
"d": "days",
"day": "days",
"days": "days",
"w": "weeks",
"week": "weeks",
"weeks": "weeks",
}

def __init__(
self,
nonnegative: bool,
default_unit: Literal["seconds", "minutes", "hours", "days", "weeks"] = "minutes",
) -> None:
"""
Initialize the instance based on parameter settings.

Args:
nonnegative: If `True` negative durations are not allowed.
default_unit: The default unit to use if no unit is specified in the input
string.
"""
super().__init__()
self._nonnegative = nonnegative
self._duration_regex = re.compile(
rf"^((-)?([0-9]+)?(\.[0-9]+)?)({'|'.join(self._abbreviations.keys())})?$",
flags=re.IGNORECASE,
)
self._default_unit = default_unit

def convert(
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
) -> timedelta:
"""
Converts a string representation of a duration into a `timedelta` object.

Args:
value: The value to convert, expected to be a string like representation of
a duration.
param: The Click parameter object for context in errors.
ctx: The Click context object for context in errors.

Returns:
The converted duration as a `timedelta` object.

Raises:
click.BadParameter: If the value is not a valid duration based on the
format.
click.BadParameter: If the duration is negative and the class was
initialized with `nonnegative` set to `True`.
"""
value = str(value).strip()
if (m := self._duration_regex.match(value)) is None:
self.fail(f"{value!r} is not a valid duration", param, ctx)
number, posneg, _, _, unit = m.groups()
if self._nonnegative and posneg == "-":
self.fail(f"{value!r} is a negative duration", param, ctx)
kwargs = {}
kwargs[self._abbreviations.get(unit, self._default_unit)] = float(number)
return timedelta(**kwargs)


class MemoryParamType(click.ParamType):
"""
A custom Click parameter type for parsing duration strings into `timedelta` objects.

Attributes:
name: The name of the parameter type.

Examples:
>>> from gempyor.shared_cli import DurationParamType
>>> duration_param_type = DurationParamType(False)
>>> duration_param_type.convert("23min", None, None)
datetime.timedelta(seconds=1380)
>>> duration_param_type.convert("2.5hr", None, None)
datetime.timedelta(seconds=9000)
>>> duration_param_type.convert("-2", None, None)
datetime.timedelta(days=-1, seconds=86280)
"""
TimothyWillard marked this conversation as resolved.
Show resolved Hide resolved

name = "memory"
_units = {
"kb": 1024.0**1.0,
"k": 1024.0**1.0,
"mb": 1024.0**2.0,
"m": 1024.0**2.0,
"gb": 1024.0**3.0,
"g": 1024.0**3.0,
"t": 1024.0**4.0,
"tb": 1024.0**4.0,
}

def __init__(self, unit: str, as_int: bool = False) -> None:
"""
Initialize the instance based on parameter settings.

Args:
unit: The output unit to use in the `convert` method.
as_int: if `True` the `convert` method returns an integer instead of a
float.

Raises:
ValueError: If `unit` is not a valid memory unit size.
"""
super().__init__()
if (unit := unit.lower()) not in self._units.keys():
raise ValueError(
f"The `unit` given is not valid, given '{unit}' and "
f"must be one of: {', '.join(self._units.keys())}."
)
self._unit = unit
self._regex = re.compile(
rf"^(([0-9]+)?(\.[0-9]+)?)({'|'.join(self._units.keys())})?$",
flags=re.IGNORECASE,
)
self._as_int = as_int

def convert(
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
) -> float | int:
"""
Converts a string representation of a memory size into a numeric.

Args:
value: The value to convert, expected to be a string like representation of
memory size.
param: The Click parameter object for context in errors.
ctx: The Click context object for context in errors.

Returns:
The converted memory size as a numeric. Specifically an integer if the
`as_int` attribute is `True` and float otherwise.

Raises:
click.BadParameter: If the value is not a valid memory size based on the
format.
"""
value = str(value).strip()
if (m := self._regex.match(value)) is None:
self.fail(f"{value!r} is not a valid memory size.", param, ctx)
number, _, _, unit = m.groups()
unit = self._unit if unit is None else unit.lower()
if unit == self._unit:
result = float(number)
else:
result = (self._units.get(unit, self._unit) * float(number)) / (
self._units.get(self._unit)
)
return ceil(result) if self._as_int else result


def click_helpstring(
params: click.Parameter | list[click.Parameter],
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from datetime import timedelta
from typing import Any

from click.exceptions import BadParameter
import pytest

from gempyor.shared_cli import DurationParamType


@pytest.mark.parametrize("nonnegative", (True, False))
@pytest.mark.parametrize("value", ("abc", "$12.34", "12..3", "12years", "12.a2"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is forbidding "years" about that being conceptually wrong or simply a duration we don't want to contemplate? seems as a generic thing, its appropriate for the tool to potentially support years, so a bit weird to explicitly test that it doesn't.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two issues with years: 1) years do not have a constant length (2024 was 366 days but most years are 365 days), and 2) python's timedelta class which doesn't have a years argument: https://docs.python.org/3/library/datetime.html#datetime.timedelta.

I could however see an argument for adding micro/milliseconds on conceptual ground if you would like? I just excluded those since it was too short for most practical use cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is a duration we explicitly don't want to contemplate (primarily because it's an ambiguous unit for our case) - that's fine! It does feel worth noting somewhere that we don't allow this unit of time - like maybe in the method documentation? But I don't think we need, say, a special error message for years.

re ms - I think it's fine to not support it at this time. I think you're saying this would be one that we'd be willing to contemplate (i.e. we aren't testing to ensure its exclusion), but that we haven't bothered to yet support (because we don't think its practically useful).

def test_invalid_duration_bad_parameter(nonnegative: bool, value: Any) -> None:
duration = DurationParamType(nonnegative=nonnegative)
with pytest.raises(BadParameter, match="^'.*' is not a valid duration$"):
duration.convert(value, None, None)


@pytest.mark.parametrize("value", ("-1", "-123", "-99.45", "-.9"))
def test_negative_duration_bad_parameter(value: Any) -> None:
duration = DurationParamType(nonnegative=True)
with pytest.raises(BadParameter, match="^'.*' is a negative duration$"):
duration.convert(value, None, None)


@pytest.mark.parametrize(
("value", "expected"),
(
("1", timedelta(minutes=1)),
("2s", timedelta(seconds=2)),
("3hrs", timedelta(hours=3)),
("-4min", timedelta(minutes=-4)),
("-5d", timedelta(days=-5)),
("12.3", timedelta(minutes=12.3)),
("-45.6h", timedelta(hours=-45.6)),
("-.1w", timedelta(weeks=-0.1)),
),
)
def test_exact_results_for_select_inputs(value: Any, expected: timedelta) -> None:
duration = DurationParamType(nonnegative=False)
assert duration.convert(value, None, None) == expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import random
from typing import Any

from click.exceptions import BadParameter
import pytest

from gempyor.shared_cli import MemoryParamType


@pytest.mark.parametrize("unit", ("Nope", "NO CHANCE", "wrong", "bb"))
def test_invalid_unit_value_error(unit: str) -> None:
with pytest.raises(
ValueError,
match=(
"^The `unit` given is not valid, given "
f"'{unit.lower()}' and must be one of:.*.$"
),
):
MemoryParamType(unit)


@pytest.mark.parametrize("value", ("1..2MB", "3.4cb", "56.abc", "-1GB"))
def test_invalid_value_bad_parameter(value: Any) -> None:
memory = MemoryParamType("mb")
with pytest.raises(BadParameter, match="^.* is not a valid memory size.$"):
memory.convert(value, None, None)


@pytest.mark.parametrize("unit", MemoryParamType._units.keys())
@pytest.mark.parametrize("as_int", (True, False))
@pytest.mark.parametrize(
"number",
[random.randint(1, 1000) for _ in range(3)] # int
+ [random.random() for _ in range(3)] # float without numbers left of decimal
+ [
random.randint(1, 25) + random.random() for _ in range(3)
], # float with numbers left of the decimal
)
def test_convert_acts_as_identity(unit: str, as_int: bool, number: int | float) -> None:
memory = MemoryParamType(unit, as_int=as_int)
for u in (unit, unit.upper()):
result = memory.convert(f"{number}{u}".lstrip("0"), None, None)
assert isinstance(result, int if as_int else float)
assert abs(result - number) <= 1 if as_int else result == number


@pytest.mark.parametrize(
("unit", "as_int", "value", "expected"),
(
("gb", False, "1.2gb", 1.2),
("gb", True, "1.2gb", 2),
("kb", False, "1mb", 1024.0),
("kb", True, "1mb", 1024),
("gb", False, "30mb", 30.0 / 1024.0),
("gb", True, "30mb", 1),
("kb", False, "2tb", 2.0 * (1024.0**3.0)),
("kb", True, "2tb", 2147483648),
("mb", False, "0.1gb", 0.1 * 1024.0),
("mb", True, "0.1gb", 103),
("gb", False, "4", 4.0),
("gb", True, "4", 4),
("mb", False, "1234.56", 1234.56),
("mb", True, "1234.56", 1235),
),
)
def test_exact_results_for_select_inputs(
unit: str, as_int: bool, value: Any, expected: float | int
) -> None:
memory = MemoryParamType(unit, as_int=as_int)
assert memory.convert(value, None, None) == expected
Loading