diff --git a/.github/workflows/test-pytest.yml b/.github/workflows/test-pytest.yml index 2993a2bd..59dc0cd1 100644 --- a/.github/workflows/test-pytest.yml +++ b/.github/workflows/test-pytest.yml @@ -26,4 +26,5 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" - run: pip install ".[test]" - - run: pytest --doctest-modules --import-mode importlib + - run: pytest --hypothesis-profile ci + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8133575d..6e1424b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: rev: "v1.5.1" hooks: - id: mypy - additional_dependencies: [types-requests,scipy,pytest,types-PyYAML] + additional_dependencies: [types-requests,scipy,pytest,hypothesis,types-pyyaml] language_version: python3.8 args: - "--namespace-packages" diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..b02df288 --- /dev/null +++ b/conftest.py @@ -0,0 +1,4 @@ +from hypothesis import Verbosity, settings + +settings.register_profile("ci", max_examples=1000) +settings.register_profile("debug", max_examples=10, verbosity=Verbosity.verbose) diff --git a/pyproject.toml b/pyproject.toml index 3e33acec..36046cd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,8 +47,8 @@ dev = [ ] test = [ "pytest", - "hypothesis", - "autora-core[serializers]", + "hypothesis[pandas]", + "autora-core[serializers]" ] build = [ "build", @@ -102,3 +102,7 @@ requires = ["setuptools", "setuptools-scm"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "--doctest-modules --import-mode importlib" diff --git a/src/autora/serializer/yaml_.py b/src/autora/serializer/yaml_.py new file mode 100644 index 00000000..fc7c24a8 --- /dev/null +++ b/src/autora/serializer/yaml_.py @@ -0,0 +1,21 @@ +import yaml + + +def dump(data, file): + yaml.dump(data, file, Dumper=yaml.Dumper) + return + + +def load(file): + data = yaml.load(file, Loader=yaml.Loader) + return data + + +def dumps(data): + string = yaml.dump(data, Dumper=yaml.Dumper) + return string + + +def loads(string): + data = yaml.load(string, Loader=yaml.Loader) + return data diff --git a/src/autora/variable/__init__.py b/src/autora/variable/__init__.py index 4cdd8ff9..f42337ac 100644 --- a/src/autora/variable/__init__.py +++ b/src/autora/variable/__init__.py @@ -6,6 +6,8 @@ class ValueType(str, Enum): """Specifies supported value types supported by Variables.""" + BOOLEAN = "boolean" + INTEGER = "integer" REAL = "real" SIGMOID = "sigmoid" PROBABILITY = "probability" # single probability diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_serializer.py b/tests/test_serializer.py index 5ae34935..77f39920 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -1,24 +1,86 @@ +import importlib +import logging import pathlib import tempfile import uuid +from collections import namedtuple -from hypothesis import Verbosity, given, settings -from hypothesis import strategies as st +import hypothesis.strategies as st -from autora.serializer import SerializersSupported, dump_state, load_state -from autora.state import StandardState +logger = logging.getLogger(__name__) +_SUPPORTED_SERIALIZERS = [ + ("pickle", "b"), + ("dill", "b"), + ("autora.serializer.yaml_", ""), +] +_SERIALIZER_DEF = namedtuple("_SERIALIZER_DEF", ["name", "module", "file_type"]) +_AVAILABLE_SERIALIZERS = [] -@given( - st.builds(StandardState, st.text(), st.text(), st.text(), st.lists(st.integers())), - st.sampled_from(SerializersSupported), -) -@settings(verbosity=Verbosity.verbose) -def test_load_inverts_dump(s, serializer): - """Test that each serializer can be used to serialize and deserialize a state object.""" - with tempfile.TemporaryDirectory() as dir: - path = pathlib.Path(dir, f"{str(uuid.uuid4())}") - print(path, s) +for module_name, file_type in _SUPPORTED_SERIALIZERS: + try: + module = importlib.import_module(module_name) + except ImportError: + logger.info(f"serializer {module} not available") + continue + _AVAILABLE_SERIALIZERS.append(_SERIALIZER_DEF(module_name, module, file_type)) - dump_state(s, path, dumper=serializer) - assert load_state(path, loader=serializer) == s +AVAILABLE_SERIALIZERS = st.sampled_from(_AVAILABLE_SERIALIZERS) + + +@st.composite +def serializer_loads_dumps_strategy(draw): + serializer = draw(AVAILABLE_SERIALIZERS) + loads, dumps = serializer.module.loads, serializer.module.dumps + return loads, dumps + + +@st.composite +def serializer_dump_load_string_strategy(draw): + """Strategy returns a function which dumps an object and reloads it via a bytestream.""" + serializer = draw(AVAILABLE_SERIALIZERS) + loads, dumps = serializer.module.loads, serializer.module.dumps + + def _load_dump_via_string(o): + logger.info(f"load dump via string using {serializer.module=}") + return loads(dumps(o)) + + return _load_dump_via_string + + +@st.composite +def serializer_dump_load_binary_file_strategy(draw): + """Strategy returns a function which dumps an object reloads it via a temporary binary file.""" + serializer = draw(AVAILABLE_SERIALIZERS) + load, dump = serializer.module.load, serializer.module.dump + + def _load_dump_via_disk(o): + logger.info(f"load dump via disk using {serializer.module=}") + with tempfile.TemporaryDirectory() as tempdir: + filename = str(uuid.uuid1()) + with open(pathlib.Path(tempdir, filename), f"w{serializer.file_type}") as f: + dump(o, f) + with open(pathlib.Path(tempdir, filename), f"r{serializer.file_type}") as f: + o_loaded = load(f) + return o_loaded + + return _load_dump_via_disk + + +@st.composite +def serializer_dump_load_strategy(draw): + """Strategy returns a function which dumps an object and reloads it via a supported method.""" + _dump_load = draw( + st.one_of( + serializer_dump_load_string_strategy(), + serializer_dump_load_binary_file_strategy(), + ) + ) + return _dump_load + + +if __name__ == "__main__": + o = list("abcde") + loader_dumper_disk = serializer_dump_load_strategy().example() + o_loaded = loader_dumper_disk(o) + print(o, o_loaded) diff --git a/tests/test_state.py b/tests/test_state.py new file mode 100644 index 00000000..7fd665c3 --- /dev/null +++ b/tests/test_state.py @@ -0,0 +1,20 @@ +import logging + +import pandas as pd +from hypothesis import HealthCheck, given, settings + +from autora.state import StandardStateDataClass + +from .test_serializer import serializer_dump_load_strategy +from .test_strategies import standard_state_dataclass_strategy + +logger = logging.getLogger(__name__) + + +@given(standard_state_dataclass_strategy(), serializer_dump_load_strategy()) +@settings(suppress_health_check={HealthCheck.too_slow}, deadline=500) +def test_state_serialize_deserialize(o: StandardStateDataClass, dump_load): + o_loaded = dump_load(o) + assert o.variables == o_loaded.variables + assert pd.DataFrame.equals(o.conditions, o_loaded.conditions) + assert pd.DataFrame.equals(o.experiment_data, o_loaded.experiment_data) diff --git a/tests/test_strategies.py b/tests/test_strategies.py new file mode 100644 index 00000000..62e95771 --- /dev/null +++ b/tests/test_strategies.py @@ -0,0 +1,471 @@ +import logging +from typing import Optional, Sequence + +import numpy as np +import pandas as pd +import sklearn.dummy +import sklearn.linear_model +from hypothesis import HealthCheck, given, settings +from hypothesis import strategies as st +from hypothesis.extra import numpy as st_np +from hypothesis.extra import pandas as st_pd + +from autora.state import StandardStateDataClass +from autora.variable import ValueType, Variable, VariableCollection + +VALUE_TYPE_DTYPE_MAPPING = { + ValueType.BOOLEAN: bool, + ValueType.INTEGER: int, + ValueType.REAL: float, + ValueType.SIGMOID: float, + ValueType.PROBABILITY: float, + ValueType.PROBABILITY_SAMPLE: float, + ValueType.PROBABILITY_DISTRIBUTION: float, + ValueType.CLASS: str, +} + +logger = logging.getLogger(__name__) + +AVAILABLE_SKLEARN_MODELS_STRATEGY = st.sampled_from( + [ + sklearn.dummy.DummyRegressor, + sklearn.linear_model.LinearRegression, + sklearn.linear_model.Ridge, + sklearn.linear_model.BayesianRidge, + ] +) + + +@st.composite +def _name_label_units_strategy(draw, name=None, label=None, units=None, covariate=None): + if name is None: + name = draw(st.text(min_size=1)) + if label is None: + label = draw(st.text(min_size=0)) + if units is None: + units = draw(st.text(min_size=0)) + if covariate is None: + covariate = draw(st.booleans()) + return name, label, units, covariate + + +@st.composite +def variable_boolean_strategy(draw, name=None, label=None, units=None, covariate=None): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.BOOLEAN + allowed_values = [True, False] + value_range = None + rescale = 1 + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_integer_strategy(draw, name=None, label=None, units=None, covariate=None): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.INTEGER + + value_range = draw( + st.one_of( + st.none(), + st.tuples(st.integers(), st.integers()) + .filter(lambda x: x[0] != x[1]) + .map(sorted), + ) + ) + if value_range is None: + allowed_values = draw(st.one_of(st.none(), st.sets(st.integers(), min_size=1))) + else: + allowed_values = None + + rescale = draw( + st.one_of( + st.just(1), + st.integers(), + st.floats(allow_infinity=False, allow_subnormal=False, allow_nan=False), + ) + ) + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_real_strategy(draw, name=None, label=None, units=None, covariate=None): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.REAL + range_strategy = st.floats(allow_nan=False, allow_subnormal=False) + value_range = draw( + st.one_of( + st.none(), + st.tuples(range_strategy, range_strategy) + .filter(lambda x: x[0] != x[1]) + .map(sorted), + ) + ) + + if value_range is None: + allowed_values = draw(st.one_of(st.none(), st.sets(range_strategy, min_size=1))) + else: + allowed_values = None + rescale = draw(st.one_of(st.just(1), range_strategy)) + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_probability_strategy( + draw, name=None, label=None, units=None, covariate=None +): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.PROBABILITY + value_range = (0, 1) + allowed_values = None + rescale = 1 + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_probability_sample_strategy( + draw, name=None, label=None, units=None, covariate=None +): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.PROBABILITY_SAMPLE + value_range = (0, 1) + allowed_values = None + rescale = 1 + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_probability_distribution_strategy( + draw, name=None, label=None, units=None, covariate=None +): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.PROBABILITY_DISTRIBUTION + value_range = (0, 1) + allowed_values = None + rescale = 1 + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_sigmoid_strategy(draw, name=None, label=None, units=None, covariate=None): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.SIGMOID + value_range = (-np.inf, +np.inf) + allowed_values = None + rescale = 1 + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +@st.composite +def variable_class_strategy(draw, name=None, label=None, units=None, covariate=None): + name, label, units, covariate = draw( + _name_label_units_strategy( + name=name, label=label, units=units, covariate=covariate + ) + ) + value_type = ValueType.CLASS + value_range = None + rescale = 1 + allowed_values = draw(st.lists(st.text(min_size=1, max_size=16), unique=True)) + return Variable( + name=name, + variable_label=label, + units=units, + type=value_type, + is_covariate=covariate, + value_range=value_range, + allowed_values=allowed_values, + rescale=rescale, + ) + + +VARIABLE_STRATEGIES = ( + variable_boolean_strategy, + variable_integer_strategy, + variable_probability_strategy, + variable_probability_sample_strategy, + variable_probability_distribution_strategy, + variable_sigmoid_strategy, + variable_real_strategy, + variable_class_strategy, +) + + +@st.composite +def variable_strategy( + draw, elements=VARIABLE_STRATEGIES, value_type: Optional[ValueType] = None, **kwargs +): + if value_type is None: + strategy = draw(st.sampled_from(elements)) + + else: + strategy = { + ValueType.BOOLEAN: variable_boolean_strategy, + ValueType.INTEGER: variable_integer_strategy, + ValueType.REAL: variable_real_strategy, + ValueType.SIGMOID: variable_sigmoid_strategy, + ValueType.PROBABILITY: variable_probability_strategy, + ValueType.PROBABILITY_SAMPLE: variable_probability_sample_strategy, + ValueType.PROBABILITY_DISTRIBUTION: variable_probability_distribution_strategy, + ValueType.CLASS: variable_class_strategy, + }[value_type] + return draw(strategy(**kwargs)) + + +@given(variable_strategy()) +def test_variable_strategy_creation(o): + assert o + + +@st.composite +def variablecollection_strategy( + draw, + elements=VARIABLE_STRATEGIES, + value_type: Optional[ValueType] = None, + max_ivs=5, + max_dvs=1, + max_covariates=2, + name_max_length=32, + **kwargs, +): + n_ivs, n_dvs, n_covariates = draw( + st.tuples( + st.integers(min_value=1, max_value=max_ivs), + st.integers(min_value=1, max_value=max_dvs), + st.integers(min_value=0, max_value=max_covariates), + ) + ) + + n_variables = n_ivs + n_dvs + n_covariates + + names = draw( + st.lists( + st.text(min_size=1, max_size=name_max_length), + unique=True, + min_size=n_variables, + max_size=n_variables, + ) + ) + independent_variables = [ + draw( + variable_strategy( + name=names.pop(), value_type=value_type, elements=elements, **kwargs + ) + ) + for _ in range(n_ivs) + ] + dependent_variables = [ + draw( + variable_strategy( + name=names.pop(), value_type=value_type, elements=elements, **kwargs + ) + ) + for _ in range(n_dvs) + ] + covariates = [ + draw( + variable_strategy( + name=names.pop(), value_type=value_type, elements=elements, **kwargs + ) + ) + for _ in range(n_covariates) + ] + + vc = VariableCollection( + independent_variables=independent_variables, + dependent_variables=dependent_variables, + covariates=covariates, + ) + return vc + + +@given(variablecollection_strategy()) +def test_variablecollection_strategy_creation(o): + assert o + + +@st.composite +def dataframe_strategy( + draw, + variables: Optional[Sequence[Variable]] = None, + value_type: Optional[ValueType] = None, +): + if variables is None: + variable_collection = draw(variablecollection_strategy(value_type=value_type)) + variables = ( + variable_collection.independent_variables + + variable_collection.dependent_variables + + variable_collection.covariates + ) + + df: pd.DataFrame = draw( + st_pd.data_frames( + columns=[ + st_pd.column(name=v.name, dtype=VALUE_TYPE_DTYPE_MAPPING[v.type]) + for v in variables + ], + ) + ) + + return df + + +@given(dataframe_strategy()) +def test_dataframe_strategy_creation(o): + assert o is not None + + +@st.composite +def model_strategy(draw, models=AVAILABLE_SKLEARN_MODELS_STRATEGY): + model = draw(models) + + n_x = draw(st.integers(min_value=1, max_value=5)) + n_y = draw(st.integers(min_value=1, max_value=1)) + n_measurements = draw(st.integers(min_value=5, max_value=100)) + + elements = st_np.from_dtype( + np.dtype(float), + allow_infinity=False, + allow_subnormal=False, + allow_nan=False, + # Include some reasonable extreme values. Values near the upper limit of the float + # ~10**308, and very small values broke the fitting + min_value=-1e5, + max_value=1e5, + min_magnitude=1e-3, + ) + X = draw(st_np.arrays(float, shape=(n_measurements, n_x), elements=elements)) + y = draw(st_np.arrays(float, shape=(n_measurements, n_y), elements=elements)) + + result = model().fit(X, y.ravel()) + return result + + +@given(model_strategy()) +def test_model_strategy_creation(o): + assert o + + +@st.composite +def standard_state_dataclass_strategy(draw): + variable_collection: VariableCollection = draw(variablecollection_strategy()) + conditions = draw( + dataframe_strategy(variables=variable_collection.independent_variables) + ) + experiment_data = draw( + dataframe_strategy( + variables=( + variable_collection.independent_variables + + variable_collection.dependent_variables + + variable_collection.covariates + ) + ) + ) + models = draw(st.lists(model_strategy(), min_size=0, max_size=5)) + s = StandardStateDataClass( + variables=variable_collection, + conditions=conditions, + experiment_data=experiment_data, + models=models, + ) + return s + + +@settings(suppress_health_check={HealthCheck.too_slow}) +@given(standard_state_dataclass_strategy()) +def test_standard_state_dataclass_strategy_creation(o): + assert o + + +if __name__ == "__main__": + print(model_strategy().example()) diff --git a/tests/test_variable.py b/tests/test_variable.py new file mode 100644 index 00000000..0a9180a1 --- /dev/null +++ b/tests/test_variable.py @@ -0,0 +1,21 @@ +import logging + +from hypothesis import given +from hypothesis import strategies as st + +from .test_serializer import serializer_dump_load_strategy +from .test_strategies import variable_strategy, variablecollection_strategy + +logger = logging.getLogger(__name__) + + +@given( + st.one_of( + variable_strategy(), + variablecollection_strategy(), + ), + serializer_dump_load_strategy(), +) +def test_variable_serialize_deserialize(o, dump_load): + o_loaded = dump_load(o) + assert o_loaded == o