From 6954ae842a83440c7b6110c074bebb238a049cbd Mon Sep 17 00:00:00 2001 From: Dominik Jain Date: Fri, 4 Feb 2022 16:57:58 +0100 Subject: [PATCH 001/131] Sync dcs commit 6a9104f3e1a38ddf28d4f9346b24593288cc2edb Author: Dominik Jain Date: Fri Feb 4 16:27:09 2022 +0100 Allow to parametrise default TorchDataSetProviderFactories in order to enable dynamic tensorisation dcs/sensai/torch/torch_base.py dcs/sensai/torch/torch_data.py commit 9857b52fc6b479035547182defee82a209a63526 Author: Dominik Jain Date: Mon Jan 31 18:51:38 2022 +0100 EvaluationUtil.compareModelsCrossValidation: Fixed data frame string conversion (could contain ellipses) dcs/sensai/evaluation/eval_util.py commit e91067624eac7f736ae34613099d8c00b309ac02 Author: Dominik Jain Date: Sat Jan 29 21:03:42 2022 +0100 TorchDataSetFromTensors: Avoid last batch containing but one item dcs/sensai/torch/torch_data.py commit 5de914517185af04110ac44992c372bd5deea272 Author: Dominik Jain Date: Thu Jan 20 11:44:16 2022 +0100 TorchModel.apply: * Improved docstring * Report normalisation issue on a per input tensor basis dcs/sensai/torch/torch_base.py --- src/sensai/evaluation/eval_util.py | 3 ++- src/sensai/torch/torch_base.py | 29 +++++++++++++++++------------ src/sensai/torch/torch_data.py | 25 +++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/src/sensai/evaluation/eval_util.py b/src/sensai/evaluation/eval_util.py index 1c1ccb0a..5b29a77f 100644 --- a/src/sensai/evaluation/eval_util.py +++ b/src/sensai/evaluation/eval_util.py @@ -239,7 +239,8 @@ def performCrossValidation(self, model: TModel, showPlots=False, logResults=True crossValidationData = crossValidator.evalModel(model) aggStatsByVar = {varName: crossValidationData.getEvalStatsCollection(predictedVarName=varName).aggStats() for varName in crossValidationData.predictedVarNames} - strEvalResults = str(pd.DataFrame.from_dict(aggStatsByVar, orient="index")) + df = pd.DataFrame.from_dict(aggStatsByVar, orient="index") + strEvalResults = df.to_string() if logResults: log.info(f"Cross-validation results:\n{strEvalResults}") if resultWriter is not None: diff --git a/src/sensai/torch/torch_base.py b/src/sensai/torch/torch_base.py index 1dc18613..8d9bddc2 100644 --- a/src/sensai/torch/torch_base.py +++ b/src/sensai/torch/torch_base.py @@ -10,7 +10,8 @@ from torch.nn import functional as F from .torch_data import TensorScaler, VectorDataUtil, ClassificationVectorDataUtil, TorchDataSet, \ - TorchDataSetProviderFromDataUtil, TorchDataSetProvider, Tensoriser, TorchDataSetFromDataFrames, RuleBasedTensoriser + TorchDataSetProvider, Tensoriser, TorchDataSetFromDataFrames, RuleBasedTensoriser, \ + TorchDataSetProviderFromVectorDataUtil from .torch_enums import ClassificationOutputMode from .torch_opt import NNOptimiser, NNLossEvaluatorRegression, NNLossEvaluatorClassification, NNOptimiserParams, TrainingInfo from ..data import DataFrameSplitter @@ -185,7 +186,7 @@ def apply(self, X: Union[torch.Tensor, np.ndarray, TorchDataSet, Sequence[torch. mcDropoutSamples: Optional[int] = None, mcDropoutProbability: Optional[float] = None, scaleOutput: bool = False, scaleInput: bool = False) -> Union[torch.Tensor, np.ndarray, Tuple]: """ - Applies the model to the given input tensor and returns the result (normalized) + Applies the model to the given input tensor and returns the result :param X: the input tensor (either a batch or, if createBatch=True, a single data point), a data set or a tuple/list of tensors (if the model accepts more than one input). @@ -194,8 +195,8 @@ def apply(self, X: Union[torch.Tensor, np.ndarray, TorchDataSet, Sequence[torch. :param createBatch: whether to add an additional tensor dimension for a batch containing just one data point :param mcDropoutSamples: if not None, apply MC-Dropout-based inference with the respective number of samples; if None, apply regular inference :param mcDropoutProbability: the probability with which to apply dropouts in MC-Dropout-based inference; if None, use model's default - :param scaleOutput: whether to scale the output that is produced by the underlying model (using this instance's output scaler) - :param scaleInput: whether to scale the input (using this instance's input scaler) before applying the underlying model + :param scaleOutput: whether to scale the output that is produced by the underlying model (using this instance's output scaler, if any) + :param scaleInput: whether to scale the input (using this instance's input scaler, if any) before applying the underlying model :return: an output tensor or, if MC-Dropout is applied, a pair (y, sd) where y the mean output tensor and sd is a tensor of the same dimension containing standard deviations @@ -234,13 +235,11 @@ def extract(z): # check input normalisation if self.NORMALISATION_CHECK_THRESHOLD is not None: - maxValue = 0.0 - for t in inputs: + for i, t in enumerate(inputs): if t.is_floating_point() and t.numel() > 0: # skip any integer tensors (which typically contain lengths) and empty tensors - maxValue = max(t.abs().max().item(), maxValue) - if maxValue > self.NORMALISATION_CHECK_THRESHOLD: - log.warning("Received input which is likely to not be correctly normalised: maximum abs. value in input tensor is %f" % maxValue) - + maxValue = t.abs().max().item() + if maxValue > self.NORMALISATION_CHECK_THRESHOLD: + log.warning(f"Received value in input tensor {i} which is likely to not be correctly normalised: maximum abs. value in tensor is %f" % maxValue) if mcDropoutSamples is None: y = model(*inputs) return extract(y) @@ -621,22 +620,28 @@ def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, class TorchDataSetProviderFactoryClassificationDefault(TorchDataSetProviderFactory): + def __init__(self, tensoriseDynamically=False): + self.tensoriseDynamically = tensoriseDynamically + def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, model: TorchVectorClassificationModel, trainingContext: TrainingContext, inputTensoriser: Optional[Tensoriser], outputTensoriser: Optional[Tensoriser], dataFrameSplitter: Optional[DataFrameSplitter]) -> TorchDataSetProvider: dataUtil = ClassificationVectorDataUtil(inputs, outputs, model.model.cuda, len(model._labels), normalisationMode=model.normalisationMode, inputTensoriser=inputTensoriser, outputTensoriser=outputTensoriser, dataFrameSplitter=dataFrameSplitter) - return TorchDataSetProviderFromDataUtil(dataUtil, model.model.cuda) + return TorchDataSetProviderFromVectorDataUtil(dataUtil, model.model.cuda, tensoriseDynamically=self.tensoriseDynamically) class TorchDataSetProviderFactoryRegressionDefault(TorchDataSetProviderFactory): + def __init__(self, tensoriseDynamically=False): + self.tensoriseDynamically = tensoriseDynamically + def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, model: TorchVectorRegressionModel, trainingContext: TrainingContext, inputTensoriser: Optional[Tensoriser], outputTensoriser: Optional[Tensoriser], dataFrameSplitter: Optional[DataFrameSplitter]) -> TorchDataSetProvider: dataUtil = VectorDataUtil(inputs, outputs, model.model.cuda, normalisationMode=model.normalisationMode, inputTensoriser=inputTensoriser, outputTensoriser=outputTensoriser, dataFrameSplitter=dataFrameSplitter) - return TorchDataSetProviderFromDataUtil(dataUtil, model.model.cuda) + return TorchDataSetProviderFromVectorDataUtil(dataUtil, model.model.cuda, tensoriseDynamically=self.tensoriseDynamically) class OutputTensorToArrayConverter(ABC): diff --git a/src/sensai/torch/torch_data.py b/src/sensai/torch/torch_data.py index 51f58630..1a0dac6b 100644 --- a/src/sensai/torch/torch_data.py +++ b/src/sensai/torch/torch_data.py @@ -1,5 +1,6 @@ import logging from abc import ABC, abstractmethod +import math from typing import Tuple, Sequence, Optional, Union, List, Iterator import numpy as np @@ -485,7 +486,15 @@ def _get_batches(self, tensorTuples: Sequence[TensorTuple], batch_size, shuffle) index = torch.LongTensor(range(length)) start_idx = 0 while start_idx < length: - end_idx = min(length, start_idx + batch_size) + remaining_items = length - start_idx + is_second_last_batch = remaining_items <= 2*batch_size and remaining_items > batch_size + if is_second_last_batch: + # to avoid cases where the last batch is excessively small (1 item in the worst case, where e.g. batch + # normalisation would not be applicable), we evenly distribute the items across the last two batches + adjusted_batch_size = math.ceil(remaining_items / 2) + end_idx = min(length, start_idx + adjusted_batch_size) + else: + end_idx = min(length, start_idx + batch_size) excerpt = index[start_idx:end_idx] batch = [] for tensorTuple in tensorTuples: @@ -504,7 +513,7 @@ def _get_batches(self, tensorTuples: Sequence[TensorTuple], batch_size, shuffle) yield batch[0] else: yield tuple(batch) - start_idx += batch_size + start_idx = end_idx def size(self): return len(self.x) @@ -598,6 +607,18 @@ def provideSplit(self, fractionalSizeOfFirstSet: float) -> Tuple[TorchDataSet, T return TorchDataSetFromTensors(x1, y1, self.cuda), TorchDataSetFromTensors(x2, y2, self.cuda) +class TorchDataSetProviderFromVectorDataUtil(TorchDataSetProvider): + def __init__(self, dataUtil: VectorDataUtil, cuda: bool, tensoriseDynamically=False): + super().__init__(inputTensorScaler=dataUtil.getInputTensorScaler(), outputTensorScaler=dataUtil.getOutputTensorScaler(), + inputDim=dataUtil.inputDim(), modelOutputDim=dataUtil.modelOutputDim()) + self.dataUtil = dataUtil + self.cuda = cuda + self.tensoriseDynamically = tensoriseDynamically + + def provideSplit(self, fractionalSizeOfFirstSet: float) -> Tuple[TorchDataSet, TorchDataSet]: + return self.dataUtil.splitIntoDataSets(fractionalSizeOfFirstSet, self.cuda, tensoriseDynamically=self.tensoriseDynamically) + + class TensorTransformer(ABC): @abstractmethod def transform(self, t: torch.Tensor) -> torch.Tensor: From 437a912693423215b645e95ccddffa53496ad061 Mon Sep 17 00:00:00 2001 From: Dominik Jain Date: Mon, 14 Mar 2022 12:37:13 +0100 Subject: [PATCH 002/131] Sync dcs commit f6f0689ec30faeccaf129fac14a32d8ac372c4ca Author: Dominik Jain Date: Thu Mar 10 15:03:46 2022 +0100 SortedKeysAndValues: * Fixed valueSliceInner * Added parameter 'fallback' to valueSliceOuter dcs/sensai/util/datastruct.py commit 718cd1126aa16b37076f7afbda7b0a11dc20a704 Author: Dominik Jain Date: Thu Mar 10 15:03:18 2022 +0100 valueSliceOuter: Improved docstring, changed default value of fallbackBounds to False dcs/sensai/util/sequences.py commit d0c84bcda4655a75dbd7b2c08c86297a19f0f152 Author: Dominik Jain Date: Thu Mar 10 14:37:17 2022 +0100 Apply setstate dcs/sensai/torch/torch_base.py commit c6b2df5e708250e95a7ccc32a25c5939da5e22e4 Author: Dominik Jain Date: Thu Mar 10 14:29:46 2022 +0100 Added docstrings dcs/sensai/torch/torch_base.py commit feace45bebde85c02ef90cb9d7cf7ea75a9e2e75 Author: Dominik Jain Date: Tue Mar 8 15:20:07 2022 +0100 LogTime: Allow to configure logger dcs/sensai/util/logging.py commit cfa9bac815f3e785906720f00aa87c72c0c89cb4 Author: Dominik Jain Date: Tue Mar 8 15:14:10 2022 +0100 Vectoriser.applyMulti: Log application time in verbose mode dcs/sensai/vectoriser.py commit 96049012c66c7943b772911241d676ee9caba5b9 Author: Dominik Jain Date: Tue Mar 8 15:13:33 2022 +0100 LogTime: Allow logging to be disabled via a flag given at construction dcs/sensai/util/logging.py commit e59cba1f25307f8e00c4fc63588c4a38db9de560 Author: Dominik Jain Date: Tue Mar 8 13:29:03 2022 +0100 Vectoriser, SequenceVectoriser: Added optional caching of value-generating function when using applyMulti dcs/sensai/vectoriser.py commit f51a63826cca586df1dc4eab1d681076f7eb3e75 Author: Dominik Jain Date: Tue Mar 8 12:56:52 2022 +0100 Vectoriser, SequenceVectoriser: Added verbose mode for more information during training dcs/sensai/vectoriser.py commit 66c8c7948a3a94310763667f345a77331bd84135 Author: Dominik Jain Date: Tue Mar 8 11:50:28 2022 +0100 Fixed check for L-BFGS assuming string representation dcs/sensai/torch/torch_opt.py commit 1caf1b2907a126b48c5b37d9dde36299416baee8 Author: Dominik Jain Date: Tue Mar 8 11:34:56 2022 +0100 StopWatch: Extended time retrieval options LogTime: Improved logging of duration (depending on elapsed time) dcs/sensai/util/logging.py --- src/sensai/torch/torch_base.py | 82 +++++++++++++++++++++++++--------- src/sensai/torch/torch_opt.py | 16 ++++--- src/sensai/util/datastruct.py | 6 +-- src/sensai/util/logging.py | 44 +++++++++++++++--- src/sensai/util/sequences.py | 7 +-- src/sensai/vectoriser.py | 61 +++++++++++++++++++++---- 6 files changed, 167 insertions(+), 49 deletions(-) diff --git a/src/sensai/torch/torch_base.py b/src/sensai/torch/torch_base.py index 8d9bddc2..fabb16af 100644 --- a/src/sensai/torch/torch_base.py +++ b/src/sensai/torch/torch_base.py @@ -17,6 +17,7 @@ from ..data import DataFrameSplitter from ..normalisation import NormalisationMode from ..util.dtype import toFloatArray +from ..util.pickle import setstate from ..util.string import ToStringMixin from ..vector_model import VectorRegressionModel, VectorClassificationModel, TrainingContext @@ -389,7 +390,7 @@ def __init__(self, modelClass: Callable[..., TorchModel], modelArgs: Sequence = self.model: Optional[TorchModel] = None self.inputTensoriser: Optional[Tensoriser] = None self.outputTensoriser: Optional[Tensoriser] = None - self.outputTensorToArrayConverter = None + self.outputTensorToArrayConverter: Optional[OutputTensorToArrayConverter] = None self.torchDataSetProviderFactory: Optional[TorchDataSetProviderFactory] = None self.dataFrameSplitter: Optional[DataFrameSplitter] = None @@ -397,36 +398,40 @@ def __setstate__(self, state) -> None: state["nnOptimiserParams"] = NNOptimiserParams.fromDictOrInstance(state["nnOptimiserParams"]) newOptionalMembers = ["inputTensoriser", "torchDataSetProviderFactory", "dataFrameSplitter", "outputTensoriser", "outputTensorToArrayConverter"] - for m in newOptionalMembers: - if m not in state: - state[m] = None - s = super() - if hasattr(s, '__setstate__'): - s.__setstate__(state) - else: - self.__dict__ = state + setstate(TorchVectorRegressionModel, self, state, newOptionalProperties=newOptionalMembers) def withInputTensoriser(self, tensoriser: Tensoriser) -> __qualname__: + """ + :param tensoriser: tensoriser to use in order to convert input data frames to (one or more) tensors. + The default tensoriser directly converts the data frame's values (which is assumed to contain only scalars that + can be coerced to floats) to a float tensor. + The use of a custom tensoriser is necessary if a non-trivial conversion is necessary or if the data frame + is to be converted to more than one input tensor. + :return: self + """ self.inputTensoriser = tensoriser return self def withOutputTensoriser(self, tensoriser: RuleBasedTensoriser) -> __qualname__: """ :param tensoriser: tensoriser to use in order to convert the output data frame to a tensor. + The default output tensoriser directly converts the data frame's values to a float tensor. + NOTE: It is required to be a rule-based tensoriser, because mechanisms that require fitting on the data and thus perform a data-dependendent conversion are likely to cause problems because they would need to be reversed at inference time (since the model will be trained on the converted values). If you require a transformation, use a target transformer, which will be applied before the tensoriser. + :return: self """ self.outputTensoriser = tensoriser return self - def withOutputTensorToArrayConverter(self, outputTensorToArrayConverter) -> __qualname__: + def withOutputTensorToArrayConverter(self, outputTensorToArrayConverter: "OutputTensorToArrayConverter") -> __qualname__: """ Configures the use of a custom converter from tensors to numpy arrays, which is applied during inference. A custom converter can be required, for example, to handle variable-length outputs (where the output tensor will typically contain unwanted padding). Note that since the converter is for inference only, it may be - required to use a custom loss evaluator during training. + required to use a custom loss evaluator during training if the use of a custom converter is necessary. :param outputTensorToArrayConverter: the converter :return: self @@ -434,10 +439,23 @@ def withOutputTensorToArrayConverter(self, outputTensorToArrayConverter) -> __qu self.outputTensorToArrayConverter = outputTensorToArrayConverter def withTorchDataSetProviderFactory(self, torchDataSetProviderFactory: "TorchDataSetProviderFactory") -> __qualname__: + """ + :param torchDataSetProviderFactory: the torch data set provider factory, which is used to instantiate the provider which + will provide the training and validation data sets from the input data frame that is passed in for learning. + By default, TorchDataSetProviderFactoryRegressionDefault is used. + :return: self + """ self.torchDataSetProviderFactory = torchDataSetProviderFactory return self def withDataFrameSplitter(self, dataFrameSplitter: DataFrameSplitter) -> __qualname__: + """ + :param dataFrameSplitter: the data frame splitter which is used to split the input/output data frames that are passed for + learning into a data frame that is used for training and a data frame that is used for validation. + The input data frame is the data frame that is passed as input to the splitter, and the returned indices + are used to split both the input and output data frames in the same way. + :return: self + """ self.dataFrameSplitter = dataFrameSplitter return self @@ -525,18 +543,19 @@ def __init__(self, outputMode: ClassificationOutputMode, def __setstate__(self, state) -> None: state["nnOptimiserParams"] = NNOptimiserParams.fromDictOrInstance(state["nnOptimiserParams"]) newOptionalMembers = ["inputTensoriser", "torchDataSetProviderFactory", "dataFrameSplitter", "outputTensoriser"] - for m in newOptionalMembers: - if m not in state: - state[m] = None - if "outputMode" not in state: - state["outputMode"] = ClassificationOutputMode.PROBABILITIES - s = super() - if hasattr(s, '__setstate__'): - s.__setstate__(state) - else: - self.__dict__ = state + newDefaultProperties = {"outputMode": ClassificationOutputMode.PROBABILITIES} + setstate(TorchVectorClassificationModel, self, state, newOptionalProperties=newOptionalMembers, + newDefaultProperties=newDefaultProperties) def withInputTensoriser(self, tensoriser: Tensoriser) -> __qualname__: + """ + :param tensoriser: tensoriser to use in order to convert input data frames to (one or more) tensors. + The default tensoriser directly converts the data frame's values (which is assumed to contain only scalars that + can be coerced to floats) to a float tensor. + The use of a custom tensoriser is necessary if a non-trivial conversion is necessary or if the data frame + is to be converted to more than one input tensor. + :return: self + """ self.inputTensoriser = tensoriser return self @@ -552,10 +571,23 @@ def withOutputTensoriser(self, tensoriser: RuleBasedTensoriser) -> __qualname__: return self def withTorchDataSetProviderFactory(self, torchDataSetProviderFactory: "TorchDataSetProviderFactory") -> __qualname__: + """ + :param torchDataSetProviderFactory: the torch data set provider factory, which is used to instantiate the provider which + will provide the training and validation data sets from the input data frame that is passed in for learning. + By default, TorchDataSetProviderFactoryClassificationDefault is used. + :return: self + """ self.torchDataSetProviderFactory = torchDataSetProviderFactory return self def withDataFrameSplitter(self, dataFrameSplitter: DataFrameSplitter) -> __qualname__: + """ + :param dataFrameSplitter: the data frame splitter which is used to split the input/output data frames that are passed for + learning into a data frame that is used for training and a data frame that is used for validation. + The input data frame is the data frame that is passed as input to the splitter, and the returned indices + are used to split both the input and output data frames in the same way. + :return: self + """ self.dataFrameSplitter = dataFrameSplitter return self @@ -621,6 +653,10 @@ def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, class TorchDataSetProviderFactoryClassificationDefault(TorchDataSetProviderFactory): def __init__(self, tensoriseDynamically=False): + """ + :param tensoriseDynamically: whether tensorisation shall take place on the fly whenever the provided data sets are iterated; + if False, tensorisation takes place once in a precomputation stage (tensors must jointly fit into memory) + """ self.tensoriseDynamically = tensoriseDynamically def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, model: TorchVectorClassificationModel, @@ -634,6 +670,10 @@ def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, mod class TorchDataSetProviderFactoryRegressionDefault(TorchDataSetProviderFactory): def __init__(self, tensoriseDynamically=False): + """ + :param tensoriseDynamically: whether tensorisation shall take place on the fly whenever the provided data sets are iterated; + if False, tensorisation takes place once in a precomputation stage (tensors must jointly fit into memory) + """ self.tensoriseDynamically = tensoriseDynamically def createDataSetProvider(self, inputs: pd.DataFrame, outputs: pd.DataFrame, model: TorchVectorRegressionModel, diff --git a/src/sensai/torch/torch_opt.py b/src/sensai/torch/torch_opt.py index 3e155159..f1b6118e 100644 --- a/src/sensai/torch/torch_opt.py +++ b/src/sensai/torch/torch_opt.py @@ -41,13 +41,20 @@ class Optimiser(enum.Enum): LBFGS = ("lbfgs", optim.LBFGS) @classmethod - def fromName(cls, name: str): + def fromName(cls, name: str) -> "Optimiser": lname = name.lower() for o in cls: if o.value[0] == lname: return o raise ValueError(f"Unknown optimiser name '{name}'; known names: {[o.value[0] for o in cls]}") + @classmethod + def fromNameOrInstance(cls, nameOrInstance: Union[str, "Optimiser"]) -> "Optimiser": + if type(nameOrInstance) == str: + return cls.fromName(nameOrInstance) + else: + return nameOrInstance + class _Optimiser(object): """ @@ -61,10 +68,7 @@ def __init__(self, params, method: Union[str, Optimiser], lr, max_grad_norm, use :param max_grad_norm: gradient norm value beyond which to apply gradient shrinkage :param optimiserArgs: keyword arguments to be used in actual torch optimiser """ - if type(method) == str: - self.method = Optimiser.fromName(method) - else: - self.method = method + self.method = Optimiser.fromNameOrInstance(method) self.params = list(params) # careful: params may be a generator self.last_ppl = None self.lr = lr @@ -553,7 +557,7 @@ def __init__(self, lossEvaluator: NNLossEvaluator = None, gpu=None, optimiser: U :param shuffle: whether to shuffle the training data :param optimiserArgs: keyword arguments to be passed on to the actual torch optimiser """ - if optimiser == 'lbfgs': + if Optimiser.fromNameOrInstance(optimiser) == Optimiser.LBFGS: largeBatchSize = 1e12 if batchSize is not None: log.warning(f"LBFGS does not make use of batches, therefore using large batch size {largeBatchSize} to achieve use of a single batch") diff --git a/src/sensai/util/datastruct.py b/src/sensai/util/datastruct.py index 27ca13d2..1b0f22dc 100644 --- a/src/sensai/util/datastruct.py +++ b/src/sensai/util/datastruct.py @@ -229,10 +229,10 @@ def closestKeyAndValue(self, key) -> Optional[Tuple[TKey, TValue]]: return None if idx is None else (self.keys[idx], self.values[idx]) def valueSliceInner(self, lowerBoundKey, upperBoundKey): - return array_util.valueSliceOuter(self.keys, lowerBoundKey, upperBoundKey, values=self.values) + return array_util.valueSliceInner(self.keys, lowerBoundKey, upperBoundKey, values=self.values) - def valueSliceOuter(self, lowerBoundKey, upperBoundKey): - return array_util.valueSliceOuter(self.keys, lowerBoundKey, upperBoundKey, values=self.values) + def valueSliceOuter(self, lowerBoundKey, upperBoundKey, fallback=False): + return array_util.valueSliceOuter(self.keys, lowerBoundKey, upperBoundKey, values=self.values, fallbackBounds=fallback) class SortedKeyValuePairs(Generic[TKey, TValue]): diff --git a/src/sensai/util/logging.py b/src/sensai/util/logging.py index b90e533b..324392b1 100644 --- a/src/sensai/util/logging.py +++ b/src/sensai/util/logging.py @@ -7,7 +7,6 @@ import pandas as pd - log = getLogger(__name__) LOG_DEFAULT_FORMAT = '%(levelname)-5s %(asctime)-15s %(name)s:%(funcName)s - %(message)s' @@ -69,6 +68,16 @@ def restart(self): def getElapsedTimeSecs(self) -> float: return time.time() - self.startTime + def getElapsedTimedelta(self) -> pd.Timedelta: + return pd.Timedelta(self.getElapsedTimeSecs(), unit="s") + + def getElapsedTimeString(self) -> str: + secs = self.getElapsedTimeSecs() + if secs < 60: + return f"{secs:.3f} seconds" + else: + return str(pd.Timedelta(secs, unit="s")) + class StopWatchManager: """ @@ -93,7 +102,7 @@ def start(self, name): def stop(self, name) -> float: """ - :param name: the name of the time + :param name: the name of the stopwatch :return: the time that has passed in seconds """ timePassedSecs = time.time() - self._stopWatches[name] @@ -105,18 +114,39 @@ def isRunning(self, name): class LogTime: - def __init__(self, name): + """ + An execution time logger which can be conveniently applied using a with-statement - in order to log the executing time of the respective + with-block. + """ + + def __init__(self, name, enabled=True, logger: Logger = None): + """ + :param name: the name of the event whose time is to be logged upon completion as " completed in