From edd144dadf1705a8f1f5d85290d6563fbf32af22 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sun, 27 Oct 2024 22:32:07 +0800 Subject: [PATCH] fix: lazy loading error for classification CSAI; --- pypots/classification/csai/data.py | 1 + pypots/classification/csai/model.py | 44 +++++++++++++++++------------ pypots/imputation/csai/model.py | 2 +- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/pypots/classification/csai/data.py b/pypots/classification/csai/data.py index cd829882..3b93765c 100644 --- a/pypots/classification/csai/data.py +++ b/pypots/classification/csai/data.py @@ -6,6 +6,7 @@ # License: BSD-3-Clause from typing import Union + from ...imputation.csai.data import DatasetForCSAI as DatasetForCSAI_Imputation diff --git a/pypots/classification/csai/model.py b/pypots/classification/csai/model.py index a504cc1d..c65a3724 100644 --- a/pypots/classification/csai/model.py +++ b/pypots/classification/csai/model.py @@ -13,8 +13,11 @@ from .core import _BCSAI from .data import DatasetForCSAI from ..base import BaseNNClassifier +from ...data.checking import key_in_data_set +from ...data.saving.h5 import load_dict_from_h5 from ...optim.adam import Adam from ...optim.base import Optimizer +from ...utils.logging import logger class CSAI(BaseNNClassifier): @@ -171,6 +174,7 @@ def __init__( # set up the optimizer self.optimizer = optimizer + self.optimizer.init_optimizer(self.model.parameters()) def _assemble_input_for_training(self, data: list, training=True) -> dict: # extract data @@ -245,6 +249,12 @@ def fit( file_type: str = "hdf5", ) -> None: # Create dataset + if isinstance(train_set, str): + logger.warning( + "CSAI does not support lazy loading because normalise mean and std need to be calculated ahead. " + "Hence the whole train set will be loaded into memory." + ) + train_set = load_dict_from_h5(train_set) training_set = DatasetForCSAI( data=train_set, file_type=file_type, @@ -267,6 +277,15 @@ def fit( ) val_loader = None if val_set is not None: + if isinstance(val_set, str): + logger.warning( + "CSAI does not support lazy loading because normalise mean and std need to be calculated ahead. " + "Hence the whole val set will be loaded into memory." + ) + val_set = load_dict_from_h5(val_set) + + if not key_in_data_set("X_ori", val_set): + raise ValueError("val_set must contain 'X_ori' for model validation.") val_set = DatasetForCSAI( data=val_set, file_type=file_type, @@ -284,24 +303,6 @@ def fit( shuffle=False, num_workers=self.num_workers, ) - # Create model - self.model = _BCSAI( - n_steps=self.n_steps, - n_features=self.n_features, - rnn_hidden_size=self.rnn_hidden_size, - imputation_weight=self.imputation_weight, - consistency_weight=self.consistency_weight, - classification_weight=self.classification_weight, - n_classes=self.n_classes, - step_channels=self.step_channels, - dropout=self.dropout, - intervals=self.intervals, - ) - self._send_model_to_given_device() - self._print_model_size() - - # set up the optimizer - self.optimizer.init_optimizer(self.model.parameters()) # train the model self._train_model(train_loader, val_loader) @@ -317,6 +318,13 @@ def predict( ) -> dict: self.model.eval() + + if isinstance(test_set, str): + logger.warning( + "CSAI does not support lazy loading because normalise mean and std need to be calculated ahead. " + "Hence the whole test set will be loaded into memory." + ) + test_set = load_dict_from_h5(test_set) test_set = DatasetForCSAI( data=test_set, file_type=file_type, diff --git a/pypots/imputation/csai/model.py b/pypots/imputation/csai/model.py index 4eaab839..fe655ea2 100644 --- a/pypots/imputation/csai/model.py +++ b/pypots/imputation/csai/model.py @@ -6,7 +6,6 @@ # License: BSD-3-Clause from typing import Union, Optional -from venv import logger import numpy as np import torch @@ -19,6 +18,7 @@ from ...data.saving.h5 import load_dict_from_h5 from ...optim.adam import Adam from ...optim.base import Optimizer +from ...utils.logging import logger class CSAI(BaseNNImputer):