diff --git a/pyproject.toml b/pyproject.toml index f8d8975..5ad95f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,4 +3,9 @@ requires = [ "setuptools", "wheel" ] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" + +[tool.pytest.ini_options] +pythonpath = [ + ".", "spare_scores", +] diff --git a/spare_scores/classes.py b/spare_scores/classes.py index 2f3e74c..03511e0 100644 --- a/spare_scores/classes.py +++ b/spare_scores/classes.py @@ -2,11 +2,10 @@ from typing import Any import pandas as pd - -from spare_scores.data_prep import logging_basic_config -from spare_scores.mlp import MLPModel -from spare_scores.mlp_torch import MLPTorchModel -from spare_scores.svm import SVMModel +from data_prep import logging_basic_config +from mlp import MLPModel +from mlp_torch import MLPTorchModel +from svm import SVMModel class SpareModel: diff --git a/spare_scores/cli.py b/spare_scores/cli.py index 37602d2..a4c9f0b 100644 --- a/spare_scores/cli.py +++ b/spare_scores/cli.py @@ -2,7 +2,7 @@ import pkg_resources # type: ignore -from spare_scores.spare_scores import spare_test, spare_train +from spare_scores import spare_test, spare_train VERSION = pkg_resources.require("spare_scores")[0].version diff --git a/spare_scores/data_prep.py b/spare_scores/data_prep.py index f6b9ff9..c987ed4 100644 --- a/spare_scores/data_prep.py +++ b/spare_scores/data_prep.py @@ -6,16 +6,14 @@ import numpy as np import pandas as pd from scipy import stats - -from spare_scores.util import convert_to_number_if_possible +from util import convert_to_number_if_possible def check_train( df: pd.DataFrame, predictors: list, to_predict: str, - verbose: int = 1, # this needs to be removed(non used). If i remove - # it, then there are bugs to the test cases(check_train() unexpected argument verbose) + verbose: int = 1, # this needs to be removed(non used) pos_group: str = "", ) -> Union[str, Tuple[pd.DataFrame, list, str]]: """Checks training dataframe for errors. diff --git a/spare_scores/mlp.py b/spare_scores/mlp.py index f1dc299..b07b1bc 100644 --- a/spare_scores/mlp.py +++ b/spare_scores/mlp.py @@ -4,13 +4,14 @@ import numpy as np import pandas as pd +from data_prep import logging_basic_config from sklearn import metrics +from sklearn.exceptions import ConvergenceWarning from sklearn.model_selection import GridSearchCV, KFold from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler - -from spare_scores.data_prep import logging_basic_config +from sklearn.utils._testing import ignore_warnings class MLPModel: @@ -105,7 +106,7 @@ def __init__( def set_parameters(self, **parameters: Any) -> None: self.__dict__.update(parameters) - # @ignore_warnings(category=RuntimeWarning) + @ignore_warnings(category=RuntimeWarning) # type: ignore def _fit(self, df: pd.DataFrame) -> None: X = df[self.predictors].astype("float64") @@ -152,7 +153,7 @@ def _fit(self, df: pd.DataFrame) -> None: self.get_stats(y, self.y_hat) - # @ignore_warnings(category=(ConvergenceWarning, UserWarning)) + @ignore_warnings(category=(ConvergenceWarning, UserWarning)) # type: ignore def fit(self, df: pd.DataFrame, verbose: int = 1) -> dict: """ Trains the model using the provided dataframe and default parameters. diff --git a/spare_scores/mlp_torch.py b/spare_scores/mlp_torch.py index 811952f..0defaee 100644 --- a/spare_scores/mlp_torch.py +++ b/spare_scores/mlp_torch.py @@ -8,6 +8,8 @@ import torch import torch.nn as nn import torch.optim as optim +from data_prep import logging_basic_config +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import ( accuracy_score, balanced_accuracy_score, @@ -23,10 +25,9 @@ ) from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler +from sklearn.utils._testing import ignore_warnings from torch.utils.data import DataLoader, Dataset -from spare_scores.data_prep import logging_basic_config - device = ( "cuda" if torch.cuda.is_available() @@ -383,7 +384,7 @@ def set_parameters(self, **parameters: Any) -> None: else: self.__dict__.update(parameters) - # @ignore_warnings(category=(ConvergenceWarning, UserWarning)) + @ignore_warnings(category=(ConvergenceWarning, UserWarning)) # type: ignore def fit(self, df: pd.DataFrame, verbose: int = 1, **kwargs: Any) -> dict: logger = logging_basic_config(verbose, content_only=True) diff --git a/spare_scores/spare_scores.py b/spare_scores/spare_scores.py index 83b9040..90bf90c 100644 --- a/spare_scores/spare_scores.py +++ b/spare_scores/spare_scores.py @@ -3,28 +3,21 @@ import numpy as np import pandas as pd - -from spare_scores.classes import MetaData, SpareModel -from spare_scores.data_prep import ( +from classes import MetaData, SpareModel +from data_prep import ( check_test, check_train, convert_cat_variables, logging_basic_config, ) -from spare_scores.util import ( - check_file_exists, - is_unique_identifier, - load_df, - load_model, - save_file, -) +from util import check_file_exists, is_unique_identifier, load_df, load_model, save_file def spare_train( df: Union[pd.DataFrame, str], to_predict: str, model_type: str = "SVM", - pos_group: Any = "", + pos_group: str = "", key_var: str = "", data_vars: list = [], ignore_vars: list = [], @@ -33,7 +26,7 @@ def spare_train( verbose: int = 1, logs: str = "", **kwargs: Any, -) -> Any: +) -> dict: """ Trains a SPARE model, either classification or regression @@ -114,7 +107,7 @@ def spare_train( try: df, predictors, mdl_task = check_train( df, predictors, to_predict, verbose, pos_group - ) # type: ignore + ) except Exception as e: err = "Dataset check failed before training was initiated." logger.error(err) @@ -207,6 +200,9 @@ def spare_train( if output != "" and output is not None: save_file(result, output, "train", logger) + print("###### PRINTING ########") + print(result) + print("####### END ###########") res["status"] = "OK" res["data"] = result res["status_code"] = 0 diff --git a/spare_scores/svm.py b/spare_scores/svm.py index b0f9708..7420d9b 100644 --- a/spare_scores/svm.py +++ b/spare_scores/svm.py @@ -4,13 +4,12 @@ import numpy as np import pandas as pd +from data_prep import logging_basic_config from sklearn import metrics from sklearn.model_selection import GridSearchCV, RepeatedKFold from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, LinearSVC, LinearSVR - -from spare_scores.data_prep import logging_basic_config -from spare_scores.util import expspace +from util import expspace class SVMModel: @@ -150,7 +149,7 @@ def fit(self, df: pd.DataFrame, verbose: int = 1, **kwargs: Any) -> dict: sampled_df = df.sample(n=500, random_state=2023) sampled_df = sampled_df.reset_index(drop=True) self.train_initialize(sampled_df, self.to_predict) - self.run_CV(sampled_df, **kwargs) + self.run_CV(sampled_df) # Use the optimal parameters to train the model on the full data param_grid = { par: expspace( @@ -166,7 +165,7 @@ def fit(self, df: pd.DataFrame, verbose: int = 1, **kwargs: Any) -> dict: # Train the model on the full data, with the optimal parameters logger.info("Training SVM model...") self.train_initialize(df, self.to_predict) - self.run_CV(df, **kwargs) + self.run_CV(df) training_time = time.time() - start_time self.stats["training_time"] = round(training_time, 4) diff --git a/tests/unit/test_data_prep.py b/tests/unit/test_data_prep.py index 3ab22e1..24b0445 100644 --- a/tests/unit/test_data_prep.py +++ b/tests/unit/test_data_prep.py @@ -16,7 +16,7 @@ smart_unique, ) -from spare_scores.util import load_df +from util import load_df class CheckDataPrep(unittest.TestCase): diff --git a/tests/unit/test_spare_scores.py b/tests/unit/test_spare_scores.py index 6f78169..f31f2b3 100644 --- a/tests/unit/test_spare_scores.py +++ b/tests/unit/test_spare_scores.py @@ -1,10 +1,11 @@ import unittest from pathlib import Path - +import sys import pandas as pd -from spare_scores.spare_scores import spare_test, spare_train -from spare_scores.util import load_df, load_model +sys.path.append("../spare_scores") +from spare_scores import spare_test, spare_train +from util import load_df, load_model class CheckSpareScores(unittest.TestCase): @@ -74,9 +75,9 @@ def test_spare_train(self): ], ) - status, result = result["status"], result["data"] - - metadata = result[1] + status, result_data = result["status"], result["data"] + + metadata = result_data[1] self.assertTrue(status == "OK") self.assertTrue(metadata["mdl_type"] == self.model_fixture[1]["mdl_type"]) self.assertTrue(metadata["kernel"] == self.model_fixture[1]["kernel"]) diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py index 85b2976..e2f73d9 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_util.py @@ -1,13 +1,14 @@ import logging import os -import re +import sys import unittest from pathlib import Path import numpy as np import pandas as pd -from spare_scores.util import ( +sys.path.append("../spare_scores") +from util import ( add_file_extension, check_file_exists, convert_to_number_if_possible,