Skip to content

Commit

Permalink
Fixed bug with pos_group
Browse files Browse the repository at this point in the history
  • Loading branch information
spirosmaggioros committed Jul 19, 2024
1 parent 8bf60d5 commit 70fbc63
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 45 deletions.
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ requires = [
"setuptools",
"wheel"
]
build-backend = "setuptools.build_meta"
build-backend = "setuptools.build_meta"

[tool.pytest.ini_options]
pythonpath = [
".", "spare_scores",
]
9 changes: 4 additions & 5 deletions spare_scores/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from typing import Any

import pandas as pd

from spare_scores.data_prep import logging_basic_config
from spare_scores.mlp import MLPModel
from spare_scores.mlp_torch import MLPTorchModel
from spare_scores.svm import SVMModel
from data_prep import logging_basic_config
from mlp import MLPModel
from mlp_torch import MLPTorchModel
from svm import SVMModel


class SpareModel:
Expand Down
2 changes: 1 addition & 1 deletion spare_scores/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pkg_resources # type: ignore

from spare_scores.spare_scores import spare_test, spare_train
from spare_scores import spare_test, spare_train

VERSION = pkg_resources.require("spare_scores")[0].version

Expand Down
6 changes: 2 additions & 4 deletions spare_scores/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,14 @@
import numpy as np
import pandas as pd
from scipy import stats

from spare_scores.util import convert_to_number_if_possible
from util import convert_to_number_if_possible


def check_train(
df: pd.DataFrame,
predictors: list,
to_predict: str,
verbose: int = 1, # this needs to be removed(non used). If i remove
# it, then there are bugs to the test cases(check_train() unexpected argument verbose)
verbose: int = 1, # this needs to be removed(non used)
pos_group: str = "",
) -> Union[str, Tuple[pd.DataFrame, list, str]]:
"""Checks training dataframe for errors.
Expand Down
9 changes: 5 additions & 4 deletions spare_scores/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

import numpy as np
import pandas as pd
from data_prep import logging_basic_config
from sklearn import metrics
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from spare_scores.data_prep import logging_basic_config
from sklearn.utils._testing import ignore_warnings


class MLPModel:
Expand Down Expand Up @@ -105,7 +106,7 @@ def __init__(
def set_parameters(self, **parameters: Any) -> None:
self.__dict__.update(parameters)

# @ignore_warnings(category=RuntimeWarning)
@ignore_warnings(category=RuntimeWarning) # type: ignore
def _fit(self, df: pd.DataFrame) -> None:

X = df[self.predictors].astype("float64")
Expand Down Expand Up @@ -152,7 +153,7 @@ def _fit(self, df: pd.DataFrame) -> None:

self.get_stats(y, self.y_hat)

# @ignore_warnings(category=(ConvergenceWarning, UserWarning))
@ignore_warnings(category=(ConvergenceWarning, UserWarning)) # type: ignore
def fit(self, df: pd.DataFrame, verbose: int = 1) -> dict:
"""
Trains the model using the provided dataframe and default parameters.
Expand Down
7 changes: 4 additions & 3 deletions spare_scores/mlp_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import torch
import torch.nn as nn
import torch.optim as optim
from data_prep import logging_basic_config
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import (
accuracy_score,
balanced_accuracy_score,
Expand All @@ -23,10 +25,9 @@
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils._testing import ignore_warnings
from torch.utils.data import DataLoader, Dataset

from spare_scores.data_prep import logging_basic_config

device = (
"cuda"
if torch.cuda.is_available()
Expand Down Expand Up @@ -383,7 +384,7 @@ def set_parameters(self, **parameters: Any) -> None:
else:
self.__dict__.update(parameters)

# @ignore_warnings(category=(ConvergenceWarning, UserWarning))
@ignore_warnings(category=(ConvergenceWarning, UserWarning)) # type: ignore
def fit(self, df: pd.DataFrame, verbose: int = 1, **kwargs: Any) -> dict:
logger = logging_basic_config(verbose, content_only=True)

Expand Down
22 changes: 9 additions & 13 deletions spare_scores/spare_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,21 @@

import numpy as np
import pandas as pd

from spare_scores.classes import MetaData, SpareModel
from spare_scores.data_prep import (
from classes import MetaData, SpareModel
from data_prep import (
check_test,
check_train,
convert_cat_variables,
logging_basic_config,
)
from spare_scores.util import (
check_file_exists,
is_unique_identifier,
load_df,
load_model,
save_file,
)
from util import check_file_exists, is_unique_identifier, load_df, load_model, save_file


def spare_train(
df: Union[pd.DataFrame, str],
to_predict: str,
model_type: str = "SVM",
pos_group: Any = "",
pos_group: str = "",
key_var: str = "",
data_vars: list = [],
ignore_vars: list = [],
Expand All @@ -33,7 +26,7 @@ def spare_train(
verbose: int = 1,
logs: str = "",
**kwargs: Any,
) -> Any:
) -> dict:
"""
Trains a SPARE model, either classification or regression
Expand Down Expand Up @@ -114,7 +107,7 @@ def spare_train(
try:
df, predictors, mdl_task = check_train(
df, predictors, to_predict, verbose, pos_group
) # type: ignore
)
except Exception as e:
err = "Dataset check failed before training was initiated."
logger.error(err)
Expand Down Expand Up @@ -207,6 +200,9 @@ def spare_train(
if output != "" and output is not None:
save_file(result, output, "train", logger)

print("###### PRINTING ########")
print(result)
print("####### END ###########")
res["status"] = "OK"
res["data"] = result
res["status_code"] = 0
Expand Down
9 changes: 4 additions & 5 deletions spare_scores/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

import numpy as np
import pandas as pd
from data_prep import logging_basic_config
from sklearn import metrics
from sklearn.model_selection import GridSearchCV, RepeatedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, LinearSVC, LinearSVR

from spare_scores.data_prep import logging_basic_config
from spare_scores.util import expspace
from util import expspace


class SVMModel:
Expand Down Expand Up @@ -150,7 +149,7 @@ def fit(self, df: pd.DataFrame, verbose: int = 1, **kwargs: Any) -> dict:
sampled_df = df.sample(n=500, random_state=2023)
sampled_df = sampled_df.reset_index(drop=True)
self.train_initialize(sampled_df, self.to_predict)
self.run_CV(sampled_df, **kwargs)
self.run_CV(sampled_df)
# Use the optimal parameters to train the model on the full data
param_grid = {
par: expspace(
Expand All @@ -166,7 +165,7 @@ def fit(self, df: pd.DataFrame, verbose: int = 1, **kwargs: Any) -> dict:
# Train the model on the full data, with the optimal parameters
logger.info("Training SVM model...")
self.train_initialize(df, self.to_predict)
self.run_CV(df, **kwargs)
self.run_CV(df)
training_time = time.time() - start_time
self.stats["training_time"] = round(training_time, 4)

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
smart_unique,
)

from spare_scores.util import load_df
from util import load_df


class CheckDataPrep(unittest.TestCase):
Expand Down
13 changes: 7 additions & 6 deletions tests/unit/test_spare_scores.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import unittest
from pathlib import Path

import sys
import pandas as pd

from spare_scores.spare_scores import spare_test, spare_train
from spare_scores.util import load_df, load_model
sys.path.append("../spare_scores")
from spare_scores import spare_test, spare_train
from util import load_df, load_model


class CheckSpareScores(unittest.TestCase):
Expand Down Expand Up @@ -74,9 +75,9 @@ def test_spare_train(self):
],
)

status, result = result["status"], result["data"]

metadata = result[1]
status, result_data = result["status"], result["data"]
metadata = result_data[1]
self.assertTrue(status == "OK")
self.assertTrue(metadata["mdl_type"] == self.model_fixture[1]["mdl_type"])
self.assertTrue(metadata["kernel"] == self.model_fixture[1]["kernel"])
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/test_util.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging
import os
import re
import sys
import unittest
from pathlib import Path

import numpy as np
import pandas as pd

from spare_scores.util import (
sys.path.append("../spare_scores")
from util import (
add_file_extension,
check_file_exists,
convert_to_number_if_possible,
Expand Down

0 comments on commit 70fbc63

Please sign in to comment.