From 3268904a7340523139d2a770eb3ee0f9ce7ad9ba Mon Sep 17 00:00:00 2001 From: Spiros Maggioros Date: Sat, 27 Jul 2024 01:00:41 +0300 Subject: [PATCH] Some changes to MLP model and some syntax fixes --- spare_scores/data_prep.py | 6 ++++-- spare_scores/mlp.py | 10 +++++----- spare_scores/mlp_torch.py | 2 +- spare_scores/svm.py | 4 ++-- test_data_prep.py | 0 tests/unit/test_spare_scores.py | 2 +- 6 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 test_data_prep.py diff --git a/spare_scores/data_prep.py b/spare_scores/data_prep.py index 64fe56f..a262c70 100644 --- a/spare_scores/data_prep.py +++ b/spare_scores/data_prep.py @@ -299,7 +299,9 @@ def age_sex_match( )[1] else: p_sex = 1 - logging.debug(f" Original: P_age: {np.round(p_age,2)}/ P_sex: {np.round(p_sex,2)}") + logging.debug( + f" Original: P_age: {np.round(p_age, 2)}/ P_sex: {np.round(p_sex, 2)}" + ) p_age_all, p_sex_all = np.array(p_age), np.array(p_sex) while np.min([p_age, p_sex]) < p_threshold: @@ -345,7 +347,7 @@ def age_sex_match( df1, df2 = df2.copy(), df1.copy() logging.debug(f" {n_orig - len(df1.index) - len(df2.index)} participants excluded") - logging.debug(f" Final: P_age: {np.round(p_age,2)}/ P_sex {np.round(p_sex,2)}") + logging.debug(f" Final: P_age: {np.round(p_age, 2)}/ P_sex {np.round(p_sex, 2)}") logging.info("Age/Sex matched!") if no_df2: return pd.concat([df1, df2], ignore_index=True) diff --git a/spare_scores/mlp.py b/spare_scores/mlp.py index 419e48c..d101563 100644 --- a/spare_scores/mlp.py +++ b/spare_scores/mlp.py @@ -99,7 +99,7 @@ def __init__( "mlp__alpha": [0.001, 0.01, 0.05, 0.1], "mlp__learning_rate": ["constant", "adaptive"], "mlp__early_stopping": [True], - "mlp__max_iter": [5000], + "mlp__max_iter": [500], } def set_parameters(self, **parameters: Any) -> None: @@ -112,11 +112,11 @@ def _fit(self, df: pd.DataFrame) -> None: y = df[self.to_predict].astype("float64") if self.task == "Regression": - mlp = MLPRegressor(early_stopping=True, max_iter=5000) + mlp = MLPRegressor(early_stopping=True, max_iter=500) scoring = "neg_mean_absolute_error" metrics = ["MAE", "RMSE", "R2"] else: - mlp = MLPClassifier(early_stopping=True, max_iter=5000) + mlp = MLPClassifier(early_stopping=True, max_iter=500) scoring = "balanced_accuracy" metrics = [ "AUC", @@ -136,7 +136,7 @@ def _fit(self, df: pd.DataFrame) -> None: pipeline_obj, self.param_grid, scoring=scoring, - cv=KFold(n_splits=5, shuffle=True, random_state=10086), + cv=KFold(n_splits=5, shuffle=True, random_state=42), refit=True, ) grid_search.fit(X, y) @@ -260,5 +260,5 @@ def get_stats(self, y: np.ndarray, y_hat: np.ndarray) -> None: def output_stats(self) -> None: for key, value in self.stats.items(): logging.info( - f">> {key} = {np.mean(value):#.4f} \u00B1 {np.std(value):#.4f}" + f">> {key} = {np.mean(value): #.4f} \u00B1 {np.std(value): #.4f}" ) diff --git a/spare_scores/mlp_torch.py b/spare_scores/mlp_torch.py index 3815d94..3715432 100644 --- a/spare_scores/mlp_torch.py +++ b/spare_scores/mlp_torch.py @@ -488,5 +488,5 @@ def predict(self, df: pd.DataFrame) -> np.ndarray: def output_stats(self) -> None: for key, value in self.stats.items(): logging.info( - f">> {key} = {np.mean(value):#.4f} \u00B1 {np.std(value):#.4f}" + f">> {key} = {np.mean(value): #.4f} \u00B1 {np.std(value): #.4f}" ) diff --git a/spare_scores/svm.py b/spare_scores/svm.py index c33e375..5734927 100644 --- a/spare_scores/svm.py +++ b/spare_scores/svm.py @@ -272,7 +272,7 @@ def train_initialize(self, df: pd.DataFrame, to_predict: str) -> None: def run_CV(self, df: pd.DataFrame) -> None: for i, fold in enumerate(self.folds): if i % self.n_repeats == 0: - logging.info(f" FOLD {int(i/self.n_repeats+1)}...") + logging.info(f" FOLD {int(i / self.n_repeats + 1)}...") X_train, X_test, y_train, y_test = self.prepare_sample( df, fold, self.scaler[i], classify=self.classify ) @@ -360,5 +360,5 @@ def correct_reg_bias(self, fold: Any, y_test: list) -> Any: def output_stats(self) -> None: for key, value in self.stats.items(): logging.info( - f">> {key} = {np.mean(value):#.4f} \u00B1 {np.std(value):#.4f}" + f">> {key} = {np.mean(value): #.4f} \u00B1 {np.std(value): #.4f}" ) diff --git a/test_data_prep.py b/test_data_prep.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_spare_scores.py b/tests/unit/test_spare_scores.py index f45dabf..46857c9 100644 --- a/tests/unit/test_spare_scores.py +++ b/tests/unit/test_spare_scores.py @@ -55,7 +55,7 @@ def test_spare_test_SVM(self): ) self.assertTrue(result == ["ROI1"]) - def test_spare_train_MLP(self): + def test_spare_train_MLP(self): self.df_fixture = load_df("../fixtures/sample_data.csv") self.model_fixture = load_model("../fixtures/sample_model.pkl.gz") # Test case 1: Testing spare_train with MLP model