diff --git a/anai/supervised/__init__.py b/anai/supervised/__init__.py index b8be92d..161224b 100644 --- a/anai/supervised/__init__.py +++ b/anai/supervised/__init__.py @@ -267,7 +267,13 @@ def __init__( optuna.logging.set_verbosity(optuna.logging.WARNING) self.explainer = Explainer() self.ensembler = Ensembler( - "classification", n_estimators=3, n_clusters=3) + "classification", + n_estimators=3, + n_clusters=3, + estimators=None, + verbose=False, + random_state=self.random_state + ) self.fit_params = {} self.encoded_column_names = [] self.dimension_handler = DimensionHandler() @@ -504,64 +510,61 @@ def __fitall(self): self.result_df["Accuracy"] = self.acc self.result_df["Cross Validated Accuracy"] = self.k_fold_accuracy self.result_df["Model"] = self.classifier_model - if self.tune: - self.result_df["Best Parameters"] = self.best_params - self.result_df["Best Accuracy"] = self.bestacc - self.best_classifier = Best( - self.result_df.loc[self.result_df["Best Accuracy"].idxmax()], - self.tune, - ) - else: - self.best_classifier = Best( - self.result_df.loc[self.result_df["Cross Validated Accuracy"].idxmax( - )], - self.tune, - ) self.result_df = self.result_df.sort_values( - by="Cross Validated Accuracy", ascending=False - ) + by="Cross Validated Accuracy", ascending=False) top_result = self.result_df.sort_values( - by=["Cross Validated Accuracy"], ascending=False - ).head(5) + by=['Cross Validated Accuracy'], ascending=False).head(5) estimators = [] + est1 = [] for i in range(top_result.shape[0]): - if not top_result.iloc[i]["Name"] == "K-Nearest Neighbors": + if not top_result.iloc[i]['Name'] == 'K-Nearest Neighbors': estimators.append( - (top_result.iloc[i]["Name"], top_result.iloc[i]["Model"]) - ) - print(Fore.YELLOW + "Ensembling on top {} models\n".format(5 if len(estimators) > 5 else len(estimators))) + top_result.iloc[i]['Model']) + est1.append( + (top_result.iloc[i]['Name'], top_result.iloc[i]['Model'])) + print(Fore.YELLOW + "Ensembling on top {} models\n".format( + 5 if len(estimators) > 5 else len(estimators))) try: ens_result = self.ensembler.ensemble( - self.X_train, - self.y_train, - self.X_val, - self.y_val, - cv_folds=self.cv_folds, - estimators=estimators, - ) - self.result_df = pd.concat([self.result_df, ens_result], axis=0) + self.X_train, self.y_train, self.X_val, self.y_val, cv_folds=self.cv_folds, estimators=estimators, est=est1) + self.result_df = pd.concat( + [self.result_df, ens_result], axis=0) except Exception as error: - print(Fore.RED + "Ensembling Failed with error: ", error, "\n") + print(traceback.format_exc()) + print(Fore.RED+"Ensembling Failed with error: ", error, "\n") self.result_df = self.result_df.sort_values( - by=["Cross Validated Accuracy"], ascending=False - ).reset_index(drop=True) + by=['Cross Validated Accuracy'], ascending=False).reset_index(drop=True) print(Fore.GREEN + "Training Done [", "\u2713", "]\n") print(Fore.CYAN + "Results Below\n") - display(self.result_df.drop(["Model"], axis=1)) + if self.tune: + self.result_df["Best Parameters"] = self.best_params + self.result_df["Best Accuracy"] = self.bestacc + self.best_classifier = Best( + self.result_df.loc[self.result_df["Best Accuracy"].idxmax()], + self.tune, + ) + else: + self.best_classifier = Best( + self.result_df.loc[self.result_df["Cross Validated Accuracy"].idxmax( + )], self.tune + ) + display(self.result_df.drop(['Model'], axis=1)) print(Fore.GREEN + "\nCompleted ANAI Run [", "\u2713", "]\n") if len(self.model_to_predict) > 1: - self.best_classifier_path, self.scaler_path = self.save( - best=True, model=self.best_classifier.model, scaler=self.sc + self.model = self.best_classifier.model + self.model_name = self.best_classifier.name + self.end = time.time() + final_time = self.end - self.start + self.meta_path = self.save( + best=True ) print( Fore.CYAN - + "Saved Best Model to {} and its scaler to {}".format( - self.best_classifier_path, self.scaler_path + + "Saved Best Model at {} ".format( + self.meta_path ), "\n", ) - self.end = time.time() - final_time = self.end - self.start print(Fore.BLUE + "Time Elapsed : ", f"{final_time:.2f}", "seconds \n") return @@ -1006,7 +1009,7 @@ def __init__( n_clusters=3, estimators=None, verbose=False, - result_df=None, + random_state=self.random_state ) self.fit_params = {} self.dimension_handler = DimensionHandler() @@ -1281,46 +1284,26 @@ def __fitall(self): self.result_df["Root Mean Squared Error"] = self.rmse self.result_df["Cross Validated Accuracy"] = self.k_fold_accuracy self.result_df["Model"] = self.regressor_model - if self.tune: - self.result_df["Best Parameters"] = self.best_params - self.result_df["Best Accuracy"] = self.bestacc - self.result_df["Trained Model"] = self.tuned_trained_model - self.best_regressor = Best( - self.result_df.loc[self.result_df["Best Accuracy"].idxmax()], - self.tune, - isReg=True, - ) - else: - self.best_regressor = Best( - self.result_df.loc[self.result_df["Cross Validated Accuracy"].idxmax( - )], - self.tune, - isReg=True, - ) self.result_df = self.result_df.sort_values( - by=["Cross Validated Accuracy"], ascending=False - ) + by=['Cross Validated Accuracy'], ascending=False) top_result = self.result_df.sort_values( - by=["Cross Validated Accuracy"], ascending=False - ).head(5) + by=['Cross Validated Accuracy'], ascending=False).head(5) estimators = [] + est1 = [] for i in range(top_result.shape[0]): - if not top_result.iloc[i]["Name"] == "K-Nearest Neighbors": + if not top_result.iloc[i]['Name'] == 'K-Nearest Neighbors': estimators.append( - (top_result.iloc[i]["Name"], top_result.iloc[i]["Model"]) - ) + top_result.iloc[i]['Model']) + est1.append( + (top_result.iloc[i]['Name'], top_result.iloc[i]['Model'])) + self.estimators = estimators print(Fore.YELLOW + "Ensembling on top {} models\n".format( 5 if len(estimators) > 5 else len(estimators))) try: ens_result = self.ensembler.ensemble( - self.X_train, - self.y_train, - self.X_val, - self.y_val, - cv_folds=self.cv_folds, - estimators=estimators, - ) - self.result_df = pd.concat([self.result_df, ens_result], axis=0) + self.X_train, self.y_train, self.X_val, self.y_val, cv_folds=self.cv_folds, estimators=estimators, est=est1) + self.result_df = pd.concat( + [self.result_df, ens_result], axis=0) except Exception as error: print( Fore.RED + "Ensembling Failed with error: ", @@ -1328,25 +1311,41 @@ def __fitall(self): "\n", ) self.result_df = self.result_df.sort_values( - by=["Cross Validated Accuracy"], ascending=False - ).reset_index(drop=True) + by=['Cross Validated Accuracy'], ascending=False).reset_index(drop=True) print(Fore.GREEN + "Training Done [", "\u2713", "]\n") print(Fore.CYAN + "Results Below\n") - display(self.result_df.drop(["Model"], axis=1)) + if self.tune: + self.result_df["Best Parameters"] = self.best_params + self.result_df["Best Accuracy"] = self.bestacc + self.result_df["Trained Model"] = self.tuned_trained_model + self.best_regressor = Best( + self.result_df.loc[self.result_df["Best Accuracy"].idxmax()], + self.tune, + isReg=True, + ) + else: + self.best_regressor = Best( + self.result_df.loc[self.result_df["Cross Validated Accuracy"].idxmax( + )], + self.tune, + isReg=True, + ) + display(self.result_df.drop(['Model'], axis=1)) print(Fore.GREEN + "\nCompleted ANAI Run [", "\u2713", "]\n") if len(self.model_to_predict) > 1: - self.best_regressor_path, self.scaler_path = self.save( - best=True, model=self.best_regressor.model, scaler=self.sc + self.model = self.best_regressor.model + self.end = time.time() + final_time = self.end - self.start + self.meta_path = self.save( + best=True, ) print( Fore.CYAN - + "Saved Best Model to {} and its scaler to {}".format( - self.best_regressor_path, self.scaler_path + + "Saved Best Model at {} ".format( + self.meta_path ), "\n", ) - self.end = time.time() - final_time = self.end - self.start print(Fore.BLUE + "Time Elapsed : ", f"{final_time:.2f}", "seconds \n") return diff --git a/setup.py b/setup.py index 38ac1c2..48a8b80 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ ], include=["anai.*", "anai"], ), - version="0.1.2-alpha-2", + version="0.1.2-alpha-3", license="Apache License 2.0", description="Automated ML", url="https://github.com/Revca-ANAI/ANAI",