Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve user interface #195

Merged
merged 5 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions autoemulate/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from autoemulate.plotting import _plot_model
from autoemulate.plotting import _plot_results
from autoemulate.printing import _print_cv_results
from autoemulate.printing import _print_setup
from autoemulate.save import ModelSerialiser
from autoemulate.utils import get_mean_scores
from autoemulate.utils import get_model_name
Expand Down Expand Up @@ -125,8 +126,13 @@ def setup(
self.n_jobs = n_jobs
self.logger = _configure_logging(log_to_file=log_to_file)
self.is_set_up = True
self.dim_reducer = dim_reducer
self.reduce_dim = reduce_dim
self.folds = folds
self.cv_results = {}

self.print_setup()

def _check_input(self, X, y):
"""Checks and possibly converts the input data.

Expand Down Expand Up @@ -198,7 +204,12 @@ def compare(self):
self.scores_df = pd.DataFrame(
columns=["model", "metric", "fold", "score"]
).astype(
{"model": "object", "metric": "object", "fold": "int64", "score": "float64"}
{
"model": "object",
"metric": "object",
"fold": "int64",
"score": "float64",
}
)

for i in range(len(self.models)):
Expand Down Expand Up @@ -368,6 +379,17 @@ def load_model(self, path=None):

return serialiser._load_model(path)

def print_setup(self) -> None:
"""Print the setup of the AutoEmulate object.

This method prints the setup of the AutoEmulate object, including the models and metrics used.

Returns
-------
None
"""
_print_setup(self)

def print_results(self, model=None, sort_by="r2"):
"""Print cv results.

Expand Down Expand Up @@ -470,5 +492,10 @@ def plot_model(self, model, plot="standard", n_cols=2, figsize=None):
Number of columns in the plot grid for multi-output. Default is 2.
"""
_plot_model(
model, self.X[self.test_idxs], self.y[self.test_idxs], plot, n_cols, figsize
model,
self.X[self.test_idxs],
self.y[self.test_idxs],
plot,
n_cols,
figsize,
)
92 changes: 92 additions & 0 deletions autoemulate/printing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
import pandas as pd

from autoemulate.utils import get_mean_scores
from autoemulate.utils import get_model_name
from autoemulate.utils import get_model_scores

try:
__IPYTHON__
_in_ipython_session = True
except NameError:
_in_ipython_session = False
Comment on lines +7 to +11
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

works well for me


if _in_ipython_session:
from IPython.display import display, HTML


def _print_cv_results(models, scores_df, model=None, sort_by="r2"):
"""Print cv results.
Expand Down Expand Up @@ -34,3 +45,84 @@ def _print_cv_results(models, scores_df, model=None, sort_by="r2"):
scores = get_model_scores(scores_df, model)
print(f"Scores for {model} across all folds:")
print(scores)


def _print_setup(cls):
"""Print the setup of the AutoEmulate object.

If in an IPython session, the setup will be displayed as an HTML table.

Parameters
----------
cls : AutoEmulate
The AutoEmulate object.
"""
if not cls.is_set_up:
raise RuntimeError("Must run setup() before print_setup()")

models = "\n- " + "\n- ".join(
[
x[1].__class__.__name__
for pipeline in cls.models
for x in pipeline.steps
if x[0] == "model"
]
)
metrics = "\n- " + "\n- ".join([metric.__name__ for metric in cls.metrics])

settings = pd.DataFrame(
[
str(cls.X.shape),
str(cls.y.shape),
str(cls.train_idxs.shape[0]),
str(cls.test_idxs.shape[0]),
str(cls.param_search),
str(cls.search_type),
str(cls.param_search_iters),
str(cls.scale),
str(cls.scaler.__class__.__name__ if cls.scaler is not None else "None"),
str(cls.reduce_dim),
str(
cls.dim_reducer.__class__.__name__
if cls.dim_reducer is not None
else "None"
),
str(cls.cv.__class__.__name__ if cls.cv is not None else "None"),
str(cls.folds),
str(cls.n_jobs if cls.n_jobs is not None else "1"),
],
index=[
"Simulation input shape (X)",
"Simulation output shape (y)",
"# training set samples (train_idxs)",
"# test set samples (test_idxs)",
"Do hyperparameter search (param_search)",
"Type of hyperparameter search (search_type)",
"# sampled parameter settings (param_search_iters)",
"Scale data before fitting (scale)",
"Scaler (scaler)",
"Dimensionality reduction before fitting (reduce_dim)",
"Dimensionality reduction method (dim_reducer)",
"Cross-validation strategy (cv)",
"# folds (folds)",
"# parallel jobs (n_jobs)",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one after thought here: Maybe this would be most useful if the argument names (rather than attribute names) would be in brackets. Would you agree?

If so, the two things to change are:

"# training set samples (train_idxs)",
"# test set samples (test_idxs)",

to
"test set size (test_set_size)"
(and the value could be 20% or 0.2)

and
"Cross-validation strategy (cv)",
to
"Cross-validation strategy (fold_strategy)",

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that makes sense. I was thinking that these should refer to any properties that exist on the object (hence, cv instead of fold_strategy as self.cv has been set; same with train_idxs and test_idxs as those are also existing as properties, whereas test_set_size is not a property on the object)... But since we're just showing the setup settings, I suppose it makes sense!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See: c0504de

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, thanks @kallewesterling ! Feel free to merge!

],
columns=["Values"],
)

settings_str = settings.to_string(index=True, header=False)
width = len(settings_str.split("\n")[0])

if _in_ipython_session:
display(HTML("<p>AutoEmulate is set up with the following settings:</p>"))
display(HTML(settings.to_html()))
return

print("AutoEmulate is set up with the following settings:")
print("-" * width)
print(settings_str)
print("-" * width)
print("Models:" + models)
print("-" * width)
print("Metrics:" + metrics)
print("-" * width)
Loading