Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

6 copying of configspace #7

Merged
merged 7 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
f = synthetic_functions.StyblinskiTang.for_n_dimensions(3, seed=seed)
cs = f.config_space

selected_hyperparameter = cs.get_hyperparameter("x1")
selected_hyperparameter = cs["x1"]

# Sampler
sampler = BayesianOptimizationSampler(f, cs, initial_points=f.ndim * 4, seed=seed)
Expand Down
2 changes: 1 addition & 1 deletion examples/main_meta_pdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def optimize_mc():
f = blackbox_function

# Optimize
n_dim = len(cs.get_hyperparameters())
n_dim = len(list(cs.values()))
sampler = BayesianOptimizationSampler(
f,
cs,
Expand Down
3 changes: 2 additions & 1 deletion examples/main_paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from tqdm import tqdm

from pyPDP.algorithms.partitioner.decision_tree_partitioner import DecisionTreePartitioner
from pyPDP.algorithms.ice import ICE, ICECurve
from pyPDP.algorithms.ice import ICE
from pyPDP.algorithms.pdp import PDP
from pyPDP.blackbox_functions import BlackboxFunction, BlackboxFunctionND
from pyPDP.blackbox_functions.synthetic_functions import StyblinskiTang
Expand All @@ -34,6 +34,7 @@
data_folder = Path(__file__).parent.parent / 'data'
data_folder.mkdir(parents=True, exist_ok=True)


def figure_1_3(f: BlackboxFunction = StyblinskiTang.for_n_dimensions(2, seed=seed),
samplers: Dict[str, Sampler] = None,
sampled_points=50):
Expand Down
2 changes: 1 addition & 1 deletion examples/main_presentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyPDP.algorithms.ice import ICE
from pyPDP.algorithms.partitioner.decision_tree_partitioner import DecisionTreePartitioner
from pyPDP.algorithms.pdp import PDP
from pyPDP.blackbox_functions import BlackboxFunctionND, BlackboxFunction
from pyPDP.blackbox_functions import BlackboxFunction
from pyPDP.blackbox_functions.synthetic_functions import StyblinskiTang
from pyPDP.sampler.acquisition_function import LowerConfidenceBound
from pyPDP.sampler.bayesian_optimization import BayesianOptimizationSampler
Expand Down
2 changes: 1 addition & 1 deletion examples/main_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def plot_tree_data(log_filename: str, img_filename: str):
# plt.boxplot(delta_mcs, positions=[0], manage_ticks=False)
# plt.plot([], [], color='orange', label='Decision Tree')
plt.plot(x, dt_mean, '*', color='red', label='Decision Tree Mean')
plt.plot(x, dt_mean + dt_std, '*', color='orange', label=f'Decision Tree $\mu\pm$ $\sigma$')
plt.plot(x, dt_mean + dt_std, '*', color='orange', label=f'Decision Tree $\\mu\\pm$ $\\sigma$')
plt.plot(x, dt_mean - dt_std, '*', color='orange')

# rf mean curve
Expand Down
2 changes: 1 addition & 1 deletion examples/main_sampler_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def plot_sampling_bias(
)[0]
ax_variances.plot(x, np.sqrt(mean_pdp.y_variances))
# Set titles
ax_pdp.set_title(f"{name}\n(mmd={np.mean(arr_mmd):.2f}$\pm${np.std(arr_mmd):.2f})")
ax_pdp.set_title(f"{name}\n(mmd={np.mean(arr_mmd):.2f}$\\pm${np.std(arr_mmd):.2f})")
ax_variances.set_ylabel("Std")

# fig1.savefig("Figure 1.png")
Expand Down
8 changes: 5 additions & 3 deletions pyPDP/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
name = "pyPDPPartitioner"
package_name = "pyPDP"
author = "Yannik Mahlau and Dominik Woiwode"
author_email = "[email protected]"
description = "A python implementation of 'Explaining Hyperparameter Optimization via Partial Dependence Plots' by Moosbauer et al."
version = "0.1.8"
author_email = "[email protected]"
description = ("A python implementation of "
"'Explaining Hyperparameter Optimization via Partial Dependence Plots'"
" by Moosbauer et al.")
version = "0.1.9"
license = "MIT"
url = "https://github.com/dwoiwode/py-pdp-partitioner"
2 changes: 1 addition & 1 deletion pyPDP/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ def n_selected_hyperparameter(self) -> int:

@property
def num_features(self) -> int:
return len(self.config_space.get_hyperparameters())
return len(list(self.config_space.values()))
6 changes: 3 additions & 3 deletions pyPDP/algorithms/ice.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from functools import cached_property
from typing import Optional, List
from typing import Optional

import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
Expand Down Expand Up @@ -142,7 +142,7 @@ def implied_config_space(self) -> CS.ConfigurationSpace:
min_values = unscale(np.min(self.x_ice, axis=0), self.full_config_space)
max_values = unscale(np.max(self.x_ice, axis=0), self.full_config_space)
cs = CS.ConfigurationSpace()
for hp, min_, max_ in zip(self.full_config_space.get_hyperparameters(), min_values, max_values):
for hp, min_, max_ in zip(list(self.full_config_space.values()), min_values, max_values):
assert isinstance(hp, CSH.NumericalHyperparameter)
if min_ == max_:
hp_copy = CSH.Constant(hp.name, value=min_)
Expand Down Expand Up @@ -202,7 +202,7 @@ def _calculate(self):
# Retrieve hp index from cs
cs = self.config_space
idx = get_selected_idx(self.selected_hyperparameter, cs)
num_features = len(cs.get_hyperparameters())
num_features = len(list(cs.values()))

# retrieve x-values from config
x_s = self.grid_points
Expand Down
9 changes: 5 additions & 4 deletions pyPDP/algorithms/partitioner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod
from functools import cached_property
from typing import Tuple, Optional, List, Callable
from typing import Tuple, Optional, List

import ConfigSpace as CS
import numpy as np
Expand Down Expand Up @@ -68,7 +68,7 @@ def negative_log_likelihood(self, true_function: BlackboxFunction) -> float:
selected_hyperparameter_names = {hp.name for hp in self.selected_hyperparameter}
not_selected_hp = [
hp
for hp in true_function.config_space.get_hyperparameters()
for hp in list(true_function.config_space.values())
if hp.name not in selected_hyperparameter_names
]

Expand Down Expand Up @@ -127,6 +127,7 @@ def plot_confidences(self,
confidence_max_sigma=confidence_max_sigma,
ax=ax)


class Partitioner(Algorithm, ABC):
def __init__(self, surrogate_model: SurrogateModel,
selected_hyperparameter: SelectedHyperparameterType,
Expand Down Expand Up @@ -159,7 +160,7 @@ def __init__(self, surrogate_model: SurrogateModel,
selected_hyperparameter_names = {hp.name for hp in self.selected_hyperparameter}
selected_hyperparameter_names = selected_hyperparameter_names.union({hp.name for hp in self.not_splittable_hp})
self.possible_split_parameters: List[CSH.Hyperparameter] = [
hp for hp in cs.get_hyperparameters()
hp for hp in list(cs.values())
if hp.name not in selected_hyperparameter_names
]

Expand All @@ -175,5 +176,5 @@ def ice(self) -> ICE:
return self._ice

@abstractmethod
def partition(self, max_depth: int = 1): # -> List[Region]:
def partition(self, max_depth: int = 1): # -> List[Region]:
pass
6 changes: 3 additions & 3 deletions pyPDP/algorithms/partitioner/decision_tree_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pyPDP.surrogate_models import SurrogateModel
from pyPDP.utils.plotting import get_ax, check_and_set_axis, get_random_color, plot_config_space
from pyPDP.utils.typing import SelectedHyperparameterType, ColorType
from pyPDP.utils.utils import scale_float, unscale_float, unscale, ConfigSpaceHolder, get_hyperparameters
from pyPDP.utils.utils import scale_float, unscale_float, unscale, ConfigSpaceHolder


class SplitCondition(ConfigSpaceHolder):
Expand Down Expand Up @@ -97,7 +97,7 @@ def __contains__(self, item: CS.Configuration) -> bool:
def implied_config_space(self, seed: Optional[int] = None) -> CS.ConfigurationSpace:
# copy cs
hp_dic = {}
for hp in self.config_space.get_hyperparameters():
for hp in list(self.config_space.values()):
if isinstance(hp, CSH.NumericalHyperparameter):
new_hp = CSH.UniformFloatHyperparameter(hp.name, lower=hp.lower, upper=hp.upper, log=hp.log)
hp_dic[hp.name] = new_hp
Expand Down Expand Up @@ -331,6 +331,6 @@ def plot_incumbent_cs(self,
ax = get_ax(ax)
region = self.get_incumbent_region(incumbent)
new_cs = region.implied_config_space()
all_hp = new_cs.get_hyperparameters()
all_hp = list(new_cs.values())
not_selected_hp = sorted(list(set(all_hp) - set(self.selected_hyperparameter)), key=lambda hp: hp.name)
plot_config_space(new_cs, x_hyperparameters=not_selected_hp, color=color, alpha=alpha, ax=ax)
6 changes: 5 additions & 1 deletion pyPDP/algorithms/partitioner/random_forest_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ def partition(self,
not_splittable_hp = list(set(self.possible_split_parameters) - set(splittable_hp))

# create dt
dt = DecisionTreePartitioner.from_ICE(subset_ice, min_points_per_node=1, not_splittable_hp=not_splittable_hp)
dt = DecisionTreePartitioner.from_ICE(
subset_ice,
min_points_per_node=1,
not_splittable_hp=not_splittable_hp
)
dt.partition(max_depth=max_depth)
self.trees.append(dt)

Expand Down
3 changes: 1 addition & 2 deletions pyPDP/algorithms/pdp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from functools import cached_property
from typing import Iterable, Optional
from typing import Optional

import ConfigSpace.hyperparameters as CSH
import numpy as np
from matplotlib import pyplot as plt

Expand Down
2 changes: 1 addition & 1 deletion pyPDP/blackbox_functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class BlackboxFunction(ConfigSpaceHolder, ABC):
def __init__(self, config_space: CS.ConfigurationSpace):
super().__init__(config_space, seed=True)
self.ndim = len(self.config_space.get_hyperparameters())
self.ndim = len(list(self.config_space.values()))
self.__name__ = str(self)

def __call__(self, **kwargs) -> float:
Expand Down
20 changes: 9 additions & 11 deletions pyPDP/blackbox_functions/synthetic_functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Collection of blackbox functions that can be minimized
"""
from typing import Union, List, Tuple
from typing import Union

import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
Expand Down Expand Up @@ -116,17 +116,20 @@ class StyblinskiTang(BlackboxFunctionND):
"""

def value_from_config(self, config: CS.Configuration) -> float:
x = np.asarray([config[hp.name] for hp in self.config_space.get_hyperparameters()])
x = np.asarray([config[hp.name] for hp in list(self.config_space.values())])

return np.sum(np.power(x, 4) - 16 * np.power(x, 2) + 5 * x) / 2

@staticmethod
def _styblinski_tang_integral(x: float) -> float:
return 0.5 * (0.2 * np.power(x, 5) - 16 / 3 * np.power(x, 3) + 2.5 * np.power(x, 2))

def pd_integral(self, *hyperparameters: Union[str, CSH.Hyperparameter], seed=None,
return_offset: bool = False) -> Union[
CallableBlackboxFunction, tuple[CallableBlackboxFunction, float]]:
def pd_integral(
self,
*hyperparameters: Union[str, CSH.Hyperparameter],
seed=None,
return_offset: bool = False
) -> Union[CallableBlackboxFunction, tuple[CallableBlackboxFunction, float]]:
if len(hyperparameters) == 0:
raise ValueError("Requires at least one hyperparameter for pd_integral")

Expand All @@ -141,7 +144,7 @@ def pd_integral(self, *hyperparameters: Union[str, CSH.Hyperparameter], seed=Non
integral_value = self._styblinski_tang_integral(upper) - self._styblinski_tang_integral(lower)
integral_offset += integral_value / (upper - lower)

hps = self.config_space.get_hyperparameters()
hps = list(self.config_space.values())
reduced_cs = CS.ConfigurationSpace(seed=seed)
hyperparameter_names = {hp.name for hp in hyperparameters}
for hp in hps:
Expand All @@ -158,8 +161,3 @@ def integral(config: CS.Configuration):
else:
return (CallableBlackboxFunction(integral, reduced_cs, name=f"{self.__name__} d({hyperparameter_names})"),
integral_offset)





2 changes: 1 addition & 1 deletion pyPDP/sampler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __del__(self):
def _hash(self, *args) -> str:
md = hashlib.md5()
md.update(bytes(str(self.__class__), encoding="latin"))
md.update(bytes(str(self.config_space.get_hyperparameters()), encoding="latin"))
md.update(bytes(str(list(self.config_space.values())), encoding="latin"))
for arg in args:
md.update(bytes(str(arg), encoding="latin"))
md.update(bytes(str(self.obj_func), encoding="latin"))
Expand Down
14 changes: 11 additions & 3 deletions pyPDP/sampler/acquisition_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def _get_optimum_uniform_distribution(self) -> Tuple[CS.Configuration, float]:

return max(config_value_pairs, key=lambda x: x[1])

def convert_configs(self, configuration: Union[CS.Configuration, np.ndarray]):
@staticmethod
def convert_configs(configuration: Union[CS.Configuration, np.ndarray]):
if isinstance(configuration, CS.Configuration):
x = np.asarray(configuration.get_array())
x = x.reshape([1, -1])
Expand Down Expand Up @@ -87,8 +88,14 @@ def plot(self,
ax.plot(x, acquisition_y, color=color_acquisition, label=self.__class__.__name__)

if show_optimum:
ax.plot(list(optimum.values())[0], self(optimum), "*", color=color_optimum, label=f"Optimum ({optimum})",
markersize=15)
ax.plot(
list(optimum.values())[0],
self(optimum),
"*",
color=color_optimum,
label=f"Optimum ({optimum})",
markersize=15
)
elif n_hyperparameters == 2: # 2D
idx = get_selected_idx(x_hyperparameters, self.config_space)
raise NotImplementedError("2D currently not implemented (#TODO)")
Expand Down Expand Up @@ -166,6 +173,7 @@ def update(self, eta: float):

class LowerConfidenceBound(AcquisitionFunction):
"""LCB"""

def __init__(self,
config_space: CS.ConfigurationSpace,
surrogate_model: SurrogateModel,
Expand Down
34 changes: 19 additions & 15 deletions pyPDP/sampler/bayesian_optimization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import hashlib
import warnings
from typing import Callable, Any, List, Tuple, Union, Optional
from typing import Callable, Any, List, Tuple, Union, Optional, Type, Dict

import ConfigSpace as CS
import numpy as np
Expand All @@ -14,15 +14,17 @@


class BayesianOptimizationSampler(Sampler):
def __init__(self,
obj_func: Callable[[Any], float],
config_space: CS.ConfigurationSpace,
surrogate_model: Optional[SurrogateModel] = None,
initial_points: int = 5,
acq_class=None,
acq_class_kwargs=None,
minimize_objective: bool = True,
seed=None):
def __init__(
self,
obj_func: Callable[[Any], float],
config_space: CS.ConfigurationSpace,
surrogate_model: Optional[SurrogateModel] = None,
initial_points: int = 5,
acq_class: Optional[Type[AcquisitionFunction]] = None,
acq_class_kwargs: Optional[Dict[str, Any]] = None,
minimize_objective: bool = True,
seed=None
):
super().__init__(obj_func, config_space, minimize_objective, seed=seed)
# Initialize class
self.initial_points = initial_points # number of initial points to be sampled
Expand All @@ -39,11 +41,13 @@ def __init__(self,
acq_class_kwargs = {}
if acq_class is None:
acq_class = LowerConfidenceBound # Default Lower Confidence Bound
self.acq_func: AcquisitionFunction = acq_class(self.config_space,
self.surrogate_model,
minimize_objective=minimize_objective,
seed=seed,
**acq_class_kwargs)
self.acq_func: AcquisitionFunction = acq_class(
self.config_space,
self.surrogate_model,
minimize_objective=minimize_objective,
seed=seed,
**acq_class_kwargs
)

# Update cache according to additional arguments
self.hash = self._hash(seed, acq_class, acq_class_kwargs, initial_points, surrogate_model.__class__)
Expand Down
4 changes: 2 additions & 2 deletions pyPDP/sampler/grid_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def __init__(
def _sample(self, n_points: int = 1, pbar: Union[ProgressDummy, tqdm] = ProgressDummy()):
expected_length = len(self) + n_points
if self._grid is None or len(self) + len(self._grid) < expected_length:
n_dims = len(self.config_space.get_hyperparameters())
n_dims = len(list(self.config_space.values()))
samplers_per_axis = int(np.ceil(expected_length ** (1 / n_dims)))
num_steps_dict = {param.name: samplers_per_axis for param in self.config_space.get_hyperparameters()}
num_steps_dict = {param.name: samplers_per_axis for param in list(self.config_space.values())}
self._grid = generate_grid(self.config_space, num_steps_dict)
self.rng.shuffle(self._grid)

Expand Down
Loading
Loading