diff --git a/bias_transfer/__init__.py b/__init__.py similarity index 100% rename from bias_transfer/__init__.py rename to __init__.py diff --git a/bias_transfer/analysis/plot_shahd.py b/bias_transfer/analysis/plot_shahd.py deleted file mode 100644 index 7a35f9d..0000000 --- a/bias_transfer/analysis/plot_shahd.py +++ /dev/null @@ -1,484 +0,0 @@ -import math -import string - -from .plot import plot -import json -import pickle as pkl -import pandas as pd -import seaborn as sns -import numpy as np -import matplotlib.pyplot as plt -import statsmodels.api as stats_model -import statsmodels.formula.api as smf -import matplotlib.transforms - - -def plot_robustness( - models, - folder_name, - test_set, - levels, - std=False, - name_map={}, - noises=[], - noise_grouping={}, - plot_overview=False, - plot_correlation=False, - plot_correlation_bootstrapped=False, - plot_individual=True, - plot_per_noise_robustness=False, - add_overview_to_groups=False, - **kwargs, -): - - if plot_individual: - means = {} - stds = {} - for model in models: - with open( - f"./{folder_name}/{model}_all_seeds_{test_set}_bootstrapped_stds.json", - "r", - ) as fp: - data = json.load(fp) - stds[model] = data - with open( - f"./{folder_name}/{model}_all_seeds_{test_set}_bootstrapped_means.json", - "r", - ) as fp: - data = json.load(fp) - means[model] = data - for k in stds: - stds[k] = stds[k]["model"] - means[k] = means[k]["model"] - else: - stds = None - means = None - models = models[1:] - - if plot_per_noise_robustness: - with open( - f"./{folder_name}/robust_scores_per_noise.json", - "r", - ) as fp: - per_noise_data_ = json.load(fp) - per_noise_data_ = { - name_map[k]: { - k2: {"mean": v2["mean"] * 100, "std": v2["std"] * 100} - for k2, v2 in v.items() - } - for k, v in per_noise_data_.items() - } - per_noise_data = {} - if noise_grouping: - for model in models: - model = name_map[model] - results = per_noise_data_[model] - per_noise_data[model] = {} - for group_name, group_items in noise_grouping.items(): - per_noise_data[model][name_change(group_name)] = { - "mean": 0, - "std": 0, - } - for noise in group_items: - per_noise_data[model][name_change(group_name)][ - "mean" - ] += results[noise]["mean"] - per_noise_data[model][name_change(group_name)]["std"] += ( - results[noise]["std"] ** 2 - ) - per_noise_data[model][name_change(group_name)]["mean"] /= len( - group_items - ) - per_noise_data[model][name_change(group_name)]["std"] = math.sqrt( - per_noise_data[model][name_change(group_name)]["std"] - / len(group_items) - ) - - else: - for model in models: - model = name_map[model] - results = per_noise_data_[model] - per_noise_data[model] = { - name_change(k): v - for k, v in sorted( - results.items(), key=lambda item: noises.index(item[0]) - ) - } - else: - per_noise_data = {} - - if plot_correlation_bootstrapped: - with open(f"./{folder_name}/mtl_bootstrapped" + ".pkl", "rb") as f: - clean_and_neural = pkl.load(f) - with open( - f"./{folder_name}/robust_scores_scatter_bootstrapped.json", "r" - ) as fp: - robust_scores = json.load(fp) - for br, scores in robust_scores.items(): - scores.update(clean_and_neural[int(br)]) - robust_scores[br] = scores - print("ROBUST", robust_scores) - corrupt_list = [] - corrupt_err_list = [] - neurals = [] - neurals_err_list = [] - imgcls = [] - for key in robust_scores.keys(): - corrupt_list.append(robust_scores[key]["mean_score"]) - corrupt_err_list.append(robust_scores[key]["score_standard_err"]) - neurals.append(robust_scores[key]["mean_neural"]) - neurals_err_list.append(robust_scores[key]["std_neural"]) - imgcls.append(robust_scores[key]["mean_cls"]) - corrupt_list = np.array(corrupt_list) * 100 - corrupt_err_list = np.array(corrupt_err_list) * 100 - - robustness_data = pd.DataFrame( - { - "Neural": neurals, - "Robustness": corrupt_list, - "robustness err": corrupt_err_list, - "neural err": neurals_err_list, - "Clean": imgcls, - "category": ["MTL" for _ in neurals], - } - ) - elif plot_correlation: - with open(f"./{folder_name}/mtl" + ".pkl", "rb") as f: - clean_and_neural = pkl.load(f) - with open(f"./{folder_name}/robust_scores_scatter_seeds.json", "r") as fp: - robust_scores = json.load(fp) - for br, scores in robust_scores.items(): - scores.update(clean_and_neural[int(br)]) - robust_scores[br] = scores - print("ROBUST", robust_scores) - corrupt_list = [] - neurals = [] - imgcls = [] - for key in robust_scores.keys(): - corrupt_list += robust_scores[key]["scores"] - neurals += robust_scores[key]["neural"] - imgcls += robust_scores[key]["mean_cls"] - corrupt_list = np.array(corrupt_list) * 100 - - robustness_data = pd.DataFrame( - { - "Neural": neurals, - "Robustness": corrupt_list, - "Clean": imgcls, - "category": ["MTL" for _ in neurals], - } - ) - else: - robustness_data = None - - if plot_overview or add_overview_to_groups: - overview_data = { - "tin_baseline": {"mean": 1.0, "std": 0.0}, - "tin_mtl": { - "mean": 1.141074196151724, - "std": 0.04030340549401311, - }, - "tin_mtl_shuffled": { - "mean": 0.9632811867144085, - "std": 0.027942464073949062, - }, - "tin_mtl_simulated": { - "mean": 1.2174372498105903, - "std": 0.044304298544084894, - }, - "tin_oracle": { - "mean": 1.2374721551434498, - "std": 0.037550730118767965, - }, - } - overview_data = { - name_map[k]: {k2: v2 * 100 for k2, v2 in v.items()} - for k, v in overview_data.items() - } - if add_overview_to_groups and per_noise_data: - for model, results in overview_data.items(): - per_noise_data[model]["Mean"] = results - overview_data = {} - else: - overview_data = {} - - _plot( - noises=noises, - means=means, - stds=stds, - levels_list=levels, - name_map=name_map, - robustness_data=robustness_data, - robustness_overview=overview_data, - robustness_per_noise=per_noise_data, - **kwargs, - ) - - -def name_change(old_name, prefix=""): - name = old_name.replace("->", " → ") - name = name.replace("_", " ") - name = " ".join([n.capitalize() for n in name.split()]) - return prefix + name - - -@plot -def _plot( - fig, - ax, - noises, - means, - stds, - levels_list, - name_map, - robustness_data, - robustness_overview, - robustness_per_noise, -): - colors = { - "Baseline": "#000000", - "MTL-Oracle": "#2578B3", - "Oracle": "#A6CEE3", - "MTL-Shuffled": "#FB9A99", - "MTL-Monkey": "#E31E1B", - } - - row, col = 0, 0 - if means is not None: - for i, cat in enumerate(noises): - levels = pd.DataFrame(columns=["model", "category", "level", "mean", "std"]) - for model in means.keys(): - means_ordered = { - float(level): v for level, v in means[model][cat].items() - } - stds_ordered = { - float(level): v for level, v in stds[model][cat].items() - } - levels = levels.append( - pd.DataFrame( - { - "model": name_map[model], - "category": cat, - "level": list(means_ordered.keys()), - "mean": list(means_ordered.values()), - "std": list(stds_ordered.values()), - } - ) - ) - d = levels.groupby("category").get_group(cat) - d_mean = d.pivot(index="level", columns="model", values=["mean", "std"]) - plot = d_mean["mean"].plot( - ax=ax[row][col], legend=False, yerr=d_mean["std"], color=colors - ) - ax[row][col].set_title(name_change(cat)) - plot.set_xticks(levels_list) - plot.set_xticklabels(levels_list) - if row == len(ax)-1: - plot.set_xlabel("Corruption Severity") - else: - plot.set_xlabel(None) - if col == 0: - plot.set_ylabel("Accuracy [%]") - - # ax[row][col].set_ylim([0, 50]) - ax[row][col].grid(True, linestyle=":") - - col = (col + 1) % len(ax[row]) - if col == 0: - row += 1 - - ax[-1][-1].axis("off") - - handles, labels = ax[0][0].get_legend_handles_labels() - new_labels = ["Baseline", "MTL-Monkey", "MTL-Shuffled", "MTL-Oracle", "Oracle"] - new_handles = [] - for label in new_labels: - new_handles.append(handles[labels.index(label)]) - fig.legend(new_handles, new_labels, loc=(0.05, 0.96), ncol=6, frameon=False) - fig.tight_layout() - - if robustness_per_noise: - plot_per_category( - ax, - col, - colors, - fig, - robustness_per_noise, - row, - despine=True, - ) - ax[row][col].set_ylabel("Robustness Score [%]") - box = ax[row][col].get_position() - box.x0 = box.x0 - 0.035 - box.x1 = box.x1 - 0.035 - ax[row][col].set_position(box) - col = (col + 1) % len(ax[row]) - if col == 0: - row += 1 - - if robustness_overview: - if robustness_per_noise: - overview_data = { - m: {"Total": result} for m, result in robustness_overview.items() - } - del overview_data["Baseline"] - plot_per_category(ax, col, colors, fig, overview_data, row, despine=False) - ax[row][col].set_ylabel("") - ax[row][col].axes.get_yaxis().set_visible(False) - box = ax[row][col].get_position() - box.x0 = box.x0 - 0.065 - box.x1 = box.x1 - 0.065 - ax[row][col].set_position(box) - col = (col + 1) % len(ax[row]) - if col == 0: - row += 1 - else: - ax[row][col].axhline(y=100, color=colors["Baseline"], label="Baseline") - plot = sns.barplot( - x=list(robustness_overview.keys())[1:], - y=[y["mean"] for y in robustness_overview.values()][1:], - yerr=[y["std"] for y in robustness_overview.values()][1:], - ax=ax[row][col], - palette=colors, - ) - plot.set_xlabel("") - plot.set_ylabel("Robustness Score [%]") - - ax[row][col].grid(True, linestyle=":") - # ax[row][col].set_ylim([50, 140]) - fig.tight_layout() - sns.despine(offset=3, trim=False) - # plt.setp(ax[row][col].xaxis.get_majorticklabels(), rotation=30, ha="right") - plt.setp(ax[row][col].xaxis.get_majorticklabels(), rotation=-40, ha="left") - # Create offset transform by 5 points in x direction - dx = -1 / 72.0 - dy = 0 / 72.0 - offset = matplotlib.transforms.ScaledTranslation( - dx, dy, fig.dpi_scale_trans - ) - # apply offset transform to all x ticklabels. - for label in ax[row][col].xaxis.get_majorticklabels(): - label.set_transform(label.get_transform() + offset) - col = (col + 1) % len(ax[row]) - if col == 0: - row += 1 - - if robustness_data is not None: - if "robustness err" in robustness_data.columns: - markers, caps, bars = ax[row][col].errorbar( - robustness_data["Neural"], - robustness_data["Robustness"], - yerr=robustness_data["robustness err"], - xerr=robustness_data["neural err"], - linestyle="None", - zorder=-32, - ) - # loop through bars and caps and set the alpha value - [bar.set_alpha(0.5) for bar in bars] - [cap.set_alpha(0.5) for cap in caps] - - m, b = np.polyfit(robustness_data["Neural"], robustness_data["Robustness"], 1) - ax[row][col].plot(robustness_data["Neural"], m * robustness_data["Neural"] + b, color="grey") - # Get significance values for correlation: - mod = smf.ols(formula="Robustness ~ Clean * Neural", data=robustness_data) - res = mod.fit() - print("Robustness ~ Clean * Neural") - print(res.summary()) - - mod = smf.ols(formula="Robustness ~ Clean + Neural", data=robustness_data) - res = mod.fit() - print("Robustness ~ Clean + Neural") - print(res.summary()) - for i in range(3): - print(res.pvalues[i]) - - plot = sns.scatterplot( - data=robustness_data, - x="Neural", - y="Robustness", - # sizes=(300, 900), - ax=ax[row][col], - hue="Clean", - palette="rocket_r", - legend=False, - ) - - norm = plt.Normalize( - robustness_data["Clean"].min(), - robustness_data["Clean"].max(), - ) - sm = plt.cm.ScalarMappable(cmap="rocket_r", norm=norm) - sm.set_array([]) - cbar = ax[row][col].figure.colorbar(sm) - cbar.set_label("Clean Accuracy [%]", rotation=270, labelpad=10) - - plot.set_xlabel("Neural Prediction [corr]") - plot.set_ylabel("Robustness Score [%]") - - ax[row][col].grid(True, linestyle=":") - - # fig.tight_layout() - - -def plot_per_category(ax, col, colors, fig, robustness_per_noise, row, despine=False): - df = pd.concat( - {k: pd.DataFrame(v).T for k, v in robustness_per_noise.items()}, axis=0 - ) - df.reset_index(inplace=True) - df.columns = ["Model", "Corruption", "Robustness", "std"] - data_up = df.copy() - data_down = df.copy() - data_up["Robustness"] = data_up["Robustness"] + data_up["std"] - data_down["Robustness"] = data_down["Robustness"] - data_down["std"] - df = pd.concat([data_up, data_down]) - ax[row][col].axhline(y=100, color=colors["Baseline"], label="Baseline") - plot = sns.barplot( - x="Corruption", - y="Robustness", - hue="Model", - data=df, - ax=ax[row][col], - # yerr=df["std"], - palette=colors, - ) - # patches = sorted(plot.patches, key=lambda patch: patch.get_x()) - # for i, bar in enumerate(patches[-4:]): - # if i == 0: - # plt.axvline(x=bar.get_x(), color="grey", linestyle=":") - # bar.set_x(bar.get_x()+ bar.get_width()) - plot.set_xlabel("") - ax[row][col].grid(True, linestyle=":") - ax[row][col].set_ylim([50, 150]) - handles, labels = ax[row][col].get_legend_handles_labels() - ax[row][col].get_legend().remove() - new_labels = ["Baseline", "MTL-Monkey", "MTL-Shuffled", "MTL-Oracle", "Oracle"] - new_handles = [] - for label in new_labels: - new_handles.append(handles[labels.index(label)]) - fig.legend(new_handles, new_labels, loc=(0.01, 0.92), ncol=6, frameon=False) - # fig.tight_layout() - if despine: - sns.despine(offset=3, trim=False) - # plt.setp(ax[row][col].xaxis.get_majorticklabels(), rotation=30) - # for label in ax[row][col].get_xticklabels(): - # label.set_horizontalalignment('center') - # ax[row][col].setp(ax[row][col].xaxis.get_majorticklabels(), rotation=-45) - # ax[row][col].set_xticklabels(ax[row][col].get_xticks(), rotation=-45) - - # for tick in ax[row][col].get_xticklabels(): - # tick.set_rotation(-45) - # dx = 1 / 72.0 - # dy = 5 / 72.0 - # offset = matplotlib.transforms.ScaledTranslation(dx, dy, fig.dpi_scale_trans) - # for label in ax[row][col].xaxis.get_majorticklabels(): - # label.set_transform(label.get_transform() + offset) - - for tick in ax[row][col].get_xticklabels(): - tick.set_rotation(-45) - # Create offset transform by 5 points in x direction - dx = 2 / 72.0 - dy = 4 / 72.0 - offset = matplotlib.transforms.ScaledTranslation(dx, dy, fig.dpi_scale_trans) - # apply offset transform to all x ticklabels. - for label in ax[row][col].xaxis.get_majorticklabels(): - label.set_transform(label.get_transform() + offset) diff --git a/bias_transfer/analysis/representation/analyzer.py b/bias_transfer/analysis/representation/analyzer.py deleted file mode 100644 index 7513084..0000000 --- a/bias_transfer/analysis/representation/analyzer.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -import numpy as np -import matplotlib as mpl -import matplotlib.pyplot as plt -import seaborn as sns -import pandas as pd -import torch -from torch import nn -from torch.backends import cudnn - -import bias_transfer.trainer.trainer -from bias_transfer.analysis.plot import plot_preparation, save_plot - - -class RepresentationAnalyzer: - def __init__( - self, - experiment, - table, - name: str, - dataset: str = "val", - base_path: str = "/work/analysis/", - ): - self.experiment = experiment - self.dataset = dataset - # data_loaders, self.model, self.trainer = ( - # table & experiment.get_restrictions() - # ).restore_saved_state(,, - # self.num_samples = -1 - # self.sample_loader = torch.utils.data.DataLoader( - # data_loaders[dataset]["img_classification"].dataset, - # sampler=data_loaders[dataset]["img_classification"].sampler, - # batch_size=64, - # shuffle=False, - # num_workers=1, - # pin_memory=False, - # ) - self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.model = self.model.to(self.device) - self._reset_seed() - self.criterion = nn.CrossEntropyLoss() - self.base_path = base_path - self.name = name - - def _reset_seed(self): - torch.manual_seed(42) - np.random.seed(42) - if self.device == "cuda": - cudnn.benchmark = False - cudnn.deterministic = True - torch.cuda.manual_seed(42) - - def _compute_representation(self, main_loop_modules): - ( - acc, - loss, - module_losses, - collected_outputs, - ) = bias_transfer.trainer.main_loop.main_loop( - self.model, - self.criterion, - self.device, - None, - self.sample_loader, - 0, - main_loop_modules, - train_mode=False, - return_outputs=True, - ) - outputs = [o[self.rep_name] for o in collected_outputs] - print("Acc:", acc, "Loss:", loss, flush=True) - return torch.cat(outputs), acc - - def get_file_name(self, method, rep_name): - return os.path.join(self.base_path, "_".join([self.name, rep_name, method])) - - def save_matrix(self, to_save, method, rep_name): - name = self.get_file_name(method, rep_name) + ".npy" - if not os.path.isdir(self.base_path): - os.mkdir(self.base_path) - np.save(os.path.join(self.base_path, name), to_save) - - def load_matrix(self, method, rep_name): - name = self.get_file_name(method, rep_name) + ".npy" - file = os.path.join(self.base_path, name) - if os.path.isfile(file): - print("Found existing {} result that will be loaded now".format(method)) - return np.load(file) - return None - - def plot_matrix( - self, - matrix_df, - title, - fig=None, - axs=None, - save="", - min=None, - max=None, - cbar_outside=True, - ): - if not fig or not axs: - fig, axs = plot_preparation(ratio=(4, 4), style="nips") - fig.tight_layout() # Or equivalently, "plt.tight_layout()" - if cbar_outside: - cbar_ax = fig.add_axes([0.90, 0.2, 0.02, 0.4]) # [left, bottom, width, height] - sns.heatmap( - matrix_df, - cmap="YlGnBu", - xticklabels=10, - yticklabels=10, - vmin=min, - vmax=max, - ax=axs, - cbar=True, - cbar_ax=cbar_ax if cbar_outside else None, - ) - sns.despine(offset=10, trim=True) - if cbar_outside: - fig.tight_layout(rect=[0, 0, 0.9, 1]) - else: - fig.tight_layout() - - st = fig.suptitle(title, fontsize=12) - st.set_y(1.05) - if save: - save_plot(fig,save) diff --git a/bias_transfer/analysis/representation/correlation.py b/bias_transfer/analysis/representation/correlation.py deleted file mode 100644 index 328521d..0000000 --- a/bias_transfer/analysis/representation/correlation.py +++ /dev/null @@ -1,88 +0,0 @@ -import copy - -import torch - -from sklearn.cluster import AgglomerativeClustering -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns -import os - -from bias_transfer.analysis.representation.analyzer import RepresentationAnalyzer - -#TODO!!! - -class CorrelationAnalyzer(RepresentationAnalyzer): - def _plot_corr_matrix( - self, mat, title="", file_name="", n_clusters=10, indices=None, acc=None - ): - fig, ax = self._plot_preparation(1, 1) - if indices is None: - clusters = AgglomerativeClustering(n_clusters=n_clusters).fit(1 - mat) - indices = np.argsort(clusters.labels_) - sns.heatmap( - mat[indices][:, indices], - cmap="YlGnBu", - xticklabels=400, - yticklabels=400, - vmin=0.0, - vmax=1.0, - ) - # sns.heatmap(mat[indices][:, indices], cmap="YlGnBu", xticklabels=400, yticklabels=400) - sns.despine(offset=10, trim=True) - if title: - fig.suptitle(title, fontsize=16) - if acc: - ax.text( - 0.82, 0.93, "Accuracy: {:02.2f}".format(acc), transform=ax.transAxes - ) - if file_name: - fig.savefig( - os.path.join(self.base_path, file_name), - facecolor=fig.get_facecolor(), - edgecolor=fig.get_edgecolor(), - bbox_inches="tight", - ) - plt.close(fig) - return indices - - - def _compute_corr_matrix(self, x, mode, noise_level): - result = self._load_representation("corr", mode, noise_level) - if result is None: - x_flat = x.flatten(1, -1) - # centered = (x_flat - x_flat.mean()) / x_flat.std() - # result = (centered @ centered.transpose(0, 1)) / x_flat.size()[1] - centered = x_flat - x_flat.mean(dim=1).view(-1, 1) - result = (centered @ centered.transpose(0, 1)) / torch.ger( - torch.norm(centered, 2, dim=1), torch.norm(centered, 2, dim=1) - ) # see https://de.mathworks.com/help/images/ref/corr2.html - print(torch.max(result)) - result = result.detach().cpu() - self._save_representation(result, "corr", mode, noise_level) - return result - - - def corr_matrix( - self, mode="clean", noise_level=0.0, clean_rep=None, *args, **kwargs - ): - self.clean_vs_noisy(noise_level=noise_level) - title = "Correlation matrix for rep from {} data ".format(mode) - if mode == "noisy": - corr_matrix = self._compute_corr_matrix( - self.noisy_rep[0], mode, noise_level - ) - title += "(std = {:01.2f})".format(noise_level) - acc = self.noisy_rep[1] - else: - corr_matrix = self._compute_corr_matrix(self.clean_rep[0], mode, 0.0) - acc = self.clean_rep[1] - - clean_rep = self._plot_corr_matrix( - corr_matrix, - title=title + "\n" + "Model: " + self.experiment.comment, - file_name=self._get_name("corr", mode, noise_level) + "_plot", - indices=clean_rep, - acc=acc, - ) - return clean_rep diff --git a/bias_transfer/analysis/representation/dim_reduction.py b/bias_transfer/analysis/representation/dim_reduction.py deleted file mode 100644 index a445cf3..0000000 --- a/bias_transfer/analysis/representation/dim_reduction.py +++ /dev/null @@ -1,238 +0,0 @@ -import copy - -import torch - -import bias_transfer.trainer.trainer -from torch import nn -from bias_transfer.trainer.main_loop_modules.noise_augmentation import NoiseAugmentation -import numpy as np -import pandas as pd -from sklearn.decomposition import PCA -from sklearn.manifold import TSNE -import matplotlib.pyplot as plt -import seaborn as sns -import os - - -#TODO!!!! - -def run(self, method): - if method in ("pca", "tsne"): - to_run = self.dim_reduction - else: - to_run = self.corr_matrix - filenames = [] - clean_rep = to_run(noise_level=0.0, method=method, mode="clean") - filenames.append( - os.path.join(self.path, self._get_name(method, "clean", 0.0) + "_plot.png") - ) - for i in range(1, 21): - noise_level = 0.05 * i - to_run( - noise_level=noise_level, - method=method, - mode="noisy", - clean_rep=clean_rep, - ) - filenames.append( - os.path.join( - self.path, - self._get_name(method, "noisy", noise_level) + "_plot.png", - ) - ) - self._generate_gif(filenames, self._get_name(method=method)) - -def clean_vs_noisy(self, noise_level=0.0): - print("==> Computing Representations", flush=True) - self._reset_seed() - if self.clean_rep is None: - # Representations form clean data: - print("Compute representation of clean input", flush=True) - self.clean_rep = self._compute_representation([]) - else: - print("Representation of clean input already in memory") - - # Representations from noisy data: - print("Compute representation of noisy input", flush=True) - self._reset_seed() - experiment = copy.deepcopy(self.experiment) - bias_transfer.trainer.trainer.trainer.noise_std = {noise_level: 1.0} - main_loop_modules = [ - NoiseAugmentation( - config=bias_transfer.trainer.trainer.trainer, - device=self.device, - data_loader=self.sample_loader, - seed=42, - ) - ] - self.noisy_rep = self._compute_representation(main_loop_modules) - -def _cosine_loss(self, rep_1, rep_2): - # Compare - cosine_criterion = nn.CosineEmbeddingLoss() - return cosine_criterion( - rep_1, rep_2, torch.ones(rep_1.shape[:1], device=self.device) - ) - -def _mse_loss(self, rep_1, rep_2): - mse_criterion = nn.MSELoss() - return mse_criterion(rep_1, rep_2) - -def clean_vs_noisy_distance(self, noise_level=0.0): - self.clean_vs_noisy(noise_level) - cosine = self._cosine_loss(self.clean_rep[0], self.noisy_rep[0]) - mse = self._mse_loss(self.clean_rep[0], self.noisy_rep[0]) - print( - "Clean vs. Noisy: Cosine loss:", - cosine.item(), - "MSE loss:", - mse.item(), - flush=True, - ) - -def _convert_to_df(self, rep, noise_level=0.0): - torch.manual_seed(42) - np.random.seed(42) - if self.device == "cuda": - torch.cuda.manual_seed(42) - rep = rep.cpu() - targets = torch.cat([t for _, t in self.sample_loader]).cpu() - self.num_labels = max(targets) + 1 - feat_cols = ["dim" + str(i) for i in range(rep.shape[1])] - df = pd.DataFrame(rep, columns=feat_cols) - df["y"] = targets - df["label"] = df["y"].apply(lambda i: str(i)) - df["noise"] = np.ones_like(targets) * noise_level - if self.num_samples > 0: - # For reproducability of the results - np.random.seed(42) - rndperm = np.random.permutation(df.shape[0]) - df = df.loc[rndperm[: self.num_samples], :].copy() - return df, feat_cols - -def _clean_vs_noisy_df(self, noise_level=0.0): - self.clean_vs_noisy(noise_level=noise_level) - if self.clean_df is None: - self.clean_df, self.feat_cols = self._convert_to_df(self.clean_rep[0], 0.0) - self.noisy_df, _ = self._convert_to_df(self.noisy_rep[0], noise_level) - - -def _compute_pca(self, df, mode, noise_level, pca=None): - pca_result = self._load_representation("pca", mode, noise_level) - if pca_result is None: - if not pca: - pca = PCA(n_components=3) - pca.fit(df[self.feat_cols].values) - pca_result = pca.transform(df[self.feat_cols].values) - self._save_representation(pca_result, "pca", mode, noise_level) - print( - "Explained variation per principal component: {}".format( - pca.explained_variance_ratio_ - ), - flush=True, - ) - df["pca-one"] = pca_result[:, 0] - df["pca-two"] = pca_result[:, 1] - df["pca-three"] = pca_result[:, 2] - return pca - -def _compute_tsne(self, df, mode, noise_level): - tsne_result = self._load_representation("tsne", mode, noise_level) - if tsne_result is None: - tsne = TSNE( - n_components=2, verbose=1, perplexity=40, n_iter=250, init="pca" - ) - tsne_result = tsne.fit_transform(df[self.feat_cols].values) - self._save_representation(tsne_result, "tsne", mode, noise_level) - df["tsne-2d-one"] = tsne_result[:, 0] - df["tsne-2d-two"] = tsne_result[:, 1] - -def _plot_dim_reduction( - self, - df, - data_columns, - num_labels=100, - hue="y", - style=None, - title="", - file_name="", - legend=False, - acc=None, -): - fig, ax = self._plot_preparation(1, len(data_columns)) - if not isinstance(ax, list): - ax = [ax] - for i, (x, y) in enumerate(data_columns): - sns.scatterplot( - x=x, - y=y, - hue=hue, - style=style, - palette=sns.color_palette("hls", num_labels), - data=df, - legend=legend, - s=10, - # ec=None, - ax=ax[i], - ) - if acc: - ax[i].text( - 0.85, - 0.90, - "Accuracy: {:02.2f}".format(acc), - transform=ax[i].transAxes, - ) - sns.despine(offset=10, trim=True) - if title: - fig.suptitle(title, fontsize=16) - if file_name: - fig.savefig( - os.path.join(self.path, file_name), - facecolor=fig.get_facecolor(), - edgecolor=fig.get_edgecolor(), - bbox_inches="tight", - ) - plt.close(fig) - - -def dim_reduction( - self, method="tsne", mode="combined", noise_level=0.0, clean_rep=None -): - self._clean_vs_noisy_df(noise_level=noise_level) - if mode == "combined": - combined_df = pd.DataFrame(self.clean_df) - combined_df = combined_df.append(self.noisy_df, ignore_index=True) - df = combined_df - acc = self.noisy_rep[1] - title = "Rep noisy vs clean data " - elif mode == "noisy": - title = "Rep from noisy data (std = {:01.2f})".format(noise_level) - df = self.noisy_df - acc = self.noisy_rep[1] - else: - title = "Rep from clean data " - df = self.clean_df - acc = self.clean_rep[1] - - data_columns = [] - print("==> Computing {} representation".format(method)) - if "tsne" in method: - self._compute_tsne(df, mode, noise_level) - data_columns.append(("tsne-2d-one", "tsne-2d-two")) - if "pca" in method: - clean_rep = self._compute_pca(df, mode, noise_level, pca=clean_rep) - data_columns.append(("pca-one", "pca-two")) - - print("==> Plotting {} representation".format(method)) - self._plot_dim_reduction( - df, - data_columns, - num_labels=self.num_labels, - style="noise" if "combined" in mode else None, - hue="y", - title=title + "\n" + "Model: " + self.experiment.comment, - file_name=self._get_name(method, mode, noise_level) + "_plot", - acc=acc, - ) - return clean_rep - diff --git a/bias_transfer/analysis/representation/noise_stability.py b/bias_transfer/analysis/representation/noise_stability.py deleted file mode 100644 index 6dc29df..0000000 --- a/bias_transfer/analysis/representation/noise_stability.py +++ /dev/null @@ -1,410 +0,0 @@ -import os -import copy -import math -import shutil - -import torch -import numpy as np -import seaborn as sns -import pandas as pd -import matplotlib.pyplot as plt -from torch.autograd import Variable - -from bias_transfer.analysis.plot import plot_preparation, save_plot -from bias_transfer.models import IntermediateLayerGetter -from bias_transfer.trainer.main_loop_modules import NoiseAugmentation -from nnfabrik.utility.dj_helpers import make_hash -from .analyzer import RepresentationAnalyzer - -ALL_REPRESENTATIONS = { - "conv1": "layer0.conv1", - # "relu": "layer0.relu", - # layer1 - "layer1.0.conv1": "layer1.0.conv1", - "layer1.0.conv2": "layer1.0.conv2", - "layer1.0.conv3": "layer1.0.conv3", - # "layer1.0.relu": "layer1.0.relu", - "layer1.1.conv1": "layer1.1.conv1", - "layer1.1.conv2": "layer1.1.conv2", - "layer1.1.conv3": "layer1.1.conv3", - # "layer1.1.relu": "layer1.1.relu", - "layer1.2.conv1": "layer1.2.conv1", - "layer1.2.conv2": "layer1.2.conv2", - "layer1.2.conv3": "layer1.2.conv3", - # "layer1.2.relu": "layer1.2.relu", - # layer2 - "layer2.0.conv1": "layer2.0.conv1", - "layer2.0.conv2": "layer2.0.conv2", - "layer2.0.conv3": "layer2.0.conv3", - # "layer2.0.relu": "layer2.0.relu", - "layer2.1.conv1": "layer2.1.conv1", - "layer2.1.conv2": "layer2.1.conv2", - "layer2.1.conv3": "layer2.1.conv3", - # "layer2.1.relu": "layer2.1.relu", - "layer2.2.conv1": "layer2.2.conv1", - "layer2.2.conv2": "layer2.2.conv2", - "layer2.2.conv3": "layer2.2.conv3", - # "layer2.2.relu": "layer2.2.relu", - "layer2.3.conv1": "layer2.3.conv1", - "layer2.3.conv2": "layer2.3.conv2", - "layer2.3.conv3": "layer2.3.conv3", - # "layer2.3.relu": "layer2.3.relu", - # layer3 - "layer3.0.conv1": "layer3.0.conv1", - "layer3.0.conv2": "layer3.0.conv2", - "layer3.0.conv3": "layer3.0.conv3", - # "layer3.0.relu": "layer3.0.relu", - "layer3.1.conv1": "layer3.1.conv1", - "layer3.1.conv2": "layer3.1.conv2", - "layer3.1.conv3": "layer3.1.conv3", - # "layer3.1.relu": "layer3.1.relu", - "layer3.2.conv1": "layer3.2.conv1", - "layer3.2.conv2": "layer3.2.conv2", - "layer3.2.conv3": "layer3.2.conv3", - # "layer3.2.relu": "layer3.2.relu", - "layer3.3.conv1": "layer3.3.conv1", - "layer3.3.conv2": "layer3.3.conv2", - "layer3.3.conv3": "layer3.3.conv3", - # "layer3.3.relu": "layer3.3.relu", - "layer3.4.conv1": "layer3.4.conv1", - "layer3.4.conv2": "layer3.4.conv2", - "layer3.4.conv3": "layer3.4.conv3", - # "layer3.4.relu": "layer3.4.relu", - "layer3.5.conv1": "layer3.5.conv1", - "layer3.5.conv2": "layer3.5.conv2", - "layer3.5.conv3": "layer3.5.conv3", - # "layer3.5.relu": "layer3.5.relu", - # layer4 - "layer4.0.conv1": "layer4.0.conv1", - "layer4.0.conv2": "layer4.0.conv2", - "layer4.0.conv3": "layer4.0.conv3", - # "layer4.0.relu": "layer4.0.relu", - "layer4.1.conv1": "layer4.1.conv1", - "layer4.1.conv2": "layer4.1.conv2", - "layer4.1.conv3": "layer4.1.conv3", - # "layer4.1.relu": "layer4.1.relu", - "layer4.2.conv1": "layer4.2.conv1", - "layer4.2.conv2": "layer4.2.conv2", - "layer4.2.conv3": "layer4.2.conv3", - # "layer4.2.relu": "layer4.2.relu", - # core output - "flatten": "core", - "fc": "readout", -} - - -def centering(K): - n = K.shape[0] - unit = torch.ones([n, n], device=K.device) - I = torch.eye(n, device=K.device) - H = I - unit / n - - return torch.mm( - torch.mm(H, K), H - ) # HKH are the same with KH, KH is the first centering, H(KH) do the second time, results are the sme with one time centering - # return np.dot(H, K) # KH - - -def rbf(X, sigma=None): - GX = torch.dot(X, X.T) - KX = torch.diag(GX) - GX + (torch.diag(GX) - GX).T - if sigma is None: - mdist = torch.median(KX[KX != 0]) - sigma = math.sqrt(mdist) - KX *= -0.5 / (sigma * sigma) - KX = torch.exp(KX) - return KX - - -def kernel_HSIC(X, Y, sigma): - return torch.sum(centering(rbf(X, sigma)) * centering(rbf(Y, sigma))) - - -def linear_HSIC(X, Y): - L_X = torch.mm(X, X.T) - L_Y = torch.mm(Y, Y.T) - return torch.sum(centering(L_X) * centering(L_Y)) - - -def linear_CKA(X, Y): - hsic = linear_HSIC(X, Y) - var1 = torch.sqrt(linear_HSIC(X, X)) - var2 = torch.sqrt(linear_HSIC(Y, Y)) - - return hsic / (var1 * var2) - - -def kernel_CKA(X, Y, sigma=None): - hsic = kernel_HSIC(X, Y, sigma) - var1 = torch.sqrt(kernel_HSIC(X, X, sigma)) - var2 = torch.sqrt(kernel_HSIC(Y, Y, sigma)) - - return hsic / (var1 * var2) - - -def pairwise_l2_distances(x, y=None): - """ - see: https://discuss.pytorch.org/t/efficient-distance-matrix-computation/9065 - Input: x is a Nxd matrix - y is an optional Mxd matirx - Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] - if y is not given then use 'y=x'. - i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 - """ - x_norm = (x ** 2).sum(1).view(-1, 1) - if y is not None: - y_norm = (y ** 2).sum(1).view(1, -1) - else: - y = x - y_norm = x_norm.view(1, -1) - - dist = x_norm + y_norm - 2.0 * torch.mm(x, torch.transpose(y, 0, 1)) - return dist - - -def RDM(X, dist_measure="corr"): - X = X - X.mean(dim=-1).unsqueeze(-1) - if dist_measure == "corr": - result = (X @ torch.transpose(X, 0, 1)) / torch.ger( - torch.norm(X, 2, dim=1), torch.norm(X, 2, dim=1) - ) - elif dist_measure == "l2": - result = pairwise_l2_distances(X) - return result - - -def RDM_comparison(X, Y, dist_measure="corr"): - RDM_X = RDM(X, dist_measure).flatten() - RDM_Y = RDM(Y, dist_measure).flatten() - result = RDM_X @ RDM_Y.T - result /= (X.shape[0]) ** 2 - return result - - -def similarity(X, Y, dist_measure="CKA"): - if dist_measure == "CKA": - return linear_CKA(X, Y) - else: - return RDM_comparison(X, Y, dist_measure) - - -class NoiseStabilityAnalyzer(RepresentationAnalyzer): - def __init__( - self, - num_samples=0, - num_repeats=4, - noise_std_max=0.51, - noise_std_step=0.01, - rep_names=None, - dist_measures=("CKA",), - *args, - **kwargs - ): - super().__init__(*args, **kwargs) - if rep_names is None: - rep_names = ALL_REPRESENTATIONS.values() - self.rep_names = rep_names - if not num_samples: - self.num_samples = len(self.sample_loader.sampler) - else: - self.num_samples = num_samples - self.num_repeats = num_repeats - self.dist_measures = dist_measures - self.noise_stds = np.arange(0, noise_std_max, noise_std_step) - if isinstance(self.model, IntermediateLayerGetter): - self.model = self.model._model - self.model = IntermediateLayerGetter(self.model, ALL_REPRESENTATIONS) - self.accuracy = None - self.tmp_path = os.path.join( - self.base_path, "tmp" + make_hash(self.name) - ) - self.num_batches = math.ceil(self.num_samples / self.sample_loader.batch_size) - - def run(self): - noise_stabilities = {d: [] for d in self.dist_measures} - for rep_name in self.rep_names: - shutil.rmtree(self.tmp_path, ignore_errors=True, onerror=None) - os.makedirs(self.tmp_path) - self._reset_seed() - self._compute_representation(rep_name) - torch.cuda.empty_cache() - for dist_measure in self.dist_measures: - stability_matrix = self.compute_stability_matrix(dist_measure, rep_name) - # compute stability measure - noise_stabilities[dist_measure].append( - np.average(stability_matrix[0, :]) - ) - # prepare plotting - stability_df = pd.DataFrame(stability_matrix) - stability_df.astype(float) - stability_df.columns = ["{:01.2f}".format(n) for n in self.noise_stds] - stability_df.index = ["{:01.2f}".format(n) for n in self.noise_stds] - fig, axs = plot_preparation( - nrows=2, - ncols=1, - fraction=0.5, - sharex=True, - ratio=(0.9, 1), # to make it quadratic - gridspec_kw={"height_ratios": [1, 4]}, - style="nips", - ) - self.plot_acc_over_noise(axs[0]) - self.plot_matrix( - matrix_df=stability_df, - title=self.name + ": " + rep_name + "(" + dist_measure + ")", - min=0 if dist_measure == "CKA" else None, - max=1 if dist_measure == "CKA" else None, - save=None, - fig=fig, - axs=axs[1], - cbar_outside=True, - ) - levels = np.arange(0, 1.0, 0.1) - contours = axs[1].contour( - stability_matrix, colors="white", levels=levels - ) - axs[1].clabel(contours, inline=True, fontsize=8) - save_plot( - fig, self.get_file_name(dist_measure, rep_name.replace(".", "_")) - ) - print("Finished {} {}-Analysis".format(rep_name, dist_measure)) - shutil.rmtree(self.tmp_path, ignore_errors=True, onerror=None) - for dist_measure, stability in noise_stabilities.items(): - fig = self.plot_noise_stability(stability) - save_plot(fig, self.get_file_name(dist_measure, "stability")) - - def _compute_representation(self, rep_name, *args, **kwargs): - test_input = next(iter(self.sample_loader))[0][:1].to(self.device) - test_out, _ = self.model(test_input) - test_out = test_out[rep_name] - if isinstance(test_out, list): - print(rep_name, len(test_out)) - test_out = test_out[0] - rep_size = test_out.flatten(1, -1).shape[-1] - - correct = torch.zeros((self.num_repeats, len(self.noise_stds))) - for batch_idx, (inputs, targets) in enumerate(self.sample_loader): - inputs, targets = ( - inputs.to(self.device, dtype=torch.float), - targets.to(self.device), - ) - self.batch_size = inputs.shape[0] - reps = torch.zeros( - ( - # self.batch_size, - min( - self.batch_size, self.num_samples - batch_idx * self.batch_size, - ), - self.num_repeats, - len(self.noise_stds), - rep_size, - ) - ) - for repeat in range(self.num_repeats): - for noise_idx, noise_std in enumerate(self.noise_stds): - # apply noise - trainer = copy.deepcopy(self.experiment.trainer) - trainer.noise_std = {noise_std: 1.0} - module = NoiseAugmentation( - self.model, - config=trainer, - device=self.device, - data_loader={"img_classification": self.sample_loader}, - seed=42, - ) - inputs_ = inputs.clone() - model, inputs_ = module.pre_forward( - self.model, inputs_, {}, train_mode=False - ) - # Forward - outputs = model(inputs_) - samples_start = 0 - samples_end = min( - self.batch_size, self.num_samples - batch_idx * self.batch_size - ) - # for rep_name in self.rep_names: - rep = outputs[0][rep_name].flatten(1, -1).detach().cpu() - reps[samples_start:samples_end, repeat, noise_idx] = rep.view( - (self.batch_size, -1) - )[: (samples_end - samples_start)] - - # track accuracy - _, predicted = outputs[1].max(1) - targets_ = targets[: (samples_end - samples_start)] - predicted_ = predicted[: (samples_end - samples_start)] - correct[repeat, noise_idx] += predicted_.eq(targets_).sum().item() - torch.save( - reps, - os.path.join(self.tmp_path, "reps_{}_{}".format(rep_name, batch_idx)), - ) - - if (batch_idx + 1) * self.batch_size >= self.num_samples: - break - - self.accuracy = ((correct / self.num_samples) * 100).numpy() - - def plot_noise_stability(self, stability): - df = pd.DataFrame( - {"Layer": range(1, len(stability) + 1), "Stability": stability} - ) - fig, axs = plot_preparation(nrows=1, ncols=1, fraction=0.5, style="nips",) - g = sns.lineplot(x="Layer", y="Stability", data=df, ax=axs) - xlabels = [str(int(x)) for x in g.get_xticks()] - g.set_xticklabels(xlabels) - return fig - - def plot_acc_over_noise(self, ax): - df = pd.DataFrame(self.accuracy) - df.astype(float) - df.columns = ["{:01.2f}".format(n) for n in self.noise_stds] - df = df.stack().reset_index() - df.columns = ["repeat", "noise", "Accuracy"] - sns.lineplot(x="noise", y="Accuracy", data=df, ax=ax) - ax.set_ylim(ymin=0, ymax=100) - - def compute_stability_matrix(self, dist_measure, rep_name): - result = self.load_matrix(dist_measure, rep_name.replace(".", "_")) - if result is None: - rep_pieces = [ - torch.load( - os.path.join( - self.tmp_path, "reps_{}_{}".format(rep_name, batch_idx) - ) - ) - for batch_idx in range(self.num_batches) - ] - rep = torch.cat(rep_pieces) - result = torch.zeros((len(self.noise_stds), len(self.noise_stds))) - first_loop = ( - range(self.num_repeats - 1) if self.num_repeats > 1 else range(1) - ) - for r in first_loop: - second_loop = ( - range(r + 1, self.num_repeats) if self.num_repeats > 1 else range(1) - ) - for r2 in second_loop: - reps1 = rep[:, r].to(self.device) - reps2 = rep[:, r2].to(self.device) - for i in range(len(self.noise_stds)): - for j in range(i, len(self.noise_stds)): - res = similarity(reps1[:, i], reps2[:, j], dist_measure) - result[i, j] += res.detach().cpu() - if j != i: - result[j, i] += res.detach().cpu() - res = similarity(reps2[:, i], reps1[:, j], dist_measure) - result[i, j] += res.detach().cpu() - if j != i: - result[j, i] += res.detach().cpu() - del reps1 - del reps2 - result = ( - ( - result / (2 * (self.num_repeats * (self.num_repeats - 1) / 2)) - if self.num_repeats > 1 - else result - ) - .detach() - .numpy() - ) - self.save_matrix(result, dist_measure, rep_name.replace(".", "_")) - return result diff --git a/bias_transfer/analysis/results/bias_transfer_benchmark.py b/bias_transfer/analysis/results/bias_transfer_benchmark.py deleted file mode 100644 index 76d6bf6..0000000 --- a/bias_transfer/analysis/results/bias_transfer_benchmark.py +++ /dev/null @@ -1,171 +0,0 @@ -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt - -from .base import Analyzer -from ..plot import plot - - -class BiasTransferAnalyzer(Analyzer): - def generate_table( - self, - objective=("Test", "img_classification", "accuracy"), - last_n=0, - label_steps=False, - ): - row_list = [] - for desc, results in self.data.items(): - if label_steps: - name_split = desc.name.split(" ") - name = " ".join(name_split[:-1]) - labels = name_split[-1][1:-1].split(";") - else: - name, labels = (desc.name, None) - row = {"name": name} - levels = sorted(list(results.keys())) - if last_n: - levels = levels[(-1) * last_n :] - for level, tracker in results.items(): - try: - if level in levels: - l = levels.index(level) - if labels: - l = labels[l] - row[l] = tracker.get_current_objective(*objective) - except: - pass # no valid entry for this objective - row_list.append(row) - df = pd.DataFrame(row_list) - if not df.empty: - df = df.groupby("name").first() - # Split off alpha from name - df = df.reset_index() - new = df["name"].str.split(":", n=1, expand=True) - if len(new.columns) > 1: - df.drop(columns=["name"], inplace=True) - df["name"] = new[0] - df["alpha"] = new[1] - df = df.set_index("name") - return df - - def generate_normalized_table(self): - df = self.generate_table(last_n=2, label_steps=True) - for i, c in enumerate(df.columns): - offset = "A" if i % 2 == 0 else "B" - baseline = df.at[f"Direct Training {offset}", c] - df.insert( - 2 * i + 1, c + " normalized", df[c].divide(baseline).multiply(100) - ) - return df - - @plot - def plot_frontier( - self, fig, ax, columns_range=(), title=False, hide_lines=False, - ): - df = self.generate_table(last_n=2, label_steps=True) - direct_a = ( - df.loc["Direct Training on Target"] - if "Direct Training on Target" in df.index - else None - ) - direct_b = ( - df.loc["Direct Training on Eval"] - if "Direct Training on Eval" in df.index - else None - ) - max_x, min_x, max_y, min_y = 0, 100, 0, 100 - for i, c in enumerate(df.columns): - if not columns_range[0] <= i <= columns_range[1]: - continue - if i % 2 == 1: - if True: - a = ax[i - 1 - columns_range[0]][i - 1 - columns_range[0]] - else: - a = ax[(i - 1) // 4][((i - 1) % 4) // 2] - colors = [ - "#a6cee3", - "#1f78b4", - "#b2df8a", - "#33a02c", - "#fb9a99", - "#e31a1c", - "#fdbf6f", - "#ff7f00", - "#cab2d6", - "#6a3d9a", - "#ffff99", - ] - models = sorted(list(set(df.index))) - print(models) - colors = dict(zip(models, colors[: len(models)])) - print(colors) - plot_res = sns.lineplot( - data=df, - x=df.columns[i - 1], - y=c, - hue="name", - ax=a, - legend="brief", - style="name", - markers=True, - palette=colors, - ) - for line in plot_res.lines[2:]: - line.set_visible(not hide_lines) - # if i == 5 and legend_outside: - # a.legend( - # fontsize=14, - # title_fontsize="14", - # bbox_to_anchor=(1.05, 1), - # loc="upper left", - # borderaxespad=0.0, - # ) - if direct_b is not None: - a.axhline( - y=direct_b[c], lw=0.7, color=colors["Direct Training on Eval"] - ) - if direct_a is not None: - a.axvline( - x=direct_a[df.columns[i - 1]], - lw=0.7, - color=colors["Direct Training on Target"], - ) - min_x = min(min_x, a.get_xlim()[0]) - min_y = min(min_y, a.get_ylim()[0]) - max_x = max(max_x, a.get_xlim()[1]) - max_y = max(max_y, a.get_ylim()[1]) - if title: - a.set_title(self.name_map(a.get_xlabel()), fontweight="bold") - a.set_xlabel( - self.name_map(a.get_xlabel().split("->")[1], "Target Task: ") - ) - a.set_ylabel(self.name_map(a.get_ylabel(), "Evaluation: ")) - - - for i in range(len(ax)): - for j in range(len(ax[i])): - # axs[i][j].set_xlim([min_x,max_x]) - ax[i][j].set_ylim([min_y, max_y]) - - # sns.despine(offset=5, trim=False) - # plt.subplots_adjust(hspace=0.4) - # if "talk" in style: - # if legend_outside: - # pass - # # ax.legend( - # # fontsize=14, - # # title_fontsize="14", - # # bbox_to_anchor=(1.05, 1), - # # loc="upper left", - # # borderaxespad=0.0, - # # ) - # else: - # plt.legend(fontsize=14, title_fontsize="14") - # elif legend_outside: - # plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0) - # if save: - # save_plot( - # fig, - # save + "_" + style, - # types=("png", "pdf", "pgf") if "nips" in style else ("png",), - # ) diff --git a/bias_transfer/analysis/results/c_test.py b/bias_transfer/analysis/results/c_test.py deleted file mode 100644 index cba5a3a..0000000 --- a/bias_transfer/analysis/results/c_test.py +++ /dev/null @@ -1,148 +0,0 @@ -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt -import re - -from .base import Analyzer -from ..plot import plot_preparation - - -class CTestAnalyzer(Analyzer): - corruption_map = { - "shot_noise": "Shot Noise", - "impulse_noise": "Impulse Noise", - "speckle_noise": "Speckle Noise", - "gaussian_noise": "Gaussian Noise", - "defocus_blur": "Defocus Blur", - "gaussian_blur": "Gauss Blur", - "motion_blur": "Motion Blur", - "glass_blur": "Glass Blur", - "zoom_blur": "Zoom Blur", - "brightness": "Brightness", - "fog": "Fog", - "frost": "Frost", - "snow": "Snow", - "contrast": "Contrast", - "elastic_transform": "Elastic Transform", - "pixelate": "Pixelate", - "jpeg_compression": "JPEG Compression", - "saturate": "Saturate", - "spatter": "Spatter", - } - - Res_Alex_Net_mean = dict() - Res_Alex_Net_mean["Gaussian Noise"] = 0.886 - Res_Alex_Net_mean["Shot Noise"] = 0.894 - Res_Alex_Net_mean["Impulse Noise"] = 0.923 - Res_Alex_Net_mean["Defocus Blur"] = 0.820 - Res_Alex_Net_mean["Gauss Blur"] = 0.826 - Res_Alex_Net_mean["Glass Blur"] = 0.826 - Res_Alex_Net_mean["Motion Blur"] = 0.786 - Res_Alex_Net_mean["Zoom Blur"] = 0.798 - Res_Alex_Net_mean["Snow"] = 0.867 - Res_Alex_Net_mean["Frost"] = 0.827 - Res_Alex_Net_mean["Fog"] = 0.819 - Res_Alex_Net_mean["Brightness"] = 0.565 - Res_Alex_Net_mean["Contrast"] = 0.853 - Res_Alex_Net_mean["Elastic Transform"] = 0.646 - Res_Alex_Net_mean["Pixelate"] = 0.718 - Res_Alex_Net_mean["JPEG Compression"] = 0.607 - Res_Alex_Net_mean["Speckle Noise"] = 0.845 - Res_Alex_Net_mean["Spatter"] = 0.718 - Res_Alex_Net_mean["Saturate"] = 0.658 - - def extract_c_test_results(self): - corruptions = ( - "shot_noise", - "impulse_noise", - # "speckle_noise", - "gaussian_noise", - "defocus_blur", - # "gaussian_blur", - "motion_blur", - "glass_blur", - "zoom_blur", - "brightness", - "fog", - "frost", - "snow", - "contrast", - "elastic_transform", - "pixelate", - "jpeg_compression", - # "saturate", - # "spatter", - ) - data_to_plot = pd.DataFrame() - for corruption in corruptions: - row_list = [] - for desc, tracker in self.data.items(): - row = { - severity: tracker.get_current_objective( - corruption, str(severity), "accuracy" - ) - for severity in range(1, 6) - } - row[0] = tracker.get_current_objective( - "Test", "img_classification", "accuracy" - ) - row["name"] = desc.name - row_list.append(row) - df = pd.DataFrame(row_list) - df = df.groupby("name").mean() - df["Corruption"] = corruption - data_to_plot = pd.concat([data_to_plot, df], axis=0, sort=True) - return data_to_plot - - def calculate_c_scores(self): - c_data = self.extract_c_test_results() - df = c_data[c_data.columns[0:6]].apply(lambda x: 100 - x) - df_mean = df[df.columns[0:6]].mean(axis=1) - c_data = pd.concat([c_data, df_mean], axis=1) - c_data.columns = [1, 2, 3, 4, 5, 0, "Corruption", "Mean"] - - def normalize_alexnet(row): - mean_error = row["Mean"] - corruption = row["Corruption"] - ce = mean_error / self.Res_Alex_Net_mean[self.corruption_map[corruption]] - return pd.concat([row, pd.Series({"mCE": ce})]) - - c_data = c_data.apply(normalize_alexnet, axis=1) - c_data = c_data.groupby("name").mean() - return c_data - - def plot_grid(self, style, **kwargs): - fig, ax = plot_preparation(style) - data_to_plot = self.extract_c_test_results() - g = sns.FacetGrid( - data=data_to_plot, - col="Corruption", - col_wrap=4, - sharey=True, - sharex=True, - # height=4 - ) - - def draw_heatmap(data, *args, **kwargs): - del data["Corruption"] - # print(data) - sns.heatmap(data, annot=True, cbar=False) - - g.map_dataframe(draw_heatmap) - fig = g.fig - sns.despine(offset=10, trim=True) - # remove ticks again (see: https://stackoverflow.com/questions/37860163/seaborn-despine-brings-back-the-ytick-labels) - # loop over the non-left axes: - for i, ax in enumerate(g.axes.flat): - if i % 4 != 0: - # get the yticklabels from the axis and set visibility to False - for label in ax.get_yticklabels(): - label.set_visible(False) - ax.yaxis.offsetText.set_visible(False) - if i < len(g.axes) - 4: - # get the xticklabels from the axis and set visibility to False - for label in ax.get_xticklabels(): - label.set_visible(False) - ax.xaxis.offsetText.set_visible(False) - self._post_plot_operations(style, **kwargs) - diff --git a/bias_transfer/analysis/results/regression.py b/bias_transfer/analysis/results/regression.py deleted file mode 100644 index 43a5e13..0000000 --- a/bias_transfer/analysis/results/regression.py +++ /dev/null @@ -1,52 +0,0 @@ -import torch - -from bias_transfer.gp.nn_kernel import nn_kernel -import matplotlib.pyplot as plt - - -class Analyzer: - def __init__(self): - self.data_loaders = {} - self.model = None - self.trainer = None - - def load_model(self, config, table, transfer_level): - # Select data: - if transfer_level < len(config.get_restrictions()): - restricted = table & config.get_restrictions()[transfer_level] - else: - print("Nothing to load") - restricted = None - if restricted: # could be empty if entry is not computed yet - self.data_loaders, self.model, self.trainer = restricted.load_model( - include_dataloader=True, include_trainer=True, include_state_dict=True - ) - - def plot_eval(self, save=""): - self.model.eval() - x_test, y_test = self.data_loaders["test"]["regression"].dataset.tensors - x_train, y_train = self.data_loaders["train"]["regression"].dataset.tensors - plt.plot(x_test, y_test, color="orange", lw=2, label="True") - plt.plot(x_train, y_train, color="red", label="Traning data") - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.model.to(device) - prediction = self.model(x_test.to(device)) # input x and predict based on x - if isinstance(prediction, tuple): - prediction = prediction[1] - plt.plot(x_test, prediction.detach().cpu().numpy(), label="Prediction") - plt.legend() - if save: - fig = plt.gcf() - fig.savefig(save, dpi=200) - - def plot_kernel(self): - self.model.eval() - x_test, y_test = self.data_loaders["test"]["regression"].dataset.tensors - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.model.to(device) - K_plot = nn_kernel(x_test, x_test, net=self.model, device=device) - plt.imshow(K_plot) - # if np.count_nonzero(x) > 0: - # _ = plt.xticks(np.arange(0,x.shape[0], 15),x[::15,0].astype(np.int)) - # _ = plt.yticks(np.arange(0,x.shape[0], 15),x[::15,0].astype(np.int)) - plt.colorbar() diff --git a/bias_transfer/analysis/train_path/base.py b/bias_transfer/analysis/train_path/base.py deleted file mode 100644 index a023c8f..0000000 --- a/bias_transfer/analysis/train_path/base.py +++ /dev/null @@ -1,80 +0,0 @@ -import tempfile - -import numpy as np -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt -import torch - -from sklearn.decomposition import PCA -from sklearn.manifold import TSNE -from bias_transfer.analysis.plot import save_plot, plot -from bias_transfer.tables.transfer import Checkpoint -from neuralpredictors.tracking import AdvancedMultipleObjectiveTracker as Tracker - - -class Analyzer: - def __init__(self): - self.data = {} - - def load_data(self, configs): - # Select data: - with tempfile.TemporaryDirectory() as temp_dir: - for description, config in configs.items(): - level = 0 - while True: - restriction = config.get_restrictions(level) - if not restriction: - break - restricted = Checkpoint() & restriction - if restricted: # could be empty if entry is not computed yet - fetch_res = restricted.fetch("state", "epoch", as_dict=True, download_path=temp_dir) - if description not in self.data: - self.data[description] = {} - for res in fetch_res: - data = self.data.get(description, {}).get(level, {}) - data[res["epoch"]] = torch.load(res["state"])["net"] - self.data[description][level] = data - level += 1 - - def _compute_pca(self, tensors): - pca = PCA(n_components=2) - pca.fit(tensors) - pca_result = pca.transform(tensors) - print( - "Explained variation per principal component: {}".format( - pca.explained_variance_ratio_ - ), - flush=True, - ) - return pca_result - - def _compute_tsne(self, tensors): - tsne = TSNE( - n_components=2, verbose=1, perplexity=40, n_iter=250, init="pca" - ) - return tsne.fit_transform(tensors) - - - def _flatten_state_dict(self, state): - parameters = [] - for param in state.values(): - parameters.append(torch.flatten(param).cpu().numpy()) - return np.concatenate(parameters) - - @plot - def plot_paths(self, fig, ax, level=0, method="pca"): - parameters = [] - labels = [] - for descr, states in self.data.items(): - states = states[level] - for epoch, state in states.items(): - parameters.append(self._flatten_state_dict(state)) - labels.append(f"{descr.name}, Seed {descr.seed}") - parameters = np.stack(parameters) - if method == "tsne": - result = self._compute_tsne(parameters) - else: - result = self._compute_pca(parameters) - sns.scatterplot(x=result[:,0], y=result[:,1], hue=labels, ax=ax) - diff --git a/bias_transfer/configs/dataset/__init__.py b/bias_transfer/configs/dataset/__init__.py deleted file mode 100644 index cf6ff9c..0000000 --- a/bias_transfer/configs/dataset/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .mnist import MNIST -from .mnist_ib import MNIST_IB -from .imagenet import ImageNet -from .tiny_imagenet import TinyImageNet -from bias_transfer.configs.dataset.mixins.transfer import Generated -from .regression import Regression -from .image import ImageDatasetConfig -from .base import DatasetConfig -from .mtl import MTLDatasetsConfig \ No newline at end of file diff --git a/bias_transfer/configs/dataset/image.py b/bias_transfer/configs/dataset/image.py deleted file mode 100644 index 83228d4..0000000 --- a/bias_transfer/configs/dataset/image.py +++ /dev/null @@ -1,98 +0,0 @@ -from typing import Dict, Tuple - -from bias_transfer.configs.dataset.base import DatasetConfig -from bias_transfer.tables.nnfabrik import Dataset - - -class ImageDatasetConfig(DatasetConfig): - config_name = "dataset" - table = Dataset() - fn = "bias_transfer.dataset.img_dataset_loader" - - data_mean_defaults = { - "CIFAR100": (0.5070751592371323, 0.48654887331495095, 0.4409178433670343,), - "CIFAR10": (0.49139968, 0.48215841, 0.44653091), - "SVHN": (0.4377, 0.4438, 0.4728), - "TinyImageNet_bw": (0.4519,), - "TinyImageNet": (0.4802, 0.4481, 0.3975,), - "ImageNet": (0.485, 0.456, 0.406), - "MNIST": (0.1307,), - "MNIST_color": (0.03685451, 0.0367535, 0.03952756), - "MNIST_color_easy": (0.03685451, 0.0367535, 0.03952756), - "MNIST_noise": (0.13405791,), - "MNIST_rotation": (0.0640235,), - "MNIST_translation": (0.06402363,), - "MNIST_addition": (0.06402363,), - "MNIST_clean": (0.06402363,), - "MNIST_clean_shuffle": (0.06402363,), - "FashionMNIST_color": (0.08239705, 0.09176614, 0.0904255,), - "FashionMNIST_color_shuffle": (0.08239705, 0.09176614, 0.0904255,), - "FashionMNIST_color_easy": (0.08239705, 0.09176614, 0.0904255,), - "FashionMNIST_noise": (0.19938468,), - "FashionMNIST_rotation": (0.14016011,), - "FashionMNIST_rotation_regression": (0.14016011,), - "FashionMNIST_translation": (0.1401599,), - "FashionMNIST_addition": (0.1401599,), - "FashionMNIST_clean": (0.1401599,), - "FashionMNIST_clean_shuffle": (0.1401599,), - } - data_std_defaults = { - "CIFAR100": (0.2673342858792401, 0.2564384629170883, 0.27615047132568404,), - "CIFAR10": (0.24703223, 0.24348513, 0.26158784), - "SVHN": (0.1980, 0.2010, 0.1970), - "TinyImageNet_bw": (0.2221,), - "TinyImageNet": (0.2302, 0.2265, 0.2262,), - "ImageNet": (0.229, 0.224, 0.225), - "MNIST": (0.3081,), - "MNIST_color": (0.17386045, 0.16883257, 0.1768625), - "MNIST_color_easy": (0.17386045, 0.16883257, 0.1768625), - "MNIST_noise": (0.22387815,), - "MNIST_rotation": (0.0640235,), - "MNIST_translation": (0.22534915,), - "MNIST_addition": (0.22534915,), - "MNIST_clean": (0.22534915,), - "MNIST_clean_shuffle": (0.22534915,), - "FashionMNIST_color": (0.25112887, 0.26145387, 0.26009334,), - "FashionMNIST_color_shuffle": (0.25112887, 0.26145387, 0.26009334,), - "FashionMNIST_color_easy": (0.25112887, 0.26145387, 0.26009334,), - "FashionMNIST_noise": (0.28845804,), - "FashionMNIST_rotation": (0.28369352,), - "FashionMNIST_rotation_regression": (0.28369352,), - "FashionMNIST_translation": (0.28550556,), - "FashionMNIST_addition": (0.28550556,), - "FashionMNIST_clean": (0.28550556,), - "FashionMNIST_clean_shuffle": (0.28550556,), - } - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.dataset_cls: str = "CIFAR10" - self.apply_augmentation: bool = True - self.apply_normalization: bool = True - self.apply_grayscale: bool = False - self.apply_noise: Dict = {} - self.convert_to_rgb: bool = False - self.input_size: int = 32 - self.add_corrupted_test: bool = False - self.add_stylized_test: bool = False - self.use_c_test_as_val: bool = False - self.show_sample: bool = False - self.filter_classes: Tuple = () # (start,end) - self.data_dir: str = "./data/image_classification/torchvision/" - self.num_workers: int = 1 - dataset_id = ( - f"{self.dataset_sub_cls}_{self.bias}" if self.bias else self.dataset_cls - ) - dataset_id += "_bw" if self.apply_grayscale else "" - self.train_data_mean: Tuple[float] = self.data_mean_defaults[dataset_id] - self.train_data_std: Tuple[float] = self.data_std_defaults[dataset_id] - - super().__init__(**kwargs) - - @property - def filters(self): - filters = [] - if self.filter_classes: - filters.append("ClassesFilter") - return filters \ No newline at end of file diff --git a/bias_transfer/configs/dataset/imagenet.py b/bias_transfer/configs/dataset/imagenet.py deleted file mode 100644 index e2aef11..0000000 --- a/bias_transfer/configs/dataset/imagenet.py +++ /dev/null @@ -1,13 +0,0 @@ -from bias_transfer.configs.dataset.image import ImageDatasetConfig - - -class ImageNet(ImageDatasetConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.dataset_cls: str = "ImageNet" - self.data_dir: str = "./data/image_classification/" - self.input_size: int = 224 - self.num_workers: int = 8 - self.valid_size: float = 0.0416 # To get ~50K (test set size) - - super().__init__(**kwargs) diff --git a/bias_transfer/configs/dataset/mixins/__init__.py b/bias_transfer/configs/dataset/mixins/__init__.py deleted file mode 100644 index 03019f9..0000000 --- a/bias_transfer/configs/dataset/mixins/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .transfer import Generated \ No newline at end of file diff --git a/bias_transfer/configs/dataset/mixins/transfer.py b/bias_transfer/configs/dataset/mixins/transfer.py deleted file mode 100644 index aae3bfe..0000000 --- a/bias_transfer/configs/dataset/mixins/transfer.py +++ /dev/null @@ -1,15 +0,0 @@ -from bias_transfer.configs.base import BaseConfig -from bias_transfer.tables.nnfabrik import Dataset - - -class Generated(BaseConfig): - config_name = "dataset" - table = Dataset() - fn = "bias_transfer.dataset.transferred_dataset_loader" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.train_on_reduced_data: bool = False - self.train_on_coreset: bool = False - self.load_coreset: bool = False - super().__init__(**kwargs) diff --git a/bias_transfer/configs/dataset/mnist.py b/bias_transfer/configs/dataset/mnist.py deleted file mode 100644 index e18c404..0000000 --- a/bias_transfer/configs/dataset/mnist.py +++ /dev/null @@ -1,9 +0,0 @@ -from bias_transfer.configs.dataset.image import ImageDatasetConfig - - -class MNIST(ImageDatasetConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.dataset_cls: str = "MNIST" - self.input_size: int = 28 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/dataset/mnist_ib.py b/bias_transfer/configs/dataset/mnist_ib.py deleted file mode 100644 index c67caca..0000000 --- a/bias_transfer/configs/dataset/mnist_ib.py +++ /dev/null @@ -1,15 +0,0 @@ -from bias_transfer.configs.dataset.image import ImageDatasetConfig - - -class MNIST_IB(ImageDatasetConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.dataset_cls = "MNIST-IB" - self.input_size: int = 40 if self.bias != "addition" else 80 - self.convert_to_rgb: bool = False - self.bias: str = "clean" - self.dataset_sub_cls: str = "FashionMNIST" # could also be MNIST - self.apply_data_normalization: bool = False - self.apply_data_augmentation: bool = False - self.add_corrupted_test: bool = False - super().__init__(**kwargs) diff --git a/bias_transfer/configs/dataset/mtl.py b/bias_transfer/configs/dataset/mtl.py deleted file mode 100644 index 98a3331..0000000 --- a/bias_transfer/configs/dataset/mtl.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Dict - -from bias_transfer.configs.dataset.base import DatasetConfig -from bias_transfer.tables.nnfabrik import Dataset - - -class MTLDatasetsConfig(DatasetConfig): - config_name = "dataset" - table = Dataset() - fn = "bias_transfer.dataset.mtl_datasets_loader" - - def __init__(self, sub_configs, **kwargs): - self.load_kwargs(**kwargs) - self.sub_configs = sub_configs - super().__init__(**kwargs) - - # super().__init__(**kwargs) - # self.neural_dataset_dict = kwargs.pop("neural_dataset_dict", {}) - # self.neural_dataset_config = NeuralDatasetConfig( - # **self.neural_dataset_dict - # ).to_dict() - # self.img_dataset_dict = kwargs.pop("img_dataset_dict", {}) - # self.img_dataset_config = ImageDatasetConfig(**self.img_dataset_dict).to_dict() - # - # self.update(**kwargs) - - def items(self): - return self.sub_configs.items() - - def values(self): - return self.sub_configs.values() - - def keys(self): - return self.sub_configs.keys() - - def __getitem__(self, item): - return self.sub_configs[item] - - @classmethod - def from_dict(cls, config_dict: Dict) -> "MTLDatasetsConfig": - """ - Constructs a `Config` from a Python dictionary of parameters. - - Args: - config_dict (:obj:`Dict[str, any]`): - Dictionary that will be used to instantiate the configuration object. Such a dictionary can be retrieved - from a pre-trained checkpoint by leveraging the :func:`~transformers.PretrainedConfig.get_config_dict` - method. - Returns: - :class:`MTLDatasetConfig`: An instance of a configuration object - """ - sub_configs = {} - for name, conf in config_dict.items(): - dataset_cls = next(iter(conf.keys())) - sub_configs[name] = globals()[dataset_cls].from_dict(conf[dataset_cls]) - return cls(sub_configs) - - def to_dict(self): - """ - Serializes this instance to a Python dictionary. - - Returns: - :obj:`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = {} - for name, conf in self.sub_configs.items(): - output[name] = {conf.__class__.__name__: conf.to_dict()} - return output \ No newline at end of file diff --git a/bias_transfer/configs/dataset/neural.py b/bias_transfer/configs/dataset/neural.py deleted file mode 100644 index e1e0cf6..0000000 --- a/bias_transfer/configs/dataset/neural.py +++ /dev/null @@ -1,19 +0,0 @@ -from bias_transfer.configs.dataset.base import DatasetConfig -from bias_transfer.tables.nnfabrik import Dataset - - -class NeuralDatasetConfig(DatasetConfig): - config_name = "dataset" - table = Dataset() - fn = "bias_transfer.dataset.neural_dataset_loader" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.train_frac = 0.8 - self.dataset = "CSRF19_V1" - self.data_dir = "./data/monkey/toliaslab/{}".format(self.dataset) - self.seed = 1000 - self.subsample = 1 - self.crop = 70 - self.time_bins_sum = 12 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/dataset/regression.py b/bias_transfer/configs/dataset/regression.py deleted file mode 100644 index b94265d..0000000 --- a/bias_transfer/configs/dataset/regression.py +++ /dev/null @@ -1,18 +0,0 @@ -from bias_transfer.configs.dataset.base import DatasetConfig -from bias_transfer.tables.nnfabrik import Dataset - - -class Regression(DatasetConfig): - config_name = "dataset" - table = Dataset() - fn = "bias_transfer.dataset.regression_dataset_loader" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.dataset_cls: str = "co2" - self.apply_normalization: bool = False - self.apply_noise: bool = False - self.input_size: int = 32 - self.num_workers: int = 0 - self.train_range: int = 10 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/dataset/tiny_imagenet.py b/bias_transfer/configs/dataset/tiny_imagenet.py deleted file mode 100644 index 061c1e9..0000000 --- a/bias_transfer/configs/dataset/tiny_imagenet.py +++ /dev/null @@ -1,12 +0,0 @@ -from bias_transfer.configs.dataset.image import ImageDatasetConfig - - -class TinyImageNet(ImageDatasetConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.dataset_cls: str = "TinyImageNet" - self.data_dir: str = "./data/image_classification/" - self.input_size: int = 64 - self.num_workers: int = 2 - self.valid_size: int = 0.1 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/model/__init__.py b/bias_transfer/configs/model/__init__.py deleted file mode 100644 index 9ce5c8f..0000000 --- a/bias_transfer/configs/model/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .regression import Regression -from .classification import Classification -from .imagenet import ImageNet, TinyImageNet -from .cifar import CIFAR10, CIFAR100 -from .mnist import MNIST, MNISTIB -from .svhn import SVHN -from .mtl import MTL diff --git a/bias_transfer/configs/model/cifar.py b/bias_transfer/configs/model/cifar.py deleted file mode 100644 index 1d1a802..0000000 --- a/bias_transfer/configs/model/cifar.py +++ /dev/null @@ -1,19 +0,0 @@ -from bias_transfer.configs.model.classification import Classification - - -class CIFAR100(Classification): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.input_channels: int = 3 - self.input_size: int = 32 - self.num_classes: int = 100 - super().__init__(**kwargs) - - -class CIFAR10(Classification): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.input_channels: int = 3 - self.input_size: int = 32 - self.num_classes: int = 10 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/model/classification.py b/bias_transfer/configs/model/classification.py deleted file mode 100644 index 6769324..0000000 --- a/bias_transfer/configs/model/classification.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import Dict, Tuple - -from .base import ModelConfig - - -class Classification(ModelConfig): - fn = "bias_transfer.models.classification_model_builder" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.type: str = "resnet50" - self.conv_stem_kernel_size: int = 3 - self.conv_stem_padding: int = 1 - self.conv_stem_stride: int = 1 - self.core_stride: int = 1 - self.max_pool_after_stem: bool = False - self.advanced_init: bool = False - self.zero_init_residual: bool = False - self.adaptive_pooling: bool = False - self.avg_pool: bool = False - - # resnet specific - self.noise_adv_classification: bool = False - self.noise_adv_regression: bool = False - self.num_noise_readout_layers: int = 1 - self.noise_sigmoid_output: bool = self.noise_adv_classification - # vgg specific - self.pretrained: bool = False - self.pretrained_path: str = "" - self.readout_type: str = "dense" - self.add_buffer: Tuple = () - super().__init__(**kwargs) \ No newline at end of file diff --git a/bias_transfer/configs/model/imagenet.py b/bias_transfer/configs/model/imagenet.py deleted file mode 100644 index 80e890b..0000000 --- a/bias_transfer/configs/model/imagenet.py +++ /dev/null @@ -1,29 +0,0 @@ -from bias_transfer.configs.model.classification import Classification - - -class ImageNet(Classification): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.num_classes: int = 1000 - self.input_size: int = 224 - self.input_channels: int = 3 - self.conv_stem_kernel_size: int = 7 - self.conv_stem_padding: int = 3 - self.conv_stem_stride: int = 2 - self.max_pool_after_stem: bool = True - self.advanced_init: bool = True - self.zero_init_residual: bool = True - self.adaptive_pooling: bool = True - self.avg_pool: bool = True - super().__init__(**kwargs) - - -class TinyImageNet(Classification): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.num_classes = 200 - self.input_size = 64 - self.input_channels: int = 3 - self.core_stride = 2 - self.conv_stem_kernel_size = 5 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/model/mnist.py b/bias_transfer/configs/model/mnist.py deleted file mode 100644 index cd8304c..0000000 --- a/bias_transfer/configs/model/mnist.py +++ /dev/null @@ -1,24 +0,0 @@ -from bias_transfer.configs.model.classification import Classification - - -class MNIST(Classification): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.type: str = "lenet5" - self.num_classes: int = 10 - self.input_size: int = 28 - self.input_channels: int = 1 - self.comment = f"MNIST {self.type}" - super().__init__(**kwargs) - - -class MNISTIB(MNIST): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.bias: str = "clean" - self.type: str = "lenet300-100" if self.bias == "translation" else "lenet5" - self.num_classes: int = 1 if "regression" in self.bias else 10 - self.input_size: int = 80 if self.bias == "addition" else 40 - self.input_channels: int = 3 if "color" in self.bias else 1 - self.comment = f"MNIST-IB {self.bias} {self.type}" - super().__init__(**kwargs) diff --git a/bias_transfer/configs/model/mtl.py b/bias_transfer/configs/model/mtl.py deleted file mode 100644 index d6616bc..0000000 --- a/bias_transfer/configs/model/mtl.py +++ /dev/null @@ -1,31 +0,0 @@ -from .base import ModelConfig - - -class MTL(ModelConfig): - fn = "bias_transfer.models.mtl_builder" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.vgg_type = kwargs.pop("vgg_type", "vgg19_bn") - self.classification = kwargs.pop("classification", False) - self.classification_readout_type = kwargs.pop( - "classification_readout_type", None - ) - self.input_size = kwargs.pop("input_size", None) - self.num_classes = kwargs.pop("num_classes", 200) - self.pretrained = kwargs.pop("pretrained", True) - - self.v1_model_layer = kwargs.pop("v1_model_layer", 17) - self.neural_input_channels = kwargs.pop("neural_input_channels", 1) - self.v1_fine_tune = kwargs.pop("v1_fine_tune", False) - self.v1_init_mu_range = kwargs.pop("v1_init_mu_range", 0.3) - self.v1_init_sigma_range = kwargs.pop("v1_init_sigma_range", 0.6) - self.v1_readout_bias = kwargs.pop("v1_readout_bias", True) - self.v1_bias = kwargs.pop("v1_bias", True) - self.v1_final_batchnorm = kwargs.pop("v1_final_batchnorm", False) - self.v1_gamma_readout = kwargs.pop("v1_gamma_readout", 0.5) - self.v1_elu_offset = kwargs.pop("v1_elu_offset", -1) - self.classification_input_channels = kwargs.pop( - "classification_input_channels", 1 - ) - super().__init__(**kwargs) diff --git a/bias_transfer/configs/model/neural.py b/bias_transfer/configs/model/neural.py deleted file mode 100644 index f19cf9e..0000000 --- a/bias_transfer/configs/model/neural.py +++ /dev/null @@ -1,20 +0,0 @@ -from .base import ModelConfig - - -class Neural(ModelConfig): - fn = "bias_transfer.models.neural_cnn_builder" - - @baseline - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.readout_type = kwargs.pop("readout_type", "point") - if self.readout_type == "point": - self.hidden_dilation = kwargs.pop("hidden_dilation", 2) - self.se_reduction = kwargs.pop("se_reduction", 16) - self.input_kern = kwargs.pop("input_kern", 24) - self.hidden_kern = kwargs.pop("hidden_kern", 9) - self.depth_separable = kwargs.pop("depth_separable", True) - self.stack = kwargs.pop("stack", -1) - self.n_se_blocks = kwargs.pop("n_se_blocks", 2) - self.gamma_readout = kwargs.pop("gamma_readout", 0.5) - self.gamma_input = kwargs.pop("gamma_input", 10) \ No newline at end of file diff --git a/bias_transfer/configs/model/regression.py b/bias_transfer/configs/model/regression.py deleted file mode 100644 index 77ca298..0000000 --- a/bias_transfer/configs/model/regression.py +++ /dev/null @@ -1,15 +0,0 @@ -from .base import ModelConfig - - -class Regression(ModelConfig): - fn = "bias_transfer.models.regression_model_builder" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.type: str = "fc" - self.input_size: int = 1 - self.output_size: int = 1 - self.layer_size: int = 100 - self.num_layers: int = 4 - self.activation: str = "sigmoid" - super().__init__(**kwargs) diff --git a/bias_transfer/configs/model/svhn.py b/bias_transfer/configs/model/svhn.py deleted file mode 100644 index 3aa3df2..0000000 --- a/bias_transfer/configs/model/svhn.py +++ /dev/null @@ -1,10 +0,0 @@ -from bias_transfer.configs.model.classification import Classification - - -class SVHN(Classification): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - self.input_size: int = 32 - self.num_classes: int = 10 - self.input_channels: int = 3 - super().__init__(**kwargs) diff --git a/bias_transfer/configs/trainer/__init__.py b/bias_transfer/configs/trainer/__init__.py deleted file mode 100644 index 41ca10c..0000000 --- a/bias_transfer/configs/trainer/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .regression import Regression -from .classification import Classification -from .base import TrainerConfig -from . import mixins \ No newline at end of file diff --git a/bias_transfer/configs/trainer/classification.py b/bias_transfer/configs/trainer/classification.py deleted file mode 100644 index 28da989..0000000 --- a/bias_transfer/configs/trainer/classification.py +++ /dev/null @@ -1,15 +0,0 @@ -from bias_transfer.configs.trainer.base import TrainerConfig - - -class Classification(TrainerConfig): - fn = "bias_transfer.trainer.img_classification" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.maximize: bool = True # if stop_function maximized or minimized - self.eval_with_bn_train: bool = False - - super(Classification, self).__init__(**kwargs) - - diff --git a/bias_transfer/configs/trainer/mixins/__init__.py b/bias_transfer/configs/trainer/mixins/__init__.py deleted file mode 100644 index d916310..0000000 --- a/bias_transfer/configs/trainer/mixins/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .lottery_ticket import LotteryTicketMixin -from .transfer import DataGenerationMixin, TransferMixin -from .noise import ( - NoiseAdversarialMixin, - RepresentationMatchingMixin, - RepresentationMonitorMixin, - NoiseAugmentationMixin, -) diff --git a/bias_transfer/configs/trainer/mixins/lottery_ticket.py b/bias_transfer/configs/trainer/mixins/lottery_ticket.py deleted file mode 100644 index c7b01f5..0000000 --- a/bias_transfer/configs/trainer/mixins/lottery_ticket.py +++ /dev/null @@ -1,17 +0,0 @@ -from typing import Dict - -from bias_transfer.configs.base import BaseConfig - - -class LotteryTicketMixin(BaseConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.lottery_ticket: Dict = {} - if self.lottery_ticket: - self.max_iter = self.lottery_ticket.get( - "rounds", 1 - ) * self.lottery_ticket.get("round_length", 100) - self.main_loop_modules.append("LotteryTicketPruning") - - super().__init__(**kwargs) diff --git a/bias_transfer/configs/trainer/mixins/transfer.py b/bias_transfer/configs/trainer/mixins/transfer.py deleted file mode 100644 index 5c5d17c..0000000 --- a/bias_transfer/configs/trainer/mixins/transfer.py +++ /dev/null @@ -1,69 +0,0 @@ -from typing import Dict, Tuple - -from bias_transfer.configs.base import BaseConfig - - -class DataGenerationMixin(BaseConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.data_transfer: bool = False - self.save_input: bool = False - self.save_representation: bool = False - self.compute_fisher: Dict = { - "DEFAULT EMPTY": True, # will turn into an empty dict - "num_samples": 1024, - "empirical": True, - } - self.compute_si_omega: Dict = { - "DEFAULT EMPTY": True, # will turn into an empty dict - "damping_factor": 0.0001, - } - self.compute_covariance: bool = False - self.extract_coreset: Dict = {} - self.reset_for_new_task: bool = False - - super().__init__(**kwargs) - - -class TransferMixin(BaseConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.data_transfer: bool = False - self.scale_loss_with_arctanh: bool = False - self.synaptic_intelligence_computation: bool = False - self.freeze = None - self.freeze_bn: bool = False - self.transfer_restriction: Tuple = () - self.transfer_after_train: bool = False - self.single_input_stream: bool = True - self.readout_name: str = "fc" - self.reset: Tuple = () - self.reset_linear_frequency = None - self.regularization: Dict = { - "DEFAULT EMPTY": True, # will turn into an empty dict - "regularizer": "L2SP/Mixup/RDL/KnowledgeDistillation", - "alpha": 1.0, - "decay_alpha": True, - } - - super().__init__(**kwargs) - - def conditional_assignment(self): - if ( - self.reset_linear_frequency - and not "RandomReadoutReset" in self.main_loop_modules - ): - self.main_loop_modules.append("RandomReadoutReset") - if ( - self.synaptic_intelligence_computation - and not "SynapticIntelligence" in self.main_loop_modules - ): - self.main_loop_modules.append("SynapticIntelligence") - if ( - self.regularization - and not self.regularization["regularizer"] in self.main_loop_modules - ): - self.main_loop_modules.append(self.regularization["regularizer"]) - super().conditional_assignment() diff --git a/bias_transfer/configs/trainer/regression.py b/bias_transfer/configs/trainer/regression.py deleted file mode 100644 index e9480ab..0000000 --- a/bias_transfer/configs/trainer/regression.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Dict - -from bias_transfer.configs.trainer.base import TrainerConfig -from bias_transfer.tables.nnfabrik import Trainer - - -class Regression(TrainerConfig): - config_name = "trainer" - table = Trainer() - fn = "bias_transfer.trainer.regression" - - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.loss_functions: Dict = {"regression": "MSELoss"} - self.maximize: bool = False - self.noise_test: Dict = {} - self.apply_noise_to_validation: bool = False - self.show_epoch_progress: bool = False - - super().__init__(**kwargs) diff --git a/bias_transfer/dataset/MNIST_IB/__init__.py b/bias_transfer/dataset/MNIST_IB/__init__.py deleted file mode 100644 index 08f6d95..0000000 --- a/bias_transfer/dataset/MNIST_IB/__init__.py +++ /dev/null @@ -1,123 +0,0 @@ -import os -from pathlib import Path - -import torch -import numpy as np - -from torchvision.datasets import MNIST, FashionMNIST, EMNIST, KMNIST, QMNIST -from torchvision import transforms - -from nnfabrik.utility.nn_helpers import set_random_seed -from .addition import apply_additon -from .expansion import apply_expansion -from .noise import apply_gaussian_noise -from .color import apply_color, get_color_codes -from .translation import apply_translation -from .rotation import apply_rotation -from .shuffle import apply_label_shuffle - - -def generate_dataset(data_loader, transform_fs=(), options=()): - new_ds_source = [] - new_ds_target = [] - for source, target in data_loader: - source = source.detach().numpy() - target = target.detach().numpy() - for t, transform_f in enumerate(transform_fs): - if transform_f is None: - continue - source, target = transform_f(source, target, **options[t]) - new_ds_source.append(source) - new_ds_target.append(target) - new_ds_source = np.concatenate(new_ds_source) - new_ds_target = np.concatenate(new_ds_target) - return new_ds_source, new_ds_target - - -bias_dict = { - "color": ( - apply_color, - { - "cfg_means": get_color_codes(), - "cbg_means": get_color_codes(), - "bg": False, - "fg": True, - "color_variance": 0.02, - }, - ), - "color_easy": ( - apply_color, - { - "cfg_means": get_color_codes(), - "cbg_means": get_color_codes(), - "bg": False, - "fg": True, - "color_variance": 0.00, - }, - ), - "color_shuffle": ( - apply_color, - { - "cfg_means": get_color_codes(), - "cbg_means": get_color_codes(), - "bg": False, - "fg": True, - "color_variance": 0.02, - "shuffle": True, - }, - ), - "noise": (apply_gaussian_noise, {"severity": -1}), # random - "translation": (apply_translation, {"std": 5}), - "rotation": (apply_rotation, {}), - "rotation_regression": (apply_rotation, {"regression": True}), - "addition": (apply_additon, {}), - "clean": (None, {}), - "clean_shuffle": (apply_label_shuffle, {}), -} - - -def generate_and_save( - bias: str, - base_path: str = "/work/data/image_classification/torchvision/", - bias_options_: dict = None, - dataset: str = "MNIST", -): - set_random_seed(42) - write_path = os.path.join(base_path, f"{dataset}-IB") - Path(write_path).mkdir(parents=True, exist_ok=True) - if ( - os.path.isfile(os.path.join(write_path, f"{bias}_train_source.npy")) - and os.path.isfile(os.path.join(write_path, f"{bias}_train_target.npy")) - and os.path.isfile(os.path.join(write_path, f"{bias}_test_source.npy")) - and os.path.isfile(os.path.join(write_path, f"{bias}_test_target.npy")) - ): - return - apply_bias, bias_options = bias_dict[bias] - bias_options = bias_options_ if bias_options_ is not None else bias_options - transform = transforms.Compose([transforms.ToTensor(),]) - train = globals().get(dataset)( - root=base_path, train=True, download=True, transform=transform, - ) - test = globals().get(dataset)( - root=base_path, train=False, download=True, transform=transform, - ) - train_loader = torch.utils.data.DataLoader( - train, batch_size=64, shuffle=False, - ) - test_loader = torch.utils.data.DataLoader( - test, batch_size=64, shuffle=False, - ) - train_ds = generate_dataset( - data_loader=train_loader, - transform_fs=(apply_expansion, apply_bias), - options=({}, bias_options), - ) - test_ds = generate_dataset( - data_loader=test_loader, - transform_fs=(apply_expansion, apply_bias), - options=({}, bias_options), - ) - np.save(os.path.join(write_path, f"{bias}_train_source.npy"), train_ds[0]) - np.save(os.path.join(write_path, f"{bias}_train_target.npy"), train_ds[1]) - np.save(os.path.join(write_path, f"{bias}_test_source.npy"), test_ds[0]) - np.save(os.path.join(write_path, f"{bias}_test_target.npy"), test_ds[1]) diff --git a/bias_transfer/dataset/MNIST_IB/addition.py b/bias_transfer/dataset/MNIST_IB/addition.py deleted file mode 100644 index ffa6c86..0000000 --- a/bias_transfer/dataset/MNIST_IB/addition.py +++ /dev/null @@ -1,9 +0,0 @@ -import numpy as np - - -def apply_additon(source, target): - second_summand = np.arange(source.shape[0]) - np.random.shuffle(second_summand) - concat_source = np.concatenate([source, source[second_summand]], axis=3) - summed_targets = target + target[second_summand] - return concat_source, summed_targets \ No newline at end of file diff --git a/bias_transfer/dataset/MNIST_IB/color.py b/bias_transfer/dataset/MNIST_IB/color.py deleted file mode 100644 index 9c8b698..0000000 --- a/bias_transfer/dataset/MNIST_IB/color.py +++ /dev/null @@ -1,71 +0,0 @@ -import numpy as np - - -# code adapted from https://github.com/salesforce/corr_based_prediction/blob/master/gen_color_mnist.py -# procedure following https://arxiv.org/pdf/1812.10352.pdf -# they variaed color_variance between 0.05 and 0.02 (in 0.005 steps) - -class_color_means = [ - [60, 180, 75], # green - [255, 255, 25], # yellow - [0, 130, 200], # blue - [245, 130, 48], # orange - [70, 240, 240], # cyan - [240, 50, 230], # magenta - [230, 25, 75], # red - [0, 0, 128], # navy - [220, 190, 255], # lavender - [255, 250, 200], # beige -] -nb_classes = 10 - - -def get_color_codes(): - # C = np.random.rand(nb_classes,3) - C = np.asarray(class_color_means) - C = C / np.max(C, axis=1)[:, None] - return C - - -def get_std_color(means, targets, var): - mean = means[targets].reshape((-1)) - cov = var * np.eye(mean.shape[0]) - c = np.random.multivariate_normal(mean=mean, cov=cov) - c = c.reshape(targets.shape[0], 3, 1, 1) - return c - - -def apply_color( - x, - targets, - cfg_means=None, - cbg_means=None, - fg=True, - bg=False, - color_variance=0.0, - shuffle=False, -): - assert ( - len(x.shape) == 4 - ), "Something is wrong, size of input x should be 4 dimensional (B x C x H x W; perhaps number of channels is degenrate? If so, it should be 1)" - xs = x.shape - x = (((x * 255) > 10) * 255).astype(np.float) # thresholding to separate fg and bg - x_rgb = np.ones((xs[0], 3, xs[2], xs[3])).astype(np.float) - x_rgb = x_rgb * x - targets_ = np.copy(targets) - if shuffle: - np.random.shuffle(targets) # to generate cue-conflict by assigning wrong colors - if fg: - x_rgb_fg = 1.0 * x_rgb - x_rgb_fg *= get_std_color(cfg_means, targets, color_variance) - else: - x_rgb_fg = np.zeros_like(x_rgb) - if bg: - x_rgb_bg = 255 - x_rgb - x_rgb_bg *= get_std_color(cbg_means, targets, color_variance) - else: - x_rgb_bg = np.zeros_like(x_rgb) - x_rgb = x_rgb_fg + x_rgb_bg - x_rgb = np.clip(x_rgb, a_min=0.0, a_max=255.0) - color_data_x = x_rgb / 255.0 - return color_data_x, targets_ diff --git a/bias_transfer/dataset/MNIST_IB/expansion.py b/bias_transfer/dataset/MNIST_IB/expansion.py deleted file mode 100644 index 3cf89b8..0000000 --- a/bias_transfer/dataset/MNIST_IB/expansion.py +++ /dev/null @@ -1,8 +0,0 @@ -import numpy as np - - -def apply_expansion(source, target): - orig_shape = source.shape - expanded_batch = np.zeros((orig_shape[0], 1, 40, 40)) - expanded_batch[:, :, 6:-6, 6:-6] = source - return expanded_batch, target \ No newline at end of file diff --git a/bias_transfer/dataset/MNIST_IB/noise.py b/bias_transfer/dataset/MNIST_IB/noise.py deleted file mode 100644 index 76131ce..0000000 --- a/bias_transfer/dataset/MNIST_IB/noise.py +++ /dev/null @@ -1,12 +0,0 @@ -import numpy as np - - -def apply_gaussian_noise(batch, targets, severity=1): - if severity == -1: - severity = np.random.randint(1,6) - # adapted from https://github.com/google-research/mnist-c - c = [0.08, 0.12, 0.18, 0.26, 0.38][severity - 1] - return ( - np.clip(batch + np.random.normal(size=batch.shape, scale=c), 0, 1), - targets, - ) diff --git a/bias_transfer/dataset/MNIST_IB/plot.py b/bias_transfer/dataset/MNIST_IB/plot.py deleted file mode 100644 index 7926818..0000000 --- a/bias_transfer/dataset/MNIST_IB/plot.py +++ /dev/null @@ -1,21 +0,0 @@ -import matplotlib.pyplot as plt - - -def plot_batch(batch, targets, n_rows, n_cols, name="", file_type="png"): - batch = batch.transpose(0, 2, 3, 1) - fig, axs = plt.subplots(n_rows, n_cols) - if n_rows == 1: - axs = [axs] - for r in range(n_rows): - for c in range(n_cols): - axs[r][c].imshow(batch[r * n_cols + c].squeeze()) - axs[r][c].set_title(int(targets[r * n_cols + c])) - axs[r][c].set_axis_off() - plt.show() - if name: - fig.savefig( - name + "." + file_type, - facecolor=fig.get_facecolor(), - edgecolor=fig.get_edgecolor(), - bbox_inches="tight", - ) diff --git a/bias_transfer/dataset/MNIST_IB/rotation.py b/bias_transfer/dataset/MNIST_IB/rotation.py deleted file mode 100644 index 502824e..0000000 --- a/bias_transfer/dataset/MNIST_IB/rotation.py +++ /dev/null @@ -1,11 +0,0 @@ -import numpy as np -from scipy import ndimage - - -def apply_rotation(source, target, regression=False): - angles = np.random.uniform(0, 360, source.shape[0]) - for i in range(source.shape[0]): - source[i] = ndimage.rotate(source[i], angles[i], reshape=False, axes=(1, 2)) - if regression: - target = angles - return source, target diff --git a/bias_transfer/dataset/MNIST_IB/run_generation.py b/bias_transfer/dataset/MNIST_IB/run_generation.py deleted file mode 100644 index ff97f8a..0000000 --- a/bias_transfer/dataset/MNIST_IB/run_generation.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import numpy as np -from . import generate_and_save - - -def main(dataset="FashionMNIST"): - for bias in ["clean", - "color", - "color_shuffle", - "translation", - "rotation", - "rotation_regression", - "noise", - # "addition" - ]: - generate_and_save( - bias, base_path="/work/data/image_classification/torchvision/",dataset=dataset - ) - train_tensor = np.load( - os.path.join( - f"/work/data/image_classification/torchvision/{dataset}-IB", - f"{bias}_train_source.npy", - ) - ) - mean = np.mean(train_tensor, axis=(0, 2, 3)) - std = np.std(train_tensor, axis=(0, 2, 3)) - print(f"Saved {dataset}-{bias} with mean {mean} and std {std}") - - -if __name__ == "__main__": - main() diff --git a/bias_transfer/dataset/MNIST_IB/shuffle.py b/bias_transfer/dataset/MNIST_IB/shuffle.py deleted file mode 100644 index 7cb8631..0000000 --- a/bias_transfer/dataset/MNIST_IB/shuffle.py +++ /dev/null @@ -1,6 +0,0 @@ -import numpy as np - - -def apply_label_shuffle(source, target): - np.random.shuffle(target) # to make this dataset random - return source, target diff --git a/bias_transfer/dataset/MNIST_IB/translation.py b/bias_transfer/dataset/MNIST_IB/translation.py deleted file mode 100644 index e906e28..0000000 --- a/bias_transfer/dataset/MNIST_IB/translation.py +++ /dev/null @@ -1,15 +0,0 @@ -import numpy as np - - -def apply_translation(source, target, std=5): - # 40x40 to follow https://www.cs.toronto.edu/~tijmen/affNIST/ for translation - offsets = np.clip( - np.random.normal(scale=std, size=source.shape[0] * 2), a_min=-6, a_max=6 - ) - offsets = offsets.astype(np.int) - x_offset, y_offset = offsets[: source.shape[0]], offsets[source.shape[0]:] - for b in range(source.shape[0]): - source[b, 0, :, :] = np.roll( - source[b, 0, :, :], (y_offset[b], x_offset[b]), axis=(0, 1) - ) - return source, target diff --git a/bias_transfer/dataset/__init__.py b/bias_transfer/dataset/__init__.py deleted file mode 100644 index 0949899..0000000 --- a/bias_transfer/dataset/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .img_dataset_loader import img_dataset_loader -from .neural_dataset_loader import neural_dataset_loader -from .mtl_datasets_loader import mtl_datasets_loader -from .regression_dataset_loader import regression_dataset_loader -from .transferred_dataset_loader import transferred_dataset_loader diff --git a/bias_transfer/dataset/img_dataset_loader.py b/bias_transfer/dataset/img_dataset_loader.py deleted file mode 100644 index a3446db..0000000 --- a/bias_transfer/dataset/img_dataset_loader.py +++ /dev/null @@ -1,434 +0,0 @@ -import os -import numpy as np -import torch -import torchvision -import torchvision.transforms as transforms -from torch.utils.data.dataset import ConcatDataset, Subset -from torch.utils.data.sampler import SubsetRandomSampler -from torchvision import datasets -from bias_transfer.configs.dataset import ImageDatasetConfig -from .MNIST_IB import generate_and_save -from .dataset_classes.pkl_dataset import PklDataset -from .dataset_classes.npy_dataset import NpyDataset -from .utils import ( - get_dataset, - create_ImageFolder_format, -) - -DATASET_URLS = { - "TinyImageNet": "http://cs231n.stanford.edu/tiny-imagenet-200.zip", - "CIFAR10-Semisupervised": "1LTw3Sb5QoiCCN-6Y5PEKkq9C9W60w-Hi", - "CIFAR10-C": "https://zenodo.org/record/2535967/files/CIFAR-10-C.tar", - "CIFAR100-C": "https://zenodo.org/record/3555552/files/CIFAR-100-C.tar", - "TinyImageNet-C": "https://zenodo.org/record/2536630/files/Tiny-ImageNet-C.tar", - "TinyImageNet-ST": "https://informatikunihamburgde-my.sharepoint.com/:u:/g/personal/shahd_safarani_informatik_uni-hamburg_de/EZhUKKVXTvRHlqi2HXHaIjEBLmAv4tQP8olvdGNRoWrPqA?e=8kSrHI&download=1", - "ImageNet": None, - "ImageNet-C": { - "blur": "https://zenodo.org/record/2235448/files/blur.tar", - "digital": "https://zenodo.org/record/2235448/files/digital.tar", - "extra": "https://zenodo.org/record/2235448/files/extra.tar", - "noise": "https://zenodo.org/record/2235448/files/noise.tar", - "weather": "https://zenodo.org/record/2235448/files/weather.tar", - }, -} - - -def img_dataset_loader(seed, **config): - """ - Utility function for loading and returning train and valid - multi-process iterators over the CIFAR-10 dataset. A sample - 9x9 grid of the images can be optionally displayed. - If using CUDA, num_workers should be set to 1 and pin_memory to True. - Params - ------ - - data_dir: path directory to the dataset. - - batch_size: how many samples per batch to load. - - augment: whether to apply the data augmentation scheme - mentioned in the paper. Only applied on the train split. - - seed: fix seed for reproducibility. - - valid_size: percentage split of the training set used for - the validation set. Should be a float in the range [0, 1]. - - shuffle: whether to shuffle the train/validation indices. - - show_sample: plot 9x9 sample grid of the dataset. - - num_workers: number of subprocesses to use when loading the dataset. - - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to - True if using GPU. - Returns - ------- - - train_loader: training set iterator. - - valid_loader: validation set iterator. - """ - config = ImageDatasetConfig.from_dict(config) - print("Loading dataset: {}".format(config.dataset_cls)) - torch.manual_seed(seed) - np.random.seed(seed) - - transform_test, transform_train, transform_val = get_transforms(config) - - error_msg = "[!] valid_size should be in the range [0, 1]." - assert (config.valid_size >= 0) and (config.valid_size <= 1), error_msg - - ( - train_dataset, - valid_dataset, - test_dataset, - c_test_datasets, - st_test_dataset, - ) = get_datasets(config, transform_test, transform_train, transform_val) - - filters = [globals().get(f)(config, train_dataset) for f in config.filters] - datasets_ = [train_dataset, valid_dataset, test_dataset] - if config.add_corrupted_test: - for c_ds in c_test_datasets.values(): - datasets_ += list(c_ds.values()) - for ds in datasets_: - for filt in filters: - filt.apply(ds) - - data_loaders = get_data_loaders( - st_test_dataset, - c_test_datasets, - config, - seed, - test_dataset, - train_dataset, - valid_dataset, - ) - - return data_loaders - - -def get_transforms(config): - if config.dataset_cls == "ImageNet": - transform_train = [ - transforms.RandomResizedCrop(config.input_size) - if config.apply_augmentation - else None, - transforms.RandomHorizontalFlip() if config.apply_augmentation else None, - transforms.Grayscale() if config.apply_grayscale else None, - transforms.ToTensor(), - transforms.Normalize(config.train_data_mean, config.train_data_std) - if config.apply_normalization - else None, - ] - transform_val = [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.Grayscale() if config.apply_grayscale else None, - transforms.ToTensor(), - transforms.Normalize(config.train_data_mean, config.train_data_std) - if config.apply_normalization - else None, - ] - transform_test = [ # TODO: we don't need resizing + cropping for IN-C! - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.Grayscale() if config.apply_grayscale else None, - transforms.ToTensor(), - transforms.Normalize(config.train_data_mean, config.train_data_std) - if config.apply_normalization - else None, - ] - else: - transform_train = [ - transforms.ToPILImage() - if config.dataset_cls == "CIFAR10-Semisupervised" - or config.dataset_cls == "MNIST-IB" - else None, - transforms.RandomCrop(config.input_size, padding=4) - if config.apply_augmentation - else None, - transforms.RandomHorizontalFlip() if config.apply_augmentation else None, - transforms.RandomRotation(15) - if config.apply_augmentation and not "MNIST" in config.dataset_cls - else None, - transforms.Grayscale() if config.apply_grayscale else None, - transforms.ToTensor(), - transforms.Lambda(lambda x: x.repeat(3, 1, 1)) - if config.convert_to_rgb - else None, - transforms.Normalize(config.train_data_mean, config.train_data_std) - if config.apply_normalization - else None, - ] - transform_val = [ - transforms.ToPILImage() - if config.dataset_cls == "CIFAR10-Semisupervised" - or config.dataset_cls == "MNIST-IB" - else None, - transforms.Grayscale() if config.apply_grayscale else None, - transforms.ToTensor(), - transforms.Lambda(lambda x: x.repeat(3, 1, 1)) - if config.convert_to_rgb - else None, - transforms.Normalize(config.train_data_mean, config.train_data_std) - if config.apply_normalization - else None, - ] - transform_test = [ - transforms.ToPILImage() if config.dataset_cls == "MNIST-IB" else None, - transforms.Grayscale() if config.apply_grayscale else None, - transforms.ToTensor(), - transforms.Lambda(lambda x: x.repeat(3, 1, 1)) - if config.convert_to_rgb - else None, - transforms.Normalize(config.train_data_mean, config.train_data_std) - if config.apply_normalization - else None, - ] - transform_test = transforms.Compose( - list(filter(lambda x: x is not None, transform_test)) - ) - transform_val = transforms.Compose( - list(filter(lambda x: x is not None, transform_val)) - ) - transform_train = transforms.Compose( - list(filter(lambda x: x is not None, transform_train)) - ) - return transform_test, transform_train, transform_val - - -def get_datasets(config, transform_test, transform_train, transform_val): - if ( - config.dataset_cls in list(torchvision.datasets.__dict__.keys()) - and config.dataset_cls != "ImageNet" - ): - dataset_cls = eval("torchvision.datasets." + config.dataset_cls) - kwargs = { - "root": config.data_dir, - "transform": transform_train, - "download": True, - } - - if config.dataset_cls == "SVHN": - kwargs["split"] = "train" - else: - kwargs["train"] = True - train_dataset = dataset_cls(**kwargs) - - kwargs["transform"] = transform_val - valid_dataset = dataset_cls(**kwargs) - - kwargs["transform"] = transform_test - if config.dataset_cls == "SVHN": - kwargs["split"] = "test" - else: - kwargs["train"] = False - test_dataset = dataset_cls(**kwargs) - elif config.dataset_cls == "MNIST-IB": - dataset_dir = os.path.join(config.data_dir, config.dataset_sub_cls + "-IB") - generate_and_save( - config.bias, base_path=config.data_dir, dataset=config.dataset_sub_cls - ) - train_dataset = NpyDataset( - f"{config.bias}_train_source.npy", - f"{config.bias}_train_target.npy", - root=dataset_dir, - transform=transform_train, - target_type=torch.float32 if "regression" in config.bias else torch.long, - ) - valid_dataset = NpyDataset( - f"{config.bias}_train_source.npy", - f"{config.bias}_train_target.npy", - root=dataset_dir, - transform=transform_val, - target_type=torch.float32 if "regression" in config.bias else torch.long, - ) - test_dataset = NpyDataset( - f"{config.bias}_test_source.npy", - f"{config.bias}_test_target.npy", - root=dataset_dir, - transform=transform_test, - target_type=torch.float32 if "regression" in config.bias else torch.long, - ) - else: - dataset_dir = get_dataset( - DATASET_URLS[config.dataset_cls], - config.data_dir, - dataset_cls=config.dataset_cls, - ) - - train_dir = os.path.join(dataset_dir, "train") - if config.dataset_cls == "CIFAR10-Semisupervised": - train_dataset = PklDataset( - train_dir, transform=transform_train, root=config.data_dir - ) - valid_dataset = PklDataset( - train_dir, transform=transform_val, root=config.data_dir - ) - dataset_cls = torchvision.datasets.CIFAR10 - test_dataset = dataset_cls( - root=config.data_dir, - train=False, - transform=transform_test, - ) - else: - if config.dataset_cls != "ImageNet": - create_ImageFolder_format(dataset_dir) - val_dir = os.path.join(dataset_dir, "val", "images") - train_dataset = datasets.ImageFolder(train_dir, transform=transform_train) - valid_dataset = datasets.ImageFolder(train_dir, transform=transform_val) - test_dataset = datasets.ImageFolder(val_dir, transform=transform_test) - - st_test_dataset = None - if config.add_stylized_test: - st_dataset_dir = get_dataset( - DATASET_URLS[config.dataset_cls + "-ST"], - config.data_dir, - dataset_cls=config.dataset_cls + "-ST", - ) - st_test_dataset = datasets.ImageFolder(st_dataset_dir, transform=transform_test) - - c_test_datasets = None - if config.add_corrupted_test: - urls = DATASET_URLS[config.dataset_cls + "-C"] - if not isinstance(urls, dict): - urls = {"default": urls} - for key, url in urls.items(): - dataset_dir = get_dataset( - url, - config.data_dir, - dataset_cls=config.dataset_cls + "-C", - ) - - c_test_datasets = {} - for c_category in os.listdir(dataset_dir): - if config.dataset_cls in ("CIFAR10", "CIFAR100"): - if c_category == "labels.npy" or not c_category.endswith(".npy"): - continue - c_test_datasets[c_category[:-4]] = {} - for c_level in range(1, 6): - start = (c_level - 1) * 10000 - end = c_level * 10000 - c_test_datasets[c_category[:-4]][c_level] = NpyDataset( - samples=c_category, - targets="labels.npy", - root=dataset_dir, - start=start, - end=end, - transform=transform_test, - ) - else: - if not os.path.isdir(os.path.join(dataset_dir, c_category)): - continue - c_test_datasets[c_category] = {} - for c_level in os.listdir(os.path.join(dataset_dir, c_category)): - c_test_datasets[c_category][ - int(c_level) - ] = datasets.ImageFolder( - os.path.join(dataset_dir, c_category, c_level), - transform=transform_test, - ) - return train_dataset, valid_dataset, test_dataset, c_test_datasets, st_test_dataset - - -def get_data_loaders( - st_test_dataset, - c_test_datasets, - config, - seed, - test_dataset, - train_dataset, - valid_dataset, -): - num_train = len(train_dataset) - indices = list(range(num_train)) - if config.use_c_test_as_val: # Use valid_size of the c_test set for validation - train_sampler = SubsetRandomSampler(indices) - datasets = [] - val_indices = [] - start_idx = 0 - for c_category in c_test_datasets.keys(): - if c_category not in ( - "speckle_noise", - "gaussian_blur", - "spatter", - "saturate", - ): - continue - for dataset in c_test_datasets[c_category].values(): - num_val = len(dataset) - indices = list(range(start_idx, start_idx + num_val)) - split = int(np.floor(config.valid_size * num_val)) - if config.shuffle: - np.random.shuffle(indices) - val_indices += indices[:split] - datasets.append(dataset) - start_idx += num_val - valid_dataset = ConcatDataset(datasets) - valid_sampler = SubsetRandomSampler(val_indices) - else: # Use valid_size of the train set for validation - split = int(np.floor(config.valid_size * num_train)) - if config.shuffle: - np.random.seed(seed) - np.random.shuffle(indices) - train_idx, valid_idx = indices[split:], indices[:split] - if config.train_subset: - subset_split = int(np.floor(config.train_subset * len(train_idx))) - train_idx = train_idx[:subset_split] - if config.shuffle: - train_sampler = SubsetRandomSampler(train_idx) - valid_sampler = SubsetRandomSampler(valid_idx) - else: - train_dataset = Subset(train_dataset, train_idx) - valid_dataset = Subset(train_dataset, valid_idx) - train_sampler = None - valid_sampler = None - train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size=config.batch_size, - sampler=train_sampler, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=False, - ) - valid_loader = torch.utils.data.DataLoader( - valid_dataset, - batch_size=config.batch_size, - sampler=valid_sampler, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=False, - ) - test_loader = torch.utils.data.DataLoader( - test_dataset, - batch_size=config.batch_size, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=True, - ) - task_key = ( - "regression" - if config.bias is not None and "regression" in config.bias - else "img_classification" - ) - data_loaders = { - "train": {task_key: train_loader}, - "validation": {task_key: valid_loader}, - "test": {task_key: test_loader}, - } - - if config.add_stylized_test: - st_test_loader = torch.utils.data.DataLoader( - st_test_dataset, - batch_size=config.batch_size, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=False, - ) - data_loaders["st_test"] = st_test_loader - - if config.add_corrupted_test: - c_test_loaders = {} - for c_category in c_test_datasets.keys(): - c_test_loaders[c_category] = {} - for c_level, dataset in c_test_datasets[c_category].items(): - c_test_loaders[c_category][c_level] = torch.utils.data.DataLoader( - dataset, - batch_size=config.batch_size, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=True, - ) - data_loaders["c_test"] = {"img_classification": c_test_loaders} - return data_loaders diff --git a/bias_transfer/dataset/mtl_datasets_loader.py b/bias_transfer/dataset/mtl_datasets_loader.py deleted file mode 100644 index 10c34be..0000000 --- a/bias_transfer/dataset/mtl_datasets_loader.py +++ /dev/null @@ -1,54 +0,0 @@ -from bias_transfer.configs.dataset import MTLDatasetsConfig -from nnfabrik.builder import resolve_data -from .img_dataset_loader import img_dataset_loader -from .neural_dataset_loader import neural_dataset_loader - -# -# def mtl_datasets_loader(seed, **config): -# neural_dataset_config = config.pop("neural_dataset_config") -# img_dataset_config = config.pop("img_dataset_config") -# -# neural_dataset_config.pop("seed") -# -# neural_dataset_loaders = neural_dataset_loader(seed, **neural_dataset_config) -# img_dataset_loaders = img_dataset_loader(seed, **img_dataset_config) -# -# data_loaders = neural_dataset_loaders -# data_loaders["train"]["img_classification"] = img_dataset_loaders["train"][ -# "img_classification" -# ] -# data_loaders["validation"]["img_classification"] = img_dataset_loaders[ -# "validation" -# ]["img_classification"] -# data_loaders["test"]["img_classification"] = img_dataset_loaders["test"][ -# "img_classification" -# ] -# if "c_test" in img_dataset_loaders: -# data_loaders["c_test"] = img_dataset_loaders["c_test"] -# return data_loaders - -def update(to_update, new_entries, prefix=""): - for k,v in new_entries.items(): - if prefix: - k = prefix + "_" + k - to_update[k] = v - -def mtl_datasets_loader(seed, **config): - mtl_config = MTLDatasetsConfig.from_dict(config) - mtl_data_loaders = {"train": {}, "validation": {}, "test": {}} - for prefix, dataset_config in mtl_config.items(): - dataset_config.seed = seed - dataset_fn = resolve_data(dataset_config.fn) - data_loaders = dataset_fn(**dataset_config.to_dict()) - update(mtl_data_loaders["train"], data_loaders["train"], prefix) - update(mtl_data_loaders["validation"], data_loaders["validation"], prefix) - update(mtl_data_loaders["test"], data_loaders["test"], prefix) - if "c_test" in data_loaders: - if "c_test" not in mtl_data_loaders: - mtl_data_loaders["c_test"] = {} - update(mtl_data_loaders["c_test"], data_loaders["c_test"], prefix) - if "st_test" in data_loaders: - if "st_test" not in mtl_data_loaders: - mtl_data_loaders["st_test"] = {} - update(mtl_data_loaders["st_test"], data_loaders["st_test"], prefix) - return mtl_data_loaders diff --git a/bias_transfer/dataset/neural_dataset_loader.py b/bias_transfer/dataset/neural_dataset_loader.py deleted file mode 100644 index f6b8b1c..0000000 --- a/bias_transfer/dataset/neural_dataset_loader.py +++ /dev/null @@ -1,29 +0,0 @@ -import numpy as np -import torch - -from nnfabrik import builder -import os -from os import listdir -from os.path import isfile, join - - -def neural_dataset_loader(seed, **config): - config.pop("comment", None) - data_dir = config.pop("data_dir", None) - neuronal_data_path = os.path.join(data_dir, "neuronal_data/") - config["neuronal_data_files"] = [ - neuronal_data_path + f - for f in listdir(neuronal_data_path) - if isfile(join(neuronal_data_path, f)) - ] - config["image_cache_path"] = os.path.join(data_dir, "images/individual") - torch.manual_seed(seed) - np.random.seed(seed) - dataset_fn = "nnvision.datasets.monkey_static_loader" - data_loaders = builder.get_data(dataset_fn, config) - dataloaders = { - "train": data_loaders["train"], - "validation": {"neural": data_loaders["validation"]}, - "test": {"neural": data_loaders["test"]}, - } - return dataloaders diff --git a/bias_transfer/dataset/regression_dataset_loader.py b/bias_transfer/dataset/regression_dataset_loader.py deleted file mode 100644 index 5bdeb43..0000000 --- a/bias_transfer/dataset/regression_dataset_loader.py +++ /dev/null @@ -1,135 +0,0 @@ -import h5py -import numpy as np -import torch -import torch.utils.data as Data -from sklearn.datasets import fetch_openml - -from bias_transfer.configs.dataset import Regression - - -def load_mauna_loa_atmospheric_co2(): - ml_data = fetch_openml(data_id=41187) - months = [] - ppmv_sums = [] - counts = [] - - y = ml_data.data[:, 0] - m = ml_data.data[:, 1] - month_float = y + (m - 1) / 12 - ppmvs = ml_data.target - - for month, ppmv in zip(month_float, ppmvs): - if not months or month != months[-1]: - months.append(month) - ppmv_sums.append(ppmv) - counts.append(1) - else: - # aggregate monthly sum to produce average - ppmv_sums[-1] += ppmv - counts[-1] += 1 - - months = np.asarray(months).reshape(-1, 1) - avg_ppmvs = np.asarray(ppmv_sums) / counts - # normalize: - avg_ppmvs -= np.mean(avg_ppmvs) - avg_ppmvs /= np.std(avg_ppmvs) - X_plot = months - Y_plot = avg_ppmvs - X_train = np.concatenate((X_plot[:120], X_plot[150:300], X_plot[380:450])) - Y_train = np.concatenate((Y_plot[:120], Y_plot[150:300], Y_plot[380:450])) - - return X_plot, Y_plot, X_train, Y_train - - -def load_co2(): - f = h5py.File("co2_data.h5", "r") - data_train = np.concatenate((f["data"].value, f["label"].value), axis=1) - f.close() - X_train = data_train[:, 0].reshape(-1, 1) - Y_train = data_train[:, 1].reshape(-1) - - X_plot = np.concatenate((X_train, np.arange(1.73, 3.51, 0.01).reshape(-1, 1))) - Y_plot = np.concatenate((Y_train, np.zeros((int((3.51 - 1.73) // 0.01 + 1),)))) - X_train = np.concatenate((X_train[:120], X_train[150:])) - Y_train = np.concatenate((Y_train[:120], Y_train[150:])) - - return X_plot, Y_plot, X_train, Y_train - - -def load_sinusoid_data(noisy=False, train_range=10): - def f(x): - return (np.sin(x)).ravel() - - rng = np.random.RandomState(0) - X_plot = np.linspace(-10, 40, 1000).reshape(-1, 1) - X_train = np.sort(train_range * rng.rand(10 * train_range, 1), axis=0) - # X_train = np.concatenate((X_train, (np.sort(10 * rng.rand(100, 1) + 20, axis=0)))) - Y_train = f(X_train) - Y_plot = f(X_plot) - if noisy: - Y_train = Y_train + 1 * (0.5 - rng.rand(X_train.shape[0])) - return X_plot, Y_plot, X_train, Y_train - - -def regression_dataset_loader(seed, **config): - config = Regression.from_dict(config) - print("Loading dataset: {}".format(config.dataset_cls)) - torch.manual_seed(seed) - np.random.seed(seed) - - error_msg = "[!] valid_size should be in the range [0, 1]." - assert (config.valid_size >= 0) and (config.valid_size <= 1), error_msg - - if config.dataset_cls == "co2": - X_plot, Y_plot, X_train, Y_train = load_co2() - elif config.dataset_cls == "co2_original": - X_plot, Y_plot, X_train, Y_train = load_mauna_loa_atmospheric_co2() - else: - X_plot, Y_plot, X_train, Y_train = load_sinusoid_data( - noisy=config.noisy, train_range=config.train_range - ) - - train_len = X_train.shape[0] - valid_start = int(train_len * (1.0 - config.valid_size)) - train_dataset = Data.TensorDataset( - torch.tensor(X_train[:valid_start], dtype=torch.float), - torch.tensor(Y_train[:valid_start], dtype=torch.float), - ) - valid_dataset = Data.TensorDataset( - torch.tensor(X_train[valid_start:], dtype=torch.float), - torch.tensor(Y_train[valid_start:], dtype=torch.float), - ) - test_dataset = Data.TensorDataset( - torch.tensor(X_plot, dtype=torch.float), - torch.tensor(Y_plot, dtype=torch.float), - ) - - train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size=config.batch_size, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=True, - ) - valid_loader = torch.utils.data.DataLoader( - valid_dataset, - batch_size=config.batch_size, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=False, - ) - test_loader = torch.utils.data.DataLoader( - test_dataset, - batch_size=config.batch_size, - num_workers=config.num_workers, - pin_memory=config.pin_memory, - shuffle=False, - ) - - data_loaders = { - "train": {"regression": train_loader}, - "validation": {"regression": valid_loader}, - "test": {"regression": test_loader}, - } - - return data_loaders diff --git a/bias_transfer/dataset/transferred_dataset_loader.py b/bias_transfer/dataset/transferred_dataset_loader.py deleted file mode 100644 index 5eed0ab..0000000 --- a/bias_transfer/dataset/transferred_dataset_loader.py +++ /dev/null @@ -1,73 +0,0 @@ -import torch -from torch.utils.data import TensorDataset - -from bias_transfer.dataset import img_dataset_loader -from bias_transfer.dataset.dataset_classes.combined_dataset import ParallelDataset -from bias_transfer.dataset.dataset_classes.npy_dataset import NpyDataset - -def load_npy(postfix, data_key, transfer_data, data_loaders, main_data_loader): - transferred_dataset = NpyDataset( - samples=transfer_data["source" + postfix], - targets=transfer_data["target" + postfix], - ) - data_loaders["train"][data_key] = torch.utils.data.DataLoader( - dataset=transferred_dataset, - batch_size=main_data_loader.batch_size, - num_workers=main_data_loader.num_workers, - pin_memory=main_data_loader.pin_memory, - shuffle=True, - ) - - -def transferred_dataset_loader(seed, primary_dataset_fn=img_dataset_loader, **config): - transfer_data_file = config.pop("transfer_data") - transfer_data = {k: transfer_data_file[k] for k in transfer_data_file.files} - - data_loaders = primary_dataset_fn(seed, **config) - main_task = next(iter(data_loaders["train"].keys())) - main_data_loader = data_loaders["train"][main_task] - main_dataset = main_data_loader.dataset - if "covariance" in transfer_data: - data_loaders["covariance"] = transfer_data.pop("covariance") - - if "source_cs" in transfer_data: # we have a coreset - if config.get("train_on_coreset"): - load_npy("_cs", main_task, transfer_data, data_loaders, main_data_loader) - else: - if config.get("train_on_reduced_data"): - load_npy("", main_task, transfer_data, data_loaders, main_data_loader) - if config.get("load_coreset"): - load_npy("_cs", f"{main_task}_cs", transfer_data, data_loaders, main_data_loader) - else: - datasets = {} - for rep_name, rep_data in transfer_data.items(): - datasets[rep_name] = TensorDataset(torch.from_numpy(rep_data)) - if "source" in transfer_data: # we have input data - source_ds = datasets.pop("source") - transfer_dataset = ParallelDataset( - source_datasets={"img": source_ds}, target_datasets=datasets - ) - transfer_data_loader = torch.utils.data.DataLoader( - dataset=transfer_dataset, - batch_size=main_data_loader.batch_size, - sampler=main_data_loader.sampler, - num_workers=main_data_loader.num_workers, - pin_memory=main_data_loader.pin_memory, - shuffle=False, - ) - data_loaders["train"]["transfer"] = transfer_data_loader - else: # we don't have input data -> only targets that are presented in parallel to class-labels - datasets["class"] = main_dataset - combined_dataset = ParallelDataset( - source_datasets={"img": main_dataset}, target_datasets=datasets - ) - combined_data_loader = torch.utils.data.DataLoader( - dataset=combined_dataset, - batch_size=main_data_loader.batch_size, - sampler=main_data_loader.sampler, - num_workers=main_data_loader.num_workers, - pin_memory=main_data_loader.pin_memory, - shuffle=False, - ) - data_loaders["train"][main_task] = combined_data_loader - return data_loaders diff --git a/bias_transfer/gp/gp_regression.py b/bias_transfer/gp/gp_regression.py deleted file mode 100644 index 7ce3544..0000000 --- a/bias_transfer/gp/gp_regression.py +++ /dev/null @@ -1,39 +0,0 @@ -import numpy as np -from scipy.optimize import minimize -from functools import partial - - -def posterior_predictive(X, X_train, Y_train, kernel, **opts): - K = kernel(X_train, X_train, **opts) - K_s = kernel(X_train, X, **opts) - K_ss = kernel(X, X, **opts) - - K_inv = np.linalg.inv(K) - - mu_s = K_s.T @ K_inv @ Y_train - cov_s = K_ss - K_s.T @ K_inv @ K_s - - return mu_s, cov_s - - -def optimize_hyper_params(kernel,X_train, Y_train): - # -log liklihood - def nll_fn(x, y): - def step(theta): - K = kernel(x, x, sigma=theta[0], l=theta[1], sigma_f=theta[2], p=theta[3]) - return np.sum(np.log(np.diagonal(np.linalg.cholesky(K)))) + \ - 0.5 * y.T @ np.linalg.inv(K) @ y + \ - 0.5 * len(x) * np.log(2 * np.pi) - - return step - - # minimize -log liklihood - res = minimize(nll_fn(X_train, Y_train), [0.01, 1, 1, 5.0], - bounds=((1e-5, 1e1), (1e-5, None), (1e-5, None), (1e-2, 1e1)), - method='L-BFGS-B') - - sigma_opt, l_opt, sigma_f_opt, p_opt = res.x - fitted_kernel = partial(kernel, sigma_f=sigma_f_opt, l=l_opt, sigma=sigma_opt, p=p_opt) - return fitted_kernel - # mu_s, cov_s = posterior_predictive(X_plot, X_train, Y_train_noisy, l=l_opt, sigma_f=sigma_f_opt, p=p_opt, - # sigma=sigma_opt) \ No newline at end of file diff --git a/bias_transfer/gp/kernels.py b/bias_transfer/gp/kernels.py deleted file mode 100644 index b3d206d..0000000 --- a/bias_transfer/gp/kernels.py +++ /dev/null @@ -1,50 +0,0 @@ -from scipy.spatial.distance import cdist -import numpy as np -import torch -from .nn_kernel import compute_cov_matrix - - -def linear(x1, x2, sigma_b=1, sigma_v=1, c=0, **kwargs): - """ - Linear Kernel: $k(x_1,x_2) =\sigma_b^2 + \sigma_v^2 (x_1 - c)(x_2-c)$ - """ - return sigma_b ** 2 + sigma_v ** 2 * np.inner(x1 - c, x2 - c) - - -def rbf(x1, x2, l=1, sigma_f=1, **kwargs): - """ - RBF Kernel: $k(x_1,x_2) =\sigma^2 \exp\left( - \frac{||x_1-x_2||^2}{2l^2} \right)$ - """ - dists = cdist(x1 / l, x2 / l, metric="sqeuclidean") - return sigma_f ** 2 * np.exp(-0.5 * dists) - - -def periodic(x1, x2, l=1.0, sigma_f=1.0, p=5.0, **kwargs): - """ - Periodic Kernel: $k(x_1,x_2) =\sigma^2 \exp\left( - \frac{2\sin^2(\pi|x_1-x_2|/p)}{l^2} \right)$ - """ - dists = cdist(x1, x2, metric="euclidean") - return sigma_f ** 2 * np.exp(-2 * (np.sin(np.pi / p * dists) / l) ** 2) - - -def locally_periodic(x1, x2, l=1, sigma_f=0.5, p=2.0): - """ - Locally Periodic Kernel: $k(x_1,x_2) =\sigma^2 \exp\left( - \frac{2\sin^2(\pi|x_1-x_2|/p)}{l^2} \right) \exp\left(-\frac{||x_1-x_2||^2}{2l^2}\right)$ - """ - return periodic(x1, x2, l, sigma_f, p) * rbf(x1, x2, l, sigma_f) / sigma_f ** 2 - - -def white_noise(x1, x2, sigma=0.1, **kwargs): - """ - White Noise Kernel: $k(x_1,x_2) = \sigma^2 \cdot I_n$ - """ - if x1 is x2: - return sigma ** 2 * np.eye(len(x1)) - else: - return np.zeros((len(x1), len(x2))) - - -def add_white_noise(kernel): - return lambda x1, x2, sigma_noise=0.1, **opts: white_noise(x1, x2, sigma_noise) + kernel( - x1, x2, **opts - ) diff --git a/bias_transfer/gp/nn_kernel.py b/bias_transfer/gp/nn_kernel.py deleted file mode 100644 index 41d4eb4..0000000 --- a/bias_transfer/gp/nn_kernel.py +++ /dev/null @@ -1,171 +0,0 @@ -from functools import partial - -import numpy as np -import torch -from scipy.optimize import minimize -from torch.autograd import Variable -from tqdm import tqdm - - -def compute_cov_matrix(x1, x2, sigma=None): - x1_flat = x1.reshape((x1.shape[0], -1)) - centered1 = x1_flat # - x1_flat.mean(axis=1).reshape((-1, 1)) - x2_flat = x2.reshape((x2.shape[0], -1)) - centered2 = x2_flat # - x2_flat.mean(axis=1).reshape((-1, 1)) - if sigma is not None: - result = ( - centered1 - @ sigma - @ centered2.T - # / np.outer(np.linalg.norm(centered1, 2, axis=1), np.linalg.norm(centered2, 2, axis=1)) - ) # see https://de.mathworks.com/help/images/ref/corr2.html - else: - result = ( - centered1 - @ centered2.T - # / np.outer(np.linalg.norm(centered1, 2, axis=1), np.linalg.norm(centered2, 2, axis=1)) - ) # see https://de.mathworks.com/help/images/ref/corr2.html - return result - - -def nn_kernel(x1, x2, net, train_reps=None, weights=None, device="cpu", sigma=None): - def get_reps(x): - if np.count_nonzero(x) == 0: - phi = train_reps - else: - x = torch.tensor(x, dtype=torch.float).to(device) - phi = net[:-1](x).detach().cpu().numpy() - return phi - - phi1 = get_reps(x1) - phi2 = get_reps(x2) - RSM = compute_cov_matrix( - phi1, - phi2, - sigma=sigma - if np.count_nonzero(x1) != 0 and np.count_nonzero(x2) != 0 - else None, - ) # .cpu().numpy() - if np.count_nonzero(x2) == 0 and weights is not None: - RSM = RSM @ weights - elif np.count_nonzero(x1) == 0 and weights is not None: - RSM = weights @ RSM - return RSM - - -def optimize_noise(kernel, X_train, Y_train): - # -log liklihood - def nll_fn(x, y): - def step(theta): - K = kernel(x, x, sigma_noise=theta[0]) - return ( - np.sum(np.log(np.diagonal(np.linalg.cholesky(K)))) - + 0.5 * y.T @ np.linalg.inv(K) @ y - + 0.5 * len(x) * np.log(2 * np.pi) - ) - - return step - - # minimize -log liklihood - res = minimize( - nll_fn(X_train, Y_train), [0.01], bounds=((1e-5, 1e1),), method="L-BFGS-B" - ) - - sigma_noise_opt = res.x - fitted_kernel = partial(kernel, sigma_noise=sigma_noise_opt) - return fitted_kernel - - -def get_nn_eigen_kernel(net, device): - v = net[-1].weight.detach().cpu().numpy().T - # v -= np.mean(v) - sigma = v @ v.T - eig_vals, eig_vecs = np.linalg.eigh(sigma) - # eig_vals (n) with possibly complex entries - # eig_vecs (n x n) where [:,j] corresponds to eig_vals[j] - # sort: - eig_sorting = np.argsort(-eig_vals) - eig_vals = eig_vals[eig_sorting[:1]] - eig_vecs = eig_vecs[:, eig_sorting[:1]] - weights = np.diag(eig_vals) - kernel = partial( - nn_kernel, - net=net, - device=device, - train_reps=eig_vecs.T, - weights=weights, - sigma=sigma, - ) - base_point_preds = eig_vecs.T @ v - # base_points = inverse_computation(net, torch.tensor(eig_vecs.T, device=device)) - return kernel, base_point_preds, None - - -def inverse_computation(net, out_vecs): - print(net) - print(net[1:-1]) - print(net[:1]) - first_layer_out = net[1:-1](out_vecs, inverse=True).detach() - print("first_layer", first_layer_out) - x = Variable( - 100 * torch.randn(first_layer_out.shape[0], 1).cuda(), requires_grad=True - ) - params = net.parameters() - optim = torch.optim.Adam([x], 0.001) - for param in params: - param.requires_grad = False - if hasattr( - tqdm, "_instances" - ): # To have tqdm output without line-breaks between steps - tqdm._instances.clear() - net.train() - t = tqdm(range(100)) - for batch in t: - y = net[:1](x) - loss = torch.mean((first_layer_out - y) ** 2) - optim.zero_grad() - loss.backward() - optim.step() - t.set_postfix( - loss=loss.item(), - eig_vec_0=first_layer_out[0][:4].cpu().numpy(), - phi_0=y[0][:4].detach().cpu().numpy(), - ) - net.eval() - return x.detach().cpu().numpy() - - -def optimize_base_points(net): - v = net[-1].weight.detach().T - sigma = v @ v.T - eig_vals, eig_vecs = torch.eig(sigma, eigenvectors=True) - # eig_vals (n x 2) with entries (real,imaginary) - # eig_vecs (n x n) where [:,j] corresponds to eig_vals[j] - eig_vecs = eig_vecs.T - x = Variable(15 * torch.randn(eig_vecs.shape[0], 1).cuda(), requires_grad=True) - params = net.parameters() - optim = torch.optim.Adam([x], 0.001) - for param in params: - param.requires_grad = False - if hasattr( - tqdm, "_instances" - ): # To have tqdm output without line-breaks between steps - tqdm._instances.clear() - t = tqdm(range(100)) - net.train() - for batch in t: - y = net[:-1](x) - loss = torch.mean((eig_vecs - y) ** 2) - optim.zero_grad() - loss.backward() - optim.step() - net.eval() - phi = net[:-1](x) - t.set_postfix( - loss=loss.item(), - eig_vec_0=eig_vecs[0][:4].cpu().numpy(), - phi_0=phi[0][:4].detach().cpu().numpy(), - ) - net.train() - net.eval() - return x.detach().cpu().numpy(), eig_vals.cpu().numpy() diff --git a/bias_transfer/gp/utils.py b/bias_transfer/gp/utils.py deleted file mode 100644 index 6a25e88..0000000 --- a/bias_transfer/gp/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt - -def plot_gp(mu, cov, X, samples=[], Y=None, X_train=None, Y_train=None, save=""): - if Y is not None: - plt.plot(X, Y, color='orange', lw=2, label='True') - if X_train is not None and Y_train is not None: - plt.plot(X_train, Y_train, color='red', label="Traning data") - X = X.reshape(-1) - mu = mu.reshape(-1) - - # cov *= 100000 - # gp_samples = np.random.multivariate_normal(mu, cov, size=1000) - # uncertainty = 2 * np.std(gp_samples, axis=0) - # 95% confidence interval - uncertainty = 1.96 * np.sqrt(np.abs(np.diag(cov))) - - plt.fill_between(X, mu + uncertainty, mu - uncertainty, alpha=0.4) - plt.plot(X, mu, label='Mean') - - for i, sample in enumerate(samples): - plt.plot(X, sample, lw=1, ls='--', label='sample_{}'.format(i)) - - plt.legend() - if save: - fig = plt.gcf() - fig.savefig(save, dpi=200) - -def plot_kernel(kernel, x): - K_plot = kernel(x,x) - plt.imshow(K_plot) - # if np.count_nonzero(x) > 0: - # _ = plt.xticks(np.arange(0,x.shape[0], 15),x[::15,0].astype(np.int)) - # _ = plt.yticks(np.arange(0,x.shape[0], 15),x[::15,0].astype(np.int)) - plt.colorbar() diff --git a/bias_transfer/models/__init__.py b/bias_transfer/models/__init__.py deleted file mode 100644 index afe8b24..0000000 --- a/bias_transfer/models/__init__.py +++ /dev/null @@ -1,150 +0,0 @@ -import torch -import numpy as np - -from bias_transfer.configs.model import ( - Classification, - MTL, - Regression, -) -from bias_transfer.models.resnet import resnet_builder -from bias_transfer.models.wrappers.noise_adv import NoiseAdvWrapper -from bias_transfer.models.utils import get_model_parameters -from bias_transfer.models.vgg import vgg_builder -from torch.hub import load_state_dict_from_url - -from nnfabrik.utility.nn_helpers import load_state_dict -from nnvision.models.models import se_core_gauss_readout, se_core_point_readout -from .lenet import lenet_builder -from .lenet_bayesian import lenet_builder as bayes_builder -from .lenet_frcl import lenet_builder as frcl_builder -from .mlp import MLP -from .wrappers import * - - -def neural_cnn_builder(data_loaders, seed: int = 1000, **config): - config.pop("comment", None) - readout_type = config.pop("readout_type", None) - if readout_type == "point": - model = se_core_point_readout(dataloaders=data_loaders, seed=seed, **config) - elif readout_type == "gauss": - model = se_core_gauss_readout(dataloaders=data_loaders, seed=seed, **config) - print("Model with {} parameters.".format(get_model_parameters(model))) - return model - - -def mtl_builder(data_loaders, seed: int = 1000, **config): - config = MTL.from_dict(config) - torch.manual_seed(seed) - np.random.seed(seed) - - from .mtl_vgg import MTL_VGG - - model = MTL_VGG( - data_loaders, - vgg_type=config.vgg_type, - classification=config.classification, - classification_readout_type=config.classification_readout_type, - input_size=config.input_size, - num_classes=config.num_classes, - pretrained=config.pretrained, - v1_model_layer=config.v1_model_layer, - neural_input_channels=config.neural_input_channels, - classification_input_channels=config.classification_input_channels, - v1_fine_tune=config.v1_fine_tune, - v1_init_mu_range=config.v1_init_mu_range, - v1_init_sigma_range=config.v1_init_sigma_range, - v1_readout_bias=config.v1_readout_bias, - v1_bias=config.v1_bias, - v1_gamma_readout=config.v1_gamma_readout, - v1_elu_offset=config.v1_elu_offset, - v1_final_batchnorm=config.v1_final_batchnorm, - ) - - print("Model with {} parameters.".format(get_model_parameters(model))) - return model - - -def classification_model_builder(data_loader, seed: int, **config): - config = Classification.from_dict(config) - torch.manual_seed(seed) - np.random.seed(seed) - if "vgg" in config.type: - model = vgg_builder(seed, config) - from torchvision.models.vgg import model_urls - elif "resnet" in config.type: - model = resnet_builder(seed, config) - from torchvision.models.resnet import model_urls - elif "lenet" in config.type: - if "bayes" in config.type: - model = bayes_builder(seed, config) - elif "frcl" in config.type: - model = frcl_builder(seed, config) - else: - model = lenet_builder(seed, config) - else: - raise Exception("Unknown type {}".format(config.type)) - - if config.pretrained: - print("Downloading pretrained model:", flush=True) - url = ( - model_urls[config.type] - if not config.pretrained_url - else config.pretrained_url - ) - state_dict = load_state_dict_from_url(url, progress=True) - try: - load_state_dict(model, state_dict) - except: - load_state_dict(model, state_dict["model_state_dict"]) - - # Add wrappers - if config.get_intermediate_rep: - model = IntermediateLayerGetter( - model, return_layers=config.get_intermediate_rep, keep_output=True - ) - if config.noise_adv_regression or config.noise_adv_classification: - assert not config.self_attention - model = NoiseAdvWrapper( - model, - input_size=model.fc.in_features - if "resnet" in config.type - else model.n_features, - hidden_size=model.fc.in_features if "resnet" in config.type else 4096, - classification=config.noise_adv_classification, - num_noise_readout_layers=config.num_noise_readout_layers, - sigmoid_output=config.noise_sigmoid_output, - ) - print("Model with {} parameters.".format(get_model_parameters(model))) - if config.add_buffer: - for n, p in model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - for b in config.add_buffer: - model.register_buffer( - f"{n}_{b}", p.detach().clone().zero_(), - ) - return model - - -def regression_model_builder(data_loader, seed: int, **config): - config = Regression.from_dict(config) - torch.manual_seed(seed) - np.random.seed(seed) - - model = MLP( - input_size=config.input_size, - num_layers=config.num_layers, - layer_size=config.layer_size, - output_size=config.output_size, - activation=config.activation, - dropout=config.dropout, - ) - - # Add wrappers - if config.get_intermediate_rep: - model = IntermediateLayerGetter( - model, return_layers=config.get_intermediate_rep, keep_output=True - ) - - print("Model with {} parameters.".format(get_model_parameters(model))) - return model diff --git a/bias_transfer/models/attention.py b/bias_transfer/models/attention.py deleted file mode 100644 index 1b6ffb7..0000000 --- a/bias_transfer/models/attention.py +++ /dev/null @@ -1,216 +0,0 @@ -""" -Implementation copied from https://github.com/leaderj1001/Stand-Alone-Self-Attention -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.nn.init as init - -import math - - -class AttentionConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - groups=1, - bias=False, - ): - super(AttentionConv, self).__init__() - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - self.groups = groups - - assert ( - self.out_channels % self.groups == 0 - ), "out_channels should be divided by groups. (example: out_channels: 40, groups: 4)" - - self.rel_h = nn.Parameter( - torch.randn(out_channels // 2, 1, 1, kernel_size, 1), requires_grad=True - ) - self.rel_w = nn.Parameter( - torch.randn(out_channels // 2, 1, 1, 1, kernel_size), requires_grad=True - ) - - self.key_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) - self.query_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) - self.value_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) - - self.reset_parameters() - - def forward(self, x): - batch, channels, height, width = x.size() - - padded_x = F.pad(x, [self.padding, self.padding, self.padding, self.padding]) - q_out = self.query_conv(x) - k_out = self.key_conv(padded_x) - v_out = self.value_conv(padded_x) - - k_out = k_out.unfold(2, self.kernel_size, self.stride).unfold( - 3, self.kernel_size, self.stride - ) - v_out = v_out.unfold(2, self.kernel_size, self.stride).unfold( - 3, self.kernel_size, self.stride - ) - - k_out_h, k_out_w = k_out.split(self.out_channels // 2, dim=1) - k_out = torch.cat((k_out_h + self.rel_h, k_out_w + self.rel_w), dim=1) - - k_out = k_out.contiguous().view( - batch, self.groups, self.out_channels // self.groups, height, width, -1 - ) - v_out = v_out.contiguous().view( - batch, self.groups, self.out_channels // self.groups, height, width, -1 - ) - - q_out = q_out.view( - batch, self.groups, self.out_channels // self.groups, height, width, 1 - ) - - out = q_out * k_out - out = F.softmax(out, dim=-1) - out = torch.einsum("bnchwk,bnchwk -> bnchw", out, v_out).view( - batch, -1, height, width - ) - - return out - - def reset_parameters(self): - init.kaiming_normal_(self.key_conv.weight, mode="fan_out", nonlinearity="relu") - init.kaiming_normal_( - self.value_conv.weight, mode="fan_out", nonlinearity="relu" - ) - init.kaiming_normal_( - self.query_conv.weight, mode="fan_out", nonlinearity="relu" - ) - - init.normal_(self.rel_h, 0, 1) - init.normal_(self.rel_w, 0, 1) - - -class AttentionStem(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - groups=1, - m=4, - bias=False, - ): - super(AttentionStem, self).__init__() - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - self.groups = groups - self.m = m - - assert ( - self.out_channels % self.groups == 0 - ), "out_channels should be divided by groups. (example: out_channels: 40, groups: 4)" - - self.emb_a = nn.Parameter( - torch.randn(out_channels // groups, kernel_size), requires_grad=True - ) - self.emb_b = nn.Parameter( - torch.randn(out_channels // groups, kernel_size), requires_grad=True - ) - self.emb_mix = nn.Parameter( - torch.randn(m, out_channels // groups), requires_grad=True - ) - - self.key_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) - self.query_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) - self.value_conv = nn.ModuleList( - [ - nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) - for _ in range(m) - ] - ) - - self.reset_parameters() - - def forward(self, x): - batch, channels, height, width = x.size() - - padded_x = F.pad(x, [self.padding, self.padding, self.padding, self.padding]) - - q_out = self.query_conv(x) - k_out = self.key_conv(padded_x) - v_out = torch.stack( - [self.value_conv[_](padded_x) for _ in range(self.m)], dim=0 - ) - - k_out = k_out.unfold(2, self.kernel_size, self.stride).unfold( - 3, self.kernel_size, self.stride - ) - v_out = v_out.unfold(3, self.kernel_size, self.stride).unfold( - 4, self.kernel_size, self.stride - ) - - k_out = k_out[:, :, :height, :width, :, :] - v_out = v_out[:, :, :, :height, :width, :, :] - - emb_logit_a = torch.einsum("mc,ca->ma", self.emb_mix, self.emb_a) - emb_logit_b = torch.einsum("mc,cb->mb", self.emb_mix, self.emb_b) - emb = emb_logit_a.unsqueeze(2) + emb_logit_b.unsqueeze(1) - emb = F.softmax(emb.view(self.m, -1), dim=0).view( - self.m, 1, 1, 1, 1, self.kernel_size, self.kernel_size - ) - - v_out = emb * v_out - - k_out = k_out.contiguous().view( - batch, self.groups, self.out_channels // self.groups, height, width, -1 - ) - v_out = v_out.contiguous().view( - self.m, - batch, - self.groups, - self.out_channels // self.groups, - height, - width, - -1, - ) - v_out = torch.sum(v_out, dim=0).view( - batch, self.groups, self.out_channels // self.groups, height, width, -1 - ) - - q_out = q_out.view( - batch, self.groups, self.out_channels // self.groups, height, width, 1 - ) - - out = q_out * k_out - out = F.softmax(out, dim=-1) - out = torch.einsum("bnchwk,bnchwk->bnchw", out, v_out).view( - batch, -1, height, width - ) - - return out - - def reset_parameters(self): - init.kaiming_normal_(self.key_conv.weight, mode="fan_out", nonlinearity="relu") - init.kaiming_normal_( - self.query_conv.weight, mode="fan_out", nonlinearity="relu" - ) - for _ in self.value_conv: - init.kaiming_normal_(_.weight, mode="fan_out", nonlinearity="relu") - - init.normal_(self.emb_a, 0, 1) - init.normal_(self.emb_b, 0, 1) - init.normal_(self.emb_mix, 0, 1) - - -# temp = torch.randn((2, 3, 32, 32)) -# conv = AttentionConv(3, 16, kernel_size=3, padding=1) -# print(conv(temp).size()) diff --git a/bias_transfer/models/lenet_bayesian.py b/bias_transfer/models/lenet_bayesian.py deleted file mode 100644 index a40c729..0000000 --- a/bias_transfer/models/lenet_bayesian.py +++ /dev/null @@ -1,193 +0,0 @@ -import math -from typing import OrderedDict, Union, Dict - -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np - -from bias_transfer.models.utils import concatenate_flattened - - -class BayesLinear(nn.Module): - def __init__( - self, - in_features: int, - out_features: int, - initial_posterior_var: float = 1e-3, - bias: bool = True, - ): - super(BayesLinear, self).__init__() - self.in_features = in_features - self.out_features = out_features - self.initial_posterior_var = initial_posterior_var - ( - self.w_prior_mean, - self.w_prior_log_var, - self.w_posterior_mean, - self.w_posterior_log_var, - ) = self.create_parameter("weight", (out_features, in_features)) - if bias: - ( - self.b_prior_mean, - self.b_prior_log_var, - self.b_posterior_mean, - self.b_posterior_log_var, - ) = self.create_parameter("bias", (out_features,)) - else: - self.register_parameter("b_posterior_mean", None) - self.register_parameter("b_posterior_log_var", None) - self.reset_parameters() - - def create_parameter(self, name, dims): - prior_mean = torch.zeros(*dims) - prior_log_var = torch.zeros(*dims) - posterior_mean = nn.Parameter(torch.Tensor(*dims), requires_grad=True) - posterior_log_var = nn.Parameter(torch.Tensor(*dims), requires_grad=True) - # Finally, we register the prior and the posterior with the nn.Module. - # The prior values are registered as buffers, which indicates to PyTorch - # that they represent persistent state which should not be updated by - # the optimizer. The posteriors are registered as parameters, which on - # the other hand are to be modified by the optimizer. - self.register_buffer(f"{name}", prior_mean) # to load with the right name - self.register_buffer(f"prior_{name}_log_var", prior_log_var) - - return prior_mean, prior_log_var, posterior_mean, posterior_log_var - - def reset_for_new_task(self): - """ - Called after completion of a task, to reset state for the next task - """ - # Set the value of the prior to be the current value of the posterior - self.w_prior_mean.data.copy_(self.w_posterior_mean.data) - self.b_prior_mean.data.copy_(self.b_posterior_mean.data) - self.w_prior_log_var.data.copy_(self.w_posterior_log_var.data) - self.b_prior_log_var.data.copy_(self.b_posterior_log_var.data) - - def reset_parameters(self): - # Initialise the posterior means with a normal distribution. Note that - # prior to training we will run a procedure to optimise these values to - # point-estimates of the parameters for the first task. - torch.nn.init.normal_(self.w_posterior_mean, mean=0, std=0.1) - # Initialise the posterior variances with the given constant value. - torch.nn.init.constant_( - self.w_posterior_log_var, math.log(self.initial_posterior_var) - ) - if self.bias is not None: - torch.nn.init.normal_(self.b_posterior_mean, mean=0, std=0.1) - # Initialise the posterior variances with the given constant value. - torch.nn.init.constant_( - self.b_posterior_log_var, math.log(self.initial_posterior_var) - ) - - @staticmethod - def _sample_parameters(w_mean, b_mean, w_log_var, b_log_var): - # sample weights and biases from normal distributions - w_epsilon = torch.randn_like(w_mean) - b_epsilon = torch.randn_like(b_mean) - sampled_weight = w_mean + w_epsilon * torch.exp(0.5 * w_log_var) - sampled_bias = b_mean + b_epsilon * torch.exp(0.5 * b_log_var) - return sampled_weight, sampled_bias - - def forward(self, input): - sampled_weight, sampled_bias = self._sample_parameters( - self.w_posterior_mean, - self.b_posterior_mean, - self.w_posterior_log_var, - self.b_posterior_log_var, - ) - return F.linear(input, sampled_weight, sampled_bias) - - -class LeNet300100(nn.Module): - def __init__( - self, - num_classes: int = 10, - input_size: int = 28, - input_channels: int = 1, - dropout: float = 0.0, - ): - super(LeNet300100, self).__init__() - self.fc1 = BayesLinear(input_size * input_size * input_channels, 300) - self.fc2 = BayesLinear(300, 100) - self.fc3 = BayesLinear(100, num_classes) - self.dropout = nn.Dropout(p=dropout) if dropout else None - self.flat_input_size = input_size * input_size * input_channels - - def forward(self, x, num_samples=1): - x = x.view(x.size(0), self.flat_input_size) - y = [] - for s in range(num_samples): - z = F.relu(self.fc1(x)) - z = self.dropout(z) if self.dropout else z - z = F.relu(self.fc2(z)) - z = self.dropout(z) if self.dropout else z - y.append(self.fc3(z)) - return torch.cat(y) - - def get_parameters(self, name): - if "prior" in name: - return concatenate_flattened( - [ - self.fc1.__getattribute__(f"w_{name}"), - self.fc2.__getattribute__(f"w_{name}"), - self.fc3.__getattribute__(f"w_{name}"), - self.fc1.__getattribute__(f"b_{name}"), - self.fc2.__getattribute__(f"b_{name}"), - self.fc3.__getattribute__(f"b_{name}"), - ] - ) - else: - return concatenate_flattened( - [ - self.fc1._parameters.get(f"w_{name}"), - self.fc2._parameters.get(f"w_{name}"), - self.fc3._parameters.get(f"w_{name}"), - self.fc1._parameters.get(f"b_{name}"), - self.fc2._parameters.get(f"b_{name}"), - self.fc3._parameters.get(f"b_{name}"), - ] - ) - - def to(self, *args, **kwargs): - """ - Our prior tensors are registered as buffers but the way we access them - indirectly (through tuple attributes on the model) is causing problems - because when we use `.to()` to move the model to a new device, the prior - tensors get moved (because they're registered as buffers) but the - references in the tuples don't get updated to point to the new moved - tensors. This has no effect when running just on a cpu but breaks the - model when trying to run on a gpu. There are a million nicer ways of - working around this problem, but for now the easiest thing is to do - this: override the `.to()` method and manually update our references to - prior tensors. - """ - self = super().to(*args, **kwargs) - for fc in [self.fc1, self.fc2, self.fc3]: - fc.w_prior_mean = fc.w_prior_mean.to(*args, **kwargs) - fc.w_prior_log_var = fc.w_prior_log_var.to(*args, **kwargs) - fc.b_prior_mean = fc.b_prior_mean.to(*args, **kwargs) - fc.b_prior_log_var = fc.b_prior_log_var.to(*args, **kwargs) - return self - - def reset_for_new_task(self): - for fc in [self.fc1, self.fc2, self.fc3]: - fc.reset_for_new_task() - - -def lenet_builder(seed: int, config): - if "5" in config.type: - lenet = LeNet5 - elif "300-100" in config.type: - lenet = LeNet300100 - - torch.manual_seed(seed) - np.random.seed(seed) - torch.cuda.manual_seed(seed) - model = lenet( - num_classes=config.num_classes, - input_size=config.input_size, - input_channels=config.input_channels, - dropout=config.dropout, - ) - return model diff --git a/bias_transfer/models/lenet_frcl.py b/bias_transfer/models/lenet_frcl.py deleted file mode 100644 index 0eee0f1..0000000 --- a/bias_transfer/models/lenet_frcl.py +++ /dev/null @@ -1,295 +0,0 @@ -import math - -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np -from torch.distributions.multivariate_normal import MultivariateNormal - - -class FRCL(nn.Module): - def __init__( - self, - input_size, - input_channels, - h_dim, - coreset_size, - num_classes: int = 10, - dropout: float = 0.0, - sigma_prior=1, - init_mu_std=1.0, - ): - """ - Adapted from the implementation of https://github.com/AndreevP/FRCL - Args: - input_size: - input_channels: - h_dim: - coreset_size: - num_classes: - dropout: - sigma_prior: - init_mu_std: - """ - super(FRCL, self).__init__() - self.num_classes = num_classes - self.dropout = nn.Dropout(p=dropout) if dropout else None - - self.sigma_prior = sigma_prior - self.w_prior = MultivariateNormal( - torch.zeros(h_dim), covariance_matrix=sigma_prior * torch.eye(h_dim), - ) - self.pred_func = nn.Softmax(dim=-1) - self.init_mu_std = init_mu_std - - self.L = nn.ParameterList( - [ - nn.Parameter(torch.zeros(h_dim, h_dim), requires_grad=True) - for _ in range(num_classes) - ] - ) - self.mu = nn.ParameterList( - [ - nn.Parameter(torch.zeros(h_dim), requires_grad=True,) - for _ in range(num_classes) - ] - ) - self.mu_prev, self.cov_prev = [], [] - for i in range(num_classes): - self.register_buffer(f"mu_prev_{i}", torch.zeros(coreset_size)) - self.register_buffer( - f"cov_prev_{i}", torch.zeros(coreset_size, coreset_size) - ) - self.register_buffer( - "coreset", torch.zeros(coreset_size, input_channels, input_size, input_size) - ) - self.register_buffer( - "coreset_prev", - torch.zeros(coreset_size, input_channels, input_size, input_size), - ) - self.device = self.coreset.device - self.reset_parameters() - - def reset_parameters(self): - for i in range(self.num_classes): - torch.nn.init.eye_(self.L[i]) - torch.nn.init.normal_(self.mu[i], mean=0, std=self.init_mu_std) - # TODO reset the rest? - - def to(self, *args, **kwargs): - self = super().to(*args, **kwargs) - for i in range(self.num_classes): - self._buffers[f"mu_prev_{i}"] = self._buffers[f"mu_prev_{i}"].to( - *args, **kwargs - ) - self._buffers[f"cov_prev_{i}"] = self._buffers[f"cov_prev_{i}"].to( - *args, **kwargs - ) - self.w_prior = MultivariateNormal( - self.w_prior.mean.to(*args, **kwargs), - covariance_matrix=self.w_prior.covariance_matrix.to(*args, **kwargs), - ) - self.coreset = self.coreset.to(*args, **kwargs) - self.coreset_prev = self.coreset_prev.to(*args, **kwargs) - self.device = self.coreset.device - return self - - def reset_for_new_task(self): - """ - Called after completion of a task, to reset state for the next task - """ - self.coreset_prev = self.coreset - phi_z = self.core_forward(self.coreset_prev) - for i in range(self.num_classes): - mu, cov = self._get_inducing_distribution(phi_z, i) - self._buffers[f"mu_prev_{i}"] = mu - self._buffers[f"cov_prev_{i}"] = cov - - @property - def prev(self): - try: - return self._prev - except AttributeError: - self._prev = torch.any(self.coreset_prev != 0) - return self._prev - - def forward(self, x, num_samples=8): - phi = self.core_forward(x) - if self.training: - return self._train_forward(phi, num_samples) - else: - return self._eval_forward(phi, num_samples) - - def core_forward(self, x): - raise NotImplementedError() - - def _train_forward(self, phi, num_samples): - """ - Return -ELBO - N_k = len(dataset), required for unbiased estimate through minibatch - """ - mu = self.mu - cov = [self.L[i] @ self.L[i].T for i in range(len(self.L))] - means = torch.stack([phi @ mu[i] for i in range(len(mu))], dim=1) - # variances = torch.cat([((phi @ cov[i]) * phi).sum(-1) for i in range(len(cov))], axis = 0) - variances = torch.stack( - [torch.diagonal(phi @ cov[i] @ phi.T, 0) for i in range(len(cov))], dim=1 - ) - samples = torch.cat( - [ - means - + torch.sqrt(variances + 1e-6) - * torch.randn(means.shape).to(self.device) - for i in range(num_samples) - ] - ) - return samples - - def _get_inducing_distribution(self, phi_z, i): - mu_u = phi_z @ self.mu[i] - L_u = phi_z @ self.L[i] - cov_u = L_u @ L_u.T - cov_u = cov_u + torch.eye(cov_u.shape[0]).to(self.device) * 1e-4 - return mu_u, cov_u - - def _get_predictive(self, phi_x): - """ Computes predictive distribution according to section 2.5 - x - batch of data - k - index of task - Return predictive distribution q_\theta(f) - """ - phi_z = self.core_forward(self.coreset) - k_xx = phi_x @ phi_x.T * self.sigma_prior - k_xz = phi_x @ phi_z.T * self.sigma_prior - k_zz = phi_z @ phi_z.T * self.sigma_prior - k_zz_ = torch.inverse(k_zz + torch.eye(phi_z.shape[0]).to(self.device) * 1e-3) - - mu_u, cov_u = ( - [None for _ in range(self.num_classes)], - [None for _ in range(self.num_classes)], - ) - for i in range(self.num_classes): - mu_u[i], cov_u[i] = self._get_inducing_distribution(phi_z, i) - - mu = [phi_x @ phi_z.T @ k_zz_ @ mu_u[i] for i in range(self.num_classes)] - sigma = [ - k_xx - + ( - k_xz - @ k_zz_ - @ (cov_u[i] - k_zz + torch.eye(k_zz.shape[0]).to(self.device) * 1e-4) - @ k_zz_ - @ k_xz.T - ) - for i in range(self.num_classes) - ] - sigma = [ - sigma[i] * torch.eye(sigma[i].shape[0]).to(self.device) - + torch.eye(sigma[i].shape[0]).to(self.device) * 1e-6 - for i in range(self.num_classes) - ] - # print([s.min() for s in sigma]) - sigma = [ - torch.clamp(sigma[i], min=0, max=10000.0) - + torch.eye(sigma[i].shape[0]).to(self.device) * 1e-6 - for i in range(self.num_classes) - ] - # we are interested only - # in diagonal part for inference ? - return [ - MultivariateNormal(loc=mu[i], covariance_matrix=sigma[i]) - for i in range(self.num_classes) - ] - - def _eval_forward(self, phi, num_samples): - """ - Compute p(y) by MC estimate from q_\theta(f)? - """ - distr = self._get_predictive(phi) - # TODO: speedup possible if you precompute distr before an eval epoch (i.e. not recompute for each batch) - predicted = [] - for _ in range(num_samples): - sample = [distr[i].sample() for i in range(self.num_classes)] - predicted.append(self.pred_func(torch.stack(sample, dim=1))) - return torch.cat(predicted) - - -class LeNet5( - FRCL -): # adapted from https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html - def __init__(self, input_size: int = 28, input_channels: int = 1, *args, **kwargs): - super(LeNet5, self).__init__( - input_size=input_size, - input_channels=input_channels, - h_dim=84, - *args, - **kwargs, - ) - conv_out_size = int( - ((((input_size - 3) + 1) / 2 - 3) + 1) / 2 - ) # [(W-K+2P)/S]+1 / MP - self.flat_feature_size = (conv_out_size ** 2) * 16 - # 1 input image channel, 6 output channels, 3x3 square convolution - # kernel - self.conv1 = nn.Conv2d(input_channels, 6, 3) - self.conv2 = nn.Conv2d(6, 16, 3) - # an affine operation: y = Wx + b - self.fc1 = nn.Linear(self.flat_feature_size, 120) - self.fc2 = nn.Linear(120, 84) - - def core_forward(self, x): - x = F.relu(self.conv1(x)) - x = self.dropout(x) if self.dropout else x - # Max pooling over a (2, 2) window - x = F.max_pool2d(x, (2, 2)) - x = F.relu(self.conv2(x)) - x = self.dropout(x) if self.dropout else x - # If the size is a square you can only specify a single number - x = F.max_pool2d(x, 2) - x = x.view(-1, self.flat_feature_size) - x = F.relu(self.fc1(x)) - x = self.dropout(x) if self.dropout else x - x = F.relu(self.fc2(x)) - x = self.dropout(x) if self.dropout else x - return x - - -class LeNet300100(FRCL): - def __init__(self, input_size: int = 28, input_channels: int = 1, *args, **kwargs): - super(LeNet300100, self).__init__( - input_size=input_size, - input_channels=input_channels, - h_dim=100, - *args, - **kwargs, - ) - self.fc1 = nn.Linear(input_size * input_size * input_channels, 300) - self.fc2 = nn.Linear(300, 100) - self.flat_input_size = input_size * input_size * input_channels - - def core_forward(self, x): - x = x.view(x.size(0), self.flat_input_size) - x = F.relu(self.fc1(x)) - x = self.dropout(x) if self.dropout else x - x = F.relu(self.fc2(x)) - x = self.dropout(x) if self.dropout else x - return x - - -def lenet_builder(seed: int, config): - if "5" in config.type: - lenet = LeNet5 - elif "300-100" in config.type: - lenet = LeNet300100 - - torch.manual_seed(seed) - np.random.seed(seed) - torch.cuda.manual_seed(seed) - model = lenet( - num_classes=config.num_classes, - input_size=config.input_size, - input_channels=config.input_channels, - dropout=config.dropout, - coreset_size=config.coreset_size, - ) - return model diff --git a/bias_transfer/models/mtl_vgg.py b/bias_transfer/models/mtl_vgg.py deleted file mode 100644 index b9453b0..0000000 --- a/bias_transfer/models/mtl_vgg.py +++ /dev/null @@ -1,296 +0,0 @@ -import torchvision -import torch.nn as nn -from torch.autograd import Variable -from neuralpredictors.layers.cores import Core2d -import torch -from nnfabrik.utility.nn_helpers import get_dims_for_loader_dict -import numpy as np -from torch.nn import functional as F -from neuralpredictors.layers.legacy import Gaussian2d -from neuralpredictors.training import eval_state -from .vgg import create_vgg_readout - -VGG_TYPES = { - "vgg11": torchvision.models.vgg11, - "vgg11_bn": torchvision.models.vgg11_bn, - "vgg13": torchvision.models.vgg13, - "vgg13_bn": torchvision.models.vgg13_bn, - "vgg16": torchvision.models.vgg16, - "vgg16_bn": torchvision.models.vgg16_bn, - "vgg19_bn": torchvision.models.vgg19_bn, - "vgg19": torchvision.models.vgg19, -} - - -def get_module_output(model, input_shape): - """ - Gets the output dimensions of the convolutional core - by passing an input image through all convolutional layers - :param core: convolutional core of the DNN, which final dimensions - need to be passed on to the readout layer - :param input_shape: the dimensions of the input - :return: output dimensions of the core - """ - initial_device = "cuda" if next(iter(model.parameters())).is_cuda else "cpu" - device = "cuda" if torch.cuda.is_available() else "cpu" - with eval_state(model): - with torch.no_grad(): - input = torch.zeros(1, *input_shape[1:]).to(device) - output = model.to(device)(input) - model.to(initial_device) - - return output[0].shape - - -class MultipleGaussian2d(torch.nn.ModuleDict): - def __init__( - self, - in_shapes, - n_neurons_dict, - init_mu_range, - init_sigma_range, - bias, - gamma_readout, - ): - # super init to get the _module attribute - super(MultipleGaussian2d, self).__init__() - for k in n_neurons_dict: - in_shape = in_shapes[k] - n_neurons = n_neurons_dict[k] - self.add_module( - k, - Gaussian2d( - in_shape=in_shape, - outdims=n_neurons, - init_mu_range=init_mu_range, - init_sigma_range=init_sigma_range, - bias=bias, - ), - ) - self.gamma_readout = gamma_readout - - def forward(self, *args, data_key=None, **kwargs): - if data_key is None and len(self) == 1: - data_key = list(self.keys())[0] - return self[data_key](*args, **kwargs) - - def regularizer(self, data_key): - return self[data_key].feature_l1(average=False) * self.gamma_readout - - -class MTL_VGG_Core(Core2d, nn.Module): - def __init__( - self, - classification=True, - vgg_type="vgg19_bn", - pretrained=True, - v1_model_layer=17, - neural_input_channels=1, - classification_input_channels=1, - v1_fine_tune=False, - momentum=0.1, - v1_bias=True, - v1_final_batchnorm=False, - **kwargs - ): - - super(MTL_VGG_Core, self).__init__() - self.v1_model_layer = v1_model_layer - self.neural_input_channels, self.classification_input_channels = ( - neural_input_channels, - classification_input_channels, - ) - self.v1_final_batchnorm = v1_final_batchnorm - self.classification = classification - - # load convolutional part of vgg - assert vgg_type in VGG_TYPES, "Unknown vgg_type '{}'".format(vgg_type) - vgg_loader = VGG_TYPES[vgg_type] - vgg = vgg_loader(pretrained=pretrained) - - self.shared_block = nn.Sequential( - *list(vgg.features.children())[:v1_model_layer] - ) - - # Remove the bias of the last conv layer if not bias: - if not v1_bias: - if "bias" in self.shared_block[-1]._parameters: - zeros = torch.zeros_like(self.shared_block[-1].bias) - self.shared_block[-1].bias.data = zeros - - # Fix pretrained parameters during training parameters - if not v1_fine_tune: - for param in self.shared_block.parameters(): - param.requires_grad = False - - if v1_final_batchnorm: - self.v1_extra = nn.Sequential() - self.v1_extra.add_module( - "OutBatchNorm", nn.BatchNorm2d(self.outchannels, momentum=momentum) - ) - self.v1_extra.add_module("OutNonlin", nn.ReLU(inplace=True)) - - if classification: - self.unshared_block = nn.Sequential( - *list(vgg.features.children())[v1_model_layer:] - ) - - def forward(self, x, classification=False): - if (classification and self.classification_input_channels == 1) or ( - not classification and self.neural_input_channels == 1 - ): - x = x.expand(-1, 3, -1, -1) - v1_core_out = shared_core_out = self.shared_block(x) - if self.v1_final_batchnorm: - v1_core_out = self.v1_extra(shared_core_out) - if classification: - core_out = self.unshared_block(shared_core_out) - return v1_core_out, core_out - return v1_core_out, None - - @property - def outchannels(self): - """ - Returns: dimensions of the output, after a forward pass through the model - """ - found_out_channels = False - i = 1 - while not found_out_channels: - if "out_channels" in self.shared_block[-i].__dict__: - found_out_channels = True - else: - i = i + 1 - return self.shared_block[-i].out_channels - - -class MTL_VGG(nn.Module): - def __init__( - self, - dataloaders, - vgg_type="vgg19_bn", - classification=False, - classification_readout_type=None, - input_size=None, - num_classes=200, - pretrained=True, - v1_model_layer=17, - neural_input_channels=1, - classification_input_channels=1, - v1_fine_tune=False, - v1_init_mu_range=0.4, - v1_init_sigma_range=0.6, - v1_readout_bias=True, - v1_bias=True, - v1_final_batchnorm=False, - v1_gamma_readout=0.002, - v1_elu_offset=-1, - **kwargs - ): - - super(MTL_VGG, self).__init__() - self.classification_readout_type = classification_readout_type - self.input_size = input_size - self.num_classes = num_classes - self.v1_elu_offset = v1_elu_offset - self.neural_input_channels = neural_input_channels - self.classification_input_channels = classification_input_channels - - # for neural dataloaders - if classification: - neural_train_dataloaders = { - k: loader - for k, loader in dataloaders["train"].items() - if k != "img_classification" - } - elif "train" in dataloaders.keys(): - neural_train_dataloaders = dataloaders["train"] - else: - neural_train_dataloaders = dataloaders - - session_shape_dict = get_dims_for_loader_dict(neural_train_dataloaders) - in_name, out_name = next( - iter(list(neural_train_dataloaders.values())[0]) - )._fields - self.neural_input_channels = [ - v[in_name][1] for v in session_shape_dict.values() - ] - assert ( - np.unique(self.neural_input_channels).size == 1 - ), "all input channels must be of equal size" - - self.mtl_vgg_core = MTL_VGG_Core( - vgg_type=vgg_type, - classification=classification, - pretrained=pretrained, - v1_model_layer=v1_model_layer, - v1_fine_tune=v1_fine_tune, - neural_input_channels=self.neural_input_channels[0], - classification_input_channels=self.classification_input_channels, - v1_final_batchnorm=v1_final_batchnorm, - v1_bias=v1_bias, - ) - - n_neurons_dict = {k: v[out_name][1] for k, v in session_shape_dict.items()} - in_shapes_dict = {k: v[in_name] for k, v in session_shape_dict.items()} - in_shapes = {} - for k in n_neurons_dict: - in_shapes[k] = get_module_output(self.mtl_vgg_core, in_shapes_dict[k])[1:] - - self.v1_readout = MultipleGaussian2d( - in_shapes=in_shapes, - n_neurons_dict=n_neurons_dict, - init_mu_range=v1_init_mu_range, - bias=v1_readout_bias, - init_sigma_range=v1_init_sigma_range, - gamma_readout=v1_gamma_readout, - ) - if v1_readout_bias: - for key, value in neural_train_dataloaders.items(): - _, targets = next(iter(value)) - self.v1_readout[key].bias.data = targets.mean(0) - - if classification: - # init fully connected part of vgg - test_input = Variable(torch.zeros(1, 3, input_size, input_size)) - _, test_out = self.mtl_vgg_core(test_input, classification=True) - self.n_features = test_out.size(1) * test_out.size(2) * test_out.size(3) - self.classification_readout = create_vgg_readout( - classification_readout_type, - n_features=self.n_features, - num_classes=num_classes, - ) - self._initialize_weights_classification_readout() - - def forward(self, x, data_key=None, classification=False): - shared_core_out, core_out = self.mtl_vgg_core(x, classification) - if classification: - if self.classification_readout_type == "dense": - core_out = core_out.view(core_out.size(0), -1) - classification_out = self.classification_readout(core_out) - return classification_out - v1_out = self.v1_readout(shared_core_out, data_key=data_key) - v1_out = F.elu(v1_out + self.v1_elu_offset) + 1 - return v1_out - - def regularizer(self, data_key=None): - return self.v1_readout.regularizer(data_key=data_key) - - def freeze(self, selection=("v1",)): - if selection is True or "v1" in selection: - for param in self.mtl_vgg_core.shared_block.parameters(): - param.requires_grad = False - - - def _initialize_weights_classification_readout(self): - if self.mtl_vgg_core.classification: - for m in self.classification_readout: - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - if m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.constant_(m.bias, 0) diff --git a/bias_transfer/models/resnet_self_attention.py b/bias_transfer/models/resnet_self_attention.py deleted file mode 100644 index 82fae76..0000000 --- a/bias_transfer/models/resnet_self_attention.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Implementation adapted from https://github.com/leaderj1001/Stand-Alone-Self-Attention -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .attention import AttentionConv, AttentionStem - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, in_channels, out_channels, stride=1, groups=1, base_width=64): - super(Bottleneck, self).__init__() - self.stride = stride - width = int(out_channels * (base_width / 64.0)) * groups - - self.conv1 = nn.Sequential( - nn.Conv2d(in_channels, width, kernel_size=1, bias=False), - nn.BatchNorm2d(width), - nn.ReLU(), - ) - self.conv2 = nn.Sequential( - AttentionConv(width, width, kernel_size=7, padding=3, groups=8), - nn.BatchNorm2d(width), - nn.ReLU(), - ) - self.conv3 = nn.Sequential( - nn.Conv2d(width, self.expansion * out_channels, kernel_size=1, bias=False), - nn.BatchNorm2d(self.expansion * out_channels), - ) - - self.shortcut = nn.Sequential() - if stride != 1 or in_channels != self.expansion * out_channels: - self.shortcut = nn.Sequential( - nn.Conv2d( - in_channels, - self.expansion * out_channels, - kernel_size=1, - stride=stride, - bias=False, - ), - nn.BatchNorm2d(self.expansion * out_channels), - ) - - def forward(self, x): - out = self.conv1(x) - out = self.conv2(out) - out = self.conv3(out) - if self.stride >= 2: - out = F.avg_pool2d(out, (self.stride, self.stride)) - - out += self.shortcut(x) - out = F.relu(out) - - return out - - -class ResNet(nn.Module): - def __init__(self, block, num_blocks, num_classes=1000, stem=False): - super(ResNet, self).__init__() - print("RESNET SELF-ATTENTION!!!") - self.in_places = 64 - - if stem: - self.init = nn.Sequential( - # CIFAR10 - AttentionStem( - in_channels=3, - out_channels=64, - kernel_size=4, - stride=1, - padding=2, - groups=1, - ), - nn.BatchNorm2d(64), - nn.ReLU(), - # For ImageNet - # AttentionStem(in_channels=3, out_channels=64, kernel_size=4, stride=1, padding=2, groups=1), - # nn.BatchNorm2d(64), - # nn.ReLU(), - # nn.MaxPool2d(4, 4) - ) - else: - self.init = nn.Sequential( - # CIFAR10 - nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), - nn.BatchNorm2d(64), - nn.ReLU(), - # For ImageNet - # nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), - # nn.BatchNorm2d(64), - # nn.ReLU(), - # nn.MaxPool2d(kernel_size=3, stride=2, padding=1), - ) - - self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) - self.readout = nn.Linear(512 * block.expansion, num_classes) - - def _make_layer(self, block, planes, num_blocks, stride): - strides = [stride] + [1] * (num_blocks - 1) - layers = [] - for stride in strides: - layers.append(block(self.in_places, planes, stride)) - self.in_places = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - out = self.init(x) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = F.avg_pool2d(out, 4) - core_out = out.view(out.size(0), -1) - out = self.readout(core_out) - - return {"logits": out, "conv_rep": core_out} diff --git a/bias_transfer/models/wrappers/noise_adv.py b/bias_transfer/models/wrappers/noise_adv.py deleted file mode 100644 index 577898d..0000000 --- a/bias_transfer/models/wrappers/noise_adv.py +++ /dev/null @@ -1,51 +0,0 @@ -import torch.nn as nn -from torch.autograd import Function - - -# Used the implementation from https://github.com/CuthbertCai/pytorch_DANN -class GradReverse(Function): - @staticmethod - def forward(ctx, x, lambda_p): - ctx.constant = lambda_p - return x.view_as(x) - - @staticmethod - def backward(ctx, grad_output): - grad_output = grad_output.neg() * ctx.constant - return grad_output, None - - -def grad_reverse(x, lambda_p): - return GradReverse.apply(x, lambda_p) - - -class NoiseAdvWrapper(nn.Module): - def __init__( - self, - model, - input_size, - hidden_size, - classification: bool = False, - num_noise_readout_layers: int = 1, - sigmoid_output: bool = False, - ): - super().__init__() - self.model = model - - noise_readout_layers = [] - for i in range(0, num_noise_readout_layers): - in_size = input_size if i == 0 else hidden_size - out_size = 1 if i == num_noise_readout_layers - 1 else hidden_size - noise_readout_layers.append(nn.Linear(in_size, out_size)) - if i < num_noise_readout_layers - 1: - noise_readout_layers.append(nn.ReLU()) - self.noise_readout = nn.Sequential(*noise_readout_layers) - self.nonlinearity = nn.Sigmoid() if classification or sigmoid_output else nn.ReLU() - - def forward(self, x, seed: int = None, noise_lambda=None): - extra_output, out = self.model(x) - core_out = extra_output["core"] - noise_out = self.noise_readout(grad_reverse(core_out, noise_lambda)) - noise_out = self.nonlinearity(noise_out) - extra_output["noise_pred"] = noise_out - return extra_output, out diff --git a/bias_transfer/run_tests.sh b/bias_transfer/run_tests.sh deleted file mode 100755 index 63afdfe..0000000 --- a/bias_transfer/run_tests.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -pip install -e ../../ml-utils -pip install -e ../../nnfabrik -pip install -e ../../nnvision -pip install -e ../../bias_transfer - -python -m unittest tests diff --git a/bias_transfer/tables/evaluated_model.py b/bias_transfer/tables/evaluated_model.py deleted file mode 100644 index 4dd1a9f..0000000 --- a/bias_transfer/tables/evaluated_model.py +++ /dev/null @@ -1,95 +0,0 @@ -from .nnfabrik import * -from nnfabrik.templates.trained_model import * -from .trained_model import TrainedModel - - -@schema -class EvaluatedModel(TrainedModelBase): - table_comment = "Custom evaluation for trained models" - - definition = """ - -> TrainedModel - --- - ->[nullable] Fabrikant - score: float # loss - output: longblob # trainer object's output - evaluatedmodel_ts=CURRENT_TIMESTAMP: timestamp # UTZ timestamp at time of insertion - """ - ModelStorage = None - - def get_full_config(self, key=None, include_state_dict=True, include_trainer=True): - """ - Returns the full configuration dictionary needed to build all components of the network - training including dataset, model and trainer. The returned dictionary is designed to be - passed (with dictionary expansion) into the get_all_parts function provided in builder.py. - - Args: - key - specific key against which to retrieve all configuration. The key must restrict all component - tables into a single entry. If None, will assume that this table is already restricted and - will obtain an existing single entry. - include_state_dict (bool) : If True, and if key refers to a model already trained with a corresponding entry in self.ModelStorage, - the state_dict of the trained model is retrieved and returned - include_trainer (bool): If False, then trainer configuration is skipped. Usually desirable when you want to simply retrieve trained model. - """ - if key is None: - key = self.fetch1("KEY") - - model_fn, model_config = (self.model_table & key).fn_config - dataset_fn, dataset_config = (self.dataset_table & key).fn_config - - ret = dict( - model_fn=model_fn, - model_config=model_config, - dataset_fn=dataset_fn, - dataset_config=dataset_config, - ) - - if include_trainer: - trainer_fn, trainer_config = (self.trainer_table & key).fn_config - ret["trainer_fn"] = trainer_fn - ret["trainer_config"] = trainer_config - - # if trained model exist and include_state_dict is True - if include_state_dict and (TrainedModel.ModelStorage & key): - with tempfile.TemporaryDirectory() as temp_dir: - state_dict_path = (TrainedModel.ModelStorage & key).fetch1( - "model_state", download_path=temp_dir - ) - ret["state_dict"] = torch.load(state_dict_path) - - return ret - - def make(self, key): - """ - Given key specifying configuration for dataloaders, model and trainer, - trains the model and saves the trained model. - """ - # lookup the fabrikant corresponding to the current DJ user - fabrikant_name = Fabrikant.get_current_user() - seed = (Seed & key).fetch1("seed") - - # load everything - dataloaders, model, trainer = self.load_model( - key, include_trainer=True, include_state_dict=True, seed=seed - ) - # model = ((TrainedModel() & key).ModelStorage()).fetch1("model_state") - - # define callback with pinging - def call_back(**kwargs): - self.connection.ping() - self.call_back(**kwargs) - - # model training - score, output, model_state = trainer( - model, dataloaders, seed=seed, uid=key, cb=call_back, eval_only=True - ) - - with tempfile.TemporaryDirectory() as temp_dir: - filename = make_hash(key) + ".pth.tar" - filepath = os.path.join(temp_dir, filename) - torch.save(model_state, filepath) - - key["score"] = score - key["output"] = output - key["fabrikant_name"] = fabrikant_name - self.insert1(key) diff --git a/bias_transfer/tests/__init__.py b/bias_transfer/tests/__init__.py deleted file mode 100644 index b2909c1..0000000 --- a/bias_transfer/tests/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .test_training import * -from .test_transfer import * -from .test_model import * -from .test_dataset import * -from .test_dataset_filter import * -from .test_lottery_ticket_pruning import * diff --git a/bias_transfer/tests/_base.py b/bias_transfer/tests/_base.py deleted file mode 100644 index 90e49cf..0000000 --- a/bias_transfer/tests/_base.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import unittest -import copy - -import numpy as np -import torch -from torch.utils.data import SubsetRandomSampler -from torch.utils.data.sampler import SequentialSampler - -import nnfabrik as nnf -from bias_transfer.configs import dataset, model, trainer -from bias_transfer.models.utils import weight_reset - - -class BaseTest(unittest.TestCase): - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal CIFAR10", - dataset_cls="CIFAR10", - apply_data_normalization=False, - apply_data_augmentation=False, - add_corrupted_test=False, - valid_size=0.95, - ) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10 ResNet18", dataset_cls="CIFAR10", type="resnet18", - # advanced_init=True, zero_init_residual=True - ) - seed = 42 - - @classmethod - def run_training(cls, trainer_conf): - uid = "test1" - path = "./checkpoint/ckpt.{}.pth".format(nnf.utility.dj_helpers.make_hash(uid)) - if os.path.exists(path): - os.remove(path) - torch.manual_seed(cls.seed) - np.random.seed(cls.seed) - torch.cuda.manual_seed(cls.seed) - cls.model = copy.deepcopy(cls.start_model) - - trainer_fn = nnf.builder.get_trainer(trainer_conf.fn, trainer_conf.to_dict()) - - def call_back(**kwargs): - pass - - # model training - score, output, model_state = trainer_fn( - model=cls.model, - dataloaders=cls.data_loaders, - seed=cls.seed, - uid=uid, - cb=call_back, - ) - return score - - @classmethod - def get_parts(cls, dataset_conf, model_conf, seed): - os.chdir("/work/") - cls.data_loaders, cls.model = nnf.builder.get_all_parts( - dataset_fn=dataset_conf.fn, - dataset_config=dataset_conf.to_dict(), - model_fn=model_conf.fn, - model_config=model_conf.to_dict(), - seed=seed, - trainer_fn=None, - trainer_config=None, - ) - cls.data_loaders["validation"] = cls.data_loaders["train"] - cls.data_loaders["test"] = cls.data_loaders["train"] - if "c_test" in cls.data_loaders: - category_1 = list(cls.data_loaders["c_test"].keys())[0] - cls.data_loaders["c_test"] = { - category_1: {1: cls.data_loaders["c_test"][category_1][1]} - } - cls.start_model = copy.deepcopy(cls.model) - - @classmethod - def setUpClass(cls): # called once before all methods of the class - cls.get_parts(cls.dataset_conf, cls.model_conf, cls.seed) diff --git a/bias_transfer/tests/_main_loop_module.py b/bias_transfer/tests/_main_loop_module.py deleted file mode 100644 index 7e83c4b..0000000 --- a/bias_transfer/tests/_main_loop_module.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np -import torch -from torch import optim, nn -from torch.backends import cudnn as cudnn -from tqdm import tqdm - -from bias_transfer.tests._base import BaseTest -from bias_transfer.trainer.utils import move_data - - -class MainLoopModuleTest(BaseTest): - def pre_epoch_test(self, model, epoch): - pass - - def pre_forward_test(self, model, inputs, shared_memory): - pass - - def post_forward_test(self, outputs, loss, targets, module_losses, **kwargs): - pass - - def post_backward_test(self, model): - pass - - def main_loop( - self, model, data_loader, module, config, device, epoch: int = 0 - ): - optimizer = getattr(optim, config.optimizer)( - model.parameters(), **config.optimizer_options - ) - n_iterations = len(data_loader) - torch.manual_seed(self.seed) - np.random.seed(self.seed) - if device == "cuda": - cudnn.benchmark = False - cudnn.deterministic = True - torch.cuda.manual_seed(self.seed) - criterion = getattr(nn, config.loss_functions["img_classification"])() - model.train() - epoch_loss, correct, total, module_losses, collected_outputs = 0, 0, 0, {}, [] - if hasattr( - tqdm, "_instances" - ): # To have tqdm output without line-breaks between steps - tqdm._instances.clear() - with torch.enable_grad(): - with tqdm( - enumerate(data_loader), - total=n_iterations, - desc="{} Epoch {}".format("Train", epoch), - ) as t: - - module.pre_epoch(model, True, epoch) - self.pre_epoch_test(model, epoch) - - optimizer.zero_grad() - - for batch_idx, batch_data in t: - # Pre-Forward - loss = torch.zeros(1, device=device) - inputs, targets, data_key, batch_dict = move_data( - batch_data, device, False - ) - shared_memory = {} # e.g. to remember where which noise was applied - model, inputs = module.pre_forward(model, inputs, shared_memory, True) - self.pre_forward_test(model, inputs, shared_memory) - # Forward - outputs = model(inputs) - # Post-Forward - outputs, loss, targets = module.post_forward(outputs, loss, targets, module_losses, True, - **shared_memory) - self.post_forward_test( - outputs, loss, targets, module_losses, **shared_memory - ) - loss += criterion(outputs, targets) - epoch_loss += loss.item() - - # Book-keeping - def average_loss(loss_): - return loss_ / (batch_idx + 1) - - _, predicted = outputs.max(1) - total += targets.size(0) - correct += predicted.eq(targets).sum().item() - eval = 100.0 * correct / total - - t.set_postfix( - eval=eval, - loss=average_loss(epoch_loss), - **{k: average_loss(l) for k, l in module_losses.items()} - ) - - # Backward - loss.backward() - module.post_backward(model) - self.post_backward_test(model) - - optimizer.step() - optimizer.zero_grad() - - return ( - eval, - average_loss(epoch_loss), - {k: average_loss(l) for k, l in module_losses.items()}, - ) diff --git a/bias_transfer/tests/test_dataset.py b/bias_transfer/tests/test_dataset.py deleted file mode 100644 index d1ec804..0000000 --- a/bias_transfer/tests/test_dataset.py +++ /dev/null @@ -1,231 +0,0 @@ -import unittest -import os -from bias_transfer.configs import trainer, model, dataset -from bias_transfer.tests._base import BaseTest -import nnfabrik as nnf - - -class DatasetTest(BaseTest): - def test_cifar100(self): - print("===================================================", flush=True) - print("================TEST CIFAR100 Training=============", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR100", dataset_cls="CIFAR100", type="resnet18", - ) - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal CIFAR100", - dataset_cls="CIFAR100", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=True, - valid_size=0.95, - ) - self.get_parts(dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="CIFAR100 Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 9.92, places=1) - - def test_tiny_imagenet(self): - print("===================================================", flush=True) - print("==============TEST Tiny-ImageNet Training==========", flush=True) - model_conf = model.ClassificationModelConfig( - comment="TinyImageNet", dataset_cls="TinyImageNet", type="resnet18", - ) - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal TinyImageNet", - dataset_cls="TinyImageNet", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=True, - valid_size=0.95, - ) - self.get_parts(dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="TinyImageNet Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 5.52, places=1) - - def test_imagenet(self): - print("===================================================", flush=True) - print("=================TEST ImageNet Training============", flush=True) - model_conf = model.ClassificationModelConfig( - comment="ImageNet", dataset_cls="ImageNet", type="resnet18", - ) - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal ImageNet", - dataset_cls="ImageNet", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=True, - batch_size=70, - valid_size=0.995, - ) - self.get_parts(dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="ImageNet Training Test", - max_iter=1, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 0.9990633780830471, places=1) - - def test_imagenet_pretrained(self): - print("===================================================", flush=True) - print("================TEST ImageNet Pretrained===========", flush=True) - model_conf = model.ClassificationModelConfig( - comment="ImageNet", dataset_cls="ImageNet", type="resnet50", pretrained=True - ) - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal ImageNet", - dataset_cls="ImageNet", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=False, - valid_size=0.01, - ) - self.data_loaders, self.model = nnf.builder.get_all_parts( - dataset_fn=dataset_conf.fn, - dataset_config=dataset_conf.to_dict(), - model_fn=model_conf.fn, - model_config=model_conf.to_dict(), - seed=self.seed, - trainer_fn=None, - trainer_config=None, - ) - if "c_test" in self.data_loaders: - category_1 = list(self.data_loaders["c_test"].keys)[0] - self.data_loaders["c_test"] = { - category_1: {1: self.data_loaders["c_test"][category_1][1]} - } - trainer_conf = trainer.TrainerConfig( - comment="ImageNet Training Test", - max_iter=0, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - uid = "test1" - path = "./checkpoint/ckpt.{}.pth".format(nnf.utility.dj_helpers.make_hash(uid)) - if os.path.exists(path): - os.remove(path) - - trainer_fn = nnf.builder.get_trainer(trainer_conf.fn, trainer_conf.to_dict()) - - def call_back(**kwargs): - pass - - # model training - score, output, model_state = trainer_fn( - model=self.model, - dataloaders=self.data_loaders, - seed=self.seed, - uid=uid, - cb=call_back, - ) - self.assertAlmostEqual(score, 76.1, places=1) - - def test_imagenet_pretrained_vgg(self): - print("===================================================", flush=True) - print("=============TEST ImageNet Pretrained (VGG)========", flush=True) - model_conf = model.ClassificationModelConfig( - comment="ImageNet", dataset_cls="ImageNet", type="vgg19_bn", pretrained=True - ) - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal ImageNet", - dataset_cls="ImageNet", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=False, - valid_size=0.01, - ) - self.data_loaders, self.model = nnf.builder.get_all_parts( - dataset_fn=dataset_conf.fn, - dataset_config=dataset_conf.to_dict(), - model_fn=model_conf.fn, - model_config=model_conf.to_dict(), - seed=self.seed, - trainer_fn=None, - trainer_config=None, - ) - if "c_test" in self.data_loaders: - category_1 = list(self.data_loaders["c_test"].keys)[0] - self.data_loaders["c_test"] = { - category_1: {1: self.data_loaders["c_test"][category_1][1]} - } - trainer_conf = trainer.TrainerConfig( - comment="ImageNet Training Test", - max_iter=0, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - ) - uid = "test1" - path = "./checkpoint/ckpt.{}.pth".format(nnf.utility.dj_helpers.make_hash(uid)) - if os.path.exists(path): - os.remove(path) - - trainer_fn = nnf.builder.get_trainer(trainer_conf.fn, trainer_conf.to_dict()) - - def call_back(**kwargs): - pass - - # model training - score, output, model_state = trainer_fn( - model=self.model, - dataloaders=self.data_loaders, - seed=self.seed, - uid=uid, - cb=call_back, - ) - self.assertAlmostEqual(score, 74.24, places=1) - -if __name__ == "__main__": - unittest.main() diff --git a/bias_transfer/tests/test_dataset_filter.py b/bias_transfer/tests/test_dataset_filter.py deleted file mode 100644 index a28c5f3..0000000 --- a/bias_transfer/tests/test_dataset_filter.py +++ /dev/null @@ -1,71 +0,0 @@ -import unittest -from bias_transfer.configs import dataset -from bias_transfer.tests._base import BaseTest -import nnfabrik as nnf - - -class DatasetFilterTest(BaseTest): - def test_cifar100(self): - print("===================================================", flush=True) - print("=================TEST CIFAR100 Filter==============", flush=True) - start = 10 - end = 90 - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal CIFAR100", - dataset_cls="CIFAR100", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=True, - valid_size=0.00, - filter_classes=(start, end), - seed=42, - ) - data_loaders = nnf.builder.get_data(dataset_conf.fn, dataset_conf.to_dict()) - self.assertEqual(len(data_loaders["train"]["img_classification"].dataset), (end-start) * 500) - self.assertEqual(len(data_loaders["test"]["img_classification"].dataset), (end-start) * 100) - self.assertEqual(len(data_loaders["c_test"]["frost"][1].dataset), (end-start) * 100) - - - def test_cifar10(self): - print("===================================================", flush=True) - print("=================TEST CIFAR10 Filter===============", flush=True) - start = 2 - end = 10 - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal CIFAR10", - dataset_cls="CIFAR10", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=True, - valid_size=0.00, - filter_classes=(start, end), - seed=42, - ) - data_loaders = nnf.builder.get_data(dataset_conf.fn, dataset_conf.to_dict()) - self.assertEqual(len(data_loaders["train"]["img_classification"].dataset), (end-start) * 5000) - self.assertEqual(len(data_loaders["test"]["img_classification"].dataset), (end-start) * 1000) - self.assertEqual(len(data_loaders["c_test"]["speckle_noise"][5].dataset), (end-start) * 1000) - - - def test_tiny_imagenet(self): - print("===================================================", flush=True) - print("===============TEST TinyImageNet Filter============", flush=True) - start = 0 - end = 150 - dataset_conf = dataset.ImageDatasetConfig( - comment="Minimal TinyImageNet", - dataset_cls="TinyImageNet", - apply_data_normalization=True, - apply_data_augmentation=True, - add_corrupted_test=True, - valid_size=0.00, - filter_classes=(start, end), - seed=42, - ) - data_loaders = nnf.builder.get_data(dataset_conf.fn, dataset_conf.to_dict()) - self.assertEqual(len(data_loaders["train"]["img_classification"].dataset), (end-start) * 500) - self.assertEqual(len(data_loaders["test"]["img_classification"].dataset), (end-start) * 50) - self.assertEqual(len(data_loaders["c_test"]["snow"][5].dataset), (end-start) * 50) - -if __name__ == "__main__": - unittest.main() diff --git a/bias_transfer/tests/test_lottery_ticket_pruning.py b/bias_transfer/tests/test_lottery_ticket_pruning.py deleted file mode 100644 index 9a4e6d8..0000000 --- a/bias_transfer/tests/test_lottery_ticket_pruning.py +++ /dev/null @@ -1,115 +0,0 @@ -import unittest -import torch - -from bias_transfer.configs import trainer -from bias_transfer.tests._main_loop_module import MainLoopModuleTest -from bias_transfer.trainer.main_loop_modules import LotteryTicketPruning -from bias_transfer.models.utils import weight_reset - - -class LotteryTicketPruningTest(MainLoopModuleTest): - def pre_epoch_test(self, model, epoch): - mask_sum = sum([torch.sum(m).cpu().detach().item() for m in self.module.mask]) - p = (1 - (self.percent / 100)) ** (1 / self.rounds) - self.assertAlmostEqual( - mask_sum, self.total_parameters * p, places=-1 - ) # remaining parameters - - def post_backward_test(self, model): - step = 0 - for name, p in model.named_parameters(): - if "weight" in name: - grad_tensor = p.grad.data - grad_masked = (grad_tensor == 0).int() - self.assertTrue( - torch.all((grad_masked + self.module.mask[step]) > 0).cpu().item() - ) - step += 1 - - # def test_module(self): - # print("===================================================", flush=True) - # print("=====TEST the individual module components=========", flush=True) - # self.rounds = 1 - # self.percent = 80 - # trainer_conf = trainer.TrainerConfig( - # comment="Minimal Training Test", - # max_iter=3, - # verbose=False, - # noise_test={"noise_snr": [], "noise_std": [],}, - # restore_best=False, - # lr_milestones=(1, 2), - # adaptive_lr=False, - # patience=1000, - # lottery_ticket={ - # "rounds": self.rounds, - # "round_length": 1, - # "percent_to_prune": self.percent, - # "pruning": True, - # "reinit": False, - # "global_pruning": True, - # }, - # ) - # self.model.apply(weight_reset) - # device = "cuda" if torch.cuda.is_available() else "cpu" - # self.model.to(device) - # self.module = LotteryTicketPruning( - # self.model, trainer_conf, device, self.data_loaders["train"], self.seed - # ) - # mask_sum = sum([torch.sum(m) for m in self.module.mask]) # should all be one - # self.total_parameters = 0 - # for name, param in self.model.named_parameters(): - # if "weight" in name: - # size = 1 - # for l in list(param.size()): - # size *= l - # self.total_parameters += size - # self.assertEqual(mask_sum, self.total_parameters) - # self.main_loop( - # self.model, - # self.data_loaders["train"], - # self.module, - # trainer_conf, - # device=device, - # epoch=2, - # ) - - def test_training(self): - print("===================================================", flush=True) - print("===========TEST complete training==================", flush=True) - rounds = 2 - round_length = 3 - percent = 69 - trainer_conf = trainer.TrainerConfig( - comment="Minimal Training Test", - max_iter=3, - verbose=False, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=None, - adaptive_lr=False, - patience=1000, - lottery_ticket={ - "rounds": rounds, - "round_length": round_length, - "percent_to_prune": percent, - "pruning": True, - "reinit": False, - "global_pruning": True, - }, - ) - score = self.run_training(trainer_conf) - zero_parameters, total_parameters = 0, 0 - for name, param in self.model.named_parameters(): - if "weight" in name and "fc" not in name: - zero_parameters += torch.sum((param.data == 0).int()).item() - size = 1 - for l in list(param.size()): - size *= l - total_parameters += size - self.assertAlmostEqual( - zero_parameters, total_parameters * (percent / 100), places=-2 - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/bias_transfer/tests/test_model.py b/bias_transfer/tests/test_model.py deleted file mode 100644 index e46db1e..0000000 --- a/bias_transfer/tests/test_model.py +++ /dev/null @@ -1,187 +0,0 @@ -import unittest -from bias_transfer.configs import trainer, model, dataset -from bias_transfer.tests._base import BaseTest - - -class ModelTest(BaseTest): - def test_noise_adv_training(self): - print("===================================================", flush=True) - print("============TEST noise-adversarial training========", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="resnet18", - noise_adv_regression=True, - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="Noise Adversarial Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_adv_regression=True, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 18.04, places=1) - self.setUpClass() - - def test_noise_adv_training_vgg(self): - print("===================================================", flush=True) - print("========TEST noise-adversarial training (VGG)======", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="vgg19_bn", - noise_adv_regression=True, - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="Noise Adversarial Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_adv_regression=True, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 16.92, places=1) - self.setUpClass() - - def test_representation_matching(self): - print("===================================================", flush=True) - print("=======TEST representation matching training ======", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="resnet18", - representation_matching=True, - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="Representation Matching Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - representation_matching={ - "representation": "core", - "criterion": "cosine", - "second_noise_std": {(0, 1.0): 1.0}, - "lambda": 1.0, - }, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 37.4, places=1) - self.setUpClass() - - def test_representation_matching_vgg(self): - print("===================================================", flush=True) - print("====TEST representation matching training (VGG)====", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="vgg19_bn", - representation_matching=True, - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="Representation Matching Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - representation_matching={ - "representation": "core", - "criterion": "cosine", - "second_noise_std": {(0, 1.0): 1.0}, - "lambda": 1.0, - }, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 17.44, places=1) - self.setUpClass() - - def test_resnet_50(self): - print("===================================================", flush=True) - print("================TEST ResNet50 Training=============", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", dataset_cls="CIFAR10", type="resnet50", - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="ResNet50 Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 12.88, places=1) - # reset: - self.setUpClass() - - def test_vgg_19(self): - print("===================================================", flush=True) - print("==================TEST VGG19 Training==============", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", dataset_cls="CIFAR10", type="vgg19_bn", - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - trainer_conf = trainer.TrainerConfig( - comment="VGG19 Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 17.64, places=1) - # reset: - self.setUpClass() - - -if __name__ == "__main__": - unittest.main() diff --git a/bias_transfer/tests/test_training.py b/bias_transfer/tests/test_training.py deleted file mode 100644 index 619e307..0000000 --- a/bias_transfer/tests/test_training.py +++ /dev/null @@ -1,125 +0,0 @@ -import unittest -import torch -import os -import copy -import numpy as np -import nnfabrik as nnf -from bias_transfer.configs import trainer -from bias_transfer.models.utils import weight_reset -from bias_transfer.tests._base import BaseTest - - -class TrainingTest(BaseTest): - def test_training_adaptive_lr_schedule(self): - print("===================================================", flush=True) - print("=========TEST adaptive_lr training=================", flush=True) - trainer_conf = trainer.TrainerConfig( - comment="Adaptive LR Training Test", - max_iter=3, - verbose=False, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=None, - adaptive_lr=True, - early_stop=True, - patience=2, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 65.08, places=1) - - def test_training_fixed_lr_schedule(self): - print("===================================================", flush=True) - print("===========TEST fixed_lr training==================", flush=True) - trainer_conf = trainer.TrainerConfig( - comment="Fixed LR Training Test", - max_iter=3, - verbose=False, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1, 2), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 79.72, places=1) - - def test_training_noise_augment_std(self): - print("===================================================", flush=True) - print("==========TEST noise-augmented training STD =======", flush=True) - trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 46.76, places=1) - - def test_training_noise_augment_snr(self): - print("===================================================", flush=True) - print("===========TEST noise-augmented training SNR=======", flush=True) - trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr={1.0: 0.25, 1.5: 0.25, -1: 0.5}, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(trainer_conf) - self.assertAlmostEqual(score, 40.16, places=1) - - def test_freeze_params(self): - print("===================================================", flush=True) - print("=============TEST freeze params====================", flush=True) - trainer_conf = trainer.TrainerConfig( - comment="Fixed LR Training Test", - max_iter=2, - verbose=False, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - freeze=("readout",), - patience=1000, - ) - torch.manual_seed(self.seed) - np.random.seed(self.seed) - torch.cuda.manual_seed(self.seed) - self.model.apply(weight_reset) - readout_weight_before = torch.clone( - dict(self.model.named_parameters())["fc.weight"].data - ).cpu() - readout_bias_before = torch.clone( - dict(self.model.named_parameters())["fc.bias"].data - ).cpu() - _ = self.run_training(trainer_conf) - readout_weight_after = dict(self.model.named_parameters())[ - "fc.weight" - ].data.cpu() - readout_bias_after = dict(self.model.named_parameters())["fc.bias"].data.cpu() - self.assertTrue( - torch.all(torch.eq(readout_weight_before, readout_weight_after)) - ) - self.assertTrue(torch.all(torch.eq(readout_bias_before, readout_bias_after))) - self.setUpClass() - - -if __name__ == "__main__": - unittest.main() diff --git a/bias_transfer/tests/test_transfer.py b/bias_transfer/tests/test_transfer.py deleted file mode 100644 index 356be5d..0000000 --- a/bias_transfer/tests/test_transfer.py +++ /dev/null @@ -1,350 +0,0 @@ -import unittest -import os -import torch -import numpy as np -import nnfabrik as nnf -from bias_transfer.configs import trainer, model -from bias_transfer.models.utils import weight_reset -from bias_transfer.tests._base import BaseTest - - -class TransferTest(BaseTest): - def test_transfer_training(self): - self.setUpClass() - print("===================================================", flush=True) - print("============TEST transfer training=================", flush=True) - pretrained_path = "./checkpoint/ckpt.{}.pth".format( - nnf.utility.dj_helpers.make_hash("test1") - ) - transfer_path = "./checkpoint/ckpt.to_transfer.pth" - if os.path.exists(transfer_path): - os.remove(transfer_path) - pretrain_trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(pretrain_trainer_conf) - self.assertAlmostEqual(score, 46.76, places=1) - state_dict = torch.load(pretrained_path) - torch.save(state_dict["net"],transfer_path) - transfer_trainer_conf = trainer.TrainerConfig( - comment="Transfer Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - transfer_from_path=transfer_path, - freeze=("core",), - reset_linear=True, - ) - score = self.run_training(transfer_trainer_conf) - self.assertAlmostEqual(score, 38.64, places=1) - - def test_transfer_rep_match_training(self): - self.setUpClass() - print("===================================================", flush=True) - print("========TEST transfer rep match training===========", flush=True) - pretrained_path = "./checkpoint/ckpt.{}.pth".format( - nnf.utility.dj_helpers.make_hash("test1") - ) - transfer_path = "./checkpoint/ckpt.to_transfer.pth" - if os.path.exists(transfer_path): - os.remove(transfer_path) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="resnet18", - representation_matching=True, - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - pretrain_trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - representation_matching={ - "representation": "core", - "criterion": "cosine", - "second_noise_std": {(0, 1.0): 1.0}, - "lambda": 1.0, - }, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(pretrain_trainer_conf) - self.assertAlmostEqual(score, 37.4, places=1) - state_dict = torch.load(pretrained_path) - torch.save(state_dict["net"],transfer_path) - self.get_parts(self.dataset_conf, self.model_conf, self.seed) - transfer_trainer_conf = trainer.TrainerConfig( - comment="Transfer Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - transfer_from_path=transfer_path, - freeze=("core",), - reset_linear=True, - ) - score = self.run_training(transfer_trainer_conf) - self.assertAlmostEqual(score, 35.48, places=1) - self.setUpClass() - - def test_transfer_to_noise_aug_training(self): - self.setUpClass() - print("===================================================", flush=True) - print("=======TEST transfer to noise aug training=========", flush=True) - pretrained_path = "./checkpoint/ckpt.{}.pth".format( - nnf.utility.dj_helpers.make_hash("test1") - ) - transfer_path = "./checkpoint/ckpt.to_transfer.pth" - if os.path.exists(transfer_path): - os.remove(transfer_path) - pretrain_trainer_conf = trainer.TrainerConfig( - comment="Clean Training Test", - max_iter=3, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - ) - score = self.run_training(pretrain_trainer_conf) - self.assertAlmostEqual(score, 66.36, places=1) - state_dict = torch.load(pretrained_path) - torch.save(state_dict["net"],transfer_path) - transfer_trainer_conf = trainer.TrainerConfig( - comment="Transfer Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - transfer_from_path=transfer_path, - freeze=("core",), - reset_linear=True, - ) - score = self.run_training(transfer_trainer_conf) - self.assertAlmostEqual(score, 69.36, places=1) - - def test_rdm_transfer_training(self): - print("===================================================", flush=True) - print("===========TEST RDM transfer training==============", flush=True) - pretrained_path = "./checkpoint/ckpt.{}.pth".format( - nnf.utility.dj_helpers.make_hash("test1") - ) - transfer_path = "./checkpoint/ckpt.to_transfer.pth" - if os.path.exists(transfer_path): - os.remove(transfer_path) - pretrain_trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - ) - score = self.run_training(pretrain_trainer_conf) - self.assertAlmostEqual(score, 46.76, places=1) - state_dict = torch.load(pretrained_path) - torch.save(state_dict["net"],transfer_path) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="resnet18", - rdm_prediction=True - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - transfer_trainer_conf = trainer.TrainerConfig( - comment="RDM Transfer Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - transfer_from_path=transfer_path, - # freeze=("core",), - # reset_linear=True, - rdm_transfer=True, - rdm_prediction={"lambda": 1.0}, - ) - score = self.run_training(transfer_trainer_conf) - self.assertAlmostEqual(score, 27.24, places=1) - # reset model - self.setUpClass() - - def test_transfer_training_vgg(self): - print("===================================================", flush=True) - print("=========TEST transfer training (VGG)==============", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="vgg19_bn", - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - pretrained_path = "./checkpoint/ckpt.{}.pth".format( - nnf.utility.dj_helpers.make_hash("test1") - ) - transfer_path = "./checkpoint/ckpt.to_transfer.pth" - if os.path.exists(transfer_path): - os.remove(transfer_path) - pretrain_trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - early_stop=False, - lr_milestones=(1,), - adaptive_lr=False, - patience=1000, - readout_name="classifier" - ) - score = self.run_training(pretrain_trainer_conf) - self.assertAlmostEqual(score, 17.92, places=1) - state_dict = torch.load(pretrained_path) - torch.save(state_dict["net"],transfer_path) - transfer_trainer_conf = trainer.TrainerConfig( - comment="Transfer Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - transfer_from_path=transfer_path, - freeze=("core",), - reset_linear=True, - readout_name="classifier" - ) - score = self.run_training(transfer_trainer_conf) - self.assertAlmostEqual(score, 29.2, places=1) - self.setUpClass() - - def test_rdm_transfer_training_vgg(self): - print("===================================================", flush=True) - print("=======TEST RDM transfer training (VGG)============", flush=True) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="vgg19_bn", - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - pretrained_path = "./checkpoint/ckpt.{}.pth".format( - nnf.utility.dj_helpers.make_hash("test1") - ) - transfer_path = "./checkpoint/ckpt.to_transfer.pth" - if os.path.exists(transfer_path): - os.remove(transfer_path) - pretrain_trainer_conf = trainer.TrainerConfig( - comment="Noise Augmented Training Test", - max_iter=2, - verbose=False, - add_noise=True, - noise_snr=None, - noise_std={0.08: 0.1, 0.12: 0.1, 0.18: 0.1, 0.26: 0.1, 0.38: 0.1, -1: 0.5,}, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - readout_name="classifier" - ) - score = self.run_training(pretrain_trainer_conf) - self.assertAlmostEqual(score, 17.92, places=1) - state_dict = torch.load(pretrained_path) - torch.save(state_dict["net"],transfer_path) - model_conf = model.ClassificationModelConfig( - comment="CIFAR10", - dataset_cls="CIFAR10", - type="vgg19_bn", - rdm_prediction=True - ) - self.get_parts(self.dataset_conf, model_conf, self.seed) - transfer_trainer_conf = trainer.TrainerConfig( - comment="RDM Transfer Training Test", - max_iter=2, - verbose=False, - add_noise=False, - noise_snr=None, - noise_std=None, - noise_test={"noise_snr": [], "noise_std": [],}, - restore_best=False, - lr_milestones=(1,), - adaptive_lr=False, - early_stop=False, - patience=1000, - transfer_from_path=transfer_path, - rdm_transfer=True, - rdm_prediction={"lambda": 1.0}, - readout_name="classifier" - ) - score = self.run_training(transfer_trainer_conf) - self.assertAlmostEqual(score, 20.52, places=1) - # reset model - self.setUpClass() - -if __name__ == "__main__": - unittest.main() diff --git a/bias_transfer/trainer/__init__.py b/bias_transfer/trainer/__init__.py deleted file mode 100644 index 62206d4..0000000 --- a/bias_transfer/trainer/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .img_classification_trainer import trainer as img_classification -from .neural_trainer import trainer as neural -from .regression_trainer import trainer as regression -from bias_transfer.trainer.transfer import trainer as transfer -from bias_transfer.trainer.transfer import regression_trainer as regression_transfer diff --git a/bias_transfer/trainer/img_classification_trainer.py b/bias_transfer/trainer/img_classification_trainer.py deleted file mode 100644 index 3f41084..0000000 --- a/bias_transfer/trainer/img_classification_trainer.py +++ /dev/null @@ -1,219 +0,0 @@ -from functools import partial - -from bias_transfer.trainer.utils.checkpointing import ( - RemoteCheckpointing, - LocalCheckpointing, -) -from bias_transfer.trainer.trainer import Trainer -from bias_transfer.trainer.utils import get_subdict, stringify -from bias_transfer.trainer.utils.loss import * -from neuralpredictors.tracking import AdvancedMultipleObjectiveTracker - -from torch import nn, optim - - -def trainer(model, dataloaders, seed, uid, cb, eval_only=False, **kwargs): - t = ImgClassificationTrainer(dataloaders, model, seed, uid, cb, **kwargs) - return t.train() - - -class ImgClassificationTrainer(Trainer): - checkpointing_cls = LocalCheckpointing - - @property - def tracker(self): - try: - return self._tracker - except AttributeError: - objectives = { - "LR": 0, - "Training": { - "img_classification": {"loss": 0, "accuracy": 0, "normalization": 0} - }, - "Validation": { - "img_classification": { - "loss": 0, - "accuracy": 0, - "normalization": 0, - }, - "patience": 0, - }, - } - self._tracker = AdvancedMultipleObjectiveTracker( - main_objective=("img_classification", "accuracy"), **objectives - ) - return self._tracker - - def get_training_controls(self): - criterion, stop_closure = {}, {} - for k in self.task_keys: - if k == "transfer" or k not in self.config.loss_functions: - continue # no validation on this data and training is handled in mainloop modules - criterion[k] = ( - globals().get(self.config.loss_functions[k]) - or getattr(nn, self.config.loss_functions[k]) - )() - - stop_closure[k] = partial( - self.main_loop, - data_loader=get_subdict(self.data_loaders["validation"], [k]), - mode="Validation", - epoch=0, - cycler_args={}, - cycler="LongCycler", - ) - optimizer = getattr(optim, self.config.optimizer)( - self.model.parameters(), **self.config.optimizer_options - ) - return optimizer, stop_closure, criterion - - def move_data(self, batch_data): - data_key, inputs, targets = batch_data[0], batch_data[1][0], batch_data[1][1] - - # targets - if isinstance(targets, dict): - targets = {k: t.to(self.device) for k, t in targets.items()} - if len(targets) == 1 and data_key != "transfer": - targets = next(iter(targets.values())) - else: - targets = targets.to(self.device) - - # inputs - if ( - isinstance(inputs, dict) and len(inputs) == 1 - ): # TODO add support for multiple inputs - inputs = next(iter(inputs.values())) - inputs = inputs.to(self.device, dtype=torch.float) - - return inputs, targets, data_key, None - - def compute_loss( - self, - mode, - task_key, - loss, - outputs, - targets, - ): - if task_key != "transfer" and task_key in self.config.loss_functions: - if not ( - self.config.regularization - and self.config.regularization.get("regularizer") == "Mixup" - ): # otherwise this is done in the mainloop-module - loss += self.criterion[task_key](outputs, targets) - _, predicted = outputs.max(1) - self.tracker.log_objective( - 100 * predicted.eq(targets).sum().item(), - keys=(mode, task_key, "accuracy"), - ) - batch_size = targets.size(0) - self.tracker.log_objective( - batch_size, - keys=(mode, task_key, "normalization"), - ) - self.tracker.log_objective( - loss.item() * batch_size, - keys=(mode, task_key, "loss"), - ) - return loss - - def test_final_model(self, epoch, bn_train=""): - deactivate_options = { - "noise_snr": None, - "noise_std": None, - "rep_matching": False, - "rep_monitoring": False, - "noise_adv": False, - } - if not bn_train and self.config.eval_with_bn_train: - self.test_final_model(epoch, bn_train=" BN=Train") - # test the final model with noise on the dev-set - # test the final model on the test set - for k in self.task_keys: - if k == "transfer": - continue - if "rep_matching" not in k and self.config.noise_test: - for n_type, n_vals in self.config.noise_test.items(): - for val in n_vals: - val_str = stringify(val) - mode = "Noise {} {}".format(n_type, val_str) + bn_train - objectives = { - mode: { - k: { - "accuracy": 0, - "loss": 0, - "normalization": 0, - } - } - } - self.tracker.add_objectives(objectives, init_epoch=True) - - module_options = deactivate_options.copy() - module_options[n_type] = val - self.main_loop( - epoch=epoch, - data_loader=get_subdict( - self.data_loaders["validation"], [k] - ), - mode=mode, - cycler_args={}, - cycler="LongCycler", - module_options=module_options, - ) - - objectives = { - "Test" - + bn_train: { - k: { - "accuracy": 0, - "loss": 0, - "normalization": 0, - } - } - } - self.tracker.add_objectives(objectives, init_epoch=True) - test_result = self.main_loop( - epoch=epoch, - data_loader=get_subdict(self.data_loaders["test"], [k]), - mode="Test" + bn_train, - cycler_args={}, - cycler="LongCycler", - module_options=deactivate_options, - ) - if "c_test" in self.data_loaders: - for k in self.task_keys: - if "rep_matching" not in k: - for c_category in list(self.data_loaders["c_test"][k].keys()): - for c_level, data_loader in self.data_loaders["c_test"][k][ - c_category - ].items(): - - objectives = { - c_category - + bn_train: { - str(c_level): { - "accuracy": 0, - "loss": 0, - "normalization": 0, - } - } - } - self.tracker.add_objectives(objectives, init_epoch=True) - self.main_loop( - epoch=epoch, - data_loader={str(c_level): data_loader}, - mode=c_category + bn_train, - cycler_args={}, - cycler="LongCycler", - module_options=deactivate_options, - ) - if "st_test" in self.data_loaders: - self.main_loop( - epoch=epoch, - data_loader={"img_classification": self.data_loaders["st_test"]}, - mode="Test-ST" + bn_train, - cycler_args={}, - cycler="LongCycler", - module_options=deactivate_options, - ) - return test_result diff --git a/bias_transfer/trainer/main_loop_modules/__init__.py b/bias_transfer/trainer/main_loop_modules/__init__.py deleted file mode 100644 index e9e43d4..0000000 --- a/bias_transfer/trainer/main_loop_modules/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from .noise_adv_training import NoiseAdvTraining -from .noise_augmentation import NoiseAugmentation -from .random_readout_reset import RandomReadoutReset -from .representation_matching import RepresentationMatching -from .representation_monitor import RepresentationMonitor -from .lottery_ticket_pruning import LotteryTicketPruning -from .model_wrapper import ModelWrapper -from .synaptic_intelligence import SynapticIntelligence -from .fisher_estimation import FisherEstimation -from .parameter_regularization.param_distance import ParamDistance -from .parameter_regularization.mixup import Mixup -from .parameter_regularization.vcl import VCL -from .representation_regularization.rdl import RDL -from .representation_regularization.knowledge_distillation import KnowledgeDistillation -from .function_regularization.frcl import FRCL -from .function_regularization.fromp import FROMP diff --git a/bias_transfer/trainer/main_loop_modules/fisher_estimation.py b/bias_transfer/trainer/main_loop_modules/fisher_estimation.py deleted file mode 100644 index a17137d..0000000 --- a/bias_transfer/trainer/main_loop_modules/fisher_estimation.py +++ /dev/null @@ -1,62 +0,0 @@ -from .main_loop_module import MainLoopModule -import torch.nn.functional as F - - -class FisherEstimation(MainLoopModule): - """ - Implementation adapted from https://github.com/GMvandeVen/continual-learning/blob/master/continual_learner.py - """ - - def __init__(self, trainer): - super().__init__(trainer) - self.num_samples = self.config.compute_fisher.get("num_samples", 128) - self.empirical = self.config.compute_fisher.get("empirical", False) - self.est_fisher_info = {} - - def pre_epoch(self, model, mode, **options): - super().pre_epoch(model, mode, **options) - # Prepare to store estimated Fisher Information matrix - for n, p in model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - self.est_fisher_info[n] = p.detach().clone().zero_() - - def post_forward(self, outputs, loss, targets, **shared_memory): - model = self.trainer.model - if self.empirical: - # use provided label to calculate loglikelihood --> "empirical Fisher": - label = targets - else: - # use predicted label to calculate loglikelihood: - label = outputs.max(1)[1] - # calculate negative log-likelihood - loss = self.trainer.criterion[self.task_key](outputs, label) - # loss = F.nll_loss(F.log_softmax(outputs, dim=1), label) - - # Calculate gradient of negative loglikelihood - model.zero_grad() - loss.backward() - - # Square gradients and keep running sum - for n, p in model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - if p.grad is not None: - self.est_fisher_info[n] += p.grad.detach() ** 2 - - return outputs, loss, targets - - def post_epoch(self, model): - # Normalize by sample size used for estimation - est_fisher_info = { - n: p / self.num_samples for n, p in self.est_fisher_info.items() - } - - # Store new values in the network - for n, p in model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - # precision (approximated by diagonal Fisher Information matrix) - model.register_buffer( - f"{n}_importance", est_fisher_info[n], - ) diff --git a/bias_transfer/trainer/main_loop_modules/function_regularization/__init__.py b/bias_transfer/trainer/main_loop_modules/function_regularization/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bias_transfer/trainer/main_loop_modules/function_regularization/frcl.py b/bias_transfer/trainer/main_loop_modules/function_regularization/frcl.py deleted file mode 100644 index b9b3648..0000000 --- a/bias_transfer/trainer/main_loop_modules/function_regularization/frcl.py +++ /dev/null @@ -1,80 +0,0 @@ -from functools import partial - -import torch - -from bias_transfer.trainer.main_loop_modules.main_loop_module import MainLoopModule - - -class FRCL(MainLoopModule): - def __init__(self, trainer): - super().__init__(trainer) - self.eps = self.config.regularization.get("eps", 1e-8) - self.num_samples = self.config.regularization.get("num_samples", 9) - self.train_len = len( - self.trainer.data_loaders["train"]["img_classification"].dataset - ) - - def pre_forward(self, model, inputs, task_key, shared_memory): - super().pre_forward(model, inputs, task_key, shared_memory) - model_ = partial(model, num_samples=self.num_samples) - return model_, inputs - - def post_forward(self, outputs, loss, targets, **shared_memory): - if self.train_mode: - loss += self._calculate_kl_term() / self.train_len - targets = targets.repeat(self.num_samples).view(-1) - return outputs, loss, targets - - @staticmethod - def kl(m1, S1, m2, S2): - S2 = S2 + torch.eye(S2.shape[0]).to(S2) * 1e-3 - S1 = S1 + torch.eye(S1.shape[0]).to(S1) * 1e-3 - S2_ = torch.inverse(S2) - return 0.5 * ( - torch.trace(S2_ @ S1) - + (m2 - m1).T @ S2_ @ (m2 - m1) - - S1.shape[0] - + torch.logdet(S2) - - torch.logdet(S1) - ) - - def _calculate_kl_term(self): - model = self.trainer.model - kls = 0 - for i in range(model.num_classes): - # kls -= kl_divergence(self.w_distr[i], self.w_prior) - kls -= self.kl( - model.mu[i], - model.L[i] @ model.L[i].T, - model.w_prior.mean, - model.w_prior.covariance_matrix, - ) - # curr_task_kls = -kls.item() - - if model.prev: - out_dim = model.num_classes # model.prev_num_classes - phi_i = model.core_forward(model.coreset_prev) - cov_i = phi_i @ phi_i.T + torch.eye(phi_i.shape[0]).to(self.device) * 1e-6 - # p_u = MultivariateNormal(torch.zeros(cov_i.shape[0]).to(self.device), - # covariance_matrix=cov_i * self.sigma_prior) - # kls -= sum([kl_divergence(self.prev_tasks_distr[i][j], p_u) for j in range(self.out_dim)]) - prev_kls = sum( - [ - self.kl( - model._buffers[f"mu_prev_{j}"], - model._buffers[f"cov_prev_{j}"], - torch.zeros(cov_i.shape[0]).to(self.device), - cov_i * model.sigma_prior, - ) - for j in range(out_dim) - ] - ) - # if state is not None: - # state.kls.append(prev_kls.item()) - kls -= prev_kls - - # if state is not None: - # state.kls.append(curr_task_kls) - # state.kls_div_nk = kls.item() / N_k - # Sum KL over all parameters - return -kls diff --git a/bias_transfer/trainer/main_loop_modules/function_regularization/fromp.py b/bias_transfer/trainer/main_loop_modules/function_regularization/fromp.py deleted file mode 100644 index 4e88885..0000000 --- a/bias_transfer/trainer/main_loop_modules/function_regularization/fromp.py +++ /dev/null @@ -1,274 +0,0 @@ -from torch import nn -import torch -import torch.nn.functional as F - -from bias_transfer.trainer.main_loop_modules.function_regularization.fromp_utils import ( - update_input, - logistic_hessian, - full_softmax_hessian, - parameters_to_matrix, - parameter_grads_to_vector, - vector_to_parameter_grads, -) -from bias_transfer.trainer.main_loop_modules.main_loop_module import MainLoopModule -from neuralpredictors.training import eval_state - - -class FROMP(MainLoopModule): - """ - Adapted from https://github.com/team-approx-bayes/fromp - """ - - def __init__(self, trainer): - super().__init__(trainer) - self.prior_prec = self.config.regularization.get("prior_prec") - self.grad_clip_norm = self.config.regularization.get("grad_clip_norm") - self.alpha = self.config.regularization.get("alpha") - if self.prior_prec < 0.0: - raise ValueError(f"invalid prior precision: {self.prior_prec}") - if (self.grad_clip_norm is not None) and (not self.grad_clip_norm >= 0.0): - raise ValueError(f"invalid gradient clip norm: {self.grad_clip_norm}") - if self.alpha < 0.0: - raise ValueError(f"invalid alpha: {self.alpha}") - self.covariance = torch.tensor( - self.trainer.data_loaders.pop("covariance"), device=self.device - ) - main_task = next(iter(self.trainer.data_loaders["train"].keys())) - self.memorable_points_prev = ( - self.trainer.data_loaders["train"].pop(f"{main_task}_cs").dataset.samples - ) - self.model = self.trainer.model - self.optimizer = self.trainer.optimizer - self.train_modules = [] - self.set_train_modules(self.model, self.train_modules) - - self.init_task(self.config.regularization.get("eps", 1e-5)) - - def init_task(self, eps): - """ - Calculate values (memorable_logits, hkh_l) for regularisation term (all but the first task) - - """ - self.kernel_inv_prev_mem_prev_model = [] - covariance = 1.0 / (self.covariance + self.prior_prec) - - with eval_state(self.model): - memorable_data_prev = self.memorable_points_prev.to(self.device) - self.optimizer.zero_grad() - logits_prev_mem = self.model.forward(memorable_data_prev) - - num_classes = logits_prev_mem.shape[-1] - if num_classes == 1: - preds_prev_mem = torch.sigmoid(logits_prev_mem) - else: - preds_prev_mem = torch.softmax(logits_prev_mem, dim=-1) - self.preds_prev_mem_prev_model = preds_prev_mem.detach() - - # Calculate kernel = J \Sigma J^T for all memory points, and store via cholesky decomposition - intermediate_outputs = [] - for module in self.train_modules: - intermediate_outputs.append(module.output) - for class_id in range(num_classes): - loss_for_class = preds_prev_mem[:, class_id].sum() - retain_graph = ( - True if class_id < num_classes - 1 else None - ) # only clean up the graph after the last class - grad = self.calculate_grad( - loss_for_class, - intermediate_outputs, - self.train_modules, - retain_graph=retain_graph, - ) - kernel = ( - torch.einsum("ij,j,pj->ip", grad, covariance, grad) - + torch.eye(grad.shape[0], dtype=grad.dtype, device=grad.device) * eps - ) - self.kernel_inv_prev_mem_prev_model.append( - torch.cholesky_inverse(torch.cholesky(kernel)) - ) - - @classmethod - def set_train_modules(cls, module, train_modules): - """ - For calculating Jacobians in PyTorch - """ - if len(list(module.children())) == 0: - if len(list(module.parameters())) != 0: - train_modules.append(module) - module.register_forward_hook(update_input) - else: - for child in list(module.children()): - cls.set_train_modules(child, train_modules) - - @classmethod - def calculate_grad( - cls, loss, intermediate_outputs, train_modules, retain_graph=None - ): - """ - Calculate the gradient (part of calculating Jacobian) of the parameters lc wrt loss - """ - linear_grad = torch.autograd.grad( - loss, intermediate_outputs, retain_graph=retain_graph - ) - grad = [] - for i, module in enumerate(train_modules): - g = linear_grad[i] - a = module.input.clone().detach() - m = a.shape[0] - - if isinstance(module, nn.Linear): - grad.append(torch.einsum("ij,ik->ijk", g, a)) - if module.bias is not None: - grad.append(g) - - if isinstance(module, nn.Conv2d): - a = F.unfold( - a, - kernel_size=module.kernel_size, - dilation=module.dilation, - padding=module.padding, - stride=module.stride, - ) - _, k, hw = a.shape - _, c, _, _ = g.shape - g = g.view(m, c, -1) - grad.append(torch.einsum("ijl,ikl->ijk", g, a)) - if module.bias is not None: - a = torch.ones((m, 1, hw), device=a.device) - grad.append(torch.einsum("ijl,ikl->ijk", g, a)) - - if isinstance(module, nn.BatchNorm1d): - grad.append(torch.mul(g, a)) - if module.bias is not None: - grad.append(g) - - if isinstance(module, nn.BatchNorm2d): - grad.append(torch.einsum("ijkl->ij", torch.mul(g, a))) - if module.bias is not None: - grad.append(torch.einsum("ijkl->ij", g)) - - grad_m = parameters_to_matrix(grad) - return grad_m.detach() - - @classmethod - def calculate_jacobian(cls, output, intermediate_outputs, train_modules): - """ - Calculate the Jacobian matrix - """ - if output.dim() > 2: - raise ValueError("the dimension of output must be smaller than 3.") - else: # output.dim() == 2: - num_classes = output.shape[1] - grad = [] - for i in range(num_classes): - retain_graph = None if i == num_classes - 1 else True - loss = output[:, i].sum() - g = cls.calculate_grad( - loss, - intermediate_outputs, - train_modules=train_modules, - retain_graph=retain_graph, - ) - grad.append(g) - result = torch.zeros( - (grad[0].shape[0], grad[0].shape[1], num_classes), - dtype=grad[0].dtype, - device=grad[0].device, - ) - for i in range(num_classes): - result[:, :, i] = grad[i] - return result - - @classmethod - def compute_covariance(cls, data, model): - """ - After training on a new task, update the coviarance matrix estimate - """ - train_modules = [] - cls.set_train_modules(model, train_modules) - - logits = model.forward(data) - - intermediate_outputs = [] - for module in train_modules: - intermediate_outputs.append(module.output) - - jacobian = cls.calculate_jacobian(logits, intermediate_outputs, train_modules) - if logits.shape[-1] == 1: - hessian = logistic_hessian(logits).detach() - hessian = hessian[:, :, None] - else: - hessian = full_softmax_hessian(logits).detach() - return torch.einsum("ijd,idp,ijp->j", jacobian, hessian, jacobian) - - def post_backward(self, model): - parameters = self.model.parameters() - grad = parameter_grads_to_vector(parameters).detach() - grad *= 1 / self.alpha - - grad_func_reg = torch.zeros_like( - grad - ) # The gradient corresponding to memorable points - # compute predictions of memorable points (from previous task) - with eval_state(self.model): - memorable_data_prev = self.memorable_points_prev.to(self.device) - self.optimizer.zero_grad() - logits_prev_mem = self.model.forward(memorable_data_prev) - - num_classes = logits_prev_mem.shape[-1] - if num_classes == 1: - preds_prev_mem = torch.sigmoid(logits_prev_mem) - else: - preds_prev_mem = torch.softmax(logits_prev_mem, dim=-1) - - # collect all intermediate outputs: - intermediate_outputs = [] - for module in self.train_modules: - intermediate_outputs.append(module.output) - - # compute function loss for each output class: - for class_id in range(num_classes): - # \Lambda * Jacobian - loss_for_class = preds_prev_mem[:, class_id].sum() - retain_graph = ( - True if class_id < num_classes - 1 else None - ) # only clean up the graph after the last class - jacobian_t = self.calculate_grad( - loss_for_class, - intermediate_outputs, - self.train_modules, - retain_graph=retain_graph, - ) - - # m_t - m_{t-1} - delta_preds = ( - preds_prev_mem[:, class_id].detach() - - self.preds_prev_mem_prev_model[:, class_id] - ) - - # K_{t-1}^{-1} - kernel_inv_prev = self.kernel_inv_prev_mem_prev_model[class_id] - - # Uncomment the following line for L2 variants of algorithms - # kernel_inv_t = torch.eye(kernel_inv_t.shape[0], device=kernel_inv_t.device) - - # Calculate K_{t-1}^{-1} (m_t - m_{t-1}) - kinvf_t = torch.squeeze( - torch.matmul(kernel_inv_prev, delta_preds[:, None]), dim=-1 - ) - - grad_func_reg += torch.einsum("ij,i->j", jacobian_t, kinvf_t) - - grad += grad_func_reg - - # Do gradient norm clipping - if self.grad_clip_norm is not None: - grad_norm = torch.norm(grad) - grad_norm = ( - 1.0 - if grad_norm < self.grad_clip_norm - else grad_norm / self.grad_clip_norm - ) - grad /= grad_norm - - vector_to_parameter_grads(grad, parameters) diff --git a/bias_transfer/trainer/main_loop_modules/function_regularization/fromp_utils.py b/bias_transfer/trainer/main_loop_modules/function_regularization/fromp_utils.py deleted file mode 100644 index 89238d0..0000000 --- a/bias_transfer/trainer/main_loop_modules/function_regularization/fromp_utils.py +++ /dev/null @@ -1,109 +0,0 @@ -import torch -from torch.nn import functional as F - - -def update_input(self, input, output): - """ - Used to register forward hook - Args: - self: - input: - output: - - Returns: - - """ - self.input = input[0].data - self.output = output - - -def logistic_hessian(f): - """ - We only calculate the diagonal elements of the hessian - """ - f = f[:, :] - pi = torch.sigmoid(f) - return pi * (1 - pi) - - -def softmax_hessian(f): - s = F.softmax(f, dim=-1) - return s - s * s - - -def full_softmax_hessian(f): - """ - Calculate the full softmax hessian - """ - s = F.softmax(f, dim=-1) - e = torch.eye(s.shape[-1], dtype=s.dtype, device=s.device) - return s[:, :, None] * e[None, :, :] - s[:, :, None] * s[:, None, :] - - -def _check_param_device(param, old_param_device): - if old_param_device is None: - old_param_device = param.get_device() if param.is_cuda else -1 - else: - warn = False - if param.is_cuda: # check if in same gpu - warn = param.get_device() != old_param_device - else: # check if in cpu - warn = old_param_device != -1 - if warn: - raise TypeError( - "found two parameters on different devices, " - "this is currently not supported." - ) - return old_param_device - - -def parameters_to_matrix(parameters): - param_device = None - mat = [] - for param in parameters: - param_device = _check_param_device(param, param_device) - m = param.shape[0] - mat.append(param.view(m, -1)) - return torch.cat(mat, dim=-1) - - -def parameter_grads_to_vector(parameters): - param_device = None - vec = [] - for param in parameters: - param_device = _check_param_device(param, param_device) - if param.grad is None: - raise ValueError("gradient not available") - vec.append(param.grad.data.view(-1)) - return torch.cat(vec, dim=-1) - - -def vector_to_parameter_grads(vec, parameters): - r"""Convert one vector to the parameters - - Arguments: - vec (Tensor): a single vector represents the parameters of a model. - parameters (Iterable[Tensor]): an iterator of Tensors that are the - parameters of a model. - """ - # Ensure vec of type Tensor - if not isinstance(vec, torch.Tensor): - raise TypeError( - "expected torch.Tensor, but got: {}".format(torch.typename(vec)) - ) - # Flag for the device where the parameter is located - param_device = None - - # Pointer for slicing the vector for each parameter - pointer = 0 - for param in parameters: - # Ensure the parameters are located in the same device - param_device = _check_param_device(param, param_device) - - # The length of the parameter - num_param = param.numel() - # Slice the vector, reshape it, and replace the old data of the parameter - param.grad = vec[pointer : pointer + num_param].view_as(param).grad - - # Increment the pointer - pointer += num_param diff --git a/bias_transfer/trainer/main_loop_modules/lottery_ticket_pruning.py b/bias_transfer/trainer/main_loop_modules/lottery_ticket_pruning.py deleted file mode 100644 index b8ec4c2..0000000 --- a/bias_transfer/trainer/main_loop_modules/lottery_ticket_pruning.py +++ /dev/null @@ -1,172 +0,0 @@ -import collections - -import numpy as np -import torch -import copy - -from torch import optim - -from bias_transfer.models.utils import weight_reset -from .main_loop_module import MainLoopModule - -EPS = 1e-6 - - -class LotteryTicketPruning(MainLoopModule): - """ - Based on the implementation from https://github.com/rahulvigneswaran/Lottery-Ticket-Hypothesis-in-Pytorch - (therefore indirectly from https://github.com/ktkth5/lottery-ticket-hyopothesis) - """ - - def __init__(self, trainer): - super().__init__(trainer) - if self.config.lottery_ticket.get("pruning", True): - n_epochs = self.config.max_iter - n_rounds = self.config.lottery_ticket.get("rounds", 1) - percent_to_prune = self.config.lottery_ticket.get("percent_to_prune", 80) - self.percent_per_round = ( - 1 - (1 - percent_to_prune / 100) ** (1 / n_rounds) - ) * 100 - self.reset_epochs = [ - r * self.config.lottery_ticket.get("round_length", 100) - for r in range(1, n_rounds + 1) - ] - print("Percent to prune per round:", self.percent_per_round, flush=True) - print("Reset before epochs:", list(self.reset_epochs), flush=True) - - # create initial (empty mask): - self.mask = self.make_empty_mask(self.trainer.model) - - # save initial state_dict to reset to this point later: - if not self.config.lottery_ticket.get("reinit"): - self.initial_state_dict = copy.deepcopy(self.trainer.model.state_dict()) - self.initial_optim_state_dict = None - self.initial_scheduler_state_dict = None - self.initial_w_scheduler_state_dict = None - - def pre_epoch( - self, model, mode, **options - ): - super().pre_epoch(model, mode, **options) - optimizer = self.trainer.optimizer - lr_scheduler = self.trainer.lr_scheduler - if self.config.lottery_ticket.get("pruning", True): - if not self.initial_optim_state_dict and optimizer is not None: - self.initial_optim_state_dict = copy.deepcopy(optimizer.state_dict()) - if not self.initial_scheduler_state_dict and lr_scheduler is not None: - self.initial_scheduler_state_dict = copy.deepcopy( - lr_scheduler.state_dict() - ) - if ( - hasattr(lr_scheduler, "warmup_scheduler") - and lr_scheduler.warmup_scheduler - ): # for warmup - self.initial_w_scheduler_state_dict = copy.deepcopy( - lr_scheduler.warmup_scheduler.state_dict() - ) - if self.tracker.epoch in self.reset_epochs and self.train_mode: - # Prune the network, i.e. update the mask - self.prune_by_percentile(model, self.percent_per_round) - print("Reset init in Epoch ", self.epoch, flush=True) - self.reset_initialization( - model, self.config.lottery_ticket.get("reinit") - ) - # Reset lr and scheduler: - if ( - hasattr(lr_scheduler, "warmup_scheduler") - and lr_scheduler.warmup_scheduler - ): # for warmup - lr_scheduler.warmup_scheduler.load_state_dict( - copy.deepcopy(self.initial_w_scheduler_state_dict) - ) - lr_scheduler.warmup_scheduler.last_step = -1 - optimizer.load_state_dict(copy.deepcopy(self.initial_optim_state_dict)) - optimizer._step_count = 0 - lr_scheduler.load_state_dict( - copy.deepcopy(self.initial_scheduler_state_dict) - ) - lr_scheduler._step_count = 0 - lr_scheduler.last_epoch = 0 - - def post_backward(self, model): - # Freezing Pruned weights by making their gradients Zero - for name, p in model.named_parameters(): - if "weight" in name and self.config.readout_name not in name: - tensor = torch.abs(p.data) - grad_tensor = p.grad.data - p.grad.data = torch.where( - tensor < EPS, torch.zeros_like(grad_tensor), grad_tensor - ) - - def prune_by_percentile(self, model, percent): - # Calculate percentile value - if self.config.lottery_ticket.get("global_pruning"): - alive_tensors = [] - step = 0 - for name, param in model.named_parameters(): - if ( - "weight" in name and self.config.readout_name not in name - ): # We do not prune bias term - alive_tensors.append( - param.data[torch.nonzero(self.mask[step], as_tuple=True)] - ) # flattened array of nonzero values - step += 1 - alive = torch.cat(alive_tensors) - percentile_value = np.percentile(torch.abs(alive).cpu().numpy(), percent) - - step = 0 - for name, param in model.named_parameters(): - if ( - "weight" in name and self.config.readout_name not in name - ): # We do not prune bias term - if not self.config.lottery_ticket.get("global_pruning"): - # print(nonzero) - alive = param.data[ - torch.nonzero(self.mask[step], as_tuple=True) - ] # flattened array of nonzero values - abs_alive = torch.abs(alive).cpu().numpy() - percentile_value = np.percentile(abs_alive, percent) - - # Convert Tensors to numpy and calculate - new_mask = torch.where( - torch.abs(param.data) - < torch.tensor(percentile_value, device=param.data.device), - torch.zeros_like(self.mask[step]), - self.mask[step], - ) - - # Apply new weight and mask - param.data = param.data * new_mask - self.mask[step] = new_mask - step += 1 - - def make_empty_mask(self, model): - """ - Function to make an empty mask of the same size as the model - :param model: - :return: mask - """ - step = 0 - for name, param in model.named_parameters(): - if "weight" in name and self.config.readout_name not in name: - step = step + 1 - mask = [None] * step - step = 0 - for name, param in model.named_parameters(): - if "weight" in name and self.config.readout_name not in name: - tensor = param.data - mask[step] = torch.ones_like(tensor, device=tensor.device) - step = step + 1 - return mask - - def reset_initialization(self, model, reinit=False): - if reinit: - model.apply(weight_reset) # new random init - step = 0 - for name, param in model.named_parameters(): - init = param.data if reinit else self.initial_state_dict[name] - if "weight" in name and self.config.readout_name not in name: - param.data = self.mask[step] * init - step = step + 1 - elif "bias" in name or "weight" in name: - param.data = init diff --git a/bias_transfer/trainer/main_loop_modules/noise_adv_training.py b/bias_transfer/trainer/main_loop_modules/noise_adv_training.py deleted file mode 100644 index e55da8c..0000000 --- a/bias_transfer/trainer/main_loop_modules/noise_adv_training.py +++ /dev/null @@ -1,61 +0,0 @@ -import numpy as np -import torch -from torch import nn -from functools import partial - -from neuralpredictors.training import LongCycler -from .main_loop_module import MainLoopModule - - -class NoiseAdvTraining(MainLoopModule): - def __init__(self, trainer): - super().__init__(trainer) - self.progress = 0.0 - if isinstance(self.train_loader, LongCycler): - train_loader = self.train_loader.loaders - self.step_size = 1 / ( - self.config.max_iter * len(self.train_loader["img_classification"]) - ) - if self.config.noise_adv_regression: - self.criterion = nn.MSELoss() - else: # config.noise_adv_classification - self.criterion = nn.BCELoss() - objectives = { - "Training": {"NoiseAdvTraining": {"loss": 0, "normalization": 0}}, - "Validation": {"NoiseAdvTraining": {"loss": 0, "normalization": 0}}, - "Test": {"NoiseAdvTraining": {"loss": 0, "normalization": 0}}, - } - self.tracker.add_objectives(objectives) - - def pre_forward(self, model, inputs, task_key, shared_memory): - super().pre_forward(model, inputs, task_key, shared_memory) - noise_adv_lambda = ( - 2.0 / (1.0 + np.exp(-self.config.noise_adv_gamma * self.progress)) - 1 - ) - if self.train_mode: - self.progress += self.step_size - return partial(model, noise_lambda=noise_adv_lambda), inputs - - def post_forward(self, outputs, loss, targets, **shared_memory): - if not self.options.get("noise_adv",True): - return outputs, loss, targets - applied_std = shared_memory["applied_std"] - num_inputs = (applied_std != 0).sum().item() - extra_outputs = outputs[0] - if applied_std is None: - applied_std = torch.zeros_like( - extra_outputs["noise_pred"], device=self.device - ) - if self.config.noise_adv_classification: - applied_std = ( - (applied_std > 0.0).type(torch.FloatTensor).to(device=self.device) - ) - noise_loss = self.criterion(extra_outputs["noise_pred"], applied_std) - self.tracker.log_objective( - noise_loss.item() * num_inputs, (self.mode, "NoiseAdvTraining", "loss") - ) - self.tracker.log_objective( - num_inputs, (self.mode, "NoiseAdvTraining", "normalization"), - ) - loss += self.config.noise_adv_loss_factor * noise_loss - return outputs, loss, targets diff --git a/bias_transfer/trainer/main_loop_modules/parameter_regularization/__init__.py b/bias_transfer/trainer/main_loop_modules/parameter_regularization/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bias_transfer/trainer/main_loop_modules/parameter_regularization/mixup.py b/bias_transfer/trainer/main_loop_modules/parameter_regularization/mixup.py deleted file mode 100644 index 4386204..0000000 --- a/bias_transfer/trainer/main_loop_modules/parameter_regularization/mixup.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -import torch -import numpy as np - -from bias_transfer.trainer.main_loop_modules.main_loop_module import MainLoopModule - - -class Mixup(MainLoopModule): - def __init__(self, trainer): - super().__init__(trainer) - - def mixup_data(self, x): - """ - Returns mixed inputs, and saves index and lambdas - Adapted from https://github.com/facebookresearch/mixup-cifar10/blob/master/train.py - """ - alpha = self.config.regularization.get("alpha",1.0) - if alpha > 0: - self.lam = np.random.beta(alpha, alpha) - else: - self.lam = 1 - - batch_size = x.size()[0] - self.index = torch.randperm(batch_size).to(self.device) - mixed_x = self.lam * x + (1 - self.lam) * x[self.index, :] - return mixed_x - - def pre_forward(self, model, inputs, task_key, shared_memory): - model, inputs = super().pre_forward( - model, inputs, task_key, shared_memory - ) - if self.train_mode: - inputs = self.mixup_data(inputs) - else: - self.lam = 1.0 - self.index = torch.arange(inputs.size()[0]) - return model, inputs - - def post_forward(self, outputs, loss, targets, **shared_memory): - if self.train_mode: - loss += (1 - self.lam) * self.trainer.criterion["img_classification"]( - outputs, targets[self.index] - ) - loss += self.lam * self.trainer.criterion["img_classification"]( - outputs, targets - ) - _, predicted = outputs.max(1) - correct = 100 * ( - self.lam * predicted.eq(targets).sum().item() - + (1 - self.lam) * predicted.eq(targets[self.index]).sum().item() - ) - self.tracker.log_objective( - correct, keys=(self.mode, self.task_key, "accuracy"), - ) - return outputs, loss, targets diff --git a/bias_transfer/trainer/main_loop_modules/parameter_regularization/param_distance.py b/bias_transfer/trainer/main_loop_modules/parameter_regularization/param_distance.py deleted file mode 100644 index 763b593..0000000 --- a/bias_transfer/trainer/main_loop_modules/parameter_regularization/param_distance.py +++ /dev/null @@ -1,53 +0,0 @@ -import copy -import os -from collections import OrderedDict - -import torch - -from bias_transfer.trainer.main_loop_modules.main_loop_module import MainLoopModule - - -class ParamDistance(MainLoopModule): - def __init__(self, trainer): - super().__init__(trainer) - self.sp_state_dict = OrderedDict() - state_dict = self.trainer.model.state_dict() - for k, v in state_dict.items(): - if isinstance(v, torch.Tensor): - self.sp_state_dict[k] = v.clone() - else: - self.sp_state_dict[k] = copy.deepcopy(v) - self.warned = False - self.alpha = self.config.regularization.get("alpha", 1.0) - self.ignore_layers = self.config.regularization.get("ignore_layers", ()) - objectives = { # TODO: make adaptable to other tasks! - "Training": {"img_classification": {"P-Dist": 0}}, - "Validation": {"img_classification": {"P-Dist": 0}}, - "Test": {"img_classification": {"P-Dist": 0}}, - } - self.tracker.add_objectives(objectives, init_epoch=True) - - def post_forward(self, outputs, loss, targets, **shared_memory): - model = self.trainer.model - if self.train_mode: - reg_loss = torch.zeros(1, dtype=torch.float32, device=self.trainer.device) - for n, param in model.named_parameters(): - if n not in self.sp_state_dict: - if not self.warned: - print(f"skipping {n}") - self.warned = True - continue - for l in self.ignore_layers: - if l in n: - continue - n_ = n.replace(".", "__") - importance = getattr(model, f"{n_}_importance", 1.0) - distance = (importance * (param - self.sp_state_dict[n]) ** 2).sum() - reg_loss = reg_loss + distance - loss += self.alpha * reg_loss - self.tracker.log_objective( - loss.item(), (self.mode, self.task_key, "P-Dist") - ) - return outputs, loss, targets - else: - return outputs, loss, targets diff --git a/bias_transfer/trainer/main_loop_modules/parameter_regularization/vcl.py b/bias_transfer/trainer/main_loop_modules/parameter_regularization/vcl.py deleted file mode 100644 index 5002a85..0000000 --- a/bias_transfer/trainer/main_loop_modules/parameter_regularization/vcl.py +++ /dev/null @@ -1,55 +0,0 @@ -import copy -import os -from collections import OrderedDict -from functools import partial - -import torch - -from bias_transfer.trainer.main_loop_modules.main_loop_module import MainLoopModule - - -class VCL(MainLoopModule): - def __init__(self, trainer): - super().__init__(trainer) - self.eps = self.config.regularization.get("eps", 1e-8) - self.num_samples = self.config.regularization.get("num_samples", 10) - self.train_len = len( - self.trainer.data_loaders["train"]["img_classification"].dataset - ) - - def pre_forward(self, model, inputs, task_key, shared_memory): - super().pre_forward(model, inputs, task_key, shared_memory) - model_ = partial(model, num_samples=self.num_samples) - return model_, inputs - - def post_forward(self, outputs, loss, targets, **shared_memory): - loss += self._calculate_kl_term() / self.train_len - targets = targets.repeat(self.num_samples).view(-1) - return outputs, loss, targets - - def _calculate_kl_term(self): - """ - Calculates and returns the KL divergence of the new posterior and the previous - iteration's posterior. See equation L3, slide 14. - """ - model = self.trainer.model - # Prior - prior_means = model.get_parameters("prior_mean") - prior_log_vars = model.get_parameters("prior_log_var") - prior_vars = torch.exp(prior_log_vars) - - # Posterior - posterior_means = model.get_parameters("posterior_mean") - posterior_log_vars = model.get_parameters("posterior_log_var") - posterior_vars = torch.exp(posterior_log_vars) - - # Calculate KL for individual normal distributions over parameters - kl_elementwise = ( - posterior_vars / (prior_vars + self.eps) - + torch.pow(prior_means - posterior_means, 2) / (prior_vars + self.eps) - - 1 - + (prior_log_vars - posterior_log_vars) - ) - - # Sum KL over all parameters - return 0.5 * kl_elementwise.sum() diff --git a/bias_transfer/trainer/main_loop_modules/random_readout_reset.py b/bias_transfer/trainer/main_loop_modules/random_readout_reset.py deleted file mode 100644 index 14ebe6f..0000000 --- a/bias_transfer/trainer/main_loop_modules/random_readout_reset.py +++ /dev/null @@ -1,28 +0,0 @@ -import numpy as np -import torch -from torch import nn -from functools import partial - -from .main_loop_module import MainLoopModule - - -class RandomReadoutReset(MainLoopModule): - def __init__(self, trainer): - super().__init__(trainer) - self.batch_progress = 0 - self.epoch_progress = 0 - - def pre_epoch(self, model, mode, **options): - super(RandomReadoutReset, self).pre_epoch(model, mode, **options) - if self.train_mode and self.config.reset_linear_frequency.get("epoch"): - if self.epoch_progress % self.config.reset_linear_frequency["epoch"] == 0: - model.module.linear_readout.reset_parameters() - self.epoch_progress += 1 - - def pre_forward(self, model, inputs, task_key, shared_memory): - super().pre_forward(model, inputs, task_key, shared_memory) - if self.train_mode and self.config.reset_linear_frequency.get("batch"): - if self.batch_progress % self.config.reset_linear_frequency["batch"] == 0: - model.module.linear_readout.reset_parameters() - self.batch_progress += 1 - return model, inputs diff --git a/bias_transfer/trainer/main_loop_modules/representation_regularization/__init__.py b/bias_transfer/trainer/main_loop_modules/representation_regularization/__init__.py deleted file mode 100644 index cbed237..0000000 --- a/bias_transfer/trainer/main_loop_modules/representation_regularization/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -from torch import nn -import torch - -from bias_transfer.trainer.main_loop_modules.main_loop_module import MainLoopModule - - -class RepresentationRegularization(MainLoopModule): - def __init__(self, trainer, name="RDL"): - super().__init__(trainer) - objectives = { # TODO: make adaptable to other tasks! - "Training": {"img_classification": {name: 0}}, - "Validation": {"img_classification": {name: 0}}, - "Test": {"img_classification": {name: 0}}, - } - self.tracker.add_objectives(objectives, init_epoch=True) - self.name = name - self.alpha_0 = self.config.regularization.get("alpha", 1.0) - self.alpha = 0.0 - - def pre_epoch(self, model, mode, **options): - super().pre_epoch(model, mode, **options) - if self.config.regularization.get("decay_alpha"): - self.alpha = self.alpha_0 * (1 - (self.epoch / self.config.max_iter)) - else: - self.alpha = self.alpha_0 - - def rep_distance(self, output, target): - raise NotImplementedError() - - def post_forward(self, outputs, loss, targets, **shared_memory): - extra_outputs = outputs[0] - if self.train_mode and ( - self.task_key == "transfer" or self.config.single_input_stream - ): - pred_loss = torch.zeros(1, device=self.device) - for key in targets.keys(): - if key == "class": - continue - pred_loss += self.rep_distance(extra_outputs[key], targets[key]) - loss += self.alpha * pred_loss - self.tracker.log_objective( - pred_loss.item(), (self.mode, "img_classification", self.name) - ) - return outputs, loss, targets.get("class", next(iter(targets.values()))) - else: - return outputs, loss, targets diff --git a/bias_transfer/trainer/main_loop_modules/representation_regularization/knowledge_distillation.py b/bias_transfer/trainer/main_loop_modules/representation_regularization/knowledge_distillation.py deleted file mode 100644 index 64fe603..0000000 --- a/bias_transfer/trainer/main_loop_modules/representation_regularization/knowledge_distillation.py +++ /dev/null @@ -1,19 +0,0 @@ -from torch import nn -import torch -import torch.nn.functional as F - - -from . import RepresentationRegularization - - -class KnowledgeDistillation(RepresentationRegularization): - def __init__(self, trainer): - super().__init__(trainer, name="KD") - self.criterion = nn.KLDivLoss(reduction="batchmean") - self.T = self.config.regularization.get("softmax_temp", 1.0) - - def rep_distance(self, output, target): - kd_loss = self.criterion( - F.log_softmax(output / self.T, dim=1), F.softmax(target / self.T, dim=1) - ) - return kd_loss * self.T * self.T diff --git a/bias_transfer/trainer/main_loop_modules/representation_regularization/rdl.py b/bias_transfer/trainer/main_loop_modules/representation_regularization/rdl.py deleted file mode 100644 index 8bd499a..0000000 --- a/bias_transfer/trainer/main_loop_modules/representation_regularization/rdl.py +++ /dev/null @@ -1,120 +0,0 @@ -from torch import nn -import torch - -from . import RepresentationRegularization -from ...utils import arctanh - - -class RDL(RepresentationRegularization): - @staticmethod - def centering(K): - n = K.shape[0] - unit = torch.ones([n, n], device=K.device) - I = torch.eye(n, device=K.device) - H = I - unit / n - - return torch.mm( - torch.mm(H, K), H - ) # HKH are the same with KH, KH is the first centering, H(KH) do the second time, results are the sme with one time centering - # return np.dot(H, K) # KH - - @staticmethod - def rbf(X, sigma=None): - GX = torch.dot(X, X.T) - KX = torch.diag(GX) - GX + (torch.diag(GX) - GX).T - if sigma is None: - mdist = torch.median(KX[KX != 0]) - sigma = math.sqrt(mdist) - KX *= -0.5 / (sigma * sigma) - KX = torch.exp(KX) - return KX - - @staticmethod - def kernel_HSIC(X, Y, sigma): - return torch.sum( - RDL.centering(RDL.rbf(X, sigma)) * RDL.centering(RDL.rbf(Y, sigma)) - ) - - @staticmethod - def linear_HSIC(X, Y): - L_X = torch.mm(X, X.T) - L_Y = torch.mm(Y, Y.T) - return torch.sum(RDL.centering(L_X) * RDL.centering(L_Y)) - - @staticmethod - def linear_CKA(X, Y): - hsic = RDL.linear_HSIC(X, Y) - var1 = torch.sqrt(RDL.linear_HSIC(X, X)) - var2 = torch.sqrt(RDL.linear_HSIC(Y, Y)) - - return hsic / (var1 * var2) - - @staticmethod - def kernel_CKA(X, Y, sigma=None): - hsic = RDL.kernel_HSIC(X, Y, sigma) - var1 = torch.sqrt(RDL.kernel_HSIC(X, X, sigma)) - var2 = torch.sqrt(RDL.kernel_HSIC(Y, Y, sigma)) - - return hsic / (var1 * var2) - - @staticmethod - def compute_mse_matrix(x, y=None): - """ - see: https://discuss.pytorch.org/t/efficient-distance-matrix-computation/9065 - Input: x is a Nxd matrix - y is an optional Mxd matirx - Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] - if y is not given then use 'y=x'. - i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 - """ - x_norm = (x ** 2).sum(1).view(-1, 1) - if y is not None: - y_norm = (y ** 2).sum(1).view(1, -1) - else: - y = x - y_norm = x_norm.view(1, -1) - - dist = x_norm + y_norm - 2.0 * torch.mm(x, torch.transpose(y, 0, 1)) - return dist - - @staticmethod - def compute_rdm(x, dist_measure="corr"): - x_flat = x.flatten(1, -1) - centered = x_flat - x_flat.mean(dim=0).view( - 1, -1 - ) # centered by mean over images - if dist_measure == "corr": - result = (centered @ centered.transpose(0, 1)) / torch.ger( - torch.norm(centered, 2, dim=1), torch.norm(centered, 2, dim=1) - ) # see https://de.mathworks.com/help/images/ref/corr2.html - else: - result = RDL.compute_mse_matrix(centered) - return result - - @staticmethod - def rdm_comparison(x, y, criterion, dist_measure="corr", use_arctanh=False): - rdm_x = RDL.compute_rdm(x, dist_measure).flatten() - rdm_y = RDL.compute_rdm(y, dist_measure).flatten() - rdm_x = rdm_x.triu(diagonal=1) - rdm_y = rdm_y.triu(diagonal=1) - if use_arctanh: - rdm_x = arctanh(rdm_x) - rdm_y = arctanh(rdm_y) - return criterion(rdm_x, rdm_y) - - def __init__(self, trainer): - super().__init__(trainer, name="RDL") - self.criterion = nn.MSELoss() - self.dist_measure = self.config.regularization.get("dist_measure") - - def rep_distance(self, output, target): - if self.dist_measure == "CKA": - return RDL.linear_CKA(output, target) - else: - return RDL.rdm_comparison( - output, - target, - self.criterion, - self.dist_measure, - self.config.regularization.get("use_arctanh"), - ) diff --git a/bias_transfer/trainer/main_loop_modules/synaptic_intelligence.py b/bias_transfer/trainer/main_loop_modules/synaptic_intelligence.py deleted file mode 100644 index 3f4010e..0000000 --- a/bias_transfer/trainer/main_loop_modules/synaptic_intelligence.py +++ /dev/null @@ -1,48 +0,0 @@ -from .main_loop_module import MainLoopModule - - -class SynapticIntelligence(MainLoopModule): - """ - Implementation adapted from https://github.com/GMvandeVen/continual-learning/blob/master/continual_learner.py - """ - - def __init__(self, trainer): - super().__init__(trainer) - # Register starting param-values - model = trainer.model - for n, p in model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - model.register_buffer(f"{n}_SI_prev_task", p.data.clone()) - # Prepare to store running importance estimates and param-values before update ("Synaptic Intelligence") - self.params = {n: p for n, p in model.named_parameters() if p.requires_grad} - self.w = {} - self.old_params = {} - for n, p in model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - self.w[n] = p.data.clone().zero_() - self.old_params[n] = p.data.clone() - - def pre_forward(self, model, inputs, task_key, shared_memory): - super().pre_forward(model, inputs, task_key, shared_memory) - # Save current parameters - for n, p in self.params.items(): - n = n.replace(".", "__") - self.old_params[n] = p.clone().detach() - return model, inputs - - def post_optimizer(self, model): - # Accumulate the w - for n, p in self.params.items(): - n = n.replace(".", "__") - delta = p.detach() - self.old_params[n] - if ( - p.grad is not None - ): # In multi-head network, some head could have no grad (lazy) since no loss go through it. - self.w[n] -= p.grad * delta # w[n] is >=0 - - def post_epoch(self, model): - # Store to be used in final steps - for n, w in self.w.items(): - model.register_buffer(f"{n}_SI_omega", w) diff --git a/bias_transfer/trainer/neural_trainer.py b/bias_transfer/trainer/neural_trainer.py deleted file mode 100644 index d5682f2..0000000 --- a/bias_transfer/trainer/neural_trainer.py +++ /dev/null @@ -1,196 +0,0 @@ -from functools import partial - -import numpy as np -from torch import nn - -from bias_transfer.trainer.trainer import Trainer -from bias_transfer.trainer.utils import NBLossWrapper, get_subdict -from neuralpredictors import measures as mlmeasures -from nnvision.utility import measures -from nnvision.utility.measures import get_poisson_loss - - -def trainer(model, dataloaders, seed, uid, cb, eval_only=False, **kwargs): - t = NeuralTrainer(dataloaders, model, seed, uid, **kwargs) - return t.train(cb) - - -class NeuralTrainer(Trainer): - def get_tracker(self): - if self.config.track_training: - tracker_dict = dict( - correlation=partial( - get_correlations(), - self.model, - self.dataloaders["validation"], - device=self.device, - per_neuron=False, - ), - poisson_loss=partial( - get_poisson_loss(), - self.model, - self.dataloaders["validation"], - device=self.device, - per_neuron=False, - avg=False, - ), - ) - if hasattr(self.model, "tracked_values"): - tracker_dict.update(self.model.tracked_values) - tracker = MultipleObjectiveTracker(**tracker_dict) - else: - tracker = None - return tracker - - def get_training_controls(self): - self.criterion, self.stop_closure = {}, {} - for k in self.val_keys: - if "img_classification" not in k: - pass - if self.config.loss_weighing: - self.criterion[k] = NBLossWrapper().to(self.device) - else: - self.criterion[k] = getattr( - mlmeasures, self.config.loss_functions[k] - )(avg=self.config.avg_loss) - self.stop_closure[k] = {} - self.stop_closure[k]["eval"] = partial( - getattr(measures, "get_correlations"), - dataloaders=dataloaders["validation"][k], - device=self.device, - per_neuron=False, - avg=True, - ) - self.stop_closure[k]["loss"] = partial( - get_poisson_loss, - dataloaders=dataloaders["validation"][k], - device=self.device, - per_neuron=False, - avg=False, - ) - - params = list(self.model.parameters()) - if self.config.loss_weighing: - for _, loss_object in self.criterion.items(): - params += list(loss_object.parameters()) - self.optimizer = getattr(optim, self.config.optimizer)( - params, **self.config.optimizer_options - ) - - def compute_loss( - self, - average_loss, - correct, - data_key, - loss, - outputs, - targets, - task_dict, - total, - total_loss, - total_loss_weight, - ): - if "img_classification" not in data_key: - loss += neural_full_objective( - self.model, - outputs, - data_loader, - self.criterion["neural"], - self.scale_loss, - data_key, - inputs, - targets, - ) - total["neural"] += get_correlations( - self.model, - batch_dict, - device=self.device, - as_dict=False, - per_neuron=False, - ) - task_dict["neural"]["eval"] = average_loss(total["neural"]) - total_loss["neural"] += loss.item() - task_dict["neural"]["epoch_loss"] = average_loss(total_loss["neural"]) - if self.config.loss_weighing: - total_loss_weight["neural"] += np.exp( - self.criterion["neural"].log_w.item() - ) - task_dict["neural"]["loss_weight"] = average_loss( - total_loss_weight["neural"] - ) - return loss - - def test_neural_model(model, data_loader, device, epoch, eval_type="Validation"): - loss = get_poisson_loss( - model, data_loader, device, as_dict=False, per_neuron=False - ) - eval = get_correlations( - model, data_loader, device=device, as_dict=False, per_neuron=False - ) - results = {"neural": {"eval": eval, "loss": loss}} - print( - "Neural {} Epoch {}: eval={}, loss={}".format( - eval_type, epoch, results["neural"]["eval"], results["neural"]["loss"] - ) - ) - return results - - def test_final_model( - self, - best_epoch, - best_eval, - config, - criterion, - dataloaders, - device, - epoch, - model, - seed, - test_n_iterations, - val_keys, - val_n_iterations, - ): - # test the final model with noise on the dev-set - # test the final model on the test set - test_results_dict, dev_final_results_dict = {}, {} - for k in self.val_keys: - if "img_classification" not in k: - dev_final_results = test_neural_model( - model, - data_loader=dataloaders["validation"][k], - device=device, - epoch=epoch, - eval_type="Validation", - ) - test_results = test_neural_model( - model, - data_loader=dataloaders["test"][k], - device=device, - epoch=epoch, - eval_type="Test", - ) - dev_final_results_dict.update(dev_final_results) - test_results_dict.update(test_results) - final_results = { - "test_results": test_results_dict, - "dev_eval": best_eval, - "epoch": best_epoch, - "dev_final_results": dev_final_results_dict, - } - return final_results, test_results_dict - - -def neural_full_objective( - model, outputs, dataloader, criterion, scale_loss, data_key, inputs, targets -): - - loss = criterion(outputs, targets) - loss_scale = ( - np.sqrt(len(dataloader[data_key].dataset) / inputs.shape[0]) - if scale_loss - else 1.0 - ) - loss *= loss_scale - if scale_loss: - loss += model.regularizer(data_key) - return loss diff --git a/bias_transfer/trainer/regression_trainer.py b/bias_transfer/trainer/regression_trainer.py deleted file mode 100644 index 82e2daa..0000000 --- a/bias_transfer/trainer/regression_trainer.py +++ /dev/null @@ -1,66 +0,0 @@ -from bias_transfer.trainer.utils.checkpointing import RemoteCheckpointing -from bias_transfer.trainer.img_classification_trainer import ImgClassificationTrainer -from bias_transfer.trainer.utils import get_subdict, arctanh -from neuralpredictors.tracking import AdvancedMultipleObjectiveTracker - - -def trainer(model, dataloaders, seed, uid, cb, eval_only=False, **kwargs): - t = RegressionTrainer(dataloaders, model, seed, uid, cb, **kwargs) - return t.train() - - -class RegressionTrainer(ImgClassificationTrainer): - checkpointing_cls = RemoteCheckpointing - - @property - def tracker(self): - try: - return self._tracker - except AttributeError: - objectives = { - "LR": 0, - "Training": {"regression": {"loss": 0, "normalization": 0}}, - "Validation": { - "regression": {"loss": 0, "normalization": 0}, - "patience": 0, - }, - } - self._tracker = AdvancedMultipleObjectiveTracker( - main_objective=("regression", "loss"), **objectives - ) - return self._tracker - - def compute_loss( - self, mode, task_key, loss, outputs, targets, - ): - reg_loss = self.criterion["regression"](outputs.reshape((-1,)), targets) - if self.config.scale_loss_with_arctanh: - reg_loss = arctanh(reg_loss) - - loss += reg_loss - _, predicted = outputs.max(1) - batch_size = targets.size(0) - self.tracker.log_objective( - batch_size, keys=(mode, task_key, "normalization"), - ) - self.tracker.log_objective( - loss.item() * batch_size, keys=(mode, task_key, "loss"), - ) - return loss - - def test_final_model(self, epoch, bn_train=""): - if not bn_train and self.config.eval_with_bn_train: - self.test_final_model(epoch, bn_train=" BN=Train") - # test the final model on the test set - for k in self.task_keys: - objectives = {"Test" + bn_train: {k: {"loss": 0, "normalization": 0,}}} - self.tracker.add_objectives(objectives, init_epoch=True) - test_result = self.main_loop( - epoch=epoch, - data_loader=get_subdict(self.data_loaders["test"], [k]), - mode="Test" + bn_train, - cycler_args={}, - cycler="LongCycler", - module_options={}, - ) - return test_result diff --git a/bias_transfer/trainer/transfer/__init__.py b/bias_transfer/trainer/transfer/__init__.py deleted file mode 100644 index 9ce68a0..0000000 --- a/bias_transfer/trainer/transfer/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .data_generator import * \ No newline at end of file diff --git a/bias_transfer/trainer/transfer/coreset_extraction.py b/bias_transfer/trainer/transfer/coreset_extraction.py deleted file mode 100644 index 13e93e8..0000000 --- a/bias_transfer/trainer/transfer/coreset_extraction.py +++ /dev/null @@ -1,233 +0,0 @@ -from copy import copy - -import torch -import numpy as np -from tqdm import tqdm - -from bias_transfer.dataset.dataset_classes.npy_dataset import NpyDataset -from bias_transfer.trainer.main_loop_modules.function_regularization.fromp_utils import ( - logistic_hessian, - softmax_hessian, -) - - -def extract_coreset( - data_loader, - method, - size, - model, - seed, - device, - initial_method="", - remove_from_data=True, - save_trainset=False, - **kwargs -): - print(f"Extracting Coreset using {method}") - collected_inputs = [] - collected_labels = [] - for src, trg in data_loader: - collected_inputs.append(src) - collected_labels.append(trg) - inputs = torch.cat(collected_inputs).numpy() - labels = torch.cat(collected_labels).numpy() - indices = list(range(len(inputs))) - if "k-center" in (method, initial_method): - coreset_idx, remain_idx = k_center(inputs, indices, size) - elif "fromp" in (method, initial_method): - coreset_idx, remain_idx = select_memorable_points( - inputs, labels, model, size, device, **kwargs - ) - elif "random_class_balanced" in (method, initial_method): - coreset_idx, remain_idx = random_class_balanced(labels, indices, seed, size) - else: # "random": - coreset_idx, remain_idx = random(indices, seed, size) - if method == "frcl": # needs an initial extraction run - coreset_idx, remain_idx = find_best_inducing_points( - inputs, model, size, coreset_idx, remain_idx, device, **kwargs - ) - if save_trainset: - if not remove_from_data: - remain_idx = list(range(len(inputs))) - return { - "source": inputs[remain_idx], - "source_cs": inputs[coreset_idx], - "target": labels[remain_idx], - "target_cs": labels[coreset_idx], - } - else: - return { - "source_cs": inputs[coreset_idx], - "target_cs": labels[coreset_idx], - } - - -def random(indices, seed, size): - np.random.seed(seed) - np.random.shuffle(indices) - coreset_idx, remain_idx = indices[:size], indices[size:] - return coreset_idx, remain_idx - - -def random_class_balanced(labels, indices, seed, size): - np.random.seed(seed) - np.random.shuffle(indices) - num_classes = max(labels) + 1 - size_per_class = size / num_classes - labels_selected = {l: 0 for l in range(num_classes)} - coreset_idx = [] - remain_idx = [] - for i, idx in enumerate(indices): - if len(coreset_idx) >= size: - remain_idx += indices[i:] - break - if labels_selected[labels[idx]] >= size_per_class: - remain_idx.append(idx) - continue - labels_selected[labels[idx]] += 1 - coreset_idx.append(labels[idx]) - return coreset_idx, remain_idx - - -def k_center(dataset, indices, size): - def update_distance(dists, x_train, current_id): - for i in range(x_train.shape[0]): - current_dist = np.linalg.norm(x_train[i, :] - x_train[current_id, :]) - dists[i] = np.minimum(current_dist, dists[i]) - return dists - - dists = np.full(dataset.shape[0], np.inf) - current_id = 0 - coreset_idx = [] - remain_idx = indices - for _ in range(size): - dists = update_distance(dists, dataset, current_id) - coreset_idx.append(current_id) - remain_idx.remove(current_id) - current_id = np.argmax(dists) - return coreset_idx, remain_idx - - -def calculate_induce_quality_statistic(idx, dataset, model, device): - """ - Calculates trace statistic of inducing quality - (up to multiplication by prior variance) - """ - statistic = 0 - - full_dataset_loader = torch.utils.data.DataLoader( - NpyDataset(samples=dataset, targets=dataset), - batch_size=500, - shuffle=False, - ) - model.eval() - with torch.no_grad(): - phi_z = model.core_forward(torch.tensor(dataset[idx]).to(device)) - k_zz = phi_z @ phi_z.T - inv_k_zz = torch.inverse(k_zz + torch.eye(k_zz.shape[0]).to(device) * 1e-3) - for x_batch, _ in full_dataset_loader: - phi_x = model.core_forward(x_batch.to(device)) - k_xz = phi_x @ phi_z.T - k_xx = phi_x @ phi_x.T - statistic += torch.trace(k_xx - k_xz @ inv_k_zz @ k_xz.T).cpu() - return statistic - - -def find_best_inducing_points( - dataset, - model, - size, - coreset_idx, - remain_idx, - device, - max_iter=300, - early_stop_num_iter=80, - verbose=True, -): - - """Sequentially adds a new point instead of a random one in - the initial set of inducing points, if the value of the statstic - above lessens, and does not do anything otherwise. - - start_inducing_set: list of points to start from - - max_iter: maximum number of tries to add a point - """ - score = calculate_induce_quality_statistic(coreset_idx, dataset, model, device) - new_point_counter = 0 - early_stop_counter = 0 - for i in range(max_iter): - add_point = np.random.randint(0, len(remain_idx)) - remove_point = np.random.randint(0, size) - coreset_idx_new = copy(coreset_idx) - coreset_idx_new[remove_point] = remain_idx[add_point] - score_new = calculate_induce_quality_statistic( - coreset_idx_new, dataset, model, device - ) - if score_new < score: - remain_idx[add_point] = coreset_idx[remove_point] - score, coreset_idx = score_new, coreset_idx_new - new_point_counter += 1 - early_stop_counter = 0 - else: - early_stop_counter += 1 - if verbose and i % 10 == 0: - print("Iteration {} out of {} is in progress".format(i, max_iter)) - print("Current best statistic is ", round(score.item(), 3)) - print("New points added ", new_point_counter, "\n") - if early_stop_counter == early_stop_num_iter: - print("Early stop activated!") - break - return coreset_idx, remain_idx - - -def select_memorable_points( - inputs, - labels, - model, - size, - device, - descending=True, -): - """ - Select memorable points ordered by their lambda values (descending=True picks most important points) - Adapted from - """ - batch_size = 500 - dataloader = torch.utils.data.DataLoader( - NpyDataset(samples=inputs, targets=labels), - batch_size=batch_size, - shuffle=False, - ) - num_classes = max(labels) + 1 - num_points_per_class = int(size / num_classes) - scores = {class_id: [] for class_id in range(num_classes)} - idx = {class_id: [] for class_id in range(num_classes)} - indices = torch.tensor(list(range(inputs.shape[0]))) - # collect scores - for i, (data, target) in tqdm(enumerate(dataloader)): - batch_start = i * batch_size - batch_end = min((i + 1) * batch_size, inputs.shape[0]) - data = data.to(device) - f = model.forward(data) - if f.shape[-1] > 1: - lamb = softmax_hessian(f) - lamb = torch.sum(lamb, dim=-1) - else: - lamb = logistic_hessian(f) - lamb = torch.squeeze(lamb, dim=-1) - lamb = lamb.detach().cpu() # hessian serves as a proxy for noise precision - for class_id in range(num_classes): - idx[class_id].append(indices[batch_start:batch_end][target == class_id]) - scores[class_id].append(lamb[target == class_id]) - - # sort by scores - coreset_idx = [] - remain_idx = [] - for class_id in range(num_classes): - idx[class_id] = torch.cat(idx[class_id], dim=0) - scores[class_id] = torch.cat(scores[class_id], dim=0) - _, indices = scores[class_id].sort(descending=descending) - - coreset_idx.append(idx[class_id][indices[:num_points_per_class]]) - remain_idx.append(idx[class_id][indices[num_points_per_class:]]) - - return torch.cat(coreset_idx), torch.cat(remain_idx) diff --git a/bias_transfer/trainer/transfer/data_generator.py b/bias_transfer/trainer/transfer/data_generator.py deleted file mode 100644 index f1dd430..0000000 --- a/bias_transfer/trainer/transfer/data_generator.py +++ /dev/null @@ -1,170 +0,0 @@ -from copy import copy - -from torch.utils.data.sampler import SubsetRandomSampler -from tqdm import tqdm -import torch -import numpy as np -import matplotlib.pyplot as plt - -from bias_transfer.dataset.dataset_classes.npy_dataset import NpyDataset -from bias_transfer.trainer.img_classification_trainer import ImgClassificationTrainer -from bias_transfer.trainer.main_loop_modules.fisher_estimation import FisherEstimation -from bias_transfer.trainer.main_loop_modules.function_regularization.fromp import FROMP -from bias_transfer.trainer.regression_trainer import RegressionTrainer -from bias_transfer.trainer.trainer import Trainer -from bias_transfer.trainer.transfer.coreset_extraction import extract_coreset - - -class DataGenerator(Trainer): - def __init__(self, dataloaders, model, seed, uid, cb, **kwargs): - super().__init__(dataloaders, model, seed, uid, cb, **kwargs) - self.main_task = list(self.task_keys)[0] - - def train(self): - self.tracker.start_epoch() - if hasattr(tqdm, "_instances"): - tqdm._instances.clear() - - if self.config.save_representation: - train = self.generate_rep_dataset(data="train") - elif self.config.extract_coreset: - save_in_model = self.config.extract_coreset.pop("save_in_model", False) - train = extract_coreset( - data_loader=self.data_loaders["train"][self.main_task], - model=self.model, - seed=self.seed, - device=self.device, - **self.config.extract_coreset, - ) - if f"{self.main_task}_cs" in self.data_loaders["train"]: # update coreset - cs = self.data_loaders["train"][f"{self.main_task}_cs"].dataset - train["source_cs"] = np.concatenate([train["source_cs"], cs.samples]) - train["target_cs"] = np.concatenate([train["target_cs"], cs.targets]) - if save_in_model: - self.model.coreset = torch.tensor(train["source_cs"]).to(self.device) - else: - train = {} - - if self.config.compute_fisher: - self.estimate_fisher(data="train") - elif self.config.compute_covariance: - train["covariance"] = self.compute_covariance( - data="train", - batch_size=self.config.compute_covariance.get("batch_size", 32), - ) - elif self.config.compute_si_omega: - self.compute_omega() - - if self.config.reset_for_new_task: - self.model.reset_for_new_task() - return 0.0, {}, self.model.state_dict(), train - - def generate_rep_dataset(self, data): - _, collected_outputs = self.main_loop( - data_loader=self.data_loaders[data], - epoch=0, - mode="Validation", - return_outputs=True, - ) - outputs = {} - for rep_name in collected_outputs[0].keys(): - outputs[rep_name] = torch.cat( - [batch_output[rep_name] for batch_output in collected_outputs] - ).numpy() - if self.config.save_input: - collected_inputs = [] - data_loader = next(iter(self.data_loaders[data].values())) - for src, _ in data_loader: - collected_inputs.append(src) - outputs["source"] = torch.cat(collected_inputs).numpy() - return outputs - - def estimate_fisher(self, data): - task_key = next(iter(self.data_loaders[data].keys())) - data_loader = self.data_loaders[data][task_key] - indices = list(range(len(data_loader.dataset))) - np.random.seed(self.seed) - np.random.shuffle(indices) - indices = indices[: self.config.compute_fisher.get("num_samples", 128)] - sampler = SubsetRandomSampler(indices) - data_loader = torch.utils.data.DataLoader( - data_loader.dataset, - batch_size=1, - sampler=sampler, - num_workers=data_loader.num_workers, - pin_memory=data_loader.pin_memory, - shuffle=False, - ) - objectives = { - "Generation": {task_key: {"loss": 0, "accuracy": 0, "normalization": 0}}, - } - self.tracker.add_objectives(objectives, init_epoch=True) - self.main_loop_modules.append(FisherEstimation(trainer=self)) - self.main_loop( - data_loader={task_key: data_loader}, - epoch=0, - mode="Generation", - return_outputs=False, - ) - - def compute_covariance(self, data, batch_size=32): - task_key = next(iter(self.data_loaders[data].keys())) - data_loader = self.data_loaders[data][task_key] - np.random.seed(self.seed) - data_loader = torch.utils.data.DataLoader( - data_loader.dataset, - batch_size=batch_size, - num_workers=data_loader.num_workers, - pin_memory=data_loader.pin_memory, - shuffle=False, - ) - self.model.eval() - covariance = 0 - # self.state['fisher'] = torch.zeros_like(self.state['mu']) - for data, label in tqdm(data_loader): - data = data.to(self.device) - self.optimizer.zero_grad() - covariance += FROMP.compute_covariance(data, self.model).cpu() - return covariance - - def compute_omega(self): - print("Compute Synaptic Intelligence Omega") - damping_factor = self.config.compute_si_omega.get("damping_factor", 0.0001) - # Loop over all parameters - for n, p in self.model.named_parameters(): - if p.requires_grad: - n = n.replace(".", "__") - - # Find/calculate new values for quadratic penalty on parameters - p_prev = getattr( - self.model, f"{n}_SI_prev_task" - ) # initial param values - omega = getattr(self.model, f"{n}_SI_omega") - p_current = p.detach().clone() - p_change = p_current - p_prev - omega_new = omega / (p_change ** 2 + damping_factor) - - # Store these new values in the model - self.model.register_buffer(f"{n}_importance", omega_new) - delattr(self.model, f"{n}_SI_omega") - delattr(self.model, f"{n}_SI_prev_task") - - -class TransferDataGeneratorClassificiation(ImgClassificationTrainer, DataGenerator): - pass - - -class TransferDataGeneratorRegression(RegressionTrainer, DataGenerator): - pass - - -def trainer(model, dataloaders, seed, uid, cb, eval_only=False, **kwargs): - t = TransferDataGeneratorClassificiation( - dataloaders, model, seed, uid, cb, **kwargs - ) - return t.train() - - -def regression_trainer(model, dataloaders, seed, uid, cb, eval_only=False, **kwargs): - t = TransferDataGeneratorRegression(dataloaders, model, seed, uid, cb, **kwargs) - return t.train() diff --git a/bias_transfer/analysis/__init__.py b/nntransfer/__init__.py similarity index 100% rename from bias_transfer/analysis/__init__.py rename to nntransfer/__init__.py diff --git a/bias_transfer/analysis/representation/__init__.py b/nntransfer/analysis/__init__.py similarity index 100% rename from bias_transfer/analysis/representation/__init__.py rename to nntransfer/analysis/__init__.py diff --git a/bias_transfer/analysis/plot.py b/nntransfer/analysis/plot.py similarity index 100% rename from bias_transfer/analysis/plot.py rename to nntransfer/analysis/plot.py diff --git a/bias_transfer/analysis/results/__init__.py b/nntransfer/analysis/results/__init__.py similarity index 100% rename from bias_transfer/analysis/results/__init__.py rename to nntransfer/analysis/results/__init__.py diff --git a/bias_transfer/analysis/results/base.py b/nntransfer/analysis/results/base.py similarity index 96% rename from bias_transfer/analysis/results/base.py rename to nntransfer/analysis/results/base.py index 9bbf049..7008f73 100644 --- a/bias_transfer/analysis/results/base.py +++ b/nntransfer/analysis/results/base.py @@ -2,8 +2,8 @@ import seaborn as sns import matplotlib.pyplot as plt -from bias_transfer.analysis.plot import plot -from bias_transfer.tables.transfer import TransferredTrainedModel +from nntransfer.analysis.plot import plot +from nntransfer.tables.transfer import TransferredTrainedModel from neuralpredictors.tracking import AdvancedMultipleObjectiveTracker as Tracker diff --git a/bias_transfer/analysis/results/noise_transfer.py b/nntransfer/analysis/results/noise_transfer.py similarity index 100% rename from bias_transfer/analysis/results/noise_transfer.py rename to nntransfer/analysis/results/noise_transfer.py diff --git a/bias_transfer/analysis/train_path/__init__.py b/nntransfer/configs/__init__.py similarity index 100% rename from bias_transfer/analysis/train_path/__init__.py rename to nntransfer/configs/__init__.py diff --git a/bias_transfer/configs/base.py b/nntransfer/configs/base.py similarity index 100% rename from bias_transfer/configs/base.py rename to nntransfer/configs/base.py diff --git a/nntransfer/configs/dataset/__init__.py b/nntransfer/configs/dataset/__init__.py new file mode 100644 index 0000000..85c1058 --- /dev/null +++ b/nntransfer/configs/dataset/__init__.py @@ -0,0 +1,2 @@ +from .image import ImageDatasetConfig +from .base import DatasetConfig diff --git a/bias_transfer/configs/dataset/base.py b/nntransfer/configs/dataset/base.py similarity index 87% rename from bias_transfer/configs/dataset/base.py rename to nntransfer/configs/dataset/base.py index 3b4be58..5c78897 100644 --- a/bias_transfer/configs/dataset/base.py +++ b/nntransfer/configs/dataset/base.py @@ -1,4 +1,4 @@ -from bias_transfer.configs.base import BaseConfig +from nntransfer.configs.base import BaseConfig class DatasetConfig(BaseConfig): diff --git a/nntransfer/configs/dataset/image.py b/nntransfer/configs/dataset/image.py new file mode 100644 index 0000000..4b44820 --- /dev/null +++ b/nntransfer/configs/dataset/image.py @@ -0,0 +1,64 @@ +from typing import Dict, Tuple + +from nntransfer.configs.dataset.base import DatasetConfig +from nntransfer.tables.nnfabrik import Dataset + + +class ImageDatasetConfig(DatasetConfig): + config_name = "dataset" + table = Dataset() + fn = "bias_transfer.dataset.torchvision_dataset_loader" + + data_mean_defaults = { + "CIFAR100": ( + 0.5070751592371323, + 0.48654887331495095, + 0.4409178433670343, + ), + "CIFAR10": (0.49139968, 0.48215841, 0.44653091), + "SVHN": (0.4377, 0.4438, 0.4728), + "MNIST": (0.1307,), + } + data_std_defaults = { + "CIFAR100": ( + 0.2673342858792401, + 0.2564384629170883, + 0.27615047132568404, + ), + "CIFAR10": (0.24703223, 0.24348513, 0.26158784), + "SVHN": (0.1980, 0.2010, 0.1970), + "MNIST": (0.3081,), + } + + def __init__(self, **kwargs): + self.load_kwargs(**kwargs) + + self.dataset_cls: str = "CIFAR10" + self.apply_augmentation: bool = True + self.apply_normalization: bool = True + self.apply_grayscale: bool = False + self.apply_noise: Dict = {} + self.convert_to_rgb: bool = False + self.input_size: int = 32 + self.add_corrupted_test: bool = False + self.add_stylized_test: bool = False + self.use_c_test_as_val: bool = False + self.show_sample: bool = False + self.filter_classes: Tuple = () # (start,end) + self.data_dir: str = "./data/image_classification/torchvision/" + self.num_workers: int = 1 + dataset_id = ( + f"{self.dataset_sub_cls}_{self.bias}" if self.bias else self.dataset_cls + ) + dataset_id += "_bw" if self.apply_grayscale else "" + self.train_data_mean: Tuple[float] = self.data_mean_defaults[dataset_id] + self.train_data_std: Tuple[float] = self.data_std_defaults[dataset_id] + + super().__init__(**kwargs) + + @property + def filters(self): + filters = [] + if self.filter_classes: + filters.append("ClassesFilter") + return filters diff --git a/bias_transfer/configs/experiment.py b/nntransfer/configs/experiment.py similarity index 98% rename from bias_transfer/configs/experiment.py rename to nntransfer/configs/experiment.py index b8f59bf..67146b8 100644 --- a/bias_transfer/configs/experiment.py +++ b/nntransfer/configs/experiment.py @@ -1,7 +1,7 @@ from typing import Dict from .base import BaseConfig -from bias_transfer.tables.nnfabrik import * +from nntransfer.tables.nnfabrik import * class Experiment(BaseConfig): diff --git a/bias_transfer/configs/__init__.py b/nntransfer/configs/model/__init__.py similarity index 100% rename from bias_transfer/configs/__init__.py rename to nntransfer/configs/model/__init__.py diff --git a/bias_transfer/configs/model/base.py b/nntransfer/configs/model/base.py similarity index 75% rename from bias_transfer/configs/model/base.py rename to nntransfer/configs/model/base.py index db8b33b..6e953ec 100644 --- a/bias_transfer/configs/model/base.py +++ b/nntransfer/configs/model/base.py @@ -1,7 +1,7 @@ from typing import Dict -from bias_transfer.configs.base import BaseConfig -from bias_transfer.tables.nnfabrik import Model +from nntransfer.configs.base import BaseConfig +from nntransfer.tables.nnfabrik import Model class ModelConfig(BaseConfig): diff --git a/nntransfer/configs/trainer/__init__.py b/nntransfer/configs/trainer/__init__.py new file mode 100644 index 0000000..d6109b6 --- /dev/null +++ b/nntransfer/configs/trainer/__init__.py @@ -0,0 +1,2 @@ +from .base import TrainerConfig +from . import mixins \ No newline at end of file diff --git a/bias_transfer/configs/trainer/base.py b/nntransfer/configs/trainer/base.py similarity index 95% rename from bias_transfer/configs/trainer/base.py rename to nntransfer/configs/trainer/base.py index 1bfc55d..6e2e76c 100644 --- a/bias_transfer/configs/trainer/base.py +++ b/nntransfer/configs/trainer/base.py @@ -1,7 +1,7 @@ from typing import Dict, Tuple -from bias_transfer.configs.base import BaseConfig -from bias_transfer.tables.nnfabrik import * +from nntransfer.configs.base import BaseConfig +from nntransfer.tables.nnfabrik import * class TrainerConfig(BaseConfig): diff --git a/nntransfer/configs/trainer/mixins/__init__.py b/nntransfer/configs/trainer/mixins/__init__.py new file mode 100644 index 0000000..31a7673 --- /dev/null +++ b/nntransfer/configs/trainer/mixins/__init__.py @@ -0,0 +1,5 @@ +from .noise import ( + RepresentationMatchingMixin, + RepresentationMonitorMixin, + NoiseAugmentationMixin, +) diff --git a/bias_transfer/configs/trainer/mixins/noise.py b/nntransfer/configs/trainer/mixins/noise.py similarity index 79% rename from bias_transfer/configs/trainer/mixins/noise.py rename to nntransfer/configs/trainer/mixins/noise.py index 95ac107..5ebe756 100644 --- a/bias_transfer/configs/trainer/mixins/noise.py +++ b/nntransfer/configs/trainer/mixins/noise.py @@ -1,6 +1,6 @@ from typing import Dict -from bias_transfer.configs.base import BaseConfig +from nntransfer.configs.base import BaseConfig class NoiseAugmentationMixin(BaseConfig): @@ -52,25 +52,6 @@ def conditional_assignment(self): super().conditional_assignment() -class NoiseAdversarialMixin(BaseConfig): - def __init__(self, **kwargs): - self.load_kwargs(**kwargs) - - self.noise_adv_classification: bool = False - self.noise_adv_regression: bool = False - self.noise_adv_loss_factor: float = 1.0 - self.noise_adv_gamma: float = 10.0 - - super().__init__(**kwargs) - - def conditional_assignment(self): - if ( - self.noise_adv_classification or self.noise_adv_regression - ) and not "NoiseAdvTraining" in self.main_loop_modules: - self.main_loop_modules.append("NoiseAdvTraining") - super().conditional_assignment() - - class RepresentationMatchingMixin(BaseConfig): def __init__(self, **kwargs): self.load_kwargs(**kwargs) diff --git a/bias_transfer/configs/transfer_experiment.py b/nntransfer/configs/transfer_experiment.py similarity index 100% rename from bias_transfer/configs/transfer_experiment.py rename to nntransfer/configs/transfer_experiment.py diff --git a/bias_transfer/dataset/dataset_classes/__init__.py b/nntransfer/dataset/__init__.py similarity index 100% rename from bias_transfer/dataset/dataset_classes/__init__.py rename to nntransfer/dataset/__init__.py diff --git a/bias_transfer/gp/__init__.py b/nntransfer/dataset/dataset_classes/__init__.py similarity index 100% rename from bias_transfer/gp/__init__.py rename to nntransfer/dataset/dataset_classes/__init__.py diff --git a/bias_transfer/dataset/dataset_classes/combined_dataset.py b/nntransfer/dataset/dataset_classes/combined_dataset.py similarity index 100% rename from bias_transfer/dataset/dataset_classes/combined_dataset.py rename to nntransfer/dataset/dataset_classes/combined_dataset.py diff --git a/bias_transfer/dataset/dataset_classes/npy_dataset.py b/nntransfer/dataset/dataset_classes/npy_dataset.py similarity index 100% rename from bias_transfer/dataset/dataset_classes/npy_dataset.py rename to nntransfer/dataset/dataset_classes/npy_dataset.py diff --git a/bias_transfer/dataset/dataset_classes/pkl_dataset.py b/nntransfer/dataset/dataset_classes/pkl_dataset.py similarity index 100% rename from bias_transfer/dataset/dataset_classes/pkl_dataset.py rename to nntransfer/dataset/dataset_classes/pkl_dataset.py diff --git a/bias_transfer/dataset/dataset_filters/__init__.py b/nntransfer/dataset/dataset_filters/__init__.py similarity index 100% rename from bias_transfer/dataset/dataset_filters/__init__.py rename to nntransfer/dataset/dataset_filters/__init__.py diff --git a/bias_transfer/dataset/dataset_filters/classes_filter.py b/nntransfer/dataset/dataset_filters/classes_filter.py similarity index 100% rename from bias_transfer/dataset/dataset_filters/classes_filter.py rename to nntransfer/dataset/dataset_filters/classes_filter.py diff --git a/bias_transfer/dataset/dataset_filters/dataset_filter.py b/nntransfer/dataset/dataset_filters/dataset_filter.py similarity index 100% rename from bias_transfer/dataset/dataset_filters/dataset_filter.py rename to nntransfer/dataset/dataset_filters/dataset_filter.py diff --git a/nntransfer/dataset/img_dataset_loader.py b/nntransfer/dataset/img_dataset_loader.py new file mode 100644 index 0000000..cbec3e1 --- /dev/null +++ b/nntransfer/dataset/img_dataset_loader.py @@ -0,0 +1,295 @@ +import os +import numpy as np +import torch +import torchvision +import torchvision.transforms as transforms +from torch.utils.data.dataset import ConcatDataset, Subset +from torch.utils.data.sampler import SubsetRandomSampler +from torchvision import datasets +from nntransfer.configs.dataset.image import ImageDatasetConfig +from nntransfer.nntransfer.dataset.utils import get_dataset + +DATASET_URLS = { + "CIFAR10-C": "https://zenodo.org/record/2535967/files/CIFAR-10-C.tar", + "CIFAR100-C": "https://zenodo.org/record/3555552/files/CIFAR-100-C.tar", + "TinyImageNet-C": "https://zenodo.org/record/2536630/files/Tiny-ImageNet-C.tar", + "TinyImageNet-ST": "https://informatikunihamburgde-my.sharepoint.com/:u:/g/personal/shahd_safarani_informatik_uni-hamburg_de/EZhUKKVXTvRHlqi2HXHaIjEBLmAv4tQP8olvdGNRoWrPqA?e=8kSrHI&download=1", + "ImageNet-C": { + "blur": "https://zenodo.org/record/2235448/files/blur.tar", + "digital": "https://zenodo.org/record/2235448/files/digital.tar", + "extra": "https://zenodo.org/record/2235448/files/extra.tar", + "noise": "https://zenodo.org/record/2235448/files/noise.tar", + "weather": "https://zenodo.org/record/2235448/files/weather.tar", + }, +} + + +class ImageDatasetLoader: + def __call__(self, seed, **config): + """ + Utility function for loading and returning train and valid + multi-process iterators over the CIFAR-10 dataset. A sample + 9x9 grid of the images can be optionally displayed. + If using CUDA, num_workers should be set to 1 and pin_memory to True. + Params + ------ + - data_dir: path directory to the dataset. + - batch_size: how many samples per batch to load. + - augment: whether to apply the data augmentation scheme + mentioned in the paper. Only applied on the train split. + - seed: fix seed for reproducibility. + - valid_size: percentage split of the training set used for + the validation set. Should be a float in the range [0, 1]. + - shuffle: whether to shuffle the train/validation indices. + - show_sample: plot 9x9 sample grid of the dataset. + - num_workers: number of subprocesses to use when loading the dataset. + - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to + True if using GPU. + Returns + ------- + - train_loader: training set iterator. + - valid_loader: validation set iterator. + """ + config = ImageDatasetConfig.from_dict(config) + print("Loading dataset: {}".format(config.dataset_cls)) + torch.manual_seed(seed) + np.random.seed(seed) + + transform_test, transform_train, transform_val = self.get_transforms(config) + + error_msg = "[!] valid_size should be in the range [0, 1]." + assert (config.valid_size >= 0) and (config.valid_size <= 1), error_msg + + ( + train_dataset, + valid_dataset, + test_dataset, + c_test_datasets, + st_test_dataset, + ) = self.get_datasets(config, transform_test, transform_train, transform_val) + + filters = [globals().get(f)(config, train_dataset) for f in config.filters] + datasets_ = [train_dataset, valid_dataset, test_dataset] + if config.add_corrupted_test: + for c_ds in c_test_datasets.values(): + datasets_ += list(c_ds.values()) + for ds in datasets_: + for filt in filters: + filt.apply(ds) + + data_loaders = self.get_data_loaders( + st_test_dataset, + c_test_datasets, + config, + seed, + test_dataset, + train_dataset, + valid_dataset, + ) + + return data_loaders + + def get_transforms(self, config): + """ + + Args: + config: + + Returns: + transform_test, + transform_train, + transform_val + """ + raise NotImplementedError() + + def get_datasets(self, config, transform_test, transform_train, transform_val): + """ + + Args: + config: + transform_test: + transform_train: + transform_val: + + Returns: + train_dataset, + valid_dataset, + test_dataset, + c_test_datasets, + st_test_dataset, + """ + raise NotImplementedError() + + def add_corrupted_test(self, config, transform_test): + c_test_datasets = None + if config.add_corrupted_test: + urls = DATASET_URLS[config.dataset_cls + "-C"] + if not isinstance(urls, dict): + urls = {"default": urls} + for key, url in urls.items(): + dataset_dir = get_dataset( + url, + config.data_dir, + dataset_cls=config.dataset_cls + "-C", + ) + + c_test_datasets = {} + for c_category in os.listdir(dataset_dir): + if config.dataset_cls in ("CIFAR10", "CIFAR100"): + if c_category == "labels.npy" or not c_category.endswith( + ".npy" + ): + continue + c_test_datasets[c_category[:-4]] = {} + for c_level in range(1, 6): + start = (c_level - 1) * 10000 + end = c_level * 10000 + c_test_datasets[c_category[:-4]][c_level] = NpyDataset( + samples=c_category, + targets="labels.npy", + root=dataset_dir, + start=start, + end=end, + transform=transform_test, + ) + else: + if not os.path.isdir(os.path.join(dataset_dir, c_category)): + continue + c_test_datasets[c_category] = {} + for c_level in os.listdir( + os.path.join(dataset_dir, c_category) + ): + c_test_datasets[c_category][ + int(c_level) + ] = datasets.ImageFolder( + os.path.join(dataset_dir, c_category, c_level), + transform=transform_test, + ) + return c_test_datasets + + def add_stylized_test(self, config, transform_test): + st_test_dataset = None + if config.add_stylized_test: + st_dataset_dir = get_dataset( + DATASET_URLS[config.dataset_cls + "-ST"], + config.data_dir, + dataset_cls=config.dataset_cls + "-ST", + ) + st_test_dataset = datasets.ImageFolder( + st_dataset_dir, transform=transform_test + ) + return st_test_dataset + + def get_data_loaders( + self, + st_test_dataset, + c_test_datasets, + config, + seed, + test_dataset, + train_dataset, + valid_dataset, + ): + num_train = len(train_dataset) + indices = list(range(num_train)) + if config.use_c_test_as_val: # Use valid_size of the c_test set for validation + train_sampler = SubsetRandomSampler(indices) + datasets = [] + val_indices = [] + start_idx = 0 + for c_category in c_test_datasets.keys(): + if c_category not in ( + "speckle_noise", + "gaussian_blur", + "spatter", + "saturate", + ): + continue + for dataset in c_test_datasets[c_category].values(): + num_val = len(dataset) + indices = list(range(start_idx, start_idx + num_val)) + split = int(np.floor(config.valid_size * num_val)) + if config.shuffle: + np.random.shuffle(indices) + val_indices += indices[:split] + datasets.append(dataset) + start_idx += num_val + valid_dataset = ConcatDataset(datasets) + valid_sampler = SubsetRandomSampler(val_indices) + else: # Use valid_size of the train set for validation + split = int(np.floor(config.valid_size * num_train)) + if config.shuffle: + np.random.seed(seed) + np.random.shuffle(indices) + train_idx, valid_idx = indices[split:], indices[:split] + if config.train_subset: + subset_split = int(np.floor(config.train_subset * len(train_idx))) + train_idx = train_idx[:subset_split] + if config.shuffle: + train_sampler = SubsetRandomSampler(train_idx) + valid_sampler = SubsetRandomSampler(valid_idx) + else: + train_dataset = Subset(train_dataset, train_idx) + valid_dataset = Subset(train_dataset, valid_idx) + train_sampler = None + valid_sampler = None + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=config.batch_size, + sampler=train_sampler, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + shuffle=False, + ) + valid_loader = torch.utils.data.DataLoader( + valid_dataset, + batch_size=config.batch_size, + sampler=valid_sampler, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + shuffle=False, + ) + test_loader = torch.utils.data.DataLoader( + test_dataset, + batch_size=config.batch_size, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + shuffle=True, + ) + task_key = ( + "regression" + if config.bias is not None and "regression" in config.bias + else "img_classification" + ) + data_loaders = { + "train": {task_key: train_loader}, + "validation": {task_key: valid_loader}, + "test": {task_key: test_loader}, + } + + if config.add_stylized_test: + st_test_loader = torch.utils.data.DataLoader( + st_test_dataset, + batch_size=config.batch_size, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + shuffle=False, + ) + data_loaders["st_test"] = st_test_loader + + if config.add_corrupted_test: + c_test_loaders = {} + for c_category in c_test_datasets.keys(): + c_test_loaders[c_category] = {} + for c_level, dataset in c_test_datasets[c_category].items(): + c_test_loaders[c_category][c_level] = torch.utils.data.DataLoader( + dataset, + batch_size=config.batch_size, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + shuffle=True, + ) + data_loaders["c_test"] = {"img_classification": c_test_loaders} + return data_loaders + + diff --git a/bias_transfer/dataset/scripts/download_imagenet.sh b/nntransfer/dataset/scripts/download_imagenet.sh similarity index 100% rename from bias_transfer/dataset/scripts/download_imagenet.sh rename to nntransfer/dataset/scripts/download_imagenet.sh diff --git a/bias_transfer/dataset/utils.py b/nntransfer/dataset/utils.py similarity index 100% rename from bias_transfer/dataset/utils.py rename to nntransfer/dataset/utils.py diff --git a/nntransfer/models/__init__.py b/nntransfer/models/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/nntransfer/models/__init__.py @@ -0,0 +1 @@ + diff --git a/bias_transfer/models/lenet.py b/nntransfer/models/lenet.py similarity index 100% rename from bias_transfer/models/lenet.py rename to nntransfer/models/lenet.py diff --git a/bias_transfer/models/mlp.py b/nntransfer/models/mlp.py similarity index 100% rename from bias_transfer/models/mlp.py rename to nntransfer/models/mlp.py diff --git a/bias_transfer/models/resnet.py b/nntransfer/models/resnet.py similarity index 100% rename from bias_transfer/models/resnet.py rename to nntransfer/models/resnet.py diff --git a/bias_transfer/models/utils.py b/nntransfer/models/utils.py similarity index 98% rename from bias_transfer/models/utils.py rename to nntransfer/models/utils.py index de3010d..47a82b5 100644 --- a/bias_transfer/models/utils.py +++ b/nntransfer/models/utils.py @@ -3,7 +3,7 @@ import torch from torch import nn from torchvision.models.resnet import Bottleneck, BasicBlock -from bias_transfer.models.resnet import ResNet +from nntransfer.models.resnet import ResNet def reset_params(model, reset=None): diff --git a/bias_transfer/models/vgg.py b/nntransfer/models/vgg.py similarity index 100% rename from bias_transfer/models/vgg.py rename to nntransfer/models/vgg.py diff --git a/bias_transfer/models/wrappers/__init__.py b/nntransfer/models/wrappers/__init__.py similarity index 61% rename from bias_transfer/models/wrappers/__init__.py rename to nntransfer/models/wrappers/__init__.py index e91f18e..d110c22 100644 --- a/bias_transfer/models/wrappers/__init__.py +++ b/nntransfer/models/wrappers/__init__.py @@ -1,2 +1 @@ -from .noise_adv import NoiseAdvWrapper from .intermediate_layer_getter import IntermediateLayerGetter \ No newline at end of file diff --git a/bias_transfer/models/wrappers/intermediate_layer_getter.py b/nntransfer/models/wrappers/intermediate_layer_getter.py similarity index 100% rename from bias_transfer/models/wrappers/intermediate_layer_getter.py rename to nntransfer/models/wrappers/intermediate_layer_getter.py diff --git a/bias_transfer/tables/__init__.py b/nntransfer/tables/__init__.py similarity index 100% rename from bias_transfer/tables/__init__.py rename to nntransfer/tables/__init__.py diff --git a/bias_transfer/tables/nnfabrik.py b/nntransfer/tables/nnfabrik.py similarity index 100% rename from bias_transfer/tables/nnfabrik.py rename to nntransfer/tables/nnfabrik.py diff --git a/bias_transfer/tables/trained_model.py b/nntransfer/tables/trained_model.py similarity index 100% rename from bias_transfer/tables/trained_model.py rename to nntransfer/tables/trained_model.py diff --git a/bias_transfer/tables/transfer.py b/nntransfer/tables/transfer.py similarity index 100% rename from bias_transfer/tables/transfer.py rename to nntransfer/tables/transfer.py diff --git a/nntransfer/trainer/__init__.py b/nntransfer/trainer/__init__.py new file mode 100644 index 0000000..0378809 --- /dev/null +++ b/nntransfer/trainer/__init__.py @@ -0,0 +1,2 @@ +from bias_transfer.trainer.transfer import trainer as transfer +from bias_transfer.trainer.transfer import regression_trainer as regression_transfer diff --git a/nntransfer/trainer/main_loop_modules/__init__.py b/nntransfer/trainer/main_loop_modules/__init__.py new file mode 100644 index 0000000..b3c0b60 --- /dev/null +++ b/nntransfer/trainer/main_loop_modules/__init__.py @@ -0,0 +1,4 @@ +from .noise_augmentation import NoiseAugmentation +from .representation_matching import RepresentationMatching +from .representation_monitor import RepresentationMonitor +from .model_wrapper import ModelWrapper diff --git a/bias_transfer/trainer/main_loop_modules/main_loop_module.py b/nntransfer/trainer/main_loop_modules/main_loop_module.py similarity index 100% rename from bias_transfer/trainer/main_loop_modules/main_loop_module.py rename to nntransfer/trainer/main_loop_modules/main_loop_module.py diff --git a/bias_transfer/trainer/main_loop_modules/model_wrapper.py b/nntransfer/trainer/main_loop_modules/model_wrapper.py similarity index 100% rename from bias_transfer/trainer/main_loop_modules/model_wrapper.py rename to nntransfer/trainer/main_loop_modules/model_wrapper.py diff --git a/bias_transfer/trainer/main_loop_modules/noise_augmentation.py b/nntransfer/trainer/main_loop_modules/noise_augmentation.py similarity index 100% rename from bias_transfer/trainer/main_loop_modules/noise_augmentation.py rename to nntransfer/trainer/main_loop_modules/noise_augmentation.py diff --git a/bias_transfer/trainer/main_loop_modules/representation_matching.py b/nntransfer/trainer/main_loop_modules/representation_matching.py similarity index 100% rename from bias_transfer/trainer/main_loop_modules/representation_matching.py rename to nntransfer/trainer/main_loop_modules/representation_matching.py diff --git a/bias_transfer/trainer/main_loop_modules/representation_monitor.py b/nntransfer/trainer/main_loop_modules/representation_monitor.py similarity index 100% rename from bias_transfer/trainer/main_loop_modules/representation_monitor.py rename to nntransfer/trainer/main_loop_modules/representation_monitor.py diff --git a/bias_transfer/trainer/trainer.py b/nntransfer/trainer/trainer.py similarity index 95% rename from bias_transfer/trainer/trainer.py rename to nntransfer/trainer/trainer.py index ba1fd0f..e422097 100644 --- a/bias_transfer/trainer/trainer.py +++ b/nntransfer/trainer/trainer.py @@ -4,22 +4,24 @@ from tqdm import tqdm import torch from torch import optim, nn + import nnfabrik as nnf from neuralpredictors.training import copy_state +from nnfabrik.utility.nn_helpers import load_state_dict -from bias_transfer.models.utils import ( +from nntransfer.models.utils import ( freeze_params, set_bn_to_eval, weight_reset, reset_params, ) -from bias_transfer.trainer.utils import SchedulerWrapper -from bias_transfer.configs.trainer import TrainerConfig -from nnfabrik.utility.nn_helpers import load_state_dict -from bias_transfer.trainer.utils.checkpointing import LocalCheckpointing, RemoteCheckpointing -from bias_transfer.trainer.main_loop_modules import * -from bias_transfer.trainer.utils import LongCycler, MTL_Cycler -from bias_transfer.trainer.utils.early_stopping import early_stopping +from nntransfer.configs.trainer import TrainerConfig + +from .main_loop_modules import * +from .utils import SchedulerWrapper +from .utils.checkpointing import LocalCheckpointing, RemoteCheckpointing +from .utils import LongCycler, MTL_Cycler +from .utils.early_stopping import early_stopping class Trainer: diff --git a/bias_transfer/trainer/utils/__init__.py b/nntransfer/trainer/utils/__init__.py similarity index 100% rename from bias_transfer/trainer/utils/__init__.py rename to nntransfer/trainer/utils/__init__.py diff --git a/bias_transfer/trainer/utils/checkpointing.py b/nntransfer/trainer/utils/checkpointing.py similarity index 100% rename from bias_transfer/trainer/utils/checkpointing.py rename to nntransfer/trainer/utils/checkpointing.py diff --git a/bias_transfer/trainer/utils/early_stopping.py b/nntransfer/trainer/utils/early_stopping.py similarity index 98% rename from bias_transfer/trainer/utils/early_stopping.py rename to nntransfer/trainer/utils/early_stopping.py index d57dd53..cef66ee 100644 --- a/bias_transfer/trainer/utils/early_stopping.py +++ b/nntransfer/trainer/utils/early_stopping.py @@ -1,6 +1,6 @@ import numpy as np -from bias_transfer.trainer.utils import StopClosureWrapper +from nntransfer.trainer.utils import StopClosureWrapper def early_stopping( diff --git a/bias_transfer/trainer/utils/loss.py b/nntransfer/trainer/utils/loss.py similarity index 100% rename from bias_transfer/trainer/utils/loss.py rename to nntransfer/trainer/utils/loss.py diff --git a/bias_transfer/trainer/utils/warmup.py b/nntransfer/trainer/utils/warmup.py similarity index 100% rename from bias_transfer/trainer/utils/warmup.py rename to nntransfer/trainer/utils/warmup.py diff --git a/setup.py b/setup.py index 679e306..d39eb01 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ from setuptools import setup setup( - name='bias_transfer', - version='0.1dev', - description='Experiments about inductive bias transfer', - author='Arne Nix', - author_email='arnenix@googlemail.com', - packages=['bias_transfer'], #same as name - install_requires=[], #external packages as dependencies -) \ No newline at end of file + name="nntransfer", + version="0.1dev", + description="Framework for transfer experiments", + author="Arne Nix", + author_email="arnenix@googlemail.com", + packages=["nntransfer"], # same as name + install_requires=[], # external packages as dependencies +)