From a0b555387272d9bd02774fb88683dc38d268f77b Mon Sep 17 00:00:00 2001 From: KonstantinWilleke Date: Wed, 1 Apr 2020 23:55:08 +0200 Subject: [PATCH 1/2] bugfix testset sampler. --- nnvision/datasets/monkey_loaders.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nnvision/datasets/monkey_loaders.py b/nnvision/datasets/monkey_loaders.py index fb198df..c81a93f 100644 --- a/nnvision/datasets/monkey_loaders.py +++ b/nnvision/datasets/monkey_loaders.py @@ -40,6 +40,7 @@ def __contains__(self, key): return key in self.cache def __getitem__(self, item): + item = item.tolist() if isinstance(item, Iterable) else item return [self[i] for i in item] if isinstance(item, Iterable) else self.update(item) def update(self, key): @@ -123,12 +124,13 @@ def get_cached_loader(image_ids, responses, batch_size, shuffle=True, image_cach image_ids = torch.tensor(image_ids.astype(np.int32)) responses = torch.tensor(responses).to(torch.float) dataset = CachedTensorDataset(image_ids, responses, image_cache=image_cache) - sampler = RepeatsBatchSampler(torch.tensor(repeat_condition.astype(np.int32))) if repeat_condition is not None else None + sampler = RepeatsBatchSampler(repeat_condition) if repeat_condition is not None else None - return utils.DataLoader(dataset, - batch_size=batch_size, - shuffle=shuffle, - batch_sampler=sampler) + dataloader = utils.DataLoader(dataset, batch_sampler=sampler) if batch_size is None else utils.DataLoader(dataset, + batch_size=batch_size, + shuffle=shuffle, + ) + return dataloader def monkey_static_loader(dataset, neuronal_data_files, @@ -256,7 +258,7 @@ def monkey_static_loader(dataset, train_loader = get_cached_loader(training_image_ids, responses_train, batch_size=batch_size, image_cache=cache) val_loader = get_cached_loader(validation_image_ids, responses_val, batch_size=batch_size, image_cache=cache) - test_loader = get_cached_loader(testing_image_ids, responses_test, batch_size=1, shuffle=False, + test_loader = get_cached_loader(testing_image_ids, responses_test, batch_size=None, shuffle=None, image_cache=cache, repeat_condition=testing_image_ids) dataloaders["train"][data_key] = train_loader From 02ae10da0da5e74d0b6abfe3697034eeaf601de9 Mon Sep 17 00:00:00 2001 From: KonstantinWilleke Date: Thu, 2 Apr 2020 00:29:14 +0200 Subject: [PATCH 2/2] Refactor measures to make poisson loss usable as an early stop measure. --- nnvision/training/trainers.py | 10 +++++----- nnvision/utility/measures.py | 26 +++++++++++++++----------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/nnvision/training/trainers.py b/nnvision/training/trainers.py index 6f4048b..c411e1c 100644 --- a/nnvision/training/trainers.py +++ b/nnvision/training/trainers.py @@ -76,7 +76,7 @@ def full_objective(model, dataloader, data_key, *args): model.train() criterion = getattr(mlmeasures, loss_function)(avg=avg_loss) - stop_closure = partial(getattr(measures, stop_function), dataloaders=dataloaders["validation"], device=device, avg=True, as_dict=False) + stop_closure = partial(getattr(measures, stop_function), dataloaders=dataloaders["validation"], device=device, per_neuron=False, avg=True) n_iterations = len(LongCycler(dataloaders["train"])) @@ -90,8 +90,8 @@ def full_objective(model, dataloader, data_key, *args): optim_step_count = len(dataloaders["train"].keys()) if loss_accum_batch_n is None else loss_accum_batch_n if track_training: - tracker_dict = dict(correlation=partial(get_correlations(), model, dataloaders["validation"], device=device, avg=True), - poisson_loss=partial(get_poisson_loss(), model, dataloaders["validation"], device=device, avg=True)) + tracker_dict = dict(correlation=partial(get_correlations(), model, dataloaders["validation"], device=device, per_neuron=False), + poisson_loss=partial(get_poisson_loss(), model, dataloaders["validation"], device=device, per_neuron=False, avg=False)) if hasattr(model, 'tracked_values'): tracker_dict.update(model.tracked_values) tracker = MultipleObjectiveTracker(**tracker_dict) @@ -130,8 +130,8 @@ def full_objective(model, dataloader, data_key, *args): tracker.finalize() if track_training else None # Compute avg validation and test correlation - validation_correlation = get_correlations(model, dataloaders["validation"], device=device, as_dict=False, avg=False) - test_correlation = get_correlations(model, dataloaders["test"], device=device, as_dict=False, avg=False) + validation_correlation = get_correlations(model, dataloaders["validation"], device=device, as_dict=False, per_neuron=False) + test_correlation = get_correlations(model, dataloaders["test"], device=device, as_dict=False, per_neuron=False) # return the whole tracker output as a dict output = {k: v for k, v in tracker.log.items()} if track_training else {} diff --git a/nnvision/utility/measures.py b/nnvision/utility/measures.py index bef5f5a..7d6a791 100644 --- a/nnvision/utility/measures.py +++ b/nnvision/utility/measures.py @@ -7,7 +7,7 @@ import contextlib -def model_predictions(loader, model, data_key, device='cpu'): +def model_predictions(dataloader, model, data_key, device='cpu'): """ computes model predictions for a given dataloader and a model Returns: @@ -16,7 +16,7 @@ def model_predictions(loader, model, data_key, device='cpu'): """ target, output = torch.empty(0), torch.empty(0) - for images, responses in loader: + for images, responses in dataloader: if len(images.shape) == 5: images = images.squeeze(dim=0) responses = responses.squeeze(dim=0) @@ -27,11 +27,11 @@ def model_predictions(loader, model, data_key, device='cpu'): return target.numpy(), output.numpy() -def get_correlations(model, dataloaders, device='cpu', as_dict=False, avg=False): +def get_correlations(model, dataloaders, device='cpu', as_dict=False, per_neuron=True, **kwargs): correlations = {} with eval_state(model) if not isinstance(model, types.FunctionType) else contextlib.nullcontext(): for k, v in dataloaders.items(): - target, output = model_predictions(loader=v, model=model, data_key=k, device=device) + target, output = model_predictions(dataloader=v, model=model, data_key=k, device=device) correlations[k] = corr(target, output, axis=0) if np.any(np.isnan(correlations[k])): @@ -39,20 +39,24 @@ def get_correlations(model, dataloaders, device='cpu', as_dict=False, avg=False) correlations[k][np.isnan(correlations[k])] = 0 if not as_dict: - correlations = np.mean(np.hstack([v for v in correlations.values()])) if avg else np.hstack([v for v in correlations.values()]) + correlations = np.hstack([v for v in correlations.values()]) if per_neuron else np.mean(np.hstack([v for v in correlations.values()])) return correlations -def get_poisson_loss(model, dataloaders, device='cpu', as_dict=False, avg=True, eps=1e-12): +def get_poisson_loss(model, dataloaders, device='cpu', as_dict=False, avg=False, per_neuron=False, eps=1e-12): poisson_loss = {} with eval_state(model) if not isinstance(model, types.FunctionType) else contextlib.nullcontext(): for k, v in dataloaders.items(): - target, output = model_predictions(loader=v, model=model, data_key=k, device=device) - poisson_loss[k] = output - target * np.log(output + eps) - if not as_dict: - return np.mean(np.hstack([v for v in poisson_loss.values()])) if avg else np.hstack([v for v in poisson_loss.values()]) - else: + target, output = model_predictions(dataloader=v, model=model, data_key=k, device=device) + loss = output - target * np.log(output + eps) + poisson_loss[k] = np.mean(loss, axis=0) if avg else np.sum(loss, axis=0) + if as_dict: return poisson_loss + else: + if per_neuron: + return np.hstack([v for v in poisson_loss.values()]) + else: + return np.mean(np.hstack([v for v in poisson_loss.values()])) if avg else np.sum(np.hstack([v for v in poisson_loss.values()])) def get_repeats(dataloader, min_repeats=2):