From 129d8c5f1c1daa0cbea61ffb5678f3af0c1f9184 Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Fri, 8 Nov 2024 14:29:35 +0100 Subject: [PATCH 01/19] Remove unused NANException --- experiments/misc.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/experiments/misc.py b/experiments/misc.py index 9295f1f..2e7b76c 100644 --- a/experiments/misc.py +++ b/experiments/misc.py @@ -2,10 +2,6 @@ import torch -class NaNError(BaseException): - """Exception to be raise when the training encounters a NaN in loss or model weights.""" - - def get_device() -> torch.device: """Gets CUDA if available, CPU else.""" return torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") From 1e51fc6f0877965d19a86cc21ae661b7bc5fdb9d Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Sun, 10 Nov 2024 19:23:15 +0100 Subject: [PATCH 02/19] Add missing bracket in logs --- experiments/base_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index 41a735f..b368a66 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -61,7 +61,7 @@ def run_mlflow(self): experiment_id, run_name = self._init() git_hash = os.popen("git rev-parse HEAD").read().strip() LOGGER.info( - f"### Starting experiment {self.cfg.exp_name}/{run_name} (mlflowid={experiment_id}) (jobid={self.cfg.jobid}) (git_hash={git_hash} ###" + f"### Starting experiment {self.cfg.exp_name}/{run_name} (mlflowid={experiment_id}) (jobid={self.cfg.jobid}) (git_hash={git_hash}) ###" ) if self.cfg.use_mlflow: with mlflow.start_run(experiment_id=experiment_id, run_name=run_name): From cef1b4c6fdb75cec462d2482f5f94bc73a98c09b Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Mon, 18 Nov 2024 23:29:39 +0100 Subject: [PATCH 03/19] Remove schedulefree optimizer (didnt help), take Lion optimizer from 'pytorch_optimizer' instead of official repo for convenience (using the same seed, both implementations give the same loss up to machine precision) --- experiments/base_experiment.py | 12 ++---------- requirements.txt | 3 +-- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index b368a66..7f31a1d 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -10,6 +10,7 @@ from hydra.utils import instantiate import mlflow from torch_ema import ExponentialMovingAverage +import pytorch_optimizer import gatr.primitives.attention import gatr.layers.linear @@ -23,9 +24,6 @@ from gatr.layers import MLPConfig, SelfAttentionConfig -from lion_pytorch import Lion -import schedulefree - cs = ConfigStore.instance() cs.store(name="base_attention", node=SelfAttentionConfig) cs.store(name="base_mlp", node=MLPConfig) @@ -388,13 +386,7 @@ def _init_optimizer(self, param_groups=None): weight_decay=self.cfg.training.weight_decay, ) elif self.cfg.training.optimizer == "Lion": - self.optimizer = Lion( - param_groups, - betas=self.cfg.training.betas, - weight_decay=self.cfg.training.weight_decay, - ) - elif self.cfg.training.optimizer == "ScheduleFree": - self.optimizer = schedulefree.AdamWScheduleFree( + self.optimizer = pytorch_optimizer.Lion( param_groups, betas=self.cfg.training.betas, weight_decay=self.cfg.training.weight_decay, diff --git a/requirements.txt b/requirements.txt index f1f6a38..4c02115 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,8 +17,7 @@ git+https://github.com/dgasmith/opt_einsum.git@1a984b7b75f3e532e7129f6aa13f7ddc3 torch==2.2.0 torch_geometric==2.4.0 torch-ema -lion-pytorch==0.1.4 -schedulefree==1.2.5 +git+https://github.com/kozistr/pytorch_optimizer # most recent pytorch_optimizer contains ADOPT; replace this with simply 'pytorch_optimizer>=3.2.0' after the next release ## Experiment management hydra-core From 38a04cbcaf2511aa2fef68510b91001406cb6f48 Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Mon, 18 Nov 2024 23:30:40 +0100 Subject: [PATCH 04/19] Add ADOPT optimizer (recent hype) --- experiments/base_experiment.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index 7f31a1d..99fb36c 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -391,6 +391,12 @@ def _init_optimizer(self, param_groups=None): betas=self.cfg.training.betas, weight_decay=self.cfg.training.weight_decay, ) + elif self.cfg.training.optimizer == "ADOPT": + self.optimizer = pytorch_optimizer.ADOPT( + param_groups, + betas=self.cfg.training.betas, + weight_decay=self.cfg.training.weight_decay, + ) else: raise ValueError(f"Optimizer {self.cfg.training.optimizer} not implemented") LOGGER.debug( From f6843175e96d5ffbc58ccc3f4a0fed88c56012eb Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Tue, 19 Nov 2024 14:44:08 +0100 Subject: [PATCH 05/19] Make the code work with training.clip_grad_norm=None (turns off gradient clipping) --- experiments/base_experiment.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index 99fb36c..6abe39b 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -576,15 +576,18 @@ def _step(self, data, step): self.cfg.training.clip_grad_value, ) # rescale gradients such that their norm matches a given number - grad_norm = ( - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), - self.cfg.training.clip_grad_norm, - error_if_nonfinite=False, + if self.cfg.training.clip_grad_norm is not None: + grad_norm = ( + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), + self.cfg.training.clip_grad_norm, + error_if_nonfinite=False, + ) + .cpu() + .item() ) - .cpu() - .item() - ) + else: + grad_norm = 0.0 # meaningless placeholder if step > MIN_STEP_SKIP and self.cfg.training.max_grad_norm is not None: if grad_norm > self.cfg.training.max_grad_norm: LOGGER.warning( From f409c16495700db8ed96f89f9244ff75750cbcf1 Mon Sep 17 00:00:00 2001 From: victor breso Date: Tue, 19 Nov 2024 15:53:31 +0100 Subject: [PATCH 06/19] Updated config files to match main, removed all mentions of ScheduleFree and updated requirements to prevent package conflicts. --- config_paper/default_tagging.yaml | 4 ++++ experiments/base_experiment.py | 4 ---- requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config_paper/default_tagging.yaml b/config_paper/default_tagging.yaml index 43d32f2..9e7a846 100644 --- a/config_paper/default_tagging.yaml +++ b/config_paper/default_tagging.yaml @@ -20,9 +20,13 @@ data: train: true training: iterations: 2e5 + optimizer: Lion batchsize: 128 lr: 3e-4 validate_every_n_steps: 5000 + weight_decay: 0 # top tagging from scratch: 0.2; JetClass pretraining/training: 0; top finetuning: 0.01 + scheduler: CosineAnnealingLR + force_xformers: true diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index 6abe39b..8509f3d 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -499,8 +499,6 @@ def cycle(iterable): for step in range(self.cfg.training.iterations): # training self.model.train() - if self.cfg.training.optimizer == "ScheduleFree": - self.optimizer.train() data = next(iterator) self._step(data, step) @@ -632,8 +630,6 @@ def _validate(self, step): metrics = self._init_metrics() self.model.eval() - if self.cfg.training.optimizer == "ScheduleFree": - self.optimizer.eval() with torch.no_grad(): for data in self.val_loader: # use EMA for validation if available diff --git a/requirements.txt b/requirements.txt index 4c02115..e0879ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ black==22.3.0 ## Standard science stack h5py matplotlib -numpy<1.25 +numpy==1.25 scipy ## Standard utils From cba67782054076f90a3a6c9110c3b394740f2769 Mon Sep 17 00:00:00 2001 From: victor breso Date: Tue, 19 Nov 2024 16:59:54 +0100 Subject: [PATCH 07/19] Expanded the range of allowed numpy versions in the requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e0879ea..9018471 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ black==22.3.0 ## Standard science stack h5py matplotlib -numpy==1.25 +nnumpy>=1.25.0,<=1.26.4 scipy ## Standard utils From 096299862fcc5decfd9bd64931ef080af988592f Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Wed, 20 Nov 2024 11:22:41 +0100 Subject: [PATCH 08/19] Speed up embedding code: Avoid for loop over batch indices --- experiments/tagging/embedding.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/experiments/tagging/embedding.py b/experiments/tagging/embedding.py index 20a80c1..ba59482 100644 --- a/experiments/tagging/embedding.py +++ b/experiments/tagging/embedding.py @@ -92,15 +92,10 @@ def embed_tagging_data_into_ga(fourmomenta, scalars, ptr, cfg_data): n_spurions = spurions.shape[0] if cfg_data.beam_token: # prepend spurions to the token list (within each block) - spurion_idxs = torch.cat( - [ - torch.arange( - ptr_start + i * n_spurions, - ptr_start + (i + 1) * n_spurions, - ) - for i, ptr_start in enumerate(ptr[:-1]) - ] - ) + spurion_idxs = torch.stack( + [ptr[:-1] + i for i in range(n_spurions)], dim=0 + ) + n_spurions * torch.arange(batchsize, device=ptr.device) + spurion_idxs = spurion_idxs.permute(1, 0).flatten() insert_spurion = torch.zeros( multivectors.shape[0] + n_spurions * batchsize, dtype=torch.bool, From 1d4dd0e5cb3977822c242daaab36181f76868314 Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Wed, 20 Nov 2024 11:27:42 +0100 Subject: [PATCH 09/19] Speed up embedding code part 2: Also kill the 2nd for loop (very similar to the 1st) --- experiments/tagging/embedding.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/experiments/tagging/embedding.py b/experiments/tagging/embedding.py index ba59482..b2677d9 100644 --- a/experiments/tagging/embedding.py +++ b/experiments/tagging/embedding.py @@ -129,15 +129,10 @@ def embed_tagging_data_into_ga(fourmomenta, scalars, ptr, cfg_data): if cfg_data.include_global_token: # prepend global tokens to the token list num_global_tokens = cfg_data.num_global_tokens - global_idxs = torch.cat( - [ - torch.arange( - ptr_start + i * num_global_tokens, - ptr_start + (i + 1) * num_global_tokens, - ) - for i, ptr_start in enumerate(ptr[:-1]) - ] - ) + global_idxs = torch.stack( + [ptr[:-1] + i for i in range(num_global_tokens)], dim=0 + ) + num_global_tokens * torch.arange(batchsize, device=ptr.device) + global_idxs = global_idxs.permute(1, 0).flatten() is_global = torch.zeros( multivectors.shape[0] + batchsize * num_global_tokens, dtype=torch.bool, From 6c69fcfc055438fc5e45aca16db725ddf219f6d2 Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Wed, 20 Nov 2024 11:42:08 +0100 Subject: [PATCH 10/19] Speed up embedding code part 3: Change the if conditions in case we have no global tokens or no spurions - the previous code crashed in this case --- experiments/tagging/embedding.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experiments/tagging/embedding.py b/experiments/tagging/embedding.py index b2677d9..da2e54d 100644 --- a/experiments/tagging/embedding.py +++ b/experiments/tagging/embedding.py @@ -90,7 +90,7 @@ def embed_tagging_data_into_ga(fourmomenta, scalars, ptr, cfg_data): fourmomenta.dtype, ) n_spurions = spurions.shape[0] - if cfg_data.beam_token: + if cfg_data.beam_token and n_spurions > 0: # prepend spurions to the token list (within each block) spurion_idxs = torch.stack( [ptr[:-1] + i for i in range(n_spurions)], dim=0 @@ -126,9 +126,9 @@ def embed_tagging_data_into_ga(fourmomenta, scalars, ptr, cfg_data): multivectors = torch.cat((multivectors, spurions), dim=-2) # global tokens - if cfg_data.include_global_token: + num_global_tokens = cfg_data.num_global_tokens + if cfg_data.include_global_token and num_global_tokens > 0: # prepend global tokens to the token list - num_global_tokens = cfg_data.num_global_tokens global_idxs = torch.stack( [ptr[:-1] + i for i in range(num_global_tokens)], dim=0 ) + num_global_tokens * torch.arange(batchsize, device=ptr.device) From 42d8a062ddcad1e8995ffb4bed469c01716c3e26 Mon Sep 17 00:00:00 2001 From: vbpla Date: Wed, 20 Nov 2024 13:09:14 +0100 Subject: [PATCH 11/19] Commentary on the numpy range expansion and opt_einsum latest version update in the requirements --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9018471..a0fcdfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ black==22.3.0 ## Standard science stack h5py matplotlib -nnumpy>=1.25.0,<=1.26.4 +numpy>=1.25.0,<=1.26.4 # numpy>=1.25.0 is needed for pytorch-optimizer, numpy<=1.26.4 is needed for numba scipy ## Standard utils @@ -13,7 +13,7 @@ tqdm ## Deep learning einops -git+https://github.com/dgasmith/opt_einsum.git@1a984b7b75f3e532e7129f6aa13f7ddc3da66e10 # Un-released latest master (pinned for repro); the latest released version 3.3.0 has bugs in shape-based contraction path computations (used for einsum) +opt_einsum torch==2.2.0 torch_geometric==2.4.0 torch-ema From df3f7533e8ce59d30cef19f12c67bdcbeb3a7a24 Mon Sep 17 00:00:00 2001 From: vbpla Date: Fri, 22 Nov 2024 17:50:28 +0100 Subject: [PATCH 12/19] Added a disclaimer about compatibility issues of xformers on Mac --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a0fcdfd..1731df7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ optuna ## Project-specific: general clifford==1.4.0 numba==0.58.1 # otherwise has conflicts -xformers==0.0.24 +xformers==0.0.24 # there exists known compatibility issues with xformers on Mac systems torchdiffeq # for CFM sampling #weaver packages From e8ab29cdd9a6793f0aafe55fd9752feda671332c Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Sat, 23 Nov 2024 12:03:02 +0100 Subject: [PATCH 13/19] Remove mentions of equivariant join from mlp documentation --- gatr/layers/mlp/geometric_bilinears.py | 2 +- gatr/layers/mlp/mlp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gatr/layers/mlp/geometric_bilinears.py b/gatr/layers/mlp/geometric_bilinears.py index aebeeff..061f53f 100644 --- a/gatr/layers/mlp/geometric_bilinears.py +++ b/gatr/layers/mlp/geometric_bilinears.py @@ -18,7 +18,7 @@ class GeometricBilinear(nn.Module): """Geometric bilinear layer. Pin-equivariant map between multivector tensors that constructs new geometric features via - geometric products and the equivariant join (based on a reference vector). + geometric products. Parameters ---------- diff --git a/gatr/layers/mlp/mlp.py b/gatr/layers/mlp/mlp.py index 7cf7be1..fbfed72 100644 --- a/gatr/layers/mlp/mlp.py +++ b/gatr/layers/mlp/mlp.py @@ -18,7 +18,7 @@ class GeoMLP(nn.Module): """Geometric MLP. This is a core component of GATr's transformer blocks. It is similar to a regular MLP, except - that it uses geometric bilinears (GP and equivariant join) in place of the first linear layer. + that it uses geometric bilinears (the geometric product) in place of the first linear layer. Assumes input has shape `(..., channels[0], 16)`, output has shape `(..., channels[-1], 16)`, will create hidden layers with shape `(..., channel, 16)` for each additional entry in From 4dd7d25d598fbb1e19b7c31bc7e851b0cb44c140 Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Mon, 25 Nov 2024 15:54:24 +0100 Subject: [PATCH 14/19] Extend requirements to make data/collect_data.py work --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 1731df7..d62e9aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,12 +4,14 @@ black==22.3.0 ## Standard science stack h5py +hdf5plugin matplotlib numpy>=1.25.0,<=1.26.4 # numpy>=1.25.0 is needed for pytorch-optimizer, numpy<=1.26.4 is needed for numba scipy ## Standard utils tqdm +wget ## Deep learning einops From 16e2aedecdc9a986489fa97c7c589e1efe395fba Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Mon, 25 Nov 2024 16:08:30 +0100 Subject: [PATCH 15/19] Fix inconsistencies in tests: remove gated_gelu_divide from tests; make tests_regression work without the BaseWrapper (which was removed a long time ago) --- tests/gatr/primitives/test_nonlinearities.py | 15 +------ .../regression_datasets/particle_mass.py | 36 ++++++++++++--- .../regression_datasets/top_reconstruction.py | 44 ++++++++++++++----- tests_regression/test_regression.py | 3 +- 4 files changed, 66 insertions(+), 32 deletions(-) diff --git a/tests/gatr/primitives/test_nonlinearities.py b/tests/gatr/primitives/test_nonlinearities.py index bb1e61c..2b41bf6 100644 --- a/tests/gatr/primitives/test_nonlinearities.py +++ b/tests/gatr/primitives/test_nonlinearities.py @@ -4,13 +4,11 @@ import torch from gatr.primitives import gated_relu, gated_sigmoid -from gatr.primitives.nonlinearities import gated_gelu, gated_gelu_divide +from gatr.primitives.nonlinearities import gated_gelu from tests.helpers import BATCH_DIMS, TOLERANCES, check_pin_equivariance -@pytest.mark.parametrize( - "fn", [gated_relu, gated_gelu, gated_sigmoid, gated_gelu_divide] -) +@pytest.mark.parametrize("fn", [gated_relu, gated_gelu, gated_sigmoid]) @pytest.mark.parametrize("batch_dims", BATCH_DIMS) def test_gated_nonlin_equivariance(fn, batch_dims): """Tests an identity map for equivariance (testing the test).""" @@ -18,12 +16,3 @@ def test_gated_nonlin_equivariance(fn, batch_dims): check_pin_equivariance( fn, 1, fn_kwargs=dict(gates=gates), batch_dims=batch_dims, **TOLERANCES ) - - -def test_gelu_division(): - """Test that gated_gelu_divide() is equal to gelu(x)/x.""" - values = torch.randn(2, 3, 4) - gates = torch.randn(2, 3, 4) - y1 = gated_gelu(values, gates) / gates - y2 = gated_gelu_divide(values, gates) - torch.testing.assert_close(y1, y2) diff --git a/tests_regression/regression_datasets/particle_mass.py b/tests_regression/regression_datasets/particle_mass.py index 4f5b9db..082b37f 100644 --- a/tests_regression/regression_datasets/particle_mass.py +++ b/tests_regression/regression_datasets/particle_mass.py @@ -1,6 +1,5 @@ import torch -from experiments.base_wrapper import BaseWrapper from gatr.interface import embed_vector, extract_scalar from tests_regression.regression_datasets.constants import DATASET_SIZE, DEVICE @@ -31,7 +30,7 @@ def __getitem__(self, idx): return self.particle[idx], self.mass[idx] -class ParticleMassWrapper(BaseWrapper): +class ParticleMassWrapper(torch.nn.Module): """Wrapper around GATr networks for PointsDistanceDataset.""" mv_in_channels = 1 @@ -42,7 +41,34 @@ class ParticleMassWrapper(BaseWrapper): raw_out_channels = 1 def __init__(self, net): - super().__init__(net, scalars=True, return_other=False) + super().__init__() + self.net = net + + def forward(self, inputs: torch.Tensor): + """Wrapped forward pass pass. + + Parses inputs into GA + scalar representation, calls the forward pass of the wrapped net, + and extracts the outputs from the GA + scalar representation again. + + Parameters + ---------- + inputs : torch.Tensor + Raw inputs, as given by dataset. + + Returns + ------- + outputs : torch.Tensor + Raw outputs, as expected in dataset. + """ + + multivector, scalars = self.embed_into_ga(inputs) + multivector_outputs, scalar_outputs = self.net( + multivector, + scalars=scalars, + ) + outputs = self.extract_from_ga(multivector_outputs, scalar_outputs) + + return outputs def embed_into_ga(self, inputs): """Embeds raw inputs into the geometric algebra (+ scalar) representation. @@ -89,8 +115,6 @@ def extract_from_ga(self, multivector, scalars): ------- outputs : torch.Tensor Raw outputs, as expected in dataset. - other : torch.Tensor - Additional output data, e.g. required for regularization. """ _, num_objects, num_channels, num_ga_components = multivector.shape @@ -99,4 +123,4 @@ def extract_from_ga(self, multivector, scalars): assert num_ga_components == 16 norm = extract_scalar(multivector[:, :, 0, :]) # (batchsize, 1, 1) - return norm, None + return norm diff --git a/tests_regression/regression_datasets/top_reconstruction.py b/tests_regression/regression_datasets/top_reconstruction.py index 25f7072..4c88cb9 100644 --- a/tests_regression/regression_datasets/top_reconstruction.py +++ b/tests_regression/regression_datasets/top_reconstruction.py @@ -1,11 +1,8 @@ import torch -from experiments.base_wrapper import BaseWrapper from gatr.interface import embed_vector, extract_vector from tests_regression.regression_datasets.constants import DATASET_SIZE, DEVICE -import matplotlib.pyplot as plt # testing - class TopReconstructionDataset(torch.utils.data.Dataset): """Toy dataset for reconstruction the mass of a top quark and W boson from @@ -169,18 +166,43 @@ def __getitem__(self, idx): return self.event[idx], self.reco[idx] -class TopReconstructionWrapper(BaseWrapper): +class TopReconstructionWrapper(torch.nn.Module): """Wrapper around GATr networks for TopReconstructionDataset.""" mv_in_channels = 1 mv_out_channels = 2 s_in_channels = 1 s_out_channels = 1 - # raw_in_channels = 4 - # raw_out_channels = 1 def __init__(self, net): - super().__init__(net, scalars=True, return_other=False) + super().__init__() + self.net = net + + def forward(self, inputs: torch.Tensor): + """Wrapped forward pass pass. + + Parses inputs into GA + scalar representation, calls the forward pass of the wrapped net, + and extracts the outputs from the GA + scalar representation again. + + Parameters + ---------- + inputs : torch.Tensor + Raw inputs, as given by dataset. + + Returns + ------- + outputs : torch.Tensor + Raw outputs, as expected in dataset. + """ + + multivector, scalars = self.embed_into_ga(inputs) + multivector_outputs, scalar_outputs = self.net( + multivector, + scalars=scalars, + ) + outputs = self.extract_from_ga(multivector_outputs, scalar_outputs) + + return outputs def embed_into_ga(self, inputs): """Embeds raw inputs into the geometric algebra (+ scalar) representation. @@ -207,8 +229,8 @@ def embed_into_ga(self, inputs): multivector = multivector.unsqueeze(2) # (batchsize, 3, 1, 16) scalars = torch.zeros( - (batchsize, 1, 1), device=inputs.device - ) # (batchsize, 1, 1) + (batchsize, 3, 1), device=inputs.device + ) # (batchsize, 3, 1) return multivector, scalars def extract_from_ga(self, multivector, scalars): @@ -227,8 +249,6 @@ def extract_from_ga(self, multivector, scalars): ------- outputs : torch.Tensor Raw outputs, as expected in dataset. - other : torch.Tensor - Additional output data, e.g. required for regularization. """ _, num_objects, num_channels, num_ga_components = multivector.shape @@ -241,4 +261,4 @@ def extract_from_ga(self, multivector, scalars): reco = torch.stack((pt, pW), dim=1) reco = reco[:, :, 0, :] # pick first output channel # reco = reco.mean(dim=2) # average over output channels (much worse, probably because mean breaks symmetry) - return reco, None + return reco diff --git a/tests_regression/test_regression.py b/tests_regression/test_regression.py index 1cbceec..b244d84 100644 --- a/tests_regression/test_regression.py +++ b/tests_regression/test_regression.py @@ -43,7 +43,8 @@ def gatr_factory(wrapper_class): @pytest.mark.parametrize("model_factory", [gatr_factory], ids=["GATr"]) @pytest.mark.parametrize( "data,wrapper_class", - [ # (ParticleMassDataset(), ParticleMassWrapper), + [ + (ParticleMassDataset(), ParticleMassWrapper), (TopReconstructionDataset(), TopReconstructionWrapper), ], ) From 03a322461ef89263f5defdb58e51d8d8e85ae5a6 Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Wed, 27 Nov 2024 09:45:09 +0100 Subject: [PATCH 16/19] For amplitudes DSI, define inv_mean and inv_std with register_buffer to make them part of the state_dict --- experiments/baselines/dsi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/experiments/baselines/dsi.py b/experiments/baselines/dsi.py index 5dcd178..4af951b 100644 --- a/experiments/baselines/dsi.py +++ b/experiments/baselines/dsi.py @@ -115,7 +115,8 @@ def __init__( # (could evaluate them pre training, # but we have large batchsizes # so no big difference expected) - self.inv_mean, self.inv_std = None, None + self.register_buffer("inv_mean", None) + self.register_buffer("inv_std", None) def _compute_invariants(self, particles): invariants = compute_invariants(particles) From 9a7cacbea5275ff73fafcad29ae6102eae317acb Mon Sep 17 00:00:00 2001 From: Jonas Spinner Date: Wed, 27 Nov 2024 10:03:16 +0100 Subject: [PATCH 17/19] Fix the last fix - setting buffers to None does not work --- experiments/baselines/dsi.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/experiments/baselines/dsi.py b/experiments/baselines/dsi.py index 4af951b..48732cc 100644 --- a/experiments/baselines/dsi.py +++ b/experiments/baselines/dsi.py @@ -115,17 +115,18 @@ def __init__( # (could evaluate them pre training, # but we have large batchsizes # so no big difference expected) - self.register_buffer("inv_mean", None) - self.register_buffer("inv_std", None) + n_invariants = n * (n + 1) // 2 if self.use_invariants else 0 + self.register_buffer("inv_inited", torch.tensor(False, dtype=torch.bool)) + self.register_buffer("inv_mean", torch.zeros(1, 1, n_invariants)) + self.register_buffer("inv_std", torch.ones(1, 1, n_invariants)) def _compute_invariants(self, particles): invariants = compute_invariants(particles) # standardize - if self.inv_mean is None or self.inv_std is None: + if not self.inv_inited: self.inv_mean = invariants.mean(dim=-2, keepdim=True) - self.inv_std = invariants.std(dim=-2, keepdim=True) - self.inv_std = self.inv_std.clamp(min=1e-5) + self.inv_std = invariants.std(dim=-2, keepdim=True).clamp(min=1e-5) invariants = (invariants - self.inv_mean) / self.inv_std return invariants From a559a363ee308562ad9a7475c4e2e058032fc3d5 Mon Sep 17 00:00:00 2001 From: vbpla Date: Wed, 27 Nov 2024 15:55:32 +0100 Subject: [PATCH 18/19] Fixed a bug on the amplitude plot script and made the model store the grad norm even if we don't have clip_grad_norm --- experiments/amplitudes/plots.py | 2 +- experiments/base_experiment.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/experiments/amplitudes/plots.py b/experiments/amplitudes/plots.py index 8a82c40..c30a336 100644 --- a/experiments/amplitudes/plots.py +++ b/experiments/amplitudes/plots.py @@ -37,7 +37,7 @@ def plot_mixer(cfg, plot_path, title, plot_dict): for idataset, dataset in enumerate(cfg.data.dataset): data = [ np.log(plot_dict["results_test"][dataset]["raw"]["truth"]), - np.log(plot_dict["results_test"][dataset]["raw"]["truth"]), + np.log(plot_dict["results_train"][dataset]["raw"]["truth"]), np.log(plot_dict["results_test"][dataset]["raw"]["prediction"]), ] plot_histograms( diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index 8509f3d..6e14be6 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -585,7 +585,14 @@ def _step(self, data, step): .item() ) else: - grad_norm = 0.0 # meaningless placeholder + grad_norm =grad_norm = ( + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), + float('inf'), + error_if_nonfinite=False, + ) + .cpu() + .item() if step > MIN_STEP_SKIP and self.cfg.training.max_grad_norm is not None: if grad_norm > self.cfg.training.max_grad_norm: LOGGER.warning( From d5c0bc751c48e141a801553e40b82d97467aea1f Mon Sep 17 00:00:00 2001 From: vbpla Date: Thu, 28 Nov 2024 15:59:37 +0100 Subject: [PATCH 19/19] Shortened the clip_grad_norm calculation accounting for all options --- experiments/base_experiment.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/experiments/base_experiment.py b/experiments/base_experiment.py index 6e14be6..1091651 100644 --- a/experiments/base_experiment.py +++ b/experiments/base_experiment.py @@ -574,25 +574,15 @@ def _step(self, data, step): self.cfg.training.clip_grad_value, ) # rescale gradients such that their norm matches a given number - if self.cfg.training.clip_grad_norm is not None: - grad_norm = ( - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), - self.cfg.training.clip_grad_norm, - error_if_nonfinite=False, - ) - .cpu() - .item() + grad_norm = ( + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), + self.cfg.training.clip_grad_norm if self.cfg.training.clip_grad_norm is not None else float('inf'), + error_if_nonfinite=False, ) - else: - grad_norm =grad_norm = ( - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), - float('inf'), - error_if_nonfinite=False, - ) - .cpu() - .item() + .cpu() + .item() + ) if step > MIN_STEP_SKIP and self.cfg.training.max_grad_norm is not None: if grad_norm > self.cfg.training.max_grad_norm: LOGGER.warning(