From 7b742a067873b9cf466e48104d9eb286c44a3859 Mon Sep 17 00:00:00 2001 From: Kye Gomez Date: Sun, 1 Sep 2024 17:26:50 -0400 Subject: [PATCH] [CLEANUP] --- README.md | 12 ++++---- docs/zeta/nn/modules/hebbian.md | 16 +++++----- docs/zeta/nn/modules/mmfusionffn.md | 22 ++++++------- docs/zeta/nn/modules/postnorm.md | 20 ++++++------ docs/zeta/nn/modules/vittransformerblock.md | 8 ++--- docs/zeta/rl/dpo.md | 8 ++--- docs/zeta/utils/save_load.md | 4 +-- pyproject.toml | 2 +- tests/nn/attentions/test_xc_attention.py | 8 ++--- tests/nn/embeddings/test_patch_embedding.py | 4 +-- tests/nn/modules/test_alr_block.py | 8 ++--- tests/nn/modules/test_cross_attn_images.py | 34 ++++++++++----------- tests/nn/modules/test_full_feedforward.py | 2 +- tests/nn/modules/test_hebbian.py | 10 +++--- tests/nn/modules/test_image_projector.py | 4 +-- tests/nn/modules/test_mm_adapter.py | 2 +- zeta/nn/modules/audio_to_text.py | 2 +- zeta/nn/modules/cog_vlm_two_adapter.py | 16 +++++----- zeta/nn/modules/crome_adapter.py | 14 ++++----- zeta/nn/modules/fusion_ffn.py | 8 ++--- zeta/nn/modules/hebbian.py | 14 ++++----- zeta/nn/modules/mm_ops.py | 8 ++--- zeta/nn/modules/multi_input_multi_output.py | 18 +++++------ zeta/nn/modules/omnimodal_fusion.py | 4 +-- zeta/nn/modules/simple_lstm.py | 8 ++--- zeta/nn/modules/simple_rnn.py | 2 +- zeta/rl/__init__.py | 5 +-- 27 files changed, 132 insertions(+), 131 deletions(-) diff --git a/README.md b/README.md index a35874fc..ed686b14 100644 --- a/README.md +++ b/README.md @@ -485,24 +485,24 @@ from zeta.rl import DPO # Define a simple policy model class PolicyModel(nn.Module): - def __init__(self, input_dim, output_dim): + def __init__(self, dim, output_dim): super().__init__() - self.fc = nn.Linear(input_dim, output_dim) + self.fc = nn.Linear(dim, output_dim) def forward(self, x): return self.fc(x) -input_dim = 10 +dim = 10 output_dim = 5 -policy_model = PolicyModel(input_dim, output_dim) +policy_model = PolicyModel(dim, output_dim) # Initialize DPO with the policy model dpo_model = DPO(model=policy_model, beta=0.1) # Sample preferred and unpreferred sequences -preferred_seq = torch.randint(0, output_dim, (3, input_dim)) -unpreferred_seq = torch.randint(0, output_dim, (3, input_dim)) +preferred_seq = torch.randint(0, output_dim, (3, dim)) +unpreferred_seq = torch.randint(0, output_dim, (3, dim)) # Compute loss loss = dpo_model(preferred_seq, unpreferred_seq) diff --git a/docs/zeta/nn/modules/hebbian.md b/docs/zeta/nn/modules/hebbian.md index e98194cc..4b075d87 100644 --- a/docs/zeta/nn/modules/hebbian.md +++ b/docs/zeta/nn/modules/hebbian.md @@ -34,7 +34,7 @@ class BasicHebbianGRUModel(nn.Module): A basic Hebbian learning model combined with a GRU for text-based tasks. Parameters: - - input_dim (int): Dimension of the input features. + - dim (int): Dimension of the input features. - hidden_dim (int): Dimension of the hidden state in the GRU. - output_dim (int): Dimension of the output features. """ @@ -42,7 +42,7 @@ class BasicHebbianGRUModel(nn.Module): The `BasicHebbianGRUModel` class has the following attributes and methods: -- `input_dim` (int): Dimension of the input features. +- `dim` (int): Dimension of the input features. - `hidden_dim` (int): Dimension of the hidden state in the GRU. - `output_dim` (int): Dimension of the output features. @@ -53,10 +53,10 @@ The `BasicHebbianGRUModel` class has the following attributes and methods: To create an instance of the `BasicHebbianGRUModel`, you need to specify the dimensions of input, hidden state, and output features. Here's how you can initialize the model: ```python -input_dim = 512 # Dimension of the input features +dim = 512 # Dimension of the input features hidden_dim = 256 # Dimension of the hidden state in the GRU output_dim = 128 # Dimension of the output features -model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim) +model = BasicHebbianGRUModel(dim, hidden_dim, output_dim) ``` --- @@ -73,7 +73,7 @@ The forward pass of the model processes input data through several stages: Here's how to perform a forward pass: ```python -# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, input_dim) +# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, dim) output = model(input_tensor) ``` @@ -84,16 +84,16 @@ output = model(input_tensor) ### Example 1: Model Initialization ```python -input_dim = 512 +dim = 512 hidden_dim = 256 output_dim = 128 -model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim) +model = BasicHebbianGRUModel(dim, hidden_dim, output_dim) ``` ### Example 2: Forward Pass ```python -# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, input_dim) +# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, dim) output = model(input_tensor) ``` diff --git a/docs/zeta/nn/modules/mmfusionffn.md b/docs/zeta/nn/modules/mmfusionffn.md index de9f19f5..1cde915d 100644 --- a/docs/zeta/nn/modules/mmfusionffn.md +++ b/docs/zeta/nn/modules/mmfusionffn.md @@ -4,12 +4,12 @@ The `MMFusionFFN` module represents a positionwise feedforward layer and is used in the context of multi-modal image and text processing. #### Class Definition -- `MMFusionFFN(input_dim, hidden_dim, dropout=0.0)` +- `MMFusionFFN(dim, hidden_dim, dropout=0.0)` #### Args | Name | Type | Description | Default | |--------------|-------|---------------------------------------|-----------| -| input_dim | int | Input dimension | - | +| dim | int | Input dimension | - | | hidden_dim | int | Hidden dimension | - | | output_dim | int | Output dimension | - | | dropout | float | Dropout probability. | 0.1 | @@ -32,34 +32,34 @@ from torch import nn from zeta.nn import MMFusionFFN # Define the input and hidden dimensions -input_dim = 512 +dim = 512 hidden_dim = 1024 output_dim = 512 dropout = 0.1 # Create an instance of MMFusionFFN -ffn = MMFusionFFN(input_dim, hidden_dim, output_dim, dropout) +ffn = MMFusionFFN(dim, hidden_dim, output_dim, dropout) # Example 1 - Forward pass with random input data input_data = torch.randn( - 5, 32, input_dim -) # Random input data of shape (5, 32, input_dim) + 5, 32, dim +) # Random input data of shape (5, 32, dim) output = ffn(input_data) print(output.shape) # Output tensor shape # Example 2 - Create an instance with default dropout -ffn_default_dropout = MMFusionFFN(input_dim, hidden_dim, output_dim) +ffn_default_dropout = MMFusionFFN(dim, hidden_dim, output_dim) # Example 3 - Forward pass with another input data input_data2 = torch.randn( - 8, 16, input_dim -) # Random input data of shape (8, 16, input_dim) + 8, 16, dim +) # Random input data of shape (8, 16, dim) output2 = ffn_default_dropout(input_data2) print(output2.shape) # Output tensor shape ``` #### Additional Information and Tips - The `MMFusionFFN` module is commonly used in multimodal machine learning applications to process multi-dimensional input data from different modalities, such as image and text. -- The most important parameters to consider when creating an instance of `MMFusionFFN` are `input_dim` and `hidden_dim`. These parameters can be adjusted based on the specifics of the input data and the desired level of transformation. +- The most important parameters to consider when creating an instance of `MMFusionFFN` are `dim` and `hidden_dim`. These parameters can be adjusted based on the specifics of the input data and the desired level of transformation. - The `dropout` parameter controls the probability of an element to be zeroed in the forward pass, which can help prevent overfitting. #### References and Resources @@ -68,4 +68,4 @@ print(output2.shape) # Output tensor shape This comprehensive documentation provides a detailed overview of the `MMFusionFFN` module, including its purpose, architecture, usage examples, and additional information. Developers can now use this documentation to effectively utilize the module in their applications. -The examples illustrate how to create instances of `MMFusionFFN`, perform forward passes, and handle different input shapes, providing a practical guide for utilizing the module. Additionally, important attributes, such as `input_dim`, `hidden_dim`, and `dropout`, are explained in the class definition table for easy reference and understanding. +The examples illustrate how to create instances of `MMFusionFFN`, perform forward passes, and handle different input shapes, providing a practical guide for utilizing the module. Additionally, important attributes, such as `dim`, `hidden_dim`, and `dropout`, are explained in the class definition table for easy reference and understanding. diff --git a/docs/zeta/nn/modules/postnorm.md b/docs/zeta/nn/modules/postnorm.md index 8c74b0af..ead2c35e 100644 --- a/docs/zeta/nn/modules/postnorm.md +++ b/docs/zeta/nn/modules/postnorm.md @@ -27,10 +27,10 @@ from zeta.nn import PostNorm # Define a simple model class SimpleModel(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim): + def __init__(self, dim, hidden_dim, output_dim): super().__init__() - self.hidden_layer = nn.Linear(input_dim, hidden_dim) + self.hidden_layer = nn.Linear(dim, hidden_dim) self.postnorm_layer = PostNorm(hidden_dim, nn.Linear(hidden_dim, output_dim)) def forward(self, x): @@ -41,9 +41,9 @@ class SimpleModel(nn.Module): # Usage: -input_dim, hidden_dim, output_dim = 10, 20, 2 -model = SimpleModel(input_dim, hidden_dim, output_dim) -inputs = torch.randn(64, input_dim) +dim, hidden_dim, output_dim = 10, 20, 2 +model = SimpleModel(dim, hidden_dim, output_dim) +inputs = torch.randn(64, dim) outputs = model(inputs) print(f"Input Shape: {inputs.shape}\nOutput Shape: {outputs.shape}") @@ -60,9 +60,9 @@ from zeta.nn import PostNorm # Define a model architecture for image data class ImageModel(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim): + def __init__(self, dim, hidden_dim, output_dim): super().__init__() - self.fc1 = nn.Linear(input_dim, hidden_dim) + self.fc1 = nn.Linear(dim, hidden_dim) self.fc2 = nn.Linear(hidden_dim, output_dim) self.postnorm = PostNorm(output_dim, nn.ReLU()) @@ -73,9 +73,9 @@ class ImageModel(nn.Module): # Usage: -input_dim, hidden_dim, output_dim = 784, 256, 10 # Applicable for MNIST data -model = ImageModel(input_dim, hidden_dim, output_dim) -inputs = torch.randn(64, input_dim) +dim, hidden_dim, output_dim = 784, 256, 10 # Applicable for MNIST data +model = ImageModel(dim, hidden_dim, output_dim) +inputs = torch.randn(64, dim) outputs = model(inputs) print(f"Input Shape: {inputs.shape}\nOutput Shape: {outputs.shape}") diff --git a/docs/zeta/nn/modules/vittransformerblock.md b/docs/zeta/nn/modules/vittransformerblock.md index cffaa4db..2ab6faa1 100644 --- a/docs/zeta/nn/modules/vittransformerblock.md +++ b/docs/zeta/nn/modules/vittransformerblock.md @@ -22,7 +22,7 @@ Parameters: import torch import torch.nn as nn -input_dim = 256 +dim = 256 num_heads = 3 dim_head = 64 feedforward_dim = 512 @@ -30,7 +30,7 @@ expansion_factor = 3 dropout_rate = 0.1 transformer_block = VitTransformerBlock( - input_dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate + dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate ) input_tensor = torch.randn( 1, 3, 256, 512 @@ -38,14 +38,14 @@ input_tensor = torch.randn( output = transformer_block(input_tensor) # Usage example 2: -input_dim = 256 +dim = 256 num_heads = 4 dim_head = 64 feedforward_dim = 512 expansion_factor = 3 dropout_rate = 0.1 transformer_block = VitTransformerBlock( - input_dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate + dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate ) input_tensor = torch.randn( 1, 4, 64, 256 diff --git a/docs/zeta/rl/dpo.md b/docs/zeta/rl/dpo.md index 5867b89d..6279c6e3 100644 --- a/docs/zeta/rl/dpo.md +++ b/docs/zeta/rl/dpo.md @@ -43,17 +43,17 @@ from zeta.rl import DPO # Define a simple policy model class PolicyModel(nn.Module): - def __init__(self, input_dim, output_dim): + def __init__(self, dim, output_dim): super().__init__() - self.fc = nn.Linear(input_dim, output_dim) + self.fc = nn.Linear(dim, output_dim) def forward(self, x): return self.fc(x) -input_dim = 10 +dim = 10 output_dim = 5 -policy_model = PolicyModel(input_dim, output_dim) +policy_model = PolicyModel(dim, output_dim) # Initialize DPO with the policy model dpo_model = DPO(model=policy_model, beta=0.1) diff --git a/docs/zeta/utils/save_load.md b/docs/zeta/utils/save_load.md index 07c303ac..e18babbf 100644 --- a/docs/zeta/utils/save_load.md +++ b/docs/zeta/utils/save_load.md @@ -67,9 +67,9 @@ from zeta.utils import save_load @save_load() class MyModel(Module): - def __init__(self, input_dim, output_dim): + def __init__(self, dim, output_dim): super().__init__() - self.layer = Linear(input_dim, output_dim) + self.layer = Linear(dim, output_dim) def forward(self, x): return self.layer(x) diff --git a/pyproject.toml b/pyproject.toml index 7640601c..08490ce1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zetascale" -version = "2.7.0" +version = "2.7.1" description = "Rapidly Build, Optimize, and Train SOTA AI Models" authors = ["Zeta Team "] license = "MIT" diff --git a/tests/nn/attentions/test_xc_attention.py b/tests/nn/attentions/test_xc_attention.py index fdfc1615..9954e08c 100644 --- a/tests/nn/attentions/test_xc_attention.py +++ b/tests/nn/attentions/test_xc_attention.py @@ -61,11 +61,11 @@ def test_xc_attention_with_different_heads(): ) -def test_xc_attention_with_different_input_dims(): +def test_xc_attention_with_different_dims(): """Test case to check if XCAttention handles different input dimensions correctly.""" - input_dims = [128, 256, 512] + dims = [128, 256, 512] - for dim in input_dims: + for dim in dims: model = XCAttention(dim=dim, cond_dim=64, heads=8) assert isinstance(model, XCAttention) assert model.to_qkv[0].in_features == dim @@ -81,7 +81,7 @@ def test_xc_attention_with_different_cond_dims(): assert model.film[0].in_features == cond_dim * 2 -def test_xc_attention_negative_input_dim(): +def test_xc_attention_negative_dim(): """Test case to check if XCAttention handles negative input dimensions correctly.""" with pytest.raises(ValueError): XCAttention(dim=-256, cond_dim=64, heads=8) diff --git a/tests/nn/embeddings/test_patch_embedding.py b/tests/nn/embeddings/test_patch_embedding.py index bf78cccb..7f7fdbb0 100644 --- a/tests/nn/embeddings/test_patch_embedding.py +++ b/tests/nn/embeddings/test_patch_embedding.py @@ -52,7 +52,7 @@ def test_embedding_layers(): # Test case for different input dimensions -def test_different_input_dimensions(): +def test_different_dimensions(): dim_in = 3 dim_out = 4 seq_len = 5 @@ -63,7 +63,7 @@ def test_different_input_dimensions(): # Test case for large input dimensions -def test_large_input_dimensions(): +def test_large_dimensions(): dim_in = 256 dim_out = 512 seq_len = 16 diff --git a/tests/nn/modules/test_alr_block.py b/tests/nn/modules/test_alr_block.py index bc25b373..c8dcffc3 100644 --- a/tests/nn/modules/test_alr_block.py +++ b/tests/nn/modules/test_alr_block.py @@ -44,16 +44,16 @@ def test_alrblock_forward(sample_input, alrblock_model): # Parameterized testing for various input dimensions and dropout rates @pytest.mark.parametrize( - "input_dim, hidden_dim, dropout", + "dim, hidden_dim, dropout", [ (256, 1024, 0.2), (512, 2048, 0.0), (128, 512, 0.3), ], ) -def test_feedforward_parameterized(input_dim, hidden_dim, dropout): - model = FeedForward(input_dim, hidden_dim, dropout) - input_tensor = torch.randn(1, 1024, input_dim) +def test_feedforward_parameterized(dim, hidden_dim, dropout): + model = FeedForward(dim, hidden_dim, dropout) + input_tensor = torch.randn(1, 1024, dim) output = model(input_tensor) assert output.shape == input_tensor.shape diff --git a/tests/nn/modules/test_cross_attn_images.py b/tests/nn/modules/test_cross_attn_images.py index 219b5523..1f20bb2b 100644 --- a/tests/nn/modules/test_cross_attn_images.py +++ b/tests/nn/modules/test_cross_attn_images.py @@ -12,62 +12,62 @@ def cross_attention_module(): def test_forward_pass(cross_attention_module): - input_dim = 1024 + dim = 1024 seq_len = 32 context_dim = 1024 - input_tensor = torch.randn(1, seq_len, input_dim) + input_tensor = torch.randn(1, seq_len, dim) context_tensor = torch.randn(1, seq_len, context_dim) output = cross_attention_module(input_tensor, context_tensor) - assert output.shape == (1, seq_len, input_dim) + assert output.shape == (1, seq_len, dim) def test_forward_pass_with_conditional_layer_norm(cross_attention_module): - input_dim = 1024 + dim = 1024 seq_len = 32 context_dim = 1024 - input_tensor = torch.randn(1, seq_len, input_dim) + input_tensor = torch.randn(1, seq_len, dim) context_tensor = torch.randn(1, seq_len, context_dim) cross_attention_module.qk = True # Enable conditional layer normalization output = cross_attention_module(input_tensor, context_tensor) - assert output.shape == (1, seq_len, input_dim) + assert output.shape == (1, seq_len, dim) def test_forward_pass_with_mask(cross_attention_module): - input_dim = 1024 + dim = 1024 seq_len = 32 context_dim = 1024 - input_tensor = torch.randn(1, seq_len, input_dim) + input_tensor = torch.randn(1, seq_len, dim) context_tensor = torch.randn(1, seq_len, context_dim) mask = torch.randint(0, 2, (seq_len, seq_len), dtype=torch.bool) cross_attention_module.mask = mask output = cross_attention_module(input_tensor, context_tensor) - assert output.shape == (1, seq_len, input_dim) + assert output.shape == (1, seq_len, dim) def test_forward_pass_with_dropout(cross_attention_module): - input_dim = 1024 + dim = 1024 seq_len = 32 context_dim = 1024 - input_tensor = torch.randn(1, seq_len, input_dim) + input_tensor = torch.randn(1, seq_len, dim) context_tensor = torch.randn(1, seq_len, context_dim) cross_attention_module.dropout = nn.Dropout(0.5) # Set dropout rate to 50% output = cross_attention_module(input_tensor, context_tensor) - assert output.shape == (1, seq_len, input_dim) + assert output.shape == (1, seq_len, dim) def test_gradcheck(cross_attention_module): - input_dim = 1024 + dim = 1024 seq_len = 32 context_dim = 1024 - input_tensor = torch.randn(1, seq_len, input_dim, requires_grad=True) + input_tensor = torch.randn(1, seq_len, dim, requires_grad=True) context_tensor = torch.randn(1, seq_len, context_dim, requires_grad=True) assert gradcheck( @@ -78,16 +78,16 @@ def test_gradcheck(cross_attention_module): def test_attention_strategy_average(cross_attention_module): - input_dim = 1024 + dim = 1024 seq_len = 32 context_dim = 1024 - input_tensor = torch.randn(1, seq_len, input_dim) + input_tensor = torch.randn(1, seq_len, dim) context_tensor = torch.randn(1, seq_len, context_dim) cross_attention_module.attention_strategy = "average" output = cross_attention_module(input_tensor, context_tensor) - assert output.shape == (1, input_dim) + assert output.shape == (1, dim) if __name__ == "__main__": diff --git a/tests/nn/modules/test_full_feedforward.py b/tests/nn/modules/test_full_feedforward.py index 9d23b9c7..65993c87 100644 --- a/tests/nn/modules/test_full_feedforward.py +++ b/tests/nn/modules/test_full_feedforward.py @@ -76,7 +76,7 @@ def test_feed_forward_swish(feed_forwardim): assert output.shape == (1, 2048) -def test_feed_forward_input_dim_mismatch(): +def test_feed_forward_dim_mismatch(): with pytest.raises(ValueError): FeedForward(768, 1024, 0.1)(torch.randn(1, 512)) diff --git a/tests/nn/modules/test_hebbian.py b/tests/nn/modules/test_hebbian.py index 5d9e76be..6b149f2a 100644 --- a/tests/nn/modules/test_hebbian.py +++ b/tests/nn/modules/test_hebbian.py @@ -9,10 +9,10 @@ # Fixture for creating an instance of the model @pytest.fixture def model_instance(): - input_dim = 512 + dim = 512 hidden_dim = 256 output_dim = 128 - model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim) + model = BasicHebbianGRUModel(dim, hidden_dim, output_dim) return model @@ -25,8 +25,8 @@ def test_model_instantiation(model_instance): def test_forward_pass(model_instance): batch_size = 32 seqlen = 10 - input_dim = 512 - input_tensor = torch.randn(batch_size, seqlen, input_dim) + dim = 512 + input_tensor = torch.randn(batch_size, seqlen, dim) output = model_instance(input_tensor) assert output.shape == (batch_size, seqlen, model_instance.output_dim) @@ -39,7 +39,7 @@ def test_weights_initialization(model_instance): # Test case for input dimension matching -def test_input_dimension_matching(model_instance): +def test_dimension_matching(model_instance): input_tensor = torch.randn(16, 20, 512) with pytest.raises(RuntimeError): _ = model_instance(input_tensor) diff --git a/tests/nn/modules/test_image_projector.py b/tests/nn/modules/test_image_projector.py index fcd0a5ac..0bfc0aaa 100644 --- a/tests/nn/modules/test_image_projector.py +++ b/tests/nn/modules/test_image_projector.py @@ -169,9 +169,9 @@ def test_patch_projector_invalid_patch_size(): # Test case for custom projection function def test_patch_projector_custom_projection(sample_input_tensor): class CustomProjection(nn.Module): - def __init__(self, input_dim, output_dim): + def __init__(self, dim, output_dim): super().__init__() - self.proj = nn.Linear(input_dim, output_dim) + self.proj = nn.Linear(dim, output_dim) def forward(self, x): return self.proj(x) diff --git a/tests/nn/modules/test_mm_adapter.py b/tests/nn/modules/test_mm_adapter.py index 7fef674c..221566dd 100644 --- a/tests/nn/modules/test_mm_adapter.py +++ b/tests/nn/modules/test_mm_adapter.py @@ -17,7 +17,7 @@ def test_creation(mm_adapter): # Example of a parameterized test with different input dimensions @pytest.mark.parametrize("dim", [256, 512, 1024]) -def test_input_dimensions(dim): +def test_dimensions(dim): mm_adapter = MultiModalAdapterDenseNetwork(dim=dim) assert mm_adapter.dim == dim diff --git a/zeta/nn/modules/audio_to_text.py b/zeta/nn/modules/audio_to_text.py index 92165f4d..6be316bd 100644 --- a/zeta/nn/modules/audio_to_text.py +++ b/zeta/nn/modules/audio_to_text.py @@ -7,7 +7,7 @@ def audio_to_text(x: Tensor, seqlen: int, dim: int, norm: bool = True): Reshapes and projects the audio input tensor to text representation. Args: - x (Tensor): Input audio tensor of shape (batch_size, sequence_length, input_dim). + x (Tensor): Input audio tensor of shape (batch_size, sequence_length, dim). seqlen (int): Length of the output sequence. dim (int): Dimension of the projected audio tensor. norm (bool, optional): Whether to apply layer normalization. Defaults to True. diff --git a/zeta/nn/modules/cog_vlm_two_adapter.py b/zeta/nn/modules/cog_vlm_two_adapter.py index 151a937a..4a8a2792 100644 --- a/zeta/nn/modules/cog_vlm_two_adapter.py +++ b/zeta/nn/modules/cog_vlm_two_adapter.py @@ -10,17 +10,17 @@ class CogVLMTwoAdapter(nn.Module): with linguistic representations using a 1D convolutional layer followed by a SwiGLU module. """ - def __init__(self, input_dim: int): + def __init__(self, dim: int): """ Initialize the CogVLMTwoAdapter module. Args: - input_dim (int): The dimension of the input features. + dim (int): The dimension of the input features. """ super(CogVLMTwoAdapter, self).__init__() self.conv = nn.Conv1d( - in_channels=input_dim, - out_channels=input_dim, + in_channels=dim, + out_channels=dim, kernel_size=2, stride=2, ) @@ -31,18 +31,18 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: Forward pass of the CogVLMTwoAdapter module. Args: - x (torch.Tensor): The input tensor of shape (batch_size, sequence_length, input_dim). + x (torch.Tensor): The input tensor of shape (batch_size, sequence_length, dim). Returns: torch.Tensor: The output tensor after applying the 1D convolution and SwiGLU module. """ - # Rearrange input tensor to match the expected input shape for Conv1d (batch, input_dim, sequence_length) + # Rearrange input tensor to match the expected input shape for Conv1d (batch, dim, sequence_length) x = rearrange(x, "b s d -> b d s") # Apply the convolution x = self.conv(x) - # Rearrange back to (batch, sequence_length, input_dim) + # Rearrange back to (batch, sequence_length, dim) x = rearrange(x, "b d s -> b s d") # Apply SwiGLU module @@ -55,5 +55,5 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # if __name__ == "__main__": # # Example input (batch, sequence_length, dimension) # x = torch.randn(2, 4, 3) # Adjust these dimensions as needed -# model = CogVLMTwoAdapter(input_dim=3) +# model = CogVLMTwoAdapter(dim=3) # print(model(x).shape) diff --git a/zeta/nn/modules/crome_adapter.py b/zeta/nn/modules/crome_adapter.py index 01c5fe87..239bd1a6 100644 --- a/zeta/nn/modules/crome_adapter.py +++ b/zeta/nn/modules/crome_adapter.py @@ -4,22 +4,22 @@ class CROMEAdapter(nn.Module): - def __init__(self, input_dim: int, bottleneck_dim: int): + def __init__(self, dim: int, bottleneck_dim: int): """ Initialize the CROMEAdapter module. Args: - input_dim (int): The dimension of the input features. + dim (int): The dimension of the input features. bottleneck_dim (int): The dimension of the bottleneck layer. """ super(CROMEAdapter, self).__init__() - self.Wd_text = nn.Linear(input_dim, bottleneck_dim) - self.Wg_text = nn.Linear(input_dim, bottleneck_dim) - self.Wd_image = nn.Linear(input_dim, bottleneck_dim) - self.Wg_image = nn.Linear(input_dim, bottleneck_dim) + self.Wd_text = nn.Linear(dim, bottleneck_dim) + self.Wg_text = nn.Linear(dim, bottleneck_dim) + self.Wd_image = nn.Linear(dim, bottleneck_dim) + self.Wg_image = nn.Linear(dim, bottleneck_dim) - self.Wu = nn.Linear(bottleneck_dim, input_dim) + self.Wu = nn.Linear(bottleneck_dim, dim) self.silu = nn.SiLU() diff --git a/zeta/nn/modules/fusion_ffn.py b/zeta/nn/modules/fusion_ffn.py index c206b1a7..04d01d5d 100644 --- a/zeta/nn/modules/fusion_ffn.py +++ b/zeta/nn/modules/fusion_ffn.py @@ -6,22 +6,22 @@ class MMFusionFFN(nn.Module): r"""Positionwise feed forward layer. Args: - input_dim (int): input dimension. + dim (int): input dimension. hidden_dim (int): hidden dimension. dropout (float, optional): dropout probability. (Default: 0.0) """ def __init__( self, - input_dim: int, + dim: int, hidden_dim: int, output_dim: int, dropout: float = 0.1, ) -> None: super().__init__() self.net = nn.Sequential( - nn.LayerNorm(input_dim), - nn.Linear(input_dim, hidden_dim, bias=True), + nn.LayerNorm(dim), + nn.Linear(dim, hidden_dim, bias=True), nn.SiLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, output_dim, bias=True), diff --git a/zeta/nn/modules/hebbian.py b/zeta/nn/modules/hebbian.py index 1e98e4c7..03bfe5b6 100644 --- a/zeta/nn/modules/hebbian.py +++ b/zeta/nn/modules/hebbian.py @@ -12,7 +12,7 @@ class BasicHebbianGRUModel(nn.Module): introducing non-linearity. Parameters: - - input_dim: Dimension of the input features. + - dim: Dimension of the input features. - hidden_dim: Dimension of the hidden state in the GRU. - output_dim: Dimension of the output features. @@ -20,17 +20,17 @@ class BasicHebbianGRUModel(nn.Module): GRU, and finally applies a ReLU activation. """ - def __init__(self, input_dim, hidden_dim, output_dim): + def __init__(self, dim, hidden_dim, output_dim): """ Initializes the Basic Hebbian GRU model. Args: - - input_dim: Dimension of the input features. + - dim: Dimension of the input features. - hidden_dim: Dimension of the hidden state in the GRU. - output_dim: Dimension of the output features. """ super().__init__() - self.weights = nn.Parameter(torch.randn(input_dim, hidden_dim)) + self.weights = nn.Parameter(torch.randn(dim, hidden_dim)) self.gru = nn.GRU(hidden_dim, hidden_dim, batch_first=True) self.fc = nn.Linear(hidden_dim, output_dim) @@ -39,7 +39,7 @@ def forward(self, x): Forward pass of the model. Args: - - x: Input tensor of shape (B, Seqlen, input_dim) + - x: Input tensor of shape (B, Seqlen, dim) Returns: - Output tensor of shape (B, Seqlen, output_dim) @@ -59,10 +59,10 @@ def forward(self, x): # # Example usage -input_dim = 512 # Dimension of the input features +dim = 512 # Dimension of the input features hidden_dim = 256 # Dimension of the hidden state in the GRU output_dim = 128 # Dimension of the output features -model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim) +model = BasicHebbianGRUModel(dim, hidden_dim, output_dim) x = torch.randn(1, 512, 512) output = model(x) diff --git a/zeta/nn/modules/mm_ops.py b/zeta/nn/modules/mm_ops.py index 97ed4217..52c6e616 100644 --- a/zeta/nn/modules/mm_ops.py +++ b/zeta/nn/modules/mm_ops.py @@ -9,13 +9,13 @@ def threed_to_text( Converts a 3D tensor to text representation. Args: - x (Tensor): The input tensor of shape (batch_size, sequence_length, input_dim). + x (Tensor): The input tensor of shape (batch_size, sequence_length, dim). max_seq_len (int): The maximum sequence length of the output tensor. dim (int): The dimension of the intermediate tensor. flatten (bool, optional): Whether to flatten the intermediate tensor. Defaults to False. Returns: - Tensor: The output tensor of shape (batch_size, max_seq_len, input_dim). + Tensor: The output tensor of shape (batch_size, max_seq_len, dim). """ b, s, d = x.shape @@ -29,11 +29,11 @@ def threed_to_text( def text_to_twod(x: Tensor, dim: int): """ - Converts a 3D tensor of shape (batch_size, sequence_length, input_dim) to a 2D tensor of shape (batch_size, dim) + Converts a 3D tensor of shape (batch_size, sequence_length, dim) to a 2D tensor of shape (batch_size, dim) by averaging the sequence dimension and applying a linear transformation. Args: - x (Tensor): The input tensor of shape (batch_size, sequence_length, input_dim). + x (Tensor): The input tensor of shape (batch_size, sequence_length, dim). dim (int): The output dimension. Returns: diff --git a/zeta/nn/modules/multi_input_multi_output.py b/zeta/nn/modules/multi_input_multi_output.py index 34d1b312..4333e12e 100644 --- a/zeta/nn/modules/multi_input_multi_output.py +++ b/zeta/nn/modules/multi_input_multi_output.py @@ -154,7 +154,7 @@ class DynamicOutputDecoder(nn.Module): Decoder module for dynamic output. Args: - input_dim (int): The input dimension. + dim (int): The input dimension. robot_count (int): The number of robots. Attributes: @@ -162,10 +162,10 @@ class DynamicOutputDecoder(nn.Module): """ - def __init__(self, input_dim, robot_count): + def __init__(self, dim, robot_count): super().__init__() self.decoders = nn.ModuleList( - [nn.Linear(input_dim, input_dim) for _ in range(robot_count)] + [nn.Linear(dim, dim) for _ in range(robot_count)] ) def forward(self, x): @@ -188,7 +188,7 @@ class DynamicInputChannels(nn.Module): Args: num_robots (int): The number of robots. - input_dim (int): The input dimension. + dim (int): The input dimension. output_dim (int): The output dimension. Attributes: @@ -199,10 +199,10 @@ class DynamicInputChannels(nn.Module): """ - def __init__(self, num_robots, input_dim, output_dim): + def __init__(self, num_robots, dim, output_dim): super().__init__() self.layers = nn.ModuleList( - [nn.Linear(input_dim, output_dim) for _ in range(num_robots)] + [nn.Linear(dim, output_dim) for _ in range(num_robots)] ) def forward(self, x): @@ -216,7 +216,7 @@ class OutputDecoders(nn.Module): Args: num_robots (int): The number of robots. - input_dim (int): The input dimension. + dim (int): The input dimension. output_dim (int): The output dimension. Attributes: @@ -227,10 +227,10 @@ class OutputDecoders(nn.Module): """ - def __init__(self, num_robots, input_dim, output_dim): + def __init__(self, num_robots, dim, output_dim): super().__init__() self.decoders = nn.ModuleList( - [nn.Linear(input_dim, output_dim) for _ in range(num_robots)] + [nn.Linear(dim, output_dim) for _ in range(num_robots)] ) def forward(self, x): diff --git a/zeta/nn/modules/omnimodal_fusion.py b/zeta/nn/modules/omnimodal_fusion.py index a6e35a9b..c32d59aa 100644 --- a/zeta/nn/modules/omnimodal_fusion.py +++ b/zeta/nn/modules/omnimodal_fusion.py @@ -31,12 +31,12 @@ def __init__( def forward(self, *modalities: torch.Tensor) -> torch.Tensor: # Dynamically add encoders for new modalities while len(self.modality_encoders) < len(modalities): - input_dim = modalities[ + dim = modalities[ len(self.modality_encoders) ].nelement() // modalities[len(self.modality_encoders)].size( 0 ) # Compute flattened input dimension - self.modality_encoders.append(nn.Linear(input_dim, self.fusion_dim)) + self.modality_encoders.append(nn.Linear(dim, self.fusion_dim)) embeddings = [] for i, modality in enumerate(modalities): diff --git a/zeta/nn/modules/simple_lstm.py b/zeta/nn/modules/simple_lstm.py index 7d6e5e0e..a4365e99 100644 --- a/zeta/nn/modules/simple_lstm.py +++ b/zeta/nn/modules/simple_lstm.py @@ -33,7 +33,7 @@ def forward(self, x: Tensor, h: Tensor, c: Tensor) -> Tensor: Forward pass of the Simple LSTM cell. Args: - x (Tensor): The input tensor of shape (batch_size, input_dim). + x (Tensor): The input tensor of shape (batch_size, dim). h (Tensor): The previous hidden state tensor of shape (batch_size, hidden_dim). c (Tensor): The previous cell state tensor of shape (batch_size, hidden_dim). @@ -148,12 +148,12 @@ def forward(self, x: Tensor) -> Tensor: # if __name__ == "__main__": # batch_size = 32 # seq_length = 10 -# input_dim = 50 +# dim = 50 # hidden_dim = 100 # num_layers = 2 # output_dim = 30 -# model = SimpleLSTM(input_dim, hidden_dim, num_layers, output_dim) -# inputs = torch.randn(batch_size, seq_length, input_dim) +# model = SimpleLSTM(dim, hidden_dim, num_layers, output_dim) +# inputs = torch.randn(batch_size, seq_length, dim) # outputs = model(inputs) # print(outputs) # Expected output shape: (batch_size, seq_length, output_dim) diff --git a/zeta/nn/modules/simple_rnn.py b/zeta/nn/modules/simple_rnn.py index c6da2de6..17d0f139 100644 --- a/zeta/nn/modules/simple_rnn.py +++ b/zeta/nn/modules/simple_rnn.py @@ -30,7 +30,7 @@ def forward(self, x: Tensor) -> Tensor: Forward pass of the simple RNN module. Args: - x (Tensor): The input tensor of shape (batch_size, sequence_length, input_dim). + x (Tensor): The input tensor of shape (batch_size, sequence_length, dim). Returns: Tensor: The output tensor of shape (batch_size, sequence_length, hidden_dim). diff --git a/zeta/rl/__init__.py b/zeta/rl/__init__.py index a6877adc..54ac1b1f 100644 --- a/zeta/rl/__init__.py +++ b/zeta/rl/__init__.py @@ -7,10 +7,11 @@ ) from zeta.rl.hindsight_replay import HindsightExperienceReplay from zeta.rl.language_reward import LanguageReward -from zeta.rl.rewardim import RewardModel + +# from zeta.rl.rewardim import RewardModel __all__ = [ - "RewardModel", + # "RewardModel", "ActorCritic", "ppo", "HindsightExperienceReplay",