From 7b742a067873b9cf466e48104d9eb286c44a3859 Mon Sep 17 00:00:00 2001
From: Kye Gomez <swarms_wd@Kyes-MacBook-Pro.local>
Date: Sun, 1 Sep 2024 17:26:50 -0400
Subject: [PATCH] [CLEANUP]

---
 README.md                                   | 12 ++++----
 docs/zeta/nn/modules/hebbian.md             | 16 +++++-----
 docs/zeta/nn/modules/mmfusionffn.md         | 22 ++++++-------
 docs/zeta/nn/modules/postnorm.md            | 20 ++++++------
 docs/zeta/nn/modules/vittransformerblock.md |  8 ++---
 docs/zeta/rl/dpo.md                         |  8 ++---
 docs/zeta/utils/save_load.md                |  4 +--
 pyproject.toml                              |  2 +-
 tests/nn/attentions/test_xc_attention.py    |  8 ++---
 tests/nn/embeddings/test_patch_embedding.py |  4 +--
 tests/nn/modules/test_alr_block.py          |  8 ++---
 tests/nn/modules/test_cross_attn_images.py  | 34 ++++++++++-----------
 tests/nn/modules/test_full_feedforward.py   |  2 +-
 tests/nn/modules/test_hebbian.py            | 10 +++---
 tests/nn/modules/test_image_projector.py    |  4 +--
 tests/nn/modules/test_mm_adapter.py         |  2 +-
 zeta/nn/modules/audio_to_text.py            |  2 +-
 zeta/nn/modules/cog_vlm_two_adapter.py      | 16 +++++-----
 zeta/nn/modules/crome_adapter.py            | 14 ++++-----
 zeta/nn/modules/fusion_ffn.py               |  8 ++---
 zeta/nn/modules/hebbian.py                  | 14 ++++-----
 zeta/nn/modules/mm_ops.py                   |  8 ++---
 zeta/nn/modules/multi_input_multi_output.py | 18 +++++------
 zeta/nn/modules/omnimodal_fusion.py         |  4 +--
 zeta/nn/modules/simple_lstm.py              |  8 ++---
 zeta/nn/modules/simple_rnn.py               |  2 +-
 zeta/rl/__init__.py                         |  5 +--
 27 files changed, 132 insertions(+), 131 deletions(-)

diff --git a/README.md b/README.md
index a35874fc..ed686b14 100644
--- a/README.md
+++ b/README.md
@@ -485,24 +485,24 @@ from zeta.rl import DPO
 
 # Define a simple policy model
 class PolicyModel(nn.Module):
-    def __init__(self, input_dim, output_dim):
+    def __init__(self, dim, output_dim):
         super().__init__()
-        self.fc = nn.Linear(input_dim, output_dim)
+        self.fc = nn.Linear(dim, output_dim)
 
     def forward(self, x):
         return self.fc(x)
 
 
-input_dim = 10
+dim = 10
 output_dim = 5
-policy_model = PolicyModel(input_dim, output_dim)
+policy_model = PolicyModel(dim, output_dim)
 
 # Initialize DPO with the policy model
 dpo_model = DPO(model=policy_model, beta=0.1)
 
 # Sample preferred and unpreferred sequences
-preferred_seq = torch.randint(0, output_dim, (3, input_dim))
-unpreferred_seq = torch.randint(0, output_dim, (3, input_dim))
+preferred_seq = torch.randint(0, output_dim, (3, dim))
+unpreferred_seq = torch.randint(0, output_dim, (3, dim))
 
 # Compute loss
 loss = dpo_model(preferred_seq, unpreferred_seq)
diff --git a/docs/zeta/nn/modules/hebbian.md b/docs/zeta/nn/modules/hebbian.md
index e98194cc..4b075d87 100644
--- a/docs/zeta/nn/modules/hebbian.md
+++ b/docs/zeta/nn/modules/hebbian.md
@@ -34,7 +34,7 @@ class BasicHebbianGRUModel(nn.Module):
     A basic Hebbian learning model combined with a GRU for text-based tasks.
 
     Parameters:
-    - input_dim (int): Dimension of the input features.
+    - dim (int): Dimension of the input features.
     - hidden_dim (int): Dimension of the hidden state in the GRU.
     - output_dim (int): Dimension of the output features.
     """
@@ -42,7 +42,7 @@ class BasicHebbianGRUModel(nn.Module):
 
 The `BasicHebbianGRUModel` class has the following attributes and methods:
 
-- `input_dim` (int): Dimension of the input features.
+- `dim` (int): Dimension of the input features.
 - `hidden_dim` (int): Dimension of the hidden state in the GRU.
 - `output_dim` (int): Dimension of the output features.
 
@@ -53,10 +53,10 @@ The `BasicHebbianGRUModel` class has the following attributes and methods:
 To create an instance of the `BasicHebbianGRUModel`, you need to specify the dimensions of input, hidden state, and output features. Here's how you can initialize the model:
 
 ```python
-input_dim = 512  # Dimension of the input features
+dim = 512  # Dimension of the input features
 hidden_dim = 256  # Dimension of the hidden state in the GRU
 output_dim = 128  # Dimension of the output features
-model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim)
+model = BasicHebbianGRUModel(dim, hidden_dim, output_dim)
 ```
 
 ---
@@ -73,7 +73,7 @@ The forward pass of the model processes input data through several stages:
 Here's how to perform a forward pass:
 
 ```python
-# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, input_dim)
+# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, dim)
 output = model(input_tensor)
 ```
 
@@ -84,16 +84,16 @@ output = model(input_tensor)
 ### Example 1: Model Initialization
 
 ```python
-input_dim = 512
+dim = 512
 hidden_dim = 256
 output_dim = 128
-model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim)
+model = BasicHebbianGRUModel(dim, hidden_dim, output_dim)
 ```
 
 ### Example 2: Forward Pass
 
 ```python
-# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, input_dim)
+# Assuming input_tensor is a 3D tensor of shape (B, Seqlen, dim)
 output = model(input_tensor)
 ```
 
diff --git a/docs/zeta/nn/modules/mmfusionffn.md b/docs/zeta/nn/modules/mmfusionffn.md
index de9f19f5..1cde915d 100644
--- a/docs/zeta/nn/modules/mmfusionffn.md
+++ b/docs/zeta/nn/modules/mmfusionffn.md
@@ -4,12 +4,12 @@
 The `MMFusionFFN` module represents a positionwise feedforward layer and is used in the context of multi-modal image and text processing.
 
 #### Class Definition
-- `MMFusionFFN(input_dim, hidden_dim, dropout=0.0)`
+- `MMFusionFFN(dim, hidden_dim, dropout=0.0)`
 
 #### Args
 | Name         | Type  | Description                           | Default   |
 |--------------|-------|---------------------------------------|-----------|
-| input_dim    | int   | Input dimension                       | -         |
+| dim    | int   | Input dimension                       | -         |
 | hidden_dim   | int   | Hidden dimension                      | -         |
 | output_dim   | int   | Output dimension                      | -         |
 | dropout      | float | Dropout probability.                  | 0.1       |
@@ -32,34 +32,34 @@ from torch import nn
 from zeta.nn import MMFusionFFN
 
 # Define the input and hidden dimensions
-input_dim = 512
+dim = 512
 hidden_dim = 1024
 output_dim = 512
 dropout = 0.1
 
 # Create an instance of MMFusionFFN
-ffn = MMFusionFFN(input_dim, hidden_dim, output_dim, dropout)
+ffn = MMFusionFFN(dim, hidden_dim, output_dim, dropout)
 
 # Example 1 - Forward pass with random input data
 input_data = torch.randn(
-    5, 32, input_dim
-)  # Random input data of shape (5, 32, input_dim)
+    5, 32, dim
+)  # Random input data of shape (5, 32, dim)
 output = ffn(input_data)
 print(output.shape)  # Output tensor shape
 
 # Example 2 - Create an instance with default dropout
-ffn_default_dropout = MMFusionFFN(input_dim, hidden_dim, output_dim)
+ffn_default_dropout = MMFusionFFN(dim, hidden_dim, output_dim)
 
 # Example 3 - Forward pass with another input data
 input_data2 = torch.randn(
-    8, 16, input_dim
-)  # Random input data of shape (8, 16, input_dim)
+    8, 16, dim
+)  # Random input data of shape (8, 16, dim)
 output2 = ffn_default_dropout(input_data2)
 print(output2.shape)  # Output tensor shape
 ```
 #### Additional Information and Tips
 - The `MMFusionFFN` module is commonly used in multimodal machine learning applications to process multi-dimensional input data from different modalities, such as image and text.
-- The most important parameters to consider when creating an instance of `MMFusionFFN` are `input_dim` and `hidden_dim`. These parameters can be adjusted based on the specifics of the input data and the desired level of transformation.
+- The most important parameters to consider when creating an instance of `MMFusionFFN` are `dim` and `hidden_dim`. These parameters can be adjusted based on the specifics of the input data and the desired level of transformation.
 - The `dropout` parameter controls the probability of an element to be zeroed in the forward pass, which can help prevent overfitting.
 
 #### References and Resources
@@ -68,4 +68,4 @@ print(output2.shape)  # Output tensor shape
 
 This comprehensive documentation provides a detailed overview of the `MMFusionFFN` module, including its purpose, architecture, usage examples, and additional information. Developers can now use this documentation to effectively utilize the module in their applications.
 
-The examples illustrate how to create instances of `MMFusionFFN`, perform forward passes, and handle different input shapes, providing a practical guide for utilizing the module. Additionally, important attributes, such as `input_dim`, `hidden_dim`, and `dropout`, are explained in the class definition table for easy reference and understanding.
+The examples illustrate how to create instances of `MMFusionFFN`, perform forward passes, and handle different input shapes, providing a practical guide for utilizing the module. Additionally, important attributes, such as `dim`, `hidden_dim`, and `dropout`, are explained in the class definition table for easy reference and understanding.
diff --git a/docs/zeta/nn/modules/postnorm.md b/docs/zeta/nn/modules/postnorm.md
index 8c74b0af..ead2c35e 100644
--- a/docs/zeta/nn/modules/postnorm.md
+++ b/docs/zeta/nn/modules/postnorm.md
@@ -27,10 +27,10 @@ from zeta.nn import PostNorm
 
 # Define a simple model
 class SimpleModel(nn.Module):
-    def __init__(self, input_dim, hidden_dim, output_dim):
+    def __init__(self, dim, hidden_dim, output_dim):
         super().__init__()
 
-        self.hidden_layer = nn.Linear(input_dim, hidden_dim)
+        self.hidden_layer = nn.Linear(dim, hidden_dim)
         self.postnorm_layer = PostNorm(hidden_dim, nn.Linear(hidden_dim, output_dim))
 
     def forward(self, x):
@@ -41,9 +41,9 @@ class SimpleModel(nn.Module):
 
 
 # Usage:
-input_dim, hidden_dim, output_dim = 10, 20, 2
-model = SimpleModel(input_dim, hidden_dim, output_dim)
-inputs = torch.randn(64, input_dim)
+dim, hidden_dim, output_dim = 10, 20, 2
+model = SimpleModel(dim, hidden_dim, output_dim)
+inputs = torch.randn(64, dim)
 outputs = model(inputs)
 
 print(f"Input Shape: {inputs.shape}\nOutput Shape: {outputs.shape}")
@@ -60,9 +60,9 @@ from zeta.nn import PostNorm
 
 # Define a model architecture for image data
 class ImageModel(nn.Module):
-    def __init__(self, input_dim, hidden_dim, output_dim):
+    def __init__(self, dim, hidden_dim, output_dim):
         super().__init__()
-        self.fc1 = nn.Linear(input_dim, hidden_dim)
+        self.fc1 = nn.Linear(dim, hidden_dim)
         self.fc2 = nn.Linear(hidden_dim, output_dim)
         self.postnorm = PostNorm(output_dim, nn.ReLU())
 
@@ -73,9 +73,9 @@ class ImageModel(nn.Module):
 
 
 # Usage:
-input_dim, hidden_dim, output_dim = 784, 256, 10  # Applicable for MNIST data
-model = ImageModel(input_dim, hidden_dim, output_dim)
-inputs = torch.randn(64, input_dim)
+dim, hidden_dim, output_dim = 784, 256, 10  # Applicable for MNIST data
+model = ImageModel(dim, hidden_dim, output_dim)
+inputs = torch.randn(64, dim)
 outputs = model(inputs)
 
 print(f"Input Shape: {inputs.shape}\nOutput Shape: {outputs.shape}")
diff --git a/docs/zeta/nn/modules/vittransformerblock.md b/docs/zeta/nn/modules/vittransformerblock.md
index cffaa4db..2ab6faa1 100644
--- a/docs/zeta/nn/modules/vittransformerblock.md
+++ b/docs/zeta/nn/modules/vittransformerblock.md
@@ -22,7 +22,7 @@ Parameters:
 import torch
 import torch.nn as nn
 
-input_dim = 256
+dim = 256
 num_heads = 3
 dim_head = 64
 feedforward_dim = 512
@@ -30,7 +30,7 @@ expansion_factor = 3
 dropout_rate = 0.1
 
 transformer_block = VitTransformerBlock(
-    input_dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate
+    dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate
 )
 input_tensor = torch.randn(
     1, 3, 256, 512
@@ -38,14 +38,14 @@ input_tensor = torch.randn(
 output = transformer_block(input_tensor)
 
 # Usage example 2:
-input_dim = 256
+dim = 256
 num_heads = 4
 dim_head = 64
 feedforward_dim = 512
 expansion_factor = 3
 dropout_rate = 0.1
 transformer_block = VitTransformerBlock(
-    input_dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate
+    dim, num_heads, dim_head, feedforward_dim, expansion_factor, dropout_rate
 )
 input_tensor = torch.randn(
     1, 4, 64, 256
diff --git a/docs/zeta/rl/dpo.md b/docs/zeta/rl/dpo.md
index 5867b89d..6279c6e3 100644
--- a/docs/zeta/rl/dpo.md
+++ b/docs/zeta/rl/dpo.md
@@ -43,17 +43,17 @@ from zeta.rl import DPO
 
 # Define a simple policy model
 class PolicyModel(nn.Module):
-    def __init__(self, input_dim, output_dim):
+    def __init__(self, dim, output_dim):
         super().__init__()
-        self.fc = nn.Linear(input_dim, output_dim)
+        self.fc = nn.Linear(dim, output_dim)
 
     def forward(self, x):
         return self.fc(x)
 
 
-input_dim = 10
+dim = 10
 output_dim = 5
-policy_model = PolicyModel(input_dim, output_dim)
+policy_model = PolicyModel(dim, output_dim)
 
 # Initialize DPO with the policy model
 dpo_model = DPO(model=policy_model, beta=0.1)
diff --git a/docs/zeta/utils/save_load.md b/docs/zeta/utils/save_load.md
index 07c303ac..e18babbf 100644
--- a/docs/zeta/utils/save_load.md
+++ b/docs/zeta/utils/save_load.md
@@ -67,9 +67,9 @@ from zeta.utils import save_load
 @save_load()
 class MyModel(Module):
 
-    def __init__(self, input_dim, output_dim):
+    def __init__(self, dim, output_dim):
         super().__init__()
-        self.layer = Linear(input_dim, output_dim)
+        self.layer = Linear(dim, output_dim)
 
     def forward(self, x):
         return self.layer(x)
diff --git a/pyproject.toml b/pyproject.toml
index 7640601c..08490ce1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "zetascale"
-version = "2.7.0"
+version = "2.7.1"
 description = "Rapidly Build, Optimize, and Train SOTA AI Models"
 authors = ["Zeta Team <kye@apac.ai>"]
 license = "MIT"
diff --git a/tests/nn/attentions/test_xc_attention.py b/tests/nn/attentions/test_xc_attention.py
index fdfc1615..9954e08c 100644
--- a/tests/nn/attentions/test_xc_attention.py
+++ b/tests/nn/attentions/test_xc_attention.py
@@ -61,11 +61,11 @@ def test_xc_attention_with_different_heads():
         )
 
 
-def test_xc_attention_with_different_input_dims():
+def test_xc_attention_with_different_dims():
     """Test case to check if XCAttention handles different input dimensions correctly."""
-    input_dims = [128, 256, 512]
+    dims = [128, 256, 512]
 
-    for dim in input_dims:
+    for dim in dims:
         model = XCAttention(dim=dim, cond_dim=64, heads=8)
         assert isinstance(model, XCAttention)
         assert model.to_qkv[0].in_features == dim
@@ -81,7 +81,7 @@ def test_xc_attention_with_different_cond_dims():
         assert model.film[0].in_features == cond_dim * 2
 
 
-def test_xc_attention_negative_input_dim():
+def test_xc_attention_negative_dim():
     """Test case to check if XCAttention handles negative input dimensions correctly."""
     with pytest.raises(ValueError):
         XCAttention(dim=-256, cond_dim=64, heads=8)
diff --git a/tests/nn/embeddings/test_patch_embedding.py b/tests/nn/embeddings/test_patch_embedding.py
index bf78cccb..7f7fdbb0 100644
--- a/tests/nn/embeddings/test_patch_embedding.py
+++ b/tests/nn/embeddings/test_patch_embedding.py
@@ -52,7 +52,7 @@ def test_embedding_layers():
 
 
 # Test case for different input dimensions
-def test_different_input_dimensions():
+def test_different_dimensions():
     dim_in = 3
     dim_out = 4
     seq_len = 5
@@ -63,7 +63,7 @@ def test_different_input_dimensions():
 
 
 # Test case for large input dimensions
-def test_large_input_dimensions():
+def test_large_dimensions():
     dim_in = 256
     dim_out = 512
     seq_len = 16
diff --git a/tests/nn/modules/test_alr_block.py b/tests/nn/modules/test_alr_block.py
index bc25b373..c8dcffc3 100644
--- a/tests/nn/modules/test_alr_block.py
+++ b/tests/nn/modules/test_alr_block.py
@@ -44,16 +44,16 @@ def test_alrblock_forward(sample_input, alrblock_model):
 
 # Parameterized testing for various input dimensions and dropout rates
 @pytest.mark.parametrize(
-    "input_dim, hidden_dim, dropout",
+    "dim, hidden_dim, dropout",
     [
         (256, 1024, 0.2),
         (512, 2048, 0.0),
         (128, 512, 0.3),
     ],
 )
-def test_feedforward_parameterized(input_dim, hidden_dim, dropout):
-    model = FeedForward(input_dim, hidden_dim, dropout)
-    input_tensor = torch.randn(1, 1024, input_dim)
+def test_feedforward_parameterized(dim, hidden_dim, dropout):
+    model = FeedForward(dim, hidden_dim, dropout)
+    input_tensor = torch.randn(1, 1024, dim)
     output = model(input_tensor)
     assert output.shape == input_tensor.shape
 
diff --git a/tests/nn/modules/test_cross_attn_images.py b/tests/nn/modules/test_cross_attn_images.py
index 219b5523..1f20bb2b 100644
--- a/tests/nn/modules/test_cross_attn_images.py
+++ b/tests/nn/modules/test_cross_attn_images.py
@@ -12,62 +12,62 @@ def cross_attention_module():
 
 
 def test_forward_pass(cross_attention_module):
-    input_dim = 1024
+    dim = 1024
     seq_len = 32
     context_dim = 1024
-    input_tensor = torch.randn(1, seq_len, input_dim)
+    input_tensor = torch.randn(1, seq_len, dim)
     context_tensor = torch.randn(1, seq_len, context_dim)
 
     output = cross_attention_module(input_tensor, context_tensor)
 
-    assert output.shape == (1, seq_len, input_dim)
+    assert output.shape == (1, seq_len, dim)
 
 
 def test_forward_pass_with_conditional_layer_norm(cross_attention_module):
-    input_dim = 1024
+    dim = 1024
     seq_len = 32
     context_dim = 1024
-    input_tensor = torch.randn(1, seq_len, input_dim)
+    input_tensor = torch.randn(1, seq_len, dim)
     context_tensor = torch.randn(1, seq_len, context_dim)
 
     cross_attention_module.qk = True  # Enable conditional layer normalization
     output = cross_attention_module(input_tensor, context_tensor)
 
-    assert output.shape == (1, seq_len, input_dim)
+    assert output.shape == (1, seq_len, dim)
 
 
 def test_forward_pass_with_mask(cross_attention_module):
-    input_dim = 1024
+    dim = 1024
     seq_len = 32
     context_dim = 1024
-    input_tensor = torch.randn(1, seq_len, input_dim)
+    input_tensor = torch.randn(1, seq_len, dim)
     context_tensor = torch.randn(1, seq_len, context_dim)
     mask = torch.randint(0, 2, (seq_len, seq_len), dtype=torch.bool)
 
     cross_attention_module.mask = mask
     output = cross_attention_module(input_tensor, context_tensor)
 
-    assert output.shape == (1, seq_len, input_dim)
+    assert output.shape == (1, seq_len, dim)
 
 
 def test_forward_pass_with_dropout(cross_attention_module):
-    input_dim = 1024
+    dim = 1024
     seq_len = 32
     context_dim = 1024
-    input_tensor = torch.randn(1, seq_len, input_dim)
+    input_tensor = torch.randn(1, seq_len, dim)
     context_tensor = torch.randn(1, seq_len, context_dim)
 
     cross_attention_module.dropout = nn.Dropout(0.5)  # Set dropout rate to 50%
     output = cross_attention_module(input_tensor, context_tensor)
 
-    assert output.shape == (1, seq_len, input_dim)
+    assert output.shape == (1, seq_len, dim)
 
 
 def test_gradcheck(cross_attention_module):
-    input_dim = 1024
+    dim = 1024
     seq_len = 32
     context_dim = 1024
-    input_tensor = torch.randn(1, seq_len, input_dim, requires_grad=True)
+    input_tensor = torch.randn(1, seq_len, dim, requires_grad=True)
     context_tensor = torch.randn(1, seq_len, context_dim, requires_grad=True)
 
     assert gradcheck(
@@ -78,16 +78,16 @@ def test_gradcheck(cross_attention_module):
 
 
 def test_attention_strategy_average(cross_attention_module):
-    input_dim = 1024
+    dim = 1024
     seq_len = 32
     context_dim = 1024
-    input_tensor = torch.randn(1, seq_len, input_dim)
+    input_tensor = torch.randn(1, seq_len, dim)
     context_tensor = torch.randn(1, seq_len, context_dim)
 
     cross_attention_module.attention_strategy = "average"
     output = cross_attention_module(input_tensor, context_tensor)
 
-    assert output.shape == (1, input_dim)
+    assert output.shape == (1, dim)
 
 
 if __name__ == "__main__":
diff --git a/tests/nn/modules/test_full_feedforward.py b/tests/nn/modules/test_full_feedforward.py
index 9d23b9c7..65993c87 100644
--- a/tests/nn/modules/test_full_feedforward.py
+++ b/tests/nn/modules/test_full_feedforward.py
@@ -76,7 +76,7 @@ def test_feed_forward_swish(feed_forwardim):
     assert output.shape == (1, 2048)
 
 
-def test_feed_forward_input_dim_mismatch():
+def test_feed_forward_dim_mismatch():
     with pytest.raises(ValueError):
         FeedForward(768, 1024, 0.1)(torch.randn(1, 512))
 
diff --git a/tests/nn/modules/test_hebbian.py b/tests/nn/modules/test_hebbian.py
index 5d9e76be..6b149f2a 100644
--- a/tests/nn/modules/test_hebbian.py
+++ b/tests/nn/modules/test_hebbian.py
@@ -9,10 +9,10 @@
 # Fixture for creating an instance of the model
 @pytest.fixture
 def model_instance():
-    input_dim = 512
+    dim = 512
     hidden_dim = 256
     output_dim = 128
-    model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim)
+    model = BasicHebbianGRUModel(dim, hidden_dim, output_dim)
     return model
 
 
@@ -25,8 +25,8 @@ def test_model_instantiation(model_instance):
 def test_forward_pass(model_instance):
     batch_size = 32
     seqlen = 10
-    input_dim = 512
-    input_tensor = torch.randn(batch_size, seqlen, input_dim)
+    dim = 512
+    input_tensor = torch.randn(batch_size, seqlen, dim)
     output = model_instance(input_tensor)
     assert output.shape == (batch_size, seqlen, model_instance.output_dim)
 
@@ -39,7 +39,7 @@ def test_weights_initialization(model_instance):
 
 
 # Test case for input dimension matching
-def test_input_dimension_matching(model_instance):
+def test_dimension_matching(model_instance):
     input_tensor = torch.randn(16, 20, 512)
     with pytest.raises(RuntimeError):
         _ = model_instance(input_tensor)
diff --git a/tests/nn/modules/test_image_projector.py b/tests/nn/modules/test_image_projector.py
index fcd0a5ac..0bfc0aaa 100644
--- a/tests/nn/modules/test_image_projector.py
+++ b/tests/nn/modules/test_image_projector.py
@@ -169,9 +169,9 @@ def test_patch_projector_invalid_patch_size():
 # Test case for custom projection function
 def test_patch_projector_custom_projection(sample_input_tensor):
     class CustomProjection(nn.Module):
-        def __init__(self, input_dim, output_dim):
+        def __init__(self, dim, output_dim):
             super().__init__()
-            self.proj = nn.Linear(input_dim, output_dim)
+            self.proj = nn.Linear(dim, output_dim)
 
         def forward(self, x):
             return self.proj(x)
diff --git a/tests/nn/modules/test_mm_adapter.py b/tests/nn/modules/test_mm_adapter.py
index 7fef674c..221566dd 100644
--- a/tests/nn/modules/test_mm_adapter.py
+++ b/tests/nn/modules/test_mm_adapter.py
@@ -17,7 +17,7 @@ def test_creation(mm_adapter):
 
 # Example of a parameterized test with different input dimensions
 @pytest.mark.parametrize("dim", [256, 512, 1024])
-def test_input_dimensions(dim):
+def test_dimensions(dim):
     mm_adapter = MultiModalAdapterDenseNetwork(dim=dim)
     assert mm_adapter.dim == dim
 
diff --git a/zeta/nn/modules/audio_to_text.py b/zeta/nn/modules/audio_to_text.py
index 92165f4d..6be316bd 100644
--- a/zeta/nn/modules/audio_to_text.py
+++ b/zeta/nn/modules/audio_to_text.py
@@ -7,7 +7,7 @@ def audio_to_text(x: Tensor, seqlen: int, dim: int, norm: bool = True):
     Reshapes and projects the audio input tensor to text representation.
 
     Args:
-        x (Tensor): Input audio tensor of shape (batch_size, sequence_length, input_dim).
+        x (Tensor): Input audio tensor of shape (batch_size, sequence_length, dim).
         seqlen (int): Length of the output sequence.
         dim (int): Dimension of the projected audio tensor.
         norm (bool, optional): Whether to apply layer normalization. Defaults to True.
diff --git a/zeta/nn/modules/cog_vlm_two_adapter.py b/zeta/nn/modules/cog_vlm_two_adapter.py
index 151a937a..4a8a2792 100644
--- a/zeta/nn/modules/cog_vlm_two_adapter.py
+++ b/zeta/nn/modules/cog_vlm_two_adapter.py
@@ -10,17 +10,17 @@ class CogVLMTwoAdapter(nn.Module):
     with linguistic representations using a 1D convolutional layer followed by a SwiGLU module.
     """
 
-    def __init__(self, input_dim: int):
+    def __init__(self, dim: int):
         """
         Initialize the CogVLMTwoAdapter module.
 
         Args:
-            input_dim (int): The dimension of the input features.
+            dim (int): The dimension of the input features.
         """
         super(CogVLMTwoAdapter, self).__init__()
         self.conv = nn.Conv1d(
-            in_channels=input_dim,
-            out_channels=input_dim,
+            in_channels=dim,
+            out_channels=dim,
             kernel_size=2,
             stride=2,
         )
@@ -31,18 +31,18 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         Forward pass of the CogVLMTwoAdapter module.
 
         Args:
-            x (torch.Tensor): The input tensor of shape (batch_size, sequence_length, input_dim).
+            x (torch.Tensor): The input tensor of shape (batch_size, sequence_length, dim).
 
         Returns:
             torch.Tensor: The output tensor after applying the 1D convolution and SwiGLU module.
         """
-        # Rearrange input tensor to match the expected input shape for Conv1d (batch, input_dim, sequence_length)
+        # Rearrange input tensor to match the expected input shape for Conv1d (batch, dim, sequence_length)
         x = rearrange(x, "b s d -> b d s")
 
         # Apply the convolution
         x = self.conv(x)
 
-        # Rearrange back to (batch, sequence_length, input_dim)
+        # Rearrange back to (batch, sequence_length, dim)
         x = rearrange(x, "b d s -> b s d")
 
         # Apply SwiGLU module
@@ -55,5 +55,5 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 # if __name__ == "__main__":
 #     # Example input (batch, sequence_length, dimension)
 #     x = torch.randn(2, 4, 3)  # Adjust these dimensions as needed
-#     model = CogVLMTwoAdapter(input_dim=3)
+#     model = CogVLMTwoAdapter(dim=3)
 #     print(model(x).shape)
diff --git a/zeta/nn/modules/crome_adapter.py b/zeta/nn/modules/crome_adapter.py
index 01c5fe87..239bd1a6 100644
--- a/zeta/nn/modules/crome_adapter.py
+++ b/zeta/nn/modules/crome_adapter.py
@@ -4,22 +4,22 @@
 
 
 class CROMEAdapter(nn.Module):
-    def __init__(self, input_dim: int, bottleneck_dim: int):
+    def __init__(self, dim: int, bottleneck_dim: int):
         """
         Initialize the CROMEAdapter module.
 
         Args:
-            input_dim (int): The dimension of the input features.
+            dim (int): The dimension of the input features.
             bottleneck_dim (int): The dimension of the bottleneck layer.
         """
         super(CROMEAdapter, self).__init__()
 
-        self.Wd_text = nn.Linear(input_dim, bottleneck_dim)
-        self.Wg_text = nn.Linear(input_dim, bottleneck_dim)
-        self.Wd_image = nn.Linear(input_dim, bottleneck_dim)
-        self.Wg_image = nn.Linear(input_dim, bottleneck_dim)
+        self.Wd_text = nn.Linear(dim, bottleneck_dim)
+        self.Wg_text = nn.Linear(dim, bottleneck_dim)
+        self.Wd_image = nn.Linear(dim, bottleneck_dim)
+        self.Wg_image = nn.Linear(dim, bottleneck_dim)
 
-        self.Wu = nn.Linear(bottleneck_dim, input_dim)
+        self.Wu = nn.Linear(bottleneck_dim, dim)
 
         self.silu = nn.SiLU()
 
diff --git a/zeta/nn/modules/fusion_ffn.py b/zeta/nn/modules/fusion_ffn.py
index c206b1a7..04d01d5d 100644
--- a/zeta/nn/modules/fusion_ffn.py
+++ b/zeta/nn/modules/fusion_ffn.py
@@ -6,22 +6,22 @@ class MMFusionFFN(nn.Module):
     r"""Positionwise feed forward layer.
 
     Args:
-        input_dim (int): input dimension.
+        dim (int): input dimension.
         hidden_dim (int): hidden dimension.
         dropout (float, optional): dropout probability. (Default: 0.0)
     """
 
     def __init__(
         self,
-        input_dim: int,
+        dim: int,
         hidden_dim: int,
         output_dim: int,
         dropout: float = 0.1,
     ) -> None:
         super().__init__()
         self.net = nn.Sequential(
-            nn.LayerNorm(input_dim),
-            nn.Linear(input_dim, hidden_dim, bias=True),
+            nn.LayerNorm(dim),
+            nn.Linear(dim, hidden_dim, bias=True),
             nn.SiLU(),
             nn.Dropout(dropout),
             nn.Linear(hidden_dim, output_dim, bias=True),
diff --git a/zeta/nn/modules/hebbian.py b/zeta/nn/modules/hebbian.py
index 1e98e4c7..03bfe5b6 100644
--- a/zeta/nn/modules/hebbian.py
+++ b/zeta/nn/modules/hebbian.py
@@ -12,7 +12,7 @@ class BasicHebbianGRUModel(nn.Module):
     introducing non-linearity.
 
     Parameters:
-    - input_dim: Dimension of the input features.
+    - dim: Dimension of the input features.
     - hidden_dim: Dimension of the hidden state in the GRU.
     - output_dim: Dimension of the output features.
 
@@ -20,17 +20,17 @@ class BasicHebbianGRUModel(nn.Module):
     GRU, and finally applies a ReLU activation.
     """
 
-    def __init__(self, input_dim, hidden_dim, output_dim):
+    def __init__(self, dim, hidden_dim, output_dim):
         """
         Initializes the Basic Hebbian GRU model.
 
         Args:
-        - input_dim: Dimension of the input features.
+        - dim: Dimension of the input features.
         - hidden_dim: Dimension of the hidden state in the GRU.
         - output_dim: Dimension of the output features.
         """
         super().__init__()
-        self.weights = nn.Parameter(torch.randn(input_dim, hidden_dim))
+        self.weights = nn.Parameter(torch.randn(dim, hidden_dim))
         self.gru = nn.GRU(hidden_dim, hidden_dim, batch_first=True)
         self.fc = nn.Linear(hidden_dim, output_dim)
 
@@ -39,7 +39,7 @@ def forward(self, x):
         Forward pass of the model.
 
         Args:
-        - x: Input tensor of shape (B, Seqlen, input_dim)
+        - x: Input tensor of shape (B, Seqlen, dim)
 
         Returns:
         - Output tensor of shape (B, Seqlen, output_dim)
@@ -59,10 +59,10 @@ def forward(self, x):
 
 
 # # Example usage
-input_dim = 512  # Dimension of the input features
+dim = 512  # Dimension of the input features
 hidden_dim = 256  # Dimension of the hidden state in the GRU
 output_dim = 128  # Dimension of the output features
-model = BasicHebbianGRUModel(input_dim, hidden_dim, output_dim)
+model = BasicHebbianGRUModel(dim, hidden_dim, output_dim)
 
 x = torch.randn(1, 512, 512)
 output = model(x)
diff --git a/zeta/nn/modules/mm_ops.py b/zeta/nn/modules/mm_ops.py
index 97ed4217..52c6e616 100644
--- a/zeta/nn/modules/mm_ops.py
+++ b/zeta/nn/modules/mm_ops.py
@@ -9,13 +9,13 @@ def threed_to_text(
     Converts a 3D tensor to text representation.
 
     Args:
-        x (Tensor): The input tensor of shape (batch_size, sequence_length, input_dim).
+        x (Tensor): The input tensor of shape (batch_size, sequence_length, dim).
         max_seq_len (int): The maximum sequence length of the output tensor.
         dim (int): The dimension of the intermediate tensor.
         flatten (bool, optional): Whether to flatten the intermediate tensor. Defaults to False.
 
     Returns:
-        Tensor: The output tensor of shape (batch_size, max_seq_len, input_dim).
+        Tensor: The output tensor of shape (batch_size, max_seq_len, dim).
     """
     b, s, d = x.shape
 
@@ -29,11 +29,11 @@ def threed_to_text(
 
 def text_to_twod(x: Tensor, dim: int):
     """
-    Converts a 3D tensor of shape (batch_size, sequence_length, input_dim) to a 2D tensor of shape (batch_size, dim)
+    Converts a 3D tensor of shape (batch_size, sequence_length, dim) to a 2D tensor of shape (batch_size, dim)
     by averaging the sequence dimension and applying a linear transformation.
 
     Args:
-        x (Tensor): The input tensor of shape (batch_size, sequence_length, input_dim).
+        x (Tensor): The input tensor of shape (batch_size, sequence_length, dim).
         dim (int): The output dimension.
 
     Returns:
diff --git a/zeta/nn/modules/multi_input_multi_output.py b/zeta/nn/modules/multi_input_multi_output.py
index 34d1b312..4333e12e 100644
--- a/zeta/nn/modules/multi_input_multi_output.py
+++ b/zeta/nn/modules/multi_input_multi_output.py
@@ -154,7 +154,7 @@ class DynamicOutputDecoder(nn.Module):
     Decoder module for dynamic output.
 
     Args:
-        input_dim (int): The input dimension.
+        dim (int): The input dimension.
         robot_count (int): The number of robots.
 
     Attributes:
@@ -162,10 +162,10 @@ class DynamicOutputDecoder(nn.Module):
 
     """
 
-    def __init__(self, input_dim, robot_count):
+    def __init__(self, dim, robot_count):
         super().__init__()
         self.decoders = nn.ModuleList(
-            [nn.Linear(input_dim, input_dim) for _ in range(robot_count)]
+            [nn.Linear(dim, dim) for _ in range(robot_count)]
         )
 
     def forward(self, x):
@@ -188,7 +188,7 @@ class DynamicInputChannels(nn.Module):
 
     Args:
         num_robots (int): The number of robots.
-        input_dim (int): The input dimension.
+        dim (int): The input dimension.
         output_dim (int): The output dimension.
 
     Attributes:
@@ -199,10 +199,10 @@ class DynamicInputChannels(nn.Module):
 
     """
 
-    def __init__(self, num_robots, input_dim, output_dim):
+    def __init__(self, num_robots, dim, output_dim):
         super().__init__()
         self.layers = nn.ModuleList(
-            [nn.Linear(input_dim, output_dim) for _ in range(num_robots)]
+            [nn.Linear(dim, output_dim) for _ in range(num_robots)]
         )
 
     def forward(self, x):
@@ -216,7 +216,7 @@ class OutputDecoders(nn.Module):
 
     Args:
         num_robots (int): The number of robots.
-        input_dim (int): The input dimension.
+        dim (int): The input dimension.
         output_dim (int): The output dimension.
 
     Attributes:
@@ -227,10 +227,10 @@ class OutputDecoders(nn.Module):
 
     """
 
-    def __init__(self, num_robots, input_dim, output_dim):
+    def __init__(self, num_robots, dim, output_dim):
         super().__init__()
         self.decoders = nn.ModuleList(
-            [nn.Linear(input_dim, output_dim) for _ in range(num_robots)]
+            [nn.Linear(dim, output_dim) for _ in range(num_robots)]
         )
 
     def forward(self, x):
diff --git a/zeta/nn/modules/omnimodal_fusion.py b/zeta/nn/modules/omnimodal_fusion.py
index a6e35a9b..c32d59aa 100644
--- a/zeta/nn/modules/omnimodal_fusion.py
+++ b/zeta/nn/modules/omnimodal_fusion.py
@@ -31,12 +31,12 @@ def __init__(
     def forward(self, *modalities: torch.Tensor) -> torch.Tensor:
         # Dynamically add encoders for new modalities
         while len(self.modality_encoders) < len(modalities):
-            input_dim = modalities[
+            dim = modalities[
                 len(self.modality_encoders)
             ].nelement() // modalities[len(self.modality_encoders)].size(
                 0
             )  # Compute flattened input dimension
-            self.modality_encoders.append(nn.Linear(input_dim, self.fusion_dim))
+            self.modality_encoders.append(nn.Linear(dim, self.fusion_dim))
 
         embeddings = []
         for i, modality in enumerate(modalities):
diff --git a/zeta/nn/modules/simple_lstm.py b/zeta/nn/modules/simple_lstm.py
index 7d6e5e0e..a4365e99 100644
--- a/zeta/nn/modules/simple_lstm.py
+++ b/zeta/nn/modules/simple_lstm.py
@@ -33,7 +33,7 @@ def forward(self, x: Tensor, h: Tensor, c: Tensor) -> Tensor:
         Forward pass of the Simple LSTM cell.
 
         Args:
-            x (Tensor): The input tensor of shape (batch_size, input_dim).
+            x (Tensor): The input tensor of shape (batch_size, dim).
             h (Tensor): The previous hidden state tensor of shape (batch_size, hidden_dim).
             c (Tensor): The previous cell state tensor of shape (batch_size, hidden_dim).
 
@@ -148,12 +148,12 @@ def forward(self, x: Tensor) -> Tensor:
 # if __name__ == "__main__":
 #     batch_size = 32
 #     seq_length = 10
-#     input_dim = 50
+#     dim = 50
 #     hidden_dim = 100
 #     num_layers = 2
 #     output_dim = 30
 
-#     model = SimpleLSTM(input_dim, hidden_dim, num_layers, output_dim)
-#     inputs = torch.randn(batch_size, seq_length, input_dim)
+#     model = SimpleLSTM(dim, hidden_dim, num_layers, output_dim)
+#     inputs = torch.randn(batch_size, seq_length, dim)
 #     outputs = model(inputs)
 #     print(outputs)  # Expected output shape: (batch_size, seq_length, output_dim)
diff --git a/zeta/nn/modules/simple_rnn.py b/zeta/nn/modules/simple_rnn.py
index c6da2de6..17d0f139 100644
--- a/zeta/nn/modules/simple_rnn.py
+++ b/zeta/nn/modules/simple_rnn.py
@@ -30,7 +30,7 @@ def forward(self, x: Tensor) -> Tensor:
         Forward pass of the simple RNN module.
 
         Args:
-            x (Tensor): The input tensor of shape (batch_size, sequence_length, input_dim).
+            x (Tensor): The input tensor of shape (batch_size, sequence_length, dim).
 
         Returns:
             Tensor: The output tensor of shape (batch_size, sequence_length, hidden_dim).
diff --git a/zeta/rl/__init__.py b/zeta/rl/__init__.py
index a6877adc..54ac1b1f 100644
--- a/zeta/rl/__init__.py
+++ b/zeta/rl/__init__.py
@@ -7,10 +7,11 @@
 )
 from zeta.rl.hindsight_replay import HindsightExperienceReplay
 from zeta.rl.language_reward import LanguageReward
-from zeta.rl.rewardim import RewardModel
+
+# from zeta.rl.rewardim import RewardModel
 
 __all__ = [
-    "RewardModel",
+    # "RewardModel",
     "ActorCritic",
     "ppo",
     "HindsightExperienceReplay",