diff --git a/.gitignore b/.gitignore index d5aec461..ceb18764 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ data # Distribution / packaging .Python build/ +.ruff_cache .vscode develop-eggs/ dist/ diff --git a/docs/corporate/zeta_cloud.md b/docs/corporate/zeta_cloud.md index f424dd34..5f20b967 100644 --- a/docs/corporate/zeta_cloud.md +++ b/docs/corporate/zeta_cloud.md @@ -58,3 +58,5 @@ The estimated timeline for shipping Zeta Cloud is as follows: | Marketplace for Pre-Trained Models | A platform for users to buy, sell, or license pre-trained models. | AI developers, companies looking for ready-to-use models. | Transaction fees, subscription for premium listings. | | Data Storage and Management | Integrated solutions for data storage, processing, and management. | All users of the platform. | Based on the amount of data stored/processed. | | API Access for Third-Party Integrations | Providing API access for integration with other tools and services. | Developers, businesses needing integrations. | Monthly/Annual subscription or pay-per-use. | + + diff --git a/playground/models/flamingo.py b/playground/models/flamingo.py index 52f3d818..66ebaa2c 100644 --- a/playground/models/flamingo.py +++ b/playground/models/flamingo.py @@ -2,7 +2,6 @@ import torch.nn.functional as F from einops import rearrange from torch import einsum, nn -from zeta.nn.modules.simple_feedforward import SimpleFeedForward from zeta.nn.attention.cross_attn_images import MultiModalCrossAttention import zeta.nn as znn diff --git a/playground/models/simple_transformer.py b/playground/models/simple_transformer.py index 7bd8e82d..9af78d10 100644 --- a/playground/models/simple_transformer.py +++ b/playground/models/simple_transformer.py @@ -3,7 +3,6 @@ from zeta.nn.modules.feedforward import FeedForward from zeta.nn.attention.shaped_attention import ShapedAttention from zeta.nn.modules.residual import Residual -from zeta.nn.attention import FlashAttention class SimpleTransformerBlock(nn.Module): diff --git a/tests/nn/attentions/test_cross_attn.py b/tests/nn/attentions/test_cross_attn.py index ce96f326..6bff17b8 100644 --- a/tests/nn/attentions/test_cross_attn.py +++ b/tests/nn/attentions/test_cross_attn.py @@ -1,6 +1,4 @@ -import pytest import torch -from torch import nn from zeta.nn.attention.cross_attention import CrossAttention # Create an instance of CrossAttention for testing diff --git a/tests/nn/attentions/test_local_attn_mha.py b/tests/nn/attentions/test_local_attn_mha.py index 0a5d89f3..91894024 100644 --- a/tests/nn/attentions/test_local_attn_mha.py +++ b/tests/nn/attentions/test_local_attn_mha.py @@ -1,6 +1,5 @@ import pytest import torch -import torch.nn as nn from torch.autograd import gradcheck from zeta.nn.attention.local_attention_mha import LocalMHA diff --git a/tests/nn/attentions/test_mgqa.py b/tests/nn/attentions/test_mgqa.py index 70f9664c..36a66bd9 100644 --- a/tests/nn/attentions/test_mgqa.py +++ b/tests/nn/attentions/test_mgqa.py @@ -1,7 +1,6 @@ import pytest import torch from zeta.nn.attention.mgqa import MGQA, CacheView -from zeta.utils.main import exists # Create an instance of MGQA for testing diff --git a/tests/nn/attentions/test_shaped_attn.py b/tests/nn/attentions/test_shaped_attn.py index 3c2071be..097dff66 100644 --- a/tests/nn/attentions/test_shaped_attn.py +++ b/tests/nn/attentions/test_shaped_attn.py @@ -1,7 +1,4 @@ -import pytest import torch -import torch.nn as nn -import torch.nn.functional as F from zeta.nn.attention.shaped_attention import ShapedAttention diff --git a/tests/nn/attentions/test_sparse_attn.py b/tests/nn/attentions/test_sparse_attn.py index 39682f75..f3006df0 100644 --- a/tests/nn/attentions/test_sparse_attn.py +++ b/tests/nn/attentions/test_sparse_attn.py @@ -65,10 +65,6 @@ def test_sparse_attention_forward(): n_batch = 4 n_ctx = 1024 n_embd = 256 - heads = 4 - attn_mode = "all" - local_attn_ctx = 32 - blocksize = 32 q = torch.randn(n_batch, n_ctx, n_embd) k = torch.randn(n_batch, n_ctx, n_embd) diff --git a/tests/nn/attentions/test_xc_attention.py b/tests/nn/attentions/test_xc_attention.py index d67a28eb..d5558996 100644 --- a/tests/nn/attentions/test_xc_attention.py +++ b/tests/nn/attentions/test_xc_attention.py @@ -42,7 +42,7 @@ def test_xc_attention_forward_with_invalid_inputs(xc_attention_model): with pytest.raises(Exception): x = torch.randn(1, 256, 16, 16) cond = torch.randn(1, 128) # Mismatched conditioning dimension - output = xc_attention_model(x, cond) + xc_attention_model(x, cond) # Test case to check if XCAttention handles different head configurations correctly @@ -81,10 +81,10 @@ def test_xc_attention_with_different_cond_dims(): # Test case to check if XCAttention handles negative input dimensions correctly def test_xc_attention_negative_input_dim(): with pytest.raises(ValueError): - model = XCAttention(dim=-256, cond_dim=64, heads=8) + XCAttention(dim=-256, cond_dim=64, heads=8) # Test case to check if XCAttention handles negative conditioning dimensions correctly def test_xc_attention_negative_cond_dim(): with pytest.raises(ValueError): - model = XCAttention(dim=256, cond_dim=-64, heads=8) + XCAttention(dim=256, cond_dim=-64, heads=8) diff --git a/tests/nn/biases/test_alibi.py b/tests/nn/biases/test_alibi.py index 2e433fac..1842c421 100644 --- a/tests/nn/biases/test_alibi.py +++ b/tests/nn/biases/test_alibi.py @@ -152,9 +152,9 @@ def tensors_equal(tensor1, tensor2): # Test for the existence of a helper function exists def test_exists_function(): - assert exists(None) == False - assert exists(0) == True - assert exists("Hello") == True + assert exists(None) is False + assert exists(0) is True + assert exists("Hello") is True # Test for the pad_at_dim helper function @@ -170,8 +170,8 @@ def test_tensors_equal_function(): tensor2 = torch.tensor([1.0, 2.0, 3.0]) tensor3 = torch.tensor([1.0, 2.0, 3.1]) - assert tensors_equal(tensor1, tensor2) == True - assert tensors_equal(tensor1, tensor3) == False + assert tensors_equal(tensor1, tensor2) is True + assert tensors_equal(tensor1, tensor3) is False # Additional tests for tensor manipulation functions @@ -193,8 +193,8 @@ def test_einops_rearrange_function(): # Test for the nn.Module class inheritance def test_nn_module_inheritance(): - assert issubclass(AlibiPositionalBias, nn.Module) == True - assert issubclass(LearnedAlibiPositionalBias, nn.Module) == True + assert issubclass(AlibiPositionalBias, nn.Module) is True + assert issubclass(LearnedAlibiPositionalBias, nn.Module) is True # Helper function to create random data diff --git a/tests/nn/biases/test_relative_position_bias.py b/tests/nn/biases/test_relative_position_bias.py index c7b2fdf9..9b3ab839 100644 --- a/tests/nn/biases/test_relative_position_bias.py +++ b/tests/nn/biases/test_relative_position_bias.py @@ -1,6 +1,5 @@ import pytest import torch -import torch.nn as nn from zeta.nn.biases.relative_position_bias import RelativePositionBias @@ -238,13 +237,13 @@ def test_different_bidirectional_bias_values(): # Test case for initializing with negative max distance def test_negative_max_distance_init(): with pytest.raises(ValueError): - bias = RelativePositionBias(max_distance=-128) + RelativePositionBias(max_distance=-128) # Test case for initializing with negative num buckets def test_negative_num_buckets_init(): with pytest.raises(ValueError): - bias = RelativePositionBias(num_buckets=-32) + RelativePositionBias(num_buckets=-32) # Test case for initializing with a large max distance @@ -280,4 +279,4 @@ def test_large_num_buckets(): # Test case for bidirectional bias with negative max distance def test_bidirectional_bias_negative_max_distance(): with pytest.raises(ValueError): - bias = RelativePositionBias(bidirectional=True, max_distance=-128) + RelativePositionBias(bidirectional=True, max_distance=-128) diff --git a/tests/nn/embeddings/test_QFTSPEmbeddings.py b/tests/nn/embeddings/test_QFTSPEmbeddings.py index 4e3f334c..bb353af9 100644 --- a/tests/nn/embeddings/test_QFTSPEmbeddings.py +++ b/tests/nn/embeddings/test_QFTSPEmbeddings.py @@ -69,18 +69,18 @@ def test_qftspembeddings_forward_negative_dim(): vocab_size = 10000 dim = -512 with pytest.raises(ValueError): - model = QFTSPEmbeddings(vocab_size, dim) + QFTSPEmbeddings(vocab_size, dim) def test_qftspembeddings_forward_negative_vocab_size(): vocab_size = -10000 dim = 512 with pytest.raises(ValueError): - model = QFTSPEmbeddings(vocab_size, dim) + QFTSPEmbeddings(vocab_size, dim) def test_qftspembeddings_forward_zero_vocab_size(): vocab_size = 0 dim = 512 with pytest.raises(ValueError): - model = QFTSPEmbeddings(vocab_size, dim) + QFTSPEmbeddings(vocab_size, dim) diff --git a/tests/nn/embeddings/test_patch_embedding.py b/tests/nn/embeddings/test_patch_embedding.py index e02e83a4..2a4aafec 100644 --- a/tests/nn/embeddings/test_patch_embedding.py +++ b/tests/nn/embeddings/test_patch_embedding.py @@ -1,4 +1,3 @@ -import pytest import torch from torch import nn from einops.layers.torch import Rearrange diff --git a/tests/nn/embeddings/test_rope.py b/tests/nn/embeddings/test_rope.py index b357f37f..4e475253 100644 --- a/tests/nn/embeddings/test_rope.py +++ b/tests/nn/embeddings/test_rope.py @@ -1,6 +1,4 @@ -import pytest import torch -from torch import nn from zeta.nn.embeddings.rope import ( RotaryEmbedding, diff --git a/tests/nn/embeddings/test_sine_positional_embs.py b/tests/nn/embeddings/test_sine_positional_embs.py index b46991e2..df6ceba2 100644 --- a/tests/nn/embeddings/test_sine_positional_embs.py +++ b/tests/nn/embeddings/test_sine_positional_embs.py @@ -1,6 +1,5 @@ import pytest import torch -from torch import nn from zeta.nn.embeddings.sine_positional import SinePositionalEmbedding @@ -76,11 +75,11 @@ def test_extend_pe(): def test_negative_dimension(): dim_model = -512 with pytest.raises(ValueError): - module = SinePositionalEmbedding(dim_model) + SinePositionalEmbedding(dim_model) # Test case for initializing with alpha=True and dropout > 0 def test_alpha_and_dropout(): dim_model = 512 with pytest.raises(ValueError): - module = SinePositionalEmbedding(dim_model, alpha=True, dropout=0.2) + SinePositionalEmbedding(dim_model, alpha=True, dropout=0.2) diff --git a/tests/nn/embeddings/test_truncated_rotary_emb.py b/tests/nn/embeddings/test_truncated_rotary_emb.py index be595ac8..f7c51814 100644 --- a/tests/nn/embeddings/test_truncated_rotary_emb.py +++ b/tests/nn/embeddings/test_truncated_rotary_emb.py @@ -1,6 +1,4 @@ import pytest -import torch -from torch import nn from zeta.nn.embeddings.truncated_rope import TruncatedRotaryEmbedding @@ -50,7 +48,7 @@ def test_negative_dimension(): b = 1.0 rho = 0.0 with pytest.raises(ValueError): - module = TruncatedRotaryEmbedding(dim, a, b, rho) + TruncatedRotaryEmbedding(dim, a, b, rho) # Test case for initializing with a > b @@ -60,7 +58,7 @@ def test_a_greater_than_b(): b = 0.5 rho = 0.0 with pytest.raises(ValueError): - module = TruncatedRotaryEmbedding(dim, a, b, rho) + TruncatedRotaryEmbedding(dim, a, b, rho) # Test case for initializing with rho > b @@ -70,4 +68,4 @@ def test_rho_greater_than_b(): b = 1.0 rho = 1.5 with pytest.raises(ValueError): - module = TruncatedRotaryEmbedding(dim, a, b, rho) + TruncatedRotaryEmbedding(dim, a, b, rho) diff --git a/tests/nn/embeddings/test_vision_embeddings.py b/tests/nn/embeddings/test_vision_embeddings.py index cd99e367..48b89da0 100644 --- a/tests/nn/embeddings/test_vision_embeddings.py +++ b/tests/nn/embeddings/test_vision_embeddings.py @@ -98,25 +98,25 @@ def test_forward_custom(): # Test case for initializing with incorrect image size def test_incorrect_img_size_init(): with pytest.raises(AssertionError): - module = VisionEmbedding(img_size=256) + VisionEmbedding(img_size=256) # Test case for initializing with incorrect patch size def test_incorrect_patch_size_init(): with pytest.raises(AssertionError): - module = VisionEmbedding(patch_size=64) + VisionEmbedding(patch_size=64) # Test case for initializing with negative in_chans def test_negative_in_chans_init(): with pytest.raises(ValueError): - module = VisionEmbedding(in_chans=-3) + VisionEmbedding(in_chans=-3) # Test case for initializing with negative embed_dim def test_negative_embed_dim_init(): with pytest.raises(ValueError): - module = VisionEmbedding(embed_dim=-768) + VisionEmbedding(embed_dim=-768) # Test case for initializing with invalid masked_position diff --git a/tests/nn/embeddings/test_vision_lang_embeddings.py b/tests/nn/embeddings/test_vision_lang_embeddings.py index 96cf5995..a72e497d 100644 --- a/tests/nn/embeddings/test_vision_lang_embeddings.py +++ b/tests/nn/embeddings/test_vision_lang_embeddings.py @@ -49,7 +49,7 @@ def test_incorrect_text_embedding_init(): text_embed = nn.Linear(10, 10) vision_embed = nn.Embedding(10, 10) with pytest.raises(AssertionError): - module = VisionLanguageEmbedding(text_embed, vision_embed) + VisionLanguageEmbedding(text_embed, vision_embed) # Test case for initializing with incorrect vision embedding @@ -57,7 +57,7 @@ def test_incorrect_vision_embedding_init(): text_embed = nn.Embedding(10, 10) vision_embed = nn.Linear(10, 10) with pytest.raises(AssertionError): - module = VisionLanguageEmbedding(text_embed, vision_embed) + VisionLanguageEmbedding(text_embed, vision_embed) # Test case for forward pass with text input being None diff --git a/tests/nn/modules/test_cross_attn_images.py b/tests/nn/modules/test_cross_attn_images.py index 8b4f3e7a..6651d72f 100644 --- a/tests/nn/modules/test_cross_attn_images.py +++ b/tests/nn/modules/test_cross_attn_images.py @@ -1,6 +1,5 @@ import torch import torch.nn as nn -import numpy as np import pytest from torch.autograd import gradcheck from zeta.nn.attention.cross_attn_images import MultiModalCrossAttention diff --git a/tests/nn/modules/test_custom_mlp.py b/tests/nn/modules/test_custom_mlp.py index e2eec696..22d0eefd 100644 --- a/tests/nn/modules/test_custom_mlp.py +++ b/tests/nn/modules/test_custom_mlp.py @@ -1,7 +1,6 @@ import pytest import torch import torch.nn as nn -import torch.nn.functional as F from zeta.nn.modules.flexible_mlp import CustomMLP diff --git a/tests/nn/modules/test_hebbian.py b/tests/nn/modules/test_hebbian.py index 0ef274ea..5d9e76be 100644 --- a/tests/nn/modules/test_hebbian.py +++ b/tests/nn/modules/test_hebbian.py @@ -1,6 +1,5 @@ import pytest import torch -import torch.nn as nn from zeta.nn.modules.hebbian import ( BasicHebbianGRUModel, diff --git a/tests/nn/modules/test_image_projector.py b/tests/nn/modules/test_image_projector.py index f6acab3f..58f3e2a2 100644 --- a/tests/nn/modules/test_image_projector.py +++ b/tests/nn/modules/test_image_projector.py @@ -90,7 +90,7 @@ def test_patch_projector_performance(sample_input_tensor): # Measure the time taken for 100 forward passes start_time = time.time() for _ in range(100): - output_tensor = patch_projector(input_tensor) + patch_projector(input_tensor) end_time = time.time() elapsed_time = end_time - start_time @@ -211,7 +211,7 @@ def test_patch_projector_performance_various_input_sizes( # Measure the time taken for 100 forward passes start_time = time.time() for _ in range(100): - output_tensor = patch_projector(input_tensor) + patch_projector(input_tensor) end_time = time.time() elapsed_time = end_time - start_time @@ -249,7 +249,7 @@ def test_patch_projector_output_shape_consistency(sample_input_tensor): # Test case for edge case: invalid max_patch_size def test_patch_projector_invalid_max_patch_size(): with pytest.raises(ValueError): - patch_projector = ImagePatchCreatorProjector( + ImagePatchCreatorProjector( max_patch_size=0, embedding_dim=768 ) @@ -257,7 +257,7 @@ def test_patch_projector_invalid_max_patch_size(): # Test case for edge case: invalid embedding_dim def test_patch_projector_invalid_embedding_dim(): with pytest.raises(ValueError): - patch_projector = ImagePatchCreatorProjector( + ImagePatchCreatorProjector( max_patch_size=16, embedding_dim=0 ) @@ -270,7 +270,7 @@ def test_patch_projector_invalid_input_shape(): input_tensor = torch.randn(1, 3, 32, 32) # Smaller image with pytest.raises(ValueError): - output_tensor = patch_projector(input_tensor) + patch_projector(input_tensor) # Test case for dynamic patch size calculation diff --git a/tests/nn/modules/test_log_ff.py b/tests/nn/modules/test_log_ff.py index 08207d76..e2d5f109 100644 --- a/tests/nn/modules/test_log_ff.py +++ b/tests/nn/modules/test_log_ff.py @@ -1,6 +1,6 @@ import torch import pytest -from zeta.nn.modules.log_ff import LogFF, compute_entropy_safe +from zeta.nn.modules.log_ff import LogFF # Test fixture for a sample input tensor diff --git a/tests/nn/modules/test_test_conv_lang.py b/tests/nn/modules/test_test_conv_lang.py index 91501991..9e776974 100644 --- a/tests/nn/modules/test_test_conv_lang.py +++ b/tests/nn/modules/test_test_conv_lang.py @@ -78,7 +78,7 @@ def test_with_mocked_convolution_layer(): block = ConvolutionLanguageBlock(128, 256, 3, 1) block.conv_layers[0] = mock_convolution x = torch.randn(1, 128, 1024) - output = block(x) + block(x) assert mock_convolution.called diff --git a/tests/ops/test_einops_poly.py b/tests/ops/test_einops_poly.py index 304055f8..a1ad7c44 100644 --- a/tests/ops/test_einops_poly.py +++ b/tests/ops/test_einops_poly.py @@ -71,7 +71,7 @@ def test_reduce_with_anon_dims(pattern, a_list): # Additional tests for rearrange_many function def test_rearrange_many_invalid_pattern(): with pytest.raises(ValueError): - output = list( + list( rearrange_many([input_data, input_data], pattern="invalid_pattern") ) @@ -86,7 +86,7 @@ def test_rearrange_many_with_multiple_patterns(): # Additional tests for repeat_many function def test_repeat_many_invalid_pattern(): with pytest.raises(ValueError): - output = list( + list( repeat_many( [input_data, input_data], pattern="invalid_pattern", @@ -97,7 +97,7 @@ def test_repeat_many_invalid_pattern(): def test_repeat_many_invalid_repeats(): with pytest.raises(ValueError): - output = list( + list( repeat_many( [input_data, input_data], pattern="b h w c", repeats=[2] ) @@ -115,7 +115,7 @@ def test_repeat_many_with_single_repeat(): # Additional tests for reduce_many function def test_reduce_many_invalid_pattern(): with pytest.raises(ValueError): - output = list( + list( reduce_many( [input_data, input_data], pattern="invalid_pattern", @@ -126,7 +126,7 @@ def test_reduce_many_invalid_pattern(): def test_reduce_many_invalid_reduction(): with pytest.raises(ValueError): - output = list( + list( reduce_many( [input_data, input_data], pattern="b h w c", @@ -148,14 +148,14 @@ def test_reduce_many_with_sum_reduction(): # Additional tests for rearrange_with_anon_dims function def test_rearrange_with_anon_dims_invalid_dim_list(): with pytest.raises(ValueError): - output = rearrange_with_anon_dims( + rearrange_with_anon_dims( input_data, pattern="...a b c", a=(1,) ) def test_rearrange_with_anon_dims_invalid_pattern(): with pytest.raises(ValueError): - output = rearrange_with_anon_dims( + rearrange_with_anon_dims( input_data, pattern="invalid_pattern", a=[(1, 2), (2, 3)] ) @@ -163,12 +163,12 @@ def test_rearrange_with_anon_dims_invalid_pattern(): # Additional tests for repeat_with_anon_dims function def test_repeat_with_anon_dims_invalid_dim_list(): with pytest.raises(ValueError): - output = repeat_with_anon_dims(input_data, pattern="...a b c", a=(2,)) + repeat_with_anon_dims(input_data, pattern="...a b c", a=(2,)) def test_repeat_with_anon_dims_invalid_pattern(): with pytest.raises(ValueError): - output = repeat_with_anon_dims( + repeat_with_anon_dims( input_data, pattern="invalid_pattern", a=[(2, 3), (3, 4)] ) @@ -176,11 +176,11 @@ def test_repeat_with_anon_dims_invalid_pattern(): # Additional tests for reduce_with_anon_dims function def test_reduce_with_anon_dims_invalid_dim_list(): with pytest.raises(ValueError): - output = reduce_with_anon_dims(input_data, pattern="...a b c", a=(2,)) + reduce_with_anon_dims(input_data, pattern="...a b c", a=(2,)) def test_reduce_with_anon_dims_invalid_pattern(): with pytest.raises(ValueError): - output = reduce_with_anon_dims( + reduce_with_anon_dims( input_data, pattern="invalid_pattern", a=[(2, 3), (3, 4)] ) diff --git a/tests/optim/test_gradient_equillibrum.py b/tests/optim/test_gradient_equillibrum.py index 256549b4..84a4f113 100644 --- a/tests/optim/test_gradient_equillibrum.py +++ b/tests/optim/test_gradient_equillibrum.py @@ -121,7 +121,7 @@ def test_optimizer_with_custom_lr_and_weight_decay(): # Test optimizer with a custom clip threshold def test_optimizer_with_custom_clip_threshold(): model, loss_fn = create_model_and_loss() - optimizer = GradientEquilibrum(model.parameters(), clip_thresh=0.5) + GradientEquilibrum(model.parameters(), clip_thresh=0.5) assert True # No exceptions were raised diff --git a/tests/optim/test_stable_adamw.py b/tests/optim/test_stable_adamw.py index 18953d97..b2ac2b87 100644 --- a/tests/optim/test_stable_adamw.py +++ b/tests/optim/test_stable_adamw.py @@ -165,21 +165,21 @@ def test_optimizer_with_zero_gradients(): def test_optimizer_with_negative_learning_rate(): model = torch.nn.Linear(10, 10) with pytest.raises(ValueError): - optimizer = StableAdamWUnfused(model.parameters(), lr=-0.001) + StableAdamWUnfused(model.parameters(), lr=-0.001) # Test optimizer with a negative weight decay (should raise a ValueError) def test_optimizer_with_negative_weight_decay(): model = torch.nn.Linear(10, 10) with pytest.raises(ValueError): - optimizer = StableAdamWUnfused(model.parameters(), weight_decay=-0.1) + StableAdamWUnfused(model.parameters(), weight_decay=-0.1) # Test optimizer with a negative custom scalar (should raise a ValueError) def test_optimizer_with_negative_custom_scalar(): model = torch.nn.Linear(10, 10) with pytest.raises(ValueError): - optimizer = StableAdamWUnfused( + StableAdamWUnfused( model.parameters(), precision="custom_fp16", custom_scalar=-65536 ) diff --git a/tests/test_init.py b/tests/test_init.py index 2a97119b..ab227e39 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -1,4 +1,3 @@ -import pytest import zeta diff --git a/tests/tokenizers/test_llama_tokenizer.py b/tests/tokenizers/test_llama_tokenizer.py index 726c193e..52f89310 100644 --- a/tests/tokenizers/test_llama_tokenizer.py +++ b/tests/tokenizers/test_llama_tokenizer.py @@ -72,5 +72,5 @@ def test_llama_tokenizer_encode_decode(text): ], ) def test_llama_tokenizer_download_tokenizer(tokenizer_name): - tokenizer = LLamaTokenizer(tokenizer_name=tokenizer_name) + LLamaTokenizer(tokenizer_name=tokenizer_name) assert os.path.isfile("data/tokenizer.model") diff --git a/zeta/models/__init__.py b/zeta/models/__init__.py index 454352b0..9dab6ca3 100644 --- a/zeta/models/__init__.py +++ b/zeta/models/__init__.py @@ -9,3 +9,17 @@ from zeta.models.palme import PalmE from zeta.models.vit import ViT from zeta.models.navit import NaViT + + +__all__ = [ + "BaseModel", + "ViT", + "MaxVit", + "MegaVit", + "PalmE", + "GPT4", + "GPT4MultiModal", + "LLama2", + "Andromeda", + "NaViT", +] \ No newline at end of file diff --git a/zeta/models/base.py b/zeta/models/base.py index 71424276..04f7a4b0 100644 --- a/zeta/models/base.py +++ b/zeta/models/base.py @@ -1,4 +1,4 @@ -from abc import ABC, abstractmethod +from abc import ABC class BaseModel(ABC): diff --git a/zeta/nn/attention/local_attention_mha.py b/zeta/nn/attention/local_attention_mha.py index 18a99ca6..8a331531 100644 --- a/zeta/nn/attention/local_attention_mha.py +++ b/zeta/nn/attention/local_attention_mha.py @@ -1,5 +1,4 @@ import torch -import torch.nn.functional as F from einops import rearrange from torch import nn diff --git a/zeta/nn/attention/multiquery_attention.py b/zeta/nn/attention/multiquery_attention.py index d94dcf53..37808373 100644 --- a/zeta/nn/attention/multiquery_attention.py +++ b/zeta/nn/attention/multiquery_attention.py @@ -1,6 +1,6 @@ import math import warnings -from typing import Dict, Optional, Type +from typing import Optional import torch import torch.nn as nn diff --git a/zeta/nn/attention/spatial_linear_attention.py b/zeta/nn/attention/spatial_linear_attention.py index 736bf781..35fbd4b3 100644 --- a/zeta/nn/attention/spatial_linear_attention.py +++ b/zeta/nn/attention/spatial_linear_attention.py @@ -3,7 +3,7 @@ from einops import rearrange -from einops_exts import check_shape, rearrange_many +from einops_exts import rearrange_many class SpatialLinearAttention(nn.Module): diff --git a/zeta/nn/embeddings/sinusoidal.py b/zeta/nn/embeddings/sinusoidal.py index 430cd396..5a5f9e7f 100644 --- a/zeta/nn/embeddings/sinusoidal.py +++ b/zeta/nn/embeddings/sinusoidal.py @@ -1,5 +1,5 @@ import torch -from torch import nn, einsum +from torch import nn from einops import rearrange diff --git a/zeta/nn/modules/__init__.py b/zeta/nn/modules/__init__.py index fe90f8bb..a94e436f 100644 --- a/zeta/nn/modules/__init__.py +++ b/zeta/nn/modules/__init__.py @@ -11,13 +11,12 @@ from zeta.nn.modules.feedforward import FeedForward from zeta.nn.modules.feedforward_network import FeedForwardNetwork from zeta.nn.modules.flexible_mlp import CustomMLP -from zeta.nn.modules.fractorial_net import FractalBlock, FractalNetwork from zeta.nn.modules.h3 import H3Layer from zeta.nn.modules.itca import IterativeCrossSelfAttention from zeta.nn.modules.lang_conv_module import ConvolutionLanguageBlock from zeta.nn.modules.layernorm import LayerNorm, l2norm from zeta.nn.modules.leaky_relu import LeakyRELU -from zeta.nn.modules.log_ff import LogFF, compute_entropy_safe +from zeta.nn.modules.log_ff import LogFF from zeta.nn.modules.lora import Lora from zeta.nn.modules.mbconv import MBConv from zeta.nn.modules.mlp import MLP @@ -31,7 +30,6 @@ from zeta.nn.modules.resnet import ResNet from zeta.nn.modules.rms_norm import RMSNorm from zeta.nn.modules.rnn_nlp import RNNL -from zeta.nn.modules.s4 import s4d_kernel from zeta.nn.modules.shufflenet import ShuffleNet from zeta.nn.modules.sig_lip import SigLipLoss from zeta.nn.modules.simple_attention import simple_attention diff --git a/zeta/nn/modules/batched_dp.py b/zeta/nn/modules/batched_dp.py index 6382df1e..a02b0764 100644 --- a/zeta/nn/modules/batched_dp.py +++ b/zeta/nn/modules/batched_dp.py @@ -1,4 +1,3 @@ -import torch from einops import rearrange diff --git a/zeta/nn/modules/clex.py b/zeta/nn/modules/clex.py index b0cf211c..932e2f38 100644 --- a/zeta/nn/modules/clex.py +++ b/zeta/nn/modules/clex.py @@ -152,7 +152,6 @@ def forward(self, device, dtype, seq_len, do_train=False): scale_factor = seq_len // self.max_position_embeddings if do_train: t_val = self.sample_random_times(self.max_t + 1, device)[0] - import math sampled_position_ids = self.get_random_position_ids( n=seq_len - 2, max=seq_len * t_val - 2 diff --git a/zeta/nn/modules/decision_tree.py b/zeta/nn/modules/decision_tree.py index 1456f82e..61b3fab7 100644 --- a/zeta/nn/modules/decision_tree.py +++ b/zeta/nn/modules/decision_tree.py @@ -1,6 +1,5 @@ import torch from torch import nn -import torch.nn.functional as F class SimpleDecisionTree(nn.Module): diff --git a/zeta/nn/modules/diffusion.py b/zeta/nn/modules/diffusion.py index 92e2f93e..d22bdd6c 100644 --- a/zeta/nn/modules/diffusion.py +++ b/zeta/nn/modules/diffusion.py @@ -1,6 +1,5 @@ import torch import torch.nn as nn -import torch.nn.functional as F class Diffuser(nn.Module): diff --git a/zeta/nn/modules/flatten_features.py b/zeta/nn/modules/flatten_features.py index 39082a08..012def81 100644 --- a/zeta/nn/modules/flatten_features.py +++ b/zeta/nn/modules/flatten_features.py @@ -1,4 +1,3 @@ -import torch from einops import rearrange diff --git a/zeta/nn/modules/image_projector.py b/zeta/nn/modules/image_projector.py index 5517be8e..0db1fa77 100644 --- a/zeta/nn/modules/image_projector.py +++ b/zeta/nn/modules/image_projector.py @@ -1,6 +1,4 @@ -import torch import torch.nn as nn -import torch.nn.functional as F class ImagePatchCreatorProjector(nn.Module): diff --git a/zeta/nn/modules/lang_conv_module.py b/zeta/nn/modules/lang_conv_module.py index aa71d2b4..eb65edff 100644 --- a/zeta/nn/modules/lang_conv_module.py +++ b/zeta/nn/modules/lang_conv_module.py @@ -1,4 +1,3 @@ -import torch from torch import nn diff --git a/zeta/nn/modules/mm_fusion.py b/zeta/nn/modules/mm_fusion.py index 6c20b4b4..8f37d973 100644 --- a/zeta/nn/modules/mm_fusion.py +++ b/zeta/nn/modules/mm_fusion.py @@ -1,6 +1,5 @@ import torch from torch import nn -from einops import rearrange class MultiModalFusion(nn.Module): diff --git a/zeta/nn/modules/modality_adaptive_module.py b/zeta/nn/modules/modality_adaptive_module.py index 06343b1d..74bae13e 100644 --- a/zeta/nn/modules/modality_adaptive_module.py +++ b/zeta/nn/modules/modality_adaptive_module.py @@ -35,7 +35,7 @@ def __init__(self, dim: int, heads: int, dropout: float = 0.1): self.heads = heads self.dropout = dropout self.scale = dim**-0.5 - assert dim % heads == 0, f"dim must alwasy be divisible by heads" + assert dim % heads == 0, "dim must alwasy be divisible by heads" # Initialize the normalization layers for each modality self.norm_text = nn.LayerNorm(dim) diff --git a/zeta/nn/modules/multimodal_concat.py b/zeta/nn/modules/multimodal_concat.py index 0a7f00a4..40e2060b 100644 --- a/zeta/nn/modules/multimodal_concat.py +++ b/zeta/nn/modules/multimodal_concat.py @@ -1,4 +1,3 @@ -import torch from einops import rearrange diff --git a/zeta/nn/modules/nebula.py b/zeta/nn/modules/nebula.py index f1b0bc88..c372c8c1 100644 --- a/zeta/nn/modules/nebula.py +++ b/zeta/nn/modules/nebula.py @@ -203,7 +203,7 @@ def determine_loss_function(self, y_pred, y_true): y_true_flat = y_true.flatten() if y_pred_flat.shape != y_true_flat.shape: y_pred_flat = y_pred_flat[: y_true_flat.numel()] - correlation = torch.tensor( + torch.tensor( np.corrcoef(y_pred_flat.cpu().numpy(), y_true_flat.cpu().numpy())[ 0, 1 ] diff --git a/zeta/nn/modules/s4.py b/zeta/nn/modules/s4.py index dd41d306..10bec348 100644 --- a/zeta/nn/modules/s4.py +++ b/zeta/nn/modules/s4.py @@ -1,5 +1,4 @@ import torch -from typing import Tuple def s4d_kernel( diff --git a/zeta/nn/modules/scale.py b/zeta/nn/modules/scale.py index e2af7571..443ab49a 100644 --- a/zeta/nn/modules/scale.py +++ b/zeta/nn/modules/scale.py @@ -1,4 +1,3 @@ -import torch from torch import nn diff --git a/zeta/nn/modules/shift_tokens.py b/zeta/nn/modules/shift_tokens.py index aeb34c9e..62723736 100644 --- a/zeta/nn/modules/shift_tokens.py +++ b/zeta/nn/modules/shift_tokens.py @@ -1,6 +1,5 @@ import torch from torch import nn -from einops import rearrange import torch.nn.functional as F diff --git a/zeta/nn/modules/simple_res_block.py b/zeta/nn/modules/simple_res_block.py index 106c6ba6..3b6cdede 100644 --- a/zeta/nn/modules/simple_res_block.py +++ b/zeta/nn/modules/simple_res_block.py @@ -1,4 +1,3 @@ -import torch from torch import nn diff --git a/zeta/nn/modules/simple_rmsnorm.py b/zeta/nn/modules/simple_rmsnorm.py index 7c5e7bd1..e3966ba7 100644 --- a/zeta/nn/modules/simple_rmsnorm.py +++ b/zeta/nn/modules/simple_rmsnorm.py @@ -1,4 +1,3 @@ -import torch import torch.nn.functional as F from torch import nn diff --git a/zeta/nn/modules/spatial_downsample.py b/zeta/nn/modules/spatial_downsample.py index b9f62fee..0b2a7de2 100644 --- a/zeta/nn/modules/spatial_downsample.py +++ b/zeta/nn/modules/spatial_downsample.py @@ -1,4 +1,3 @@ -import torch from torch import nn from einops import rearrange, pack, unpack diff --git a/zeta/nn/modules/subln.py b/zeta/nn/modules/subln.py index 01041e87..3b55ff1d 100644 --- a/zeta/nn/modules/subln.py +++ b/zeta/nn/modules/subln.py @@ -1,4 +1,3 @@ -import torch from torch import nn diff --git a/zeta/nn/modules/transformations.py b/zeta/nn/modules/transformations.py index f938c179..d72c407f 100644 --- a/zeta/nn/modules/transformations.py +++ b/zeta/nn/modules/transformations.py @@ -1,6 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates -from typing import Optional, Sequence, Tuple +from typing import Optional, Tuple import torch import torch.nn as nn diff --git a/zeta/nn/modules/video_autoencoder.py b/zeta/nn/modules/video_autoencoder.py index 3ead357d..3576c368 100644 --- a/zeta/nn/modules/video_autoencoder.py +++ b/zeta/nn/modules/video_autoencoder.py @@ -1,8 +1,7 @@ -import torch from torch import nn from typing import Union, Tuple import torch.nn.functional as F -from einops import rearrange, reduce, repeat, pack, unpack +from einops import pack, unpack # helper diff --git a/zeta/ops/async_softmax.py b/zeta/ops/async_softmax.py index 5fede6a9..85cac3c8 100644 --- a/zeta/ops/async_softmax.py +++ b/zeta/ops/async_softmax.py @@ -1,6 +1,5 @@ # Import necessary libraries import torch -import torch.nn.functional as F from torch import nn diff --git a/zeta/optim/batched_optimizer.py b/zeta/optim/batched_optimizer.py index 71248d7c..36cc0b5e 100644 --- a/zeta/optim/batched_optimizer.py +++ b/zeta/optim/batched_optimizer.py @@ -1,6 +1,5 @@ import contextlib import logging -import random from collections import defaultdict from typing import List, Optional, Tuple, Union @@ -207,7 +206,6 @@ def step(self, closure=None): with torch.enable_grad(): loss = closure() - batch = True for group, group_params_names in zip( self.param_groups, self.parameters_names @@ -471,7 +469,7 @@ def _step_one_batch( as a batch) state: state-dict for p, to look up the optimizer state """ - lr = group["lr"] + group["lr"] size_update_period = group["size_update_period"] beta1 = group["betas"][0] @@ -535,7 +533,7 @@ def _size_update( param_max_rms = group["param_max_rms"] eps = group["eps"] step = state["step"] - batch_size = p.shape[0] + p.shape[0] size_update_period = scale_grads.shape[0] # correct beta2 for the size update period: we will have @@ -596,7 +594,7 @@ def _step(self, group: dict, p: Tensor, state: dict): beta1, beta2 = group["betas"] eps = group["eps"] param_min_rms = group["param_min_rms"] - step = state["step"] + state["step"] exp_avg_sq = state["exp_avg_sq"] exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=(1 - beta2)) diff --git a/zeta/rl/actor_critic.py b/zeta/rl/actor_critic.py index 8b50b4c0..80e705a9 100644 --- a/zeta/rl/actor_critic.py +++ b/zeta/rl/actor_critic.py @@ -1,6 +1,5 @@ import torch from torch import nn -import torch.nn as optim class ActorCritic(nn.Module): diff --git a/zeta/rl/ppo.py b/zeta/rl/ppo.py index 0f4e5026..00bd243d 100644 --- a/zeta/rl/ppo.py +++ b/zeta/rl/ppo.py @@ -1,7 +1,5 @@ -import numpy as np import torch import torch.nn as nn -import torch.optim as optim class ActorCritic(nn.Module): diff --git a/zeta/structs/hierarchical_transformer.py b/zeta/structs/hierarchical_transformer.py index 7447c24e..d7c75d1b 100644 --- a/zeta/structs/hierarchical_transformer.py +++ b/zeta/structs/hierarchical_transformer.py @@ -7,7 +7,7 @@ import torch.nn.functional as F from einops import rearrange, repeat from einops.layers.torch import Rearrange -from torch import einsum, nn +from torch import nn from vector_quantize_pytorch import RandomProjectionQuantizer from zeta.structs.attn_layers import rotate_half diff --git a/zeta/structs/mag_vit.py b/zeta/structs/mag_vit.py index 4f5f102d..e31350d1 100644 --- a/zeta/structs/mag_vit.py +++ b/zeta/structs/mag_vit.py @@ -1,10 +1,9 @@ # from lucidrain -from math import log2 import torch import torch.nn.functional as F -from torch import nn, einsum, Tensor +from torch import nn, Tensor from torch.nn import Module, ModuleList from collections import namedtuple diff --git a/zeta/structs/multi_modal_projector.py b/zeta/structs/multi_modal_projector.py index 8ce56246..c5e3eefb 100644 --- a/zeta/structs/multi_modal_projector.py +++ b/zeta/structs/multi_modal_projector.py @@ -1,4 +1,3 @@ -import torch import torch.nn as nn import re diff --git a/zeta/tokenizers/tokenmonster.py b/zeta/tokenizers/tokenmonster.py index b4bf5570..b6302b4a 100644 --- a/zeta/tokenizers/tokenmonster.py +++ b/zeta/tokenizers/tokenmonster.py @@ -1,4 +1,3 @@ -import numpy as np import tokenmonster diff --git a/zeta/training/hive_trainer.py b/zeta/training/hive_trainer.py index f5fc8002..9496d8fd 100644 --- a/zeta/training/hive_trainer.py +++ b/zeta/training/hive_trainer.py @@ -17,8 +17,6 @@ """ -import torch -import torch.distributed as dist import threading from zeta.training.train import Trainer diff --git a/zeta/utils/save_load_wrapper.py b/zeta/utils/save_load_wrapper.py index 133114ea..b1d63e19 100644 --- a/zeta/utils/save_load_wrapper.py +++ b/zeta/utils/save_load_wrapper.py @@ -3,7 +3,6 @@ import torch from beartype import beartype from beartype.typing import Optional, Callable -from packaging import version from torch.nn import Module