feat: add tests for embedding layer

ml-gde · Sep 15, 2024 · 3d9dc3f · 3d9dc3f
1 parent 8a64274
commit 3d9dc3f
Show file tree

Hide file tree

Showing 4 changed files with 335 additions and 7 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,14 +14,19 @@ dependencies = [
     "jax>=0.4.31",
     "mypy>=1.11.2",
     "pillow>=10.4.0",
-    "pytest>=8.3.2",
     "ruff>=0.6.3",
     "transformers>=4.44.2",
 ]
 
 [project.scripts]
 jflux = "jflux.cli:app"
 
+[tool.uv]
+dev-dependencies = [
+    "pytest>=8.3.3",
+    "torch>=2.4.1",
+]
+
 [tool.uv.sources]
 flux-jax = { workspace = true }
 

diff --git a/tests/test_layers.py b/tests/test_layers.py
@@ -0,0 +1,60 @@
+import chex
+import jax.numpy as jnp
+import torch.nn as nn
+import torch
+from einops import rearrange
+
+from jflux.layers import Embed
+
+
+def torch_rope(pos, dim: int, theta: int):
+    assert dim % 2 == 0
+    scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim
+    omega = 1.0 / (theta**scale)
+    out = torch.einsum("...n,d->...nd", pos, omega)
+    out = torch.stack(
+        [torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1
+    )
+    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
+    return out.float()
+
+
+class EmbedND(nn.Module):
+    def __init__(self, dim: int, theta: int, axes_dim: list[int]):
+        super().__init__()
+        self.dim = dim
+        self.theta = theta
+        self.axes_dim = axes_dim
+
+    def forward(self, ids):
+        n_axes = ids.shape[-1]
+        emb = torch.cat(
+            [
+                torch_rope(ids[..., i], self.axes_dim[i], self.theta)
+                for i in range(n_axes)
+            ],
+            dim=-3,
+        )
+
+        return emb.unsqueeze(1)
+
+
+class EmbedTestCase(chex.TestCase):
+    def test_embed(self):
+        # Initialize layers
+        pytorch_embed_layer = EmbedND(512, 10000, [64, 64, 64, 64])
+        jax_embed_layer = Embed(512, 10000, [64, 64, 64, 64])
+
+        # Generate random inputs
+        torch_ids = torch.randint(0, 10000, (1, 32, 4), dtype=torch.float64)
+        jax_ids = jnp.asarray(torch_ids.numpy())
+
+        # Forward pass
+        jax_output = jax_embed_layer(jax_ids)
+        pytorch_output = pytorch_embed_layer(torch_ids)
+
+        # Assertions
+        chex.assert_equal_shape([jax_output, jnp.asarray(pytorch_output.numpy())])
+        chex.assert_trees_all_close(
+            jax_output, jnp.asarray(pytorch_output.numpy()), rtol=1e-3, atol=1e-3
+        )
diff --git a/tests/test_math.py b/tests/test_math.py
@@ -5,7 +5,6 @@
 from jflux.math import attention, rope, apply_rope
 
 
-@pytest.mark.xfail
 class TestAttentionMechanism(unittest.TestCase):
     def setUp(self):
         self.batch_size = 2
@@ -29,6 +28,7 @@ def test_rope(self):
             rope_output.shape, expected_shape, "rope function output shape is incorrect"
         )
 
+    @pytest.mark.xfail
     def test_apply_rope(self):
         pos = jnp.expand_dims(jnp.arange(self.seq_len), axis=0)
         pos = jnp.repeat(pos, self.batch_size, axis=0)
@@ -43,6 +43,7 @@ def test_apply_rope(self):
             xk_out.shape, self.k.shape, "apply_rope xk output shape is incorrect"
         )
 
+    @pytest.mark.xfail
     def test_attention(self):
         pos = jnp.expand_dims(jnp.arange(self.seq_len), axis=0)
         pos = jnp.repeat(pos, self.batch_size, axis=0)