oFurther push of embedding op #104. Hitting L1 issues on Metal. (#197)

tenstorrent · Sep 2, 2024 · 3dec39a · 3dec39a
1 parent 831b1b5
commit 3dec39a
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 5 deletions.
diff --git a/pybuda/csrc/passes/lower_to_mlir.cpp b/pybuda/csrc/passes/lower_to_mlir.cpp
@@ -497,11 +497,10 @@ class MLIRGenerator
         void init_lowering_handler_map()
         {
             lowering_handler_map["add"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::AddOp>;
+            lowering_handler_map["embedding"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::EmbeddingOp>;
             lowering_handler_map["matmul"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::MatmulOp>;
             lowering_handler_map["multiply"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::MultiplyOp>;
             lowering_handler_map["reduce_avg"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::MeanOp>;
-            lowering_handler_map["reduce_avg"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::MeanOp>;
-            lowering_handler_map["reduce_sum"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::SumOp>;
             lowering_handler_map["reduce_sum"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::SumOp>;
             lowering_handler_map["relu"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::ReluOp>;
             lowering_handler_map["softmax"] = &MLIRGenerator::emit_mlir_ttforge_op<mlir::tt::ttir::SoftmaxOp>;

diff --git a/pybuda/test/mlir/llama/tests/test_llama_embedding.py b/pybuda/test/mlir/llama/tests/test_llama_embedding.py
@@ -9,7 +9,7 @@
 from pybuda.op.eval.common import compare_with_golden_pcc
 
 
-@pytest.mark.xfail(reason="Embedding op is not supported on MLIR.")
+@pytest.mark.xfail(reason="L1 allocation issue on Metal")
 def test_llama_embedding():    
     # Load Llama 3B model and tokenizer
     framework_model = load_model()
@@ -20,7 +20,7 @@ def test_llama_embedding():
     inputs = [
         torch.randint(0, vocab_size, (1, 12)), # Input token IDs
     ]
-
+    
     # Sanity run
     golden_output = framework_model(*inputs)
 
@@ -33,4 +33,3 @@ def test_llama_embedding():
 
     # Validate results
     assert compare_with_golden_pcc(golden=golden_output, calculated=tt_out[0], pcc=0.99)
-
diff --git a/pybuda/test/mlir/test_ops.py b/pybuda/test/mlir/test_ops.py
@@ -295,3 +295,36 @@ def forward(self, x):
 
     co_out = [co.to("cpu") for co in co_out]
     assert compare_with_golden_pcc(golden=fw_out, calculated=co_out[0], pcc=0.99)
+
+
+# @pytest.mark.parametrize("vocab_size", [2048, 16384, 32000])
+# @pytest.mark.parametrize("token_num", [1, 7, 32])
+# @pytest.mark.parametrize("embedding_dim", [128, 512, 3200])
+@pytest.mark.xfail(reason="L1 allocation issue on Metal")
+@pytest.mark.parametrize("vocab_size", [32000])
+@pytest.mark.parametrize("token_num", [12])
+@pytest.mark.parametrize("embedding_dim", [3200])
+def test_embedding(vocab_size, token_num, embedding_dim):
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.enable_tvm_cpu_fallback = False
+
+    class Embedding(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.embedding = nn.Embedding(vocab_size, embedding_dim)
+
+        def forward(self, x):
+            return self.embedding(x)
+
+    inputs = [
+        torch.randint(0, vocab_size, (1, token_num)),
+    ]
+
+    framework_model = Embedding()
+    fw_out = framework_model(*inputs)
+
+    compiled_model = pybuda.compile(framework_model, sample_inputs=inputs)
+    co_out = compiled_model(*inputs)
+
+    co_out = [co.to("cpu") for co in co_out]
+    assert compare_with_golden_pcc(golden=fw_out, calculated=co_out[0], pcc=0.99)