#0: use prepare_device_inputs

tenstorrent · Oct 23, 2024 · f226c3b · f226c3b
1 parent 7c77caa
commit f226c3b
Showing 1 changed file with 0 additions and 6 deletions.
diff --git a/models/demos/t3000/llama2_70b/tests/test_llama_perf_decode.py b/models/demos/t3000/llama2_70b/tests/test_llama_perf_decode.py
@@ -321,12 +321,6 @@ def run_test_LlamaModel_end_to_end_hybrid_data_tensor_parallel(
         ##### Prepare Inputs #####
         prev_pos = total_len - 1
         tt_inp_emb, prev_pos, rot_mat, cache_idxs, _ = tt_model.prepare_device_inputs(tokens, prev_pos, mode="decode")
-        tt_inp_emb = ttnn.to_device(tt_inp_emb, submesh, memory_config=ttnn.DRAM_MEMORY_CONFIG)
-        tt_inp_emb = tt_model.tt_embd(tt_inp_emb)
-        tt_inp_emb = ttnn.interleaved_to_sharded(tt_inp_emb, tt_model.model_config["WORD_EMBEDDING_OUTPUT_MEMCFG"])
-
-        rot_mat = ttnn.to_device(rot_mat, submesh, memory_config=tt_model.model_config["ROT_MAT_MM_IN1_MEMCFG"])
-        cache_idxs = ttnn.to_device(cache_idxs, submesh, memory_config=ttnn.DRAM_MEMORY_CONFIG)
 
         ##### Compile Model #####
         logger.info("Compiling model")