#8729: TESTING - xdist + reset mechanism on pipelines

- fd nightly - model perf - t3k frequent + demo + model perf
tenstorrent · Jun 14, 2024 · 70db4f1 · 70db4f1
1 parent 2ec4c93
commit 70db4f1
Show file tree

Hide file tree

Showing 13 changed files with 70 additions and 68 deletions.
diff --git a/test.sh b/test.sh
@@ -0,0 +1 @@
+pytest tests/ttnn/python_api_testing/non_working_unit_tests/grayskull/test_eltwise_softplus_inf.py
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
@@ -11,19 +11,19 @@ run_perf_models_other() {
     local tt_arch=$1
     local test_marker=$2
 
-    env pytest tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker --metal-cleanup=1
 
-    env pytest tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker --metal-cleanup=1
 
-    env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
+    env pytest -n auto models/demos/ttnn_falcon7b/tests -m $test_marker --metal-cleanup=1
 
     # Separate calls since we can't mix switching between number of cqs
-    env pytest models/demos/resnet/tests/test_perf_resnet.py -m $test_marker
-    env pytest models/demos/resnet/tests/test_perf_resnet_2cqs.py -m $test_marker
+    env pytest -n auto models/demos/resnet/tests/test_perf_resnet.py -m $test_marker --metal-cleanup=1
+    env pytest -n auto models/demos/resnet/tests/test_perf_resnet_2cqs.py -m $test_marker --metal-cleanup=1
 
-    env pytest tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker --metal-cleanup=1
 
-    env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
+    env pytest -n auto models/demos/metal_BERT_large_11/tests -m $test_marker --metal-cleanup=1
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py
@@ -33,13 +33,13 @@ run_perf_models_llm_javelin() {
     local tt_arch=$1
     local test_marker=$2
 
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m $test_marker
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m $test_marker --metal-cleanup=1
 
     if [ "$tt_arch" == "wormhole_b0" ]; then
-        env pytest models/demos/mamba/tests -m $test_marker
+        env pytest -n auto models/demos/mamba/tests -m $test_marker --metal-cleanup=1
     fi
-
-    env  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/mistral7b/tests -m $test_marker
+    
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yamlpytest -n auto models/demos/wormhole/mistral7b/tests -m $test_marker --metal-cleanup=1
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py
@@ -50,7 +50,7 @@ run_perf_models_cnn_javelin() {
     local test_marker=$2
 
     # Run tests
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/device_perf_tests/stable_diffusion -m $test_marker
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/device_perf_tests/stable_diffusion -m $test_marker --metal-cleanup=1
     #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker
 
     ## Merge all the generated reports
@@ -60,30 +60,30 @@ run_perf_models_cnn_javelin() {
 run_device_perf_models() {
     local test_marker=$1
 
-    env pytest tests/device_perf_tests/stable_diffusion -m $test_marker
+    env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --metal-cleanup=1
 
     if [ "$tt_arch" == "grayskull" ]; then
         #TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with
         #Model Device perf regression tests to make sure thy run on no-soft-reset BMs
         tests/scripts/run_profiler_regressions.sh PROFILER_NO_RESET
 
-        env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
+        env pytest models/demos/metal_BERT_large_11/tests -m $test_marker --metal-cleanup=1
 
-        env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
+        env pytest models/demos/ttnn_falcon7b/tests -m $test_marker --metal-cleanup=1
 
-        env pytest models/demos/bert/tests -m $test_marker
+        env pytest models/demos/bert/tests -m $test_marker --metal-cleanup=1
 
-        env pytest models/demos/wormhole/mistral7b/tests -m $test_marker
+        env pytest models/demos/wormhole/mistral7b/tests -m $test_marker --metal-cleanup=1
 
-        env pytest "tests/ttnn/integration_tests/resnet/test_performance.py" -m $test_marker
+        env pytest "tests/ttnn/integration_tests/resnet/test_performance.py" -m $test_marker --metal-cleanup=1
 
-        env pytest models/demos/resnet/tests -m $test_marker
+        env pytest models/demos/resnet/tests -m $test_marker --metal-cleanup=1
     fi
 
     if [ "$tt_arch" == "wormhole_b0" ]; then
-        env pytest models/demos/mamba/tests -m $test_marker
+        env pytest models/demos/mamba/tests -m $test_marker --metal-cleanup=1
 
-        env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/metal_BERT_large_11/tests -m $test_marker
+        env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/metal_BERT_large_11/tests -m $test_marker --metal-cleanup=1
         #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker
     fi
 

diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh
@@ -81,7 +81,7 @@ run_frequent_api_pipeline_tests() {
         ./tests/scripts/run_python_api_unit_tests.sh
     else
         if [[ $tt_arch == "wormhole_b0" ]]; then
-            pytest  tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k nightly
+            pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k nightly --metal-cleanup=1
         else
             echo "API tests are not available for fast dispatch because they're already covered in post-commit"
         fi

diff --git a/tests/scripts/single_card/nightly/run_common_models.sh b/tests/scripts/single_card/nightly/run_common_models.sh
@@ -9,4 +9,4 @@ fi
 
 echo "Running common models for archs"
 
-env pytest tests/nightly/common_models/
+env pytest -n auto tests/nightly/common_models/ --metal-cleanup=1
diff --git a/tests/scripts/single_card/nightly/run_gs_only.sh b/tests/scripts/single_card/nightly/run_gs_only.sh
@@ -9,6 +9,6 @@ fi
 
 echo "Running model nightly tests for GS only"
 
-env pytest models/demos/resnet/tests/test_metal_resnet50_performant.py
+env pytest -n auto models/demos/resnet/tests/test_metal_resnet50_performant.py --metal-cleanup=1
 
-env pytest models/demos/resnet/tests/test_metal_resnet50_2cqs_performant.py
+env pytest -n auto models/demos/resnet/tests/test_metal_resnet50_2cqs_performant.py --metal-cleanup=1
diff --git a/tests/scripts/single_card/nightly/run_ttnn.sh b/tests/scripts/single_card/nightly/run_ttnn.sh
@@ -9,4 +9,4 @@ fi
 
 echo "Running ttnn nightly tests for GS only"
 
-env pytest tests/ttnn/integration_tests -m "not models_performance_bare_metal and not models_device_performance_bare_metal"
+env pytest -n auto tests/ttnn/integration_tests -m "not models_performance_bare_metal and not models_device_performance_bare_metal" --metal-cleanup=1
diff --git a/tests/scripts/single_card/nightly/run_wh_b0_only.sh b/tests/scripts/single_card/nightly/run_wh_b0_only.sh
@@ -8,5 +8,5 @@ if [[ -z "$TT_METAL_HOME" ]]; then
 fi
 
 echo "Running nightly tests for WH B0 only"
-env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/nightly/wh_b0_only_eth
-env pytest tests/nightly/wh_b0_only
+env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/nightly/wh_b0_only_eth --metal-cleanup=1
+env pytest -n auto tests/nightly/wh_b0_only --metal-cleanup=1
diff --git a/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh b/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
@@ -9,4 +9,4 @@ fi
 
 echo "Running unstable nightly tests for WH B0 only"
 
-SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml env pytest tests/ttnn/integration_tests/stable_diffusion
+SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml env pytest -n auto tests/ttnn/integration_tests/stable_diffusion --metal-cleanup=1
diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh
@@ -9,10 +9,10 @@ run_t3000_falcon40b_tests() {
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
   # Falcon40B prefill 60 layer end to end with 10 loops; we need 8x8 grid size
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py --metal-cleanup=1
 
   # Falcon40B end to end demo (prefill + decode)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -27,12 +27,12 @@ run_t3000_falcon7b_tests(){
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
   # Falcon7B demo (perf verification and output verification)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_greedy_verify]
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_stochastic_verify] --metal-cleanup=1
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_greedy_verify] --metal-cleanup=1
 
   # Falcon7B perplexity test (prefill and decode)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[1-True-prefill_seq1024_dram]
-  # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[1-True-decode_1024_l1_sharded]  # Disabled due to Issue #9268
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[1-True-prefill_seq1024_dram] --metal-cleanup=1
+  # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[1-True-decode_1024_l1_sharded]  # Disabled due to Issue #9268
 
   # Record the end time
   end_time=$(date +%s)
@@ -47,7 +47,7 @@ run_t3000_mixtral_tests() {
   echo "LOG_METAL: Running run_t3000_mixtral8x7b_tests"
 
   # mixtral8x7b 8 chip demo test - 100 token generation with general weights (env flags set inside the test)
-  pytest models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights]
+  pytest -n auto models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights] --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)

diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh
@@ -8,8 +8,8 @@ run_t3000_ethernet_tests() {
 
   echo "LOG_METAL: Running run_t3000_ethernet_tests"
 
-  pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py
-  pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py
+  pytest -n auto tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py --metal-cleanup=1
+  pytest -n auto tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -28,10 +28,10 @@ run_t3000_llama2_70b_tests() {
   export LLAMA_TOKENIZER_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/tokenizer.model
   export LLAMA_CACHE_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/llama-data-cache/weights-cache-2
 
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py --metal-cleanup=1
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py --metal-cleanup=1
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py --metal-cleanup=1
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -48,8 +48,8 @@ run_t3000_llama2_70b_experimental_tests() {
   # Removing tests to reduce the time taken to run the tests
   # pytest models/experimental/llama2_70b/tests/test_llama_mlp_t3000.py
   # pytest models/experimental/llama2_70b/tests/test_llama_attention_t3000.py
-  pytest models/experimental/llama2_70b/tests/test_llama_decoder_t3000.py
-  pytest models/experimental/llama2_70b/tests/test_llama_model_t3000.py
+  pytest -n auto models/experimental/llama2_70b/tests/test_llama_decoder_t3000.py --metal-cleanup=1
+  pytest -n auto models/experimental/llama2_70b/tests/test_llama_model_t3000.py --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -64,7 +64,7 @@ run_t3000_mixtral_tests() {
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
   # mixtral8x7b 8 chip decode model test (env flags set inside the test)
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc]
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc] --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -78,7 +78,7 @@ run_t3000_tteager_tests() {
 
   echo "LOG_METAL: Running run_t3000_tteager_tests"
 
-  pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit
+  pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -92,10 +92,10 @@ run_t3000_falcon40b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_mlp.py --metal-cleanup=1
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_attention.py --metal-cleanup=1
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_decoder.py --metal-cleanup=1
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_causallm.py --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)

diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
@@ -8,7 +8,7 @@ run_t3000_falcon7b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m "model_perf_t3000" --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -22,7 +22,7 @@ run_t3000_mixtral_tests() {
 
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
-  env pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py::test_mixtral_model_perf[wormhole_b0-True-2048-150-0.025] -m "model_perf_t3000"
+  env pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py::test_mixtral_model_perf[wormhole_b0-True-2048-150-0.025] -m "model_perf_t3000" --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -36,7 +36,7 @@ run_t3000_llama2_70b_tests() {
 
   echo "LOG_METAL: Running run_t3000_llama2_70b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000" --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)
@@ -50,7 +50,7 @@ run_t3000_falcon40b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000" --metal-cleanup=1
 
   # Record the end time
   end_time=$(date +%s)