diff --git a/models/demos/falcon7b/tests/ci/test_falcon_end_to_end_prefill.py b/models/demos/falcon7b/tests/ci/test_falcon_end_to_end_prefill.py index 2b2f05b3e74..b61ce7278ca 100644 --- a/models/demos/falcon7b/tests/ci/test_falcon_end_to_end_prefill.py +++ b/models/demos/falcon7b/tests/ci/test_falcon_end_to_end_prefill.py @@ -13,7 +13,7 @@ "llm_mode, num_layers, batch, seq_len, kv_cache_len, model_config_str, expected_output_pcc, expected_k_cache_pcc, expected_v_cache_pcc", ( ("prefill", 32, 1, 32, 0, "BFLOAT16-DRAM", 0.97, 0.95, 0.95), - ("prefill", 32, 1, 128, 0, "BFLOAT16-DRAM", 0.97, 0.99, 0.96), + ("prefill", 32, 1, 128, 0, "BFLOAT16-DRAM", 0.98, 0.99, 0.97), ("prefill", 32, 1, 1024, 0, "BFLOAT16-DRAM", 0.99, 0.99, 0.97), # ("prefill", 32, 1, 2048, 0, "BFLOAT16-DRAM", 0.99, 0.99, 0.97), # CI machines don't have enough RAM memory to run this test atm; to reduce memory usage (#8349) ), diff --git a/models/demos/falcon7b/tests/test_perf_falcon.py b/models/demos/falcon7b/tests/test_perf_falcon.py index f7714ee69d7..8bad903a6c5 100644 --- a/models/demos/falcon7b/tests/test_perf_falcon.py +++ b/models/demos/falcon7b/tests/test_perf_falcon.py @@ -509,7 +509,7 @@ def run_perf_wh_bare_metal( @pytest.mark.parametrize( "llm_mode, num_layers, batch, seq_len, kv_cache_len, model_config_str, expected_output_pcc, expected_k_cache_pcc, expected_v_cache_pcc, expected_inference_time", ( - ("prefill", 32, 1, 128, 0, "BFLOAT16-DRAM", 0.97, 0.99, 0.96, 0.1), + ("prefill", 32, 1, 128, 0, "BFLOAT16-DRAM", 0.98, 0.99, 0.97, 0.1), ("prefill", 32, 1, 1024, 0, "BFLOAT16-DRAM", 0.99, 0.99, 0.97, 1), ("prefill", 32, 1, 2048, 0, "BFLOAT16-DRAM", 0.99, 0.99, 0.97, 2), ("decode", 32, 32, 1, 128, "BFLOAT16-DRAM", 0.91, 0.92, 0.93, 0.15),