Unit scales in FP8 CI scenarios (HabanaAI#633)

nageshdn · Dec 16, 2024 · da61ecf · da61ecf
1 parent c9a740f
commit da61ecf
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 8 deletions.
diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-fp8.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-fp8.yaml
@@ -5,10 +5,10 @@ tasks:
 - name: "gsm8k_cot_llama"
   metrics:
   - name: "exact_match,strict-match"
-    value: 0.8317
+    value: 0.664
   - name: "exact_match,flexible-extract"
-    value: 0.8355
-limit: null
+    value: 0.676
+limit: 250
 num_fewshot: 8
 dtype: "bfloat16"
 fewshot_as_multiturn: true

diff --git a/.jenkins/lm-eval-harness/inc_unit_scales_config.json b/.jenkins/lm-eval-harness/inc_unit_scales_config.json
@@ -0,0 +1,16 @@
+{
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "unit_scale",
+    "allowlist": {
+        "types": [],
+        "names": []
+    },
+    "blocklist": {
+        "types": [],
+        "names": [
+            "lm_head"
+        ]
+    },
+    "dump_stats_path": ""
+}
diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py
@@ -27,12 +27,10 @@
 TP_SIZE = os.environ.get("LM_EVAL_TP_SIZE", 1)
 
 
-def setup_fp8(model_path, device_type):
-    flavor = f"g{device_type[-1]}"
-    normalized_model_name = Path(model_path).parts[-1].lower()
+def setup_fp8():
     os.environ[
         "QUANT_CONFIG"] = \
-            f"/software/data/vllm-benchmarks/inc/{normalized_model_name}/maxabs_quant_{flavor}.json"
+            "inc_unit_scales_config.json"
 
 
 def fail_on_exit():
@@ -147,7 +145,7 @@ def test_lm_eval_correctness(record_xml_attribute, record_property):
 
         # Set up environment for FP8 inference
         if eval_config.get("fp8"):
-            setup_fp8(eval_config["model_name"], platform)
+            setup_fp8()
         # Launch eval requests.
         start_time = time.perf_counter()
         results = launch_lm_eval(eval_config)