Skip to content

Commit

Permalink
Unit scales in FP8 CI scenarios (HabanaAI#633)
Browse files Browse the repository at this point in the history
  • Loading branch information
afierka-intel authored Dec 16, 2024
1 parent c9a740f commit da61ecf
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ tasks:
- name: "gsm8k_cot_llama"
metrics:
- name: "exact_match,strict-match"
value: 0.8317
value: 0.664
- name: "exact_match,flexible-extract"
value: 0.8355
limit: null
value: 0.676
limit: 250
num_fewshot: 8
dtype: "bfloat16"
fewshot_as_multiturn: true
Expand Down
16 changes: 16 additions & 0 deletions .jenkins/lm-eval-harness/inc_unit_scales_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"mode": "QUANTIZE",
"observer": "maxabs",
"scale_method": "unit_scale",
"allowlist": {
"types": [],
"names": []
},
"blocklist": {
"types": [],
"names": [
"lm_head"
]
},
"dump_stats_path": ""
}
8 changes: 3 additions & 5 deletions .jenkins/lm-eval-harness/test_lm_eval_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,10 @@
TP_SIZE = os.environ.get("LM_EVAL_TP_SIZE", 1)


def setup_fp8(model_path, device_type):
flavor = f"g{device_type[-1]}"
normalized_model_name = Path(model_path).parts[-1].lower()
def setup_fp8():
os.environ[
"QUANT_CONFIG"] = \
f"/software/data/vllm-benchmarks/inc/{normalized_model_name}/maxabs_quant_{flavor}.json"
"inc_unit_scales_config.json"


def fail_on_exit():
Expand Down Expand Up @@ -147,7 +145,7 @@ def test_lm_eval_correctness(record_xml_attribute, record_property):

# Set up environment for FP8 inference
if eval_config.get("fp8"):
setup_fp8(eval_config["model_name"], platform)
setup_fp8()
# Launch eval requests.
start_time = time.perf_counter()
results = launch_lm_eval(eval_config)
Expand Down

0 comments on commit da61ecf

Please sign in to comment.