From 516f2a35cd6b809dbbe518eacb791f6e3acf632e Mon Sep 17 00:00:00 2001 From: Alexander Kozlov Date: Wed, 11 Dec 2024 18:35:55 +0300 Subject: [PATCH] Set KV-cache to FP16 in LLM evaluation tests (#27956) Co-authored-by: Alina Kladieva --- tests/llm/accuracy_conformance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/llm/accuracy_conformance.py b/tests/llm/accuracy_conformance.py index 7f75a8e912bbd6..4c6a1e140e597b 100644 --- a/tests/llm/accuracy_conformance.py +++ b/tests/llm/accuracy_conformance.py @@ -98,7 +98,7 @@ def teardown_module(): test_scope, ) def test_accuracy_conformance(model_path, model_type, precision, gt_data, device): - target_model = OVModelForCausalLM.from_pretrained(model_path, device=device) + target_model = OVModelForCausalLM.from_pretrained(model_path, device=device, ov_config={"KV_CACHE_PRECISION": "f16"}) tokenizer = AutoTokenizer.from_pretrained(model_path) evaluator = wwb.Evaluator(