Set KV-cache to FP16 in LLM evaluation tests (#27956)

Co-authored-by: Alina Kladieva <[email protected]>
openvinotoolkit · Dec 11, 2024 · 516f2a3 · 516f2a3
1 parent 06de644
commit 516f2a3
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/tests/llm/accuracy_conformance.py b/tests/llm/accuracy_conformance.py
@@ -98,7 +98,7 @@ def teardown_module():
     test_scope,
 )
 def test_accuracy_conformance(model_path, model_type, precision, gt_data, device):
-    target_model = OVModelForCausalLM.from_pretrained(model_path, device=device)
+    target_model = OVModelForCausalLM.from_pretrained(model_path, device=device, ov_config={"KV_CACHE_PRECISION": "f16"})
     tokenizer = AutoTokenizer.from_pretrained(model_path)
 
     evaluator = wwb.Evaluator(