From 516f2a35cd6b809dbbe518eacb791f6e3acf632e Mon Sep 17 00:00:00 2001
From: Alexander Kozlov <alexander.kozlov@intel.com>
Date: Wed, 11 Dec 2024 18:35:55 +0300
Subject: [PATCH] Set KV-cache to FP16 in LLM evaluation tests (#27956)

Co-authored-by: Alina Kladieva <alina.kladieva@intel.com>
---
 tests/llm/accuracy_conformance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/llm/accuracy_conformance.py b/tests/llm/accuracy_conformance.py
index 7f75a8e912bbd6..4c6a1e140e597b 100644
--- a/tests/llm/accuracy_conformance.py
+++ b/tests/llm/accuracy_conformance.py
@@ -98,7 +98,7 @@ def teardown_module():
     test_scope,
 )
 def test_accuracy_conformance(model_path, model_type, precision, gt_data, device):
-    target_model = OVModelForCausalLM.from_pretrained(model_path, device=device)
+    target_model = OVModelForCausalLM.from_pretrained(model_path, device=device, ov_config={"KV_CACHE_PRECISION": "f16"})
     tokenizer = AutoTokenizer.from_pretrained(model_path)
 
     evaluator = wwb.Evaluator(