From a850c2680171853f4402c9f80acedde717fca451 Mon Sep 17 00:00:00 2001
From: Wangchong Zhou <wangchong@konghq.com>
Date: Mon, 2 Dec 2024 15:49:23 +0800
Subject: [PATCH] fix(ai-proxy): set content-length for non compressed response

---
 kong/llm/drivers/shared.lua                       |  2 +-
 kong/llm/plugin/base.lua                          |  2 ++
 .../shared-filters/normalize-json-response.lua    | 15 ++++++++-------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua
index 03c00bbcddb9..55169a29b97d 100644
--- a/kong/llm/drivers/shared.lua
+++ b/kong/llm/drivers/shared.lua
@@ -175,7 +175,7 @@ _M.operation_map = {
 }
 
 _M.clear_response_headers = {
-  shared = {
+  shared = { -- deprecared, not using
     "Content-Length",
   },
   openai = {
diff --git a/kong/llm/plugin/base.lua b/kong/llm/plugin/base.lua
index 0daca7a29419..4bafcecefb5a 100644
--- a/kong/llm/plugin/base.lua
+++ b/kong/llm/plugin/base.lua
@@ -110,6 +110,8 @@ function MetaPlugin:header_filter(sub_plugin, conf)
       -- and seems nginx doesn't support it
 
     elseif get_global_ctx("accept_gzip") then
+      -- for gzip response, don't set content-length at all to align with upstream
+      kong.response.clear_header("Content-Length")
       kong.response.set_header("Content-Encoding", "gzip")
     end
 
diff --git a/kong/llm/plugin/shared-filters/normalize-json-response.lua b/kong/llm/plugin/shared-filters/normalize-json-response.lua
index 1e0988f52495..f98b0d07bf12 100644
--- a/kong/llm/plugin/shared-filters/normalize-json-response.lua
+++ b/kong/llm/plugin/shared-filters/normalize-json-response.lua
@@ -2,7 +2,6 @@ local cjson = require("cjson")
 
 local ai_plugin_ctx = require("kong.llm.plugin.ctx")
 local ai_plugin_o11y = require("kong.llm.plugin.observability")
-local ai_shared = require("kong.llm.drivers.shared")
 
 local _M = {
   NAME = "normalize-json-response",
@@ -57,6 +56,8 @@ local function transform_body(conf)
   end
 
   set_global_ctx("response_body", response_body) -- to be sent out later or consumed by other plugins
+
+  return #response_body
 end
 
 function _M:run(conf)
@@ -81,8 +82,9 @@ function _M:run(conf)
   -- if not streaming, prepare the response body buffer
   -- this must be called before sending any response headers so that
   -- we can modify status code if needed
+  local body_length
   if not get_global_ctx("stream_mode") then
-    transform_body(conf)
+    body_length = transform_body(conf)
   end
 
   -- populate cost
@@ -94,12 +96,11 @@ function _M:run(conf)
     ai_plugin_o11y.metrics_set("llm_usage_cost", 0)
   end
 
-  -- clear shared restricted headers
-  for _, v in ipairs(ai_shared.clear_response_headers.shared) do
-    kong.response.clear_header(v)
+  if not get_global_ctx("accept_gzip") and not get_global_ctx("stream_mode") then
+    -- otherwise use our transformed body length
+    kong.response.set_header("Content-Length", body_length)
   end
 
-
   if ngx.var.http_kong_debug or conf.model_name_header then
     local model_t = ai_plugin_ctx.get_request_model_table_inuse()
     assert(model_t and model_t.name, "model name is missing")
@@ -109,4 +110,4 @@ function _M:run(conf)
   return true
 end
 
-return _M
\ No newline at end of file
+return _M