From e69ebdf81b7e60055ff097fba84d74d978f520df Mon Sep 17 00:00:00 2001 From: Wangchong Zhou Date: Fri, 9 Aug 2024 16:03:02 +0800 Subject: [PATCH] feat(ai-proxy): add option to return model name in response header (#13472) --- changelog/unreleased/kong/ai-proxy-model-header.yml | 3 +++ kong/clustering/compat/removed_fields.lua | 1 + kong/llm/proxy/handler.lua | 5 +++++ kong/plugins/ai-proxy/schema.lua | 2 ++ .../09-hybrid_mode/09-config-compat_spec.lua | 8 ++++++++ .../03-plugins/38-ai-proxy/02-openai_integration_spec.lua | 1 + .../38-ai-proxy/03-anthropic_integration_spec.lua | 1 + .../03-plugins/38-ai-proxy/04-cohere_integration_spec.lua | 1 + spec/03-plugins/38-ai-proxy/05-azure_integration_spec.lua | 1 + .../38-ai-proxy/06-mistral_integration_spec.lua | 1 + 10 files changed, 24 insertions(+) create mode 100644 changelog/unreleased/kong/ai-proxy-model-header.yml diff --git a/changelog/unreleased/kong/ai-proxy-model-header.yml b/changelog/unreleased/kong/ai-proxy-model-header.yml new file mode 100644 index 0000000000000..95c80d75e966a --- /dev/null +++ b/changelog/unreleased/kong/ai-proxy-model-header.yml @@ -0,0 +1,3 @@ +message: '**ai-proxy**: Added a new response header X-Kong-LLM-Model that displays the name of the language model used in the AI-Proxy plugin.' +type: feature +scope: Plugin diff --git a/kong/clustering/compat/removed_fields.lua b/kong/clustering/compat/removed_fields.lua index 5c1b7404fe8a0..d37db9a4172de 100644 --- a/kong/clustering/compat/removed_fields.lua +++ b/kong/clustering/compat/removed_fields.lua @@ -175,6 +175,7 @@ return { "model.options.bedrock", "auth.aws_access_key_id", "auth.aws_secret_access_key", + "model_name_header", }, ai_prompt_decorator = { "max_request_body_size", diff --git a/kong/llm/proxy/handler.lua b/kong/llm/proxy/handler.lua index d6c7fd1ec6fc8..8177da5a4a7d5 100644 --- a/kong/llm/proxy/handler.lua +++ b/kong/llm/proxy/handler.lua @@ -335,6 +335,11 @@ function _M:header_filter(conf) kong.response.clear_header(v) end + if ngx.var.http_kong_debug or conf.model_name_header then + local name = conf.model.provider .. "/" .. (kong.ctx.plugin.llm_model_requested or conf.model.name) + kong.response.set_header("X-Kong-LLM-Model", name) + end + -- we use openai's streaming mode (SSE) if llm_state.is_streaming_mode() then -- we are going to send plaintext event-stream frames for ALL models diff --git a/kong/plugins/ai-proxy/schema.lua b/kong/plugins/ai-proxy/schema.lua index 0754a0348cd77..4db75b46d0145 100644 --- a/kong/plugins/ai-proxy/schema.lua +++ b/kong/plugins/ai-proxy/schema.lua @@ -20,6 +20,8 @@ local ai_proxy_only_config = { gt = 0, description = "max allowed body size allowed to be introspected",} }, + { model_name_header = { description = "Display the model name selected in the X-Kong-LLM-Model response header", + type = "boolean", default = true, }}, } for i, v in pairs(ai_proxy_only_config) do diff --git a/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua b/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua index 9eecc8ec7a45d..808f4cd5ade3e 100644 --- a/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua +++ b/spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua @@ -510,6 +510,7 @@ describe("CP/DP config compat transformations #" .. strategy, function() }, }, max_request_body_size = 8192, + model_name_header = true, }, } -- ]] @@ -519,6 +520,9 @@ describe("CP/DP config compat transformations #" .. strategy, function() -- max body size expected.config.max_request_body_size = nil + -- model name header + expected.config.model_name_header = nil + -- gemini fields expected.config.auth.gcp_service_account_json = nil expected.config.auth.gcp_use_service_account = nil @@ -695,6 +699,7 @@ describe("CP/DP config compat transformations #" .. strategy, function() }, }, max_request_body_size = 8192, + model_name_header = true, }, } -- ]] @@ -704,6 +709,9 @@ describe("CP/DP config compat transformations #" .. strategy, function() -- max body size expected.config.max_request_body_size = nil + -- model name header + expected.config.model_name_header = nil + -- gemini fields expected.config.auth.gcp_service_account_json = nil expected.config.auth.gcp_use_service_account = nil diff --git a/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua b/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua index e963d908e3247..b1b772bfbedae 100644 --- a/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua @@ -858,6 +858,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then assert.equals(json.id, "chatcmpl-8T6YwgvjQVVnGbJ2w8hpOA17SeNy2") assert.equals(json.model, "gpt-3.5-turbo-0613") assert.equals(json.object, "chat.completion") + assert.equals(r.headers["X-Kong-LLM-Model"], "openai/gpt-3.5-turbo") assert.is_table(json.choices) assert.is_table(json.choices[1].message) diff --git a/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua b/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua index 78f990fe6161c..dd52f7e066a93 100644 --- a/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua @@ -541,6 +541,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then -- assert.equals(json.id, "chatcmpl-8T6YwgvjQVVnGbJ2w8hpOA17SeNy2") assert.equals(json.model, "claude-2.1") assert.equals(json.object, "chat.content") + assert.equals(r.headers["X-Kong-LLM-Model"], "anthropic/claude-2.1") assert.is_table(json.choices) assert.is_table(json.choices[1].message) diff --git a/spec/03-plugins/38-ai-proxy/04-cohere_integration_spec.lua b/spec/03-plugins/38-ai-proxy/04-cohere_integration_spec.lua index 548db5e59be1e..eb52249c8fb4b 100644 --- a/spec/03-plugins/38-ai-proxy/04-cohere_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/04-cohere_integration_spec.lua @@ -416,6 +416,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then -- check this is in the 'kong' response format assert.equals(json.model, "command") assert.equals(json.object, "chat.completion") + assert.equals(r.headers["X-Kong-LLM-Model"], "cohere/command") assert.is_table(json.choices) assert.is_table(json.choices[1].message) diff --git a/spec/03-plugins/38-ai-proxy/05-azure_integration_spec.lua b/spec/03-plugins/38-ai-proxy/05-azure_integration_spec.lua index d76d0c4ac50d7..82385720efcfb 100644 --- a/spec/03-plugins/38-ai-proxy/05-azure_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/05-azure_integration_spec.lua @@ -493,6 +493,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then assert.equals("cmpl-8TBeaJVQIhE9kHEJbk1RnKzgFxIqN", json.id) assert.equals("gpt-3.5-turbo-instruct", json.model) assert.equals("text_completion", json.object) + assert.equals(r.headers["X-Kong-LLM-Model"], "azure/gpt-3.5-turbo-instruct") assert.is_table(json.choices) assert.is_table(json.choices[1]) diff --git a/spec/03-plugins/38-ai-proxy/06-mistral_integration_spec.lua b/spec/03-plugins/38-ai-proxy/06-mistral_integration_spec.lua index 94058750ff1df..26bc21acf9993 100644 --- a/spec/03-plugins/38-ai-proxy/06-mistral_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/06-mistral_integration_spec.lua @@ -338,6 +338,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then assert.equals(json.id, "chatcmpl-8T6YwgvjQVVnGbJ2w8hpOA17SeNy2") assert.equals(json.model, "mistralai/Mistral-7B-Instruct-v0.1-instruct") assert.equals(json.object, "chat.completion") + assert.equals(r.headers["X-Kong-LLM-Model"], "mistral/mistralai/Mistral-7B-Instruct-v0.1-instruct") assert.is_table(json.choices) assert.is_table(json.choices[1].message)