From 19cdafa6e96bc757540f1d8bab53ee041f0753f1 Mon Sep 17 00:00:00 2001 From: Robin Xiang Date: Fri, 12 Apr 2024 11:38:32 +0800 Subject: [PATCH 1/4] fix(AI-Proxy): enhances the robustness of anthropic's statistics to prevent from errors when: 1. no usage provided; 2. changes in usage data; --- .../03-anthropic_integration_spec.lua | 142 +++++++++++++++++- .../responses/malformed_usage_response.json | 15 ++ .../responses/no_usage_response.json | 11 ++ 3 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json create mode 100644 spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json diff --git a/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua b/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua index d5b36fce7b2..43067360b18 100644 --- a/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua +++ b/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua @@ -1,6 +1,7 @@ local helpers = require "spec.helpers" local cjson = require "cjson" local pl_file = require "pl.file" +local deepcompare = require("pl.tablex").deepcompare local PLUGIN_NAME = "ai-proxy" local MOCK_PORT = helpers.get_available_port() @@ -75,6 +76,56 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then } } + location = "/llm/v1/chat/no_usage_upstream_response" { + content_by_lua_block { + local pl_file = require "pl.file" + local json = require("cjson.safe") + + local token = ngx.req.get_headers()["x-api-key"] + if token == "anthropic-key" then + ngx.req.read_body() + local body, err = ngx.req.get_body_data() + body, err = json.decode(body) + + if err or (not body.messages) then + ngx.status = 400 + ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/bad_request.json")) + else + ngx.status = 200 + ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json")) + end + else + ngx.status = 401 + ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/unauthorized.json")) + end + } + } + + location = "/llm/v1/chat/malformed_usage_upstream_response" { + content_by_lua_block { + local pl_file = require "pl.file" + local json = require("cjson.safe") + + local token = ngx.req.get_headers()["x-api-key"] + if token == "anthropic-key" then + ngx.req.read_body() + local body, err = ngx.req.get_body_data() + body, err = json.decode(body) + + if err or (not body.messages) then + ngx.status = 400 + ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/bad_request.json")) + else + ngx.status = 200 + ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json")) + end + else + ngx.status = 401 + ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/unauthorized.json")) + end + } + } + location = "/llm/v1/chat/bad_request" { content_by_lua_block { local pl_file = require "pl.file" @@ -170,7 +221,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then -- -- 200 chat bad upstream response with one option - local chat_good = assert(bp.routes:insert { + local chat_bad = assert(bp.routes:insert { service = empty_service, protocols = { "http" }, strip_path = true, @@ -178,7 +229,7 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then }) bp.plugins:insert { name = PLUGIN_NAME, - route = { id = chat_good.id }, + route = { id = chat_bad.id }, config = { route_type = "llm/v1/chat", auth = { @@ -199,6 +250,65 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then } -- + -- 200 chat no-usage response + local chat_no_usage = assert(bp.routes:insert { + service = empty_service, + protocols = { "http" }, + strip_path = true, + paths = { "/anthropic/llm/v1/chat/no_usage_upstream_response" } + }) + bp.plugins:insert { + name = PLUGIN_NAME, + route = { id = chat_no_usage.id }, + config = { + route_type = "llm/v1/chat", + auth = { + header_name = "x-api-key", + header_value = "anthropic-key", + }, + model = { + name = "claude-2.1", + provider = "anthropic", + options = { + max_tokens = 256, + temperature = 1.0, + upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/no_usage_upstream_response", + anthropic_version = "2023-06-01", + }, + }, + }, + } + -- + + -- 200 chat malformed-usage response + local chat_malformed_usage = assert(bp.routes:insert { + service = empty_service, + protocols = { "http" }, + strip_path = true, + paths = { "/anthropic/llm/v1/chat/malformed_usage_upstream_response" } + }) + bp.plugins:insert { + name = PLUGIN_NAME, + route = { id = chat_malformed_usage.id }, + config = { + route_type = "llm/v1/chat", + auth = { + header_name = "x-api-key", + header_value = "anthropic-key", + }, + model = { + name = "claude-2.1", + provider = "anthropic", + options = { + max_tokens = 256, + temperature = 1.0, + upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/malformed_usage_upstream_response", + anthropic_version = "2023-06-01", + }, + }, + }, + } + -- 200 completions good with one option local completions_good = assert(bp.routes:insert { service = empty_service, @@ -487,6 +597,34 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then -- check this is in the 'kong' response format assert.equals(json.error.message, "request format not recognised") end) + + it("no usage response", function() + local r = client:get("/anthropic/llm/v1/chat/no_usage_upstream_response", { + headers = { + ["content-type"] = "application/json", + ["accept"] = "application/json", + }, + body = pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/requests/good.json"), + }) + + local body = assert.res_status(200 , r) + local json = cjson.decode(body) + assert.equals(json.usage, "no usage data returned from upstream") + end) + + it("malformed usage response", function() + local r = client:get("/anthropic/llm/v1/chat/malformed_usage_upstream_response", { + headers = { + ["content-type"] = "application/json", + ["accept"] = "application/json", + }, + body = pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/requests/good.json"), + }) + + local body = assert.res_status(200 , r) + local json = cjson.decode(body) + assert.is_truthy(deepcompare(json.usage, {})) + end) end) describe("anthropic llm/v1/completions", function() diff --git a/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json b/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json new file mode 100644 index 00000000000..0a8ec4da8e3 --- /dev/null +++ b/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json @@ -0,0 +1,15 @@ +{ + "content": [ + { + "text": "The sum of 1 + 1 is 2.", + "type": "text" + } + ], + "model": "claude-2.1", + "stop_reason": "end_turn", + "stop_sequence": "string", + "usage": { + "foo": 0, + "bar": 0 + } +} diff --git a/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json b/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json new file mode 100644 index 00000000000..6f10d884823 --- /dev/null +++ b/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json @@ -0,0 +1,11 @@ +{ + "content": [ + { + "text": "The sum of 1 + 1 is 2.", + "type": "text" + } + ], + "model": "claude-2.1", + "stop_reason": "end_turn", + "stop_sequence": "string" +} From 48e6908525d79dc813ea2faf284e74807fa28d40 Mon Sep 17 00:00:00 2001 From: Robin Xiang Date: Fri, 12 Apr 2024 12:26:44 +0800 Subject: [PATCH 2/4] update kong/llm/drivers/anthropic.lua --- kong/llm/drivers/anthropic.lua | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kong/llm/drivers/anthropic.lua b/kong/llm/drivers/anthropic.lua index 18c3f2bce5b..d3c40d31fd7 100644 --- a/kong/llm/drivers/anthropic.lua +++ b/kong/llm/drivers/anthropic.lua @@ -137,6 +137,7 @@ local transformers_from = { end if response_table.content then + local usage = response_table.usage local res = { choices = { { @@ -148,12 +149,12 @@ local transformers_from = { finish_reason = response_table.stop_reason, }, }, - usage = { - prompt_tokens = response_table.usage.input_tokens or 0, - completion_tokens = response_table.usage.output_tokens or 0, - total_tokens = response_table.usage.input_tokens and response_table.usage.output_tokens and - response_table.usage.input_tokens + response_table.usage.output_tokens or 0, - }, + usage = usage and { + prompt_tokens = usage and usage.input_tokens or nil, + completion_tokens = usage and usage.output_tokens or nil, + total_tokens = usage and usage.input_tokens and usage.output_tokens and + usage.input_tokens + usage.output_tokens or nil, + } or "no usage data returned from upstream", model = response_table.model, object = "chat.content", } From 5be0f8c988414430276f67533e41390c02f83915 Mon Sep 17 00:00:00 2001 From: Robin Xiang Date: Tue, 16 Apr 2024 17:07:30 +0800 Subject: [PATCH 3/4] reconstruct usage --- kong/llm/drivers/anthropic.lua | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/kong/llm/drivers/anthropic.lua b/kong/llm/drivers/anthropic.lua index d3c40d31fd7..63ae99ebd09 100644 --- a/kong/llm/drivers/anthropic.lua +++ b/kong/llm/drivers/anthropic.lua @@ -138,6 +138,19 @@ local transformers_from = { if response_table.content then local usage = response_table.usage + + if usage then + usage = { + prompt_tokens = usage.input_tokens or nil, + completion_tokens = usage.output_tokens or nil, + total_tokens = usage.input_tokens and usage.output_tokens and + usage.input_tokens + usage.output_tokens or nil, + } + + else + usage = "no usage data returned from upstream" + end + local res = { choices = { { @@ -149,16 +162,11 @@ local transformers_from = { finish_reason = response_table.stop_reason, }, }, - usage = usage and { - prompt_tokens = usage and usage.input_tokens or nil, - completion_tokens = usage and usage.output_tokens or nil, - total_tokens = usage and usage.input_tokens and usage.output_tokens and - usage.input_tokens + usage.output_tokens or nil, - } or "no usage data returned from upstream", + usage = usage, model = response_table.model, object = "chat.content", } - + return cjson.encode(res) else -- it's probably an error block, return generic error From 47f09ca47156e8314ca28e95c2cb2360d49bc588 Mon Sep 17 00:00:00 2001 From: Robin Xiang Date: Tue, 16 Apr 2024 18:11:58 +0800 Subject: [PATCH 4/4] update test case --- kong/llm/drivers/anthropic.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kong/llm/drivers/anthropic.lua b/kong/llm/drivers/anthropic.lua index 63ae99ebd09..08d86cbc06c 100644 --- a/kong/llm/drivers/anthropic.lua +++ b/kong/llm/drivers/anthropic.lua @@ -141,8 +141,8 @@ local transformers_from = { if usage then usage = { - prompt_tokens = usage.input_tokens or nil, - completion_tokens = usage.output_tokens or nil, + prompt_tokens = usage.input_tokens, + completion_tokens = usage.output_tokens, total_tokens = usage.input_tokens and usage.output_tokens and usage.input_tokens + usage.output_tokens or nil, }