From f6ef87982db8256e371377d0af1310e6f18801e8 Mon Sep 17 00:00:00 2001 From: Jack Tysoe Date: Wed, 16 Oct 2024 16:01:26 +0100 Subject: [PATCH] fix(ai-proxy): (Gemini)(AG-154) fixed tools-functions calls coming back empty --- .../kong/ai-gemini-fix-function-calling.yml | 3 + kong/llm/drivers/bedrock.lua | 53 +++--- kong/llm/drivers/gemini.lua | 168 +++++++++++++----- 3 files changed, 150 insertions(+), 74 deletions(-) create mode 100644 changelog/unreleased/kong/ai-gemini-fix-function-calling.yml diff --git a/changelog/unreleased/kong/ai-gemini-fix-function-calling.yml b/changelog/unreleased/kong/ai-gemini-fix-function-calling.yml new file mode 100644 index 0000000000000..59e6f5baa27e0 --- /dev/null +++ b/changelog/unreleased/kong/ai-gemini-fix-function-calling.yml @@ -0,0 +1,3 @@ +message: "**ai-proxy**: Fixed a bug where tools (function) calls to Gemini (or via Vertex) would return empty results." +type: bugfix +scope: Plugin diff --git a/kong/llm/drivers/bedrock.lua b/kong/llm/drivers/bedrock.lua index d5b6620fec0a9..90b97fefbe0f4 100644 --- a/kong/llm/drivers/bedrock.lua +++ b/kong/llm/drivers/bedrock.lua @@ -79,24 +79,33 @@ local function to_tools(in_tools) return out_tools end -local function from_tool_call_response(tool_use) - local arguments +local function from_tool_call_response(content) + if not content then return nil end - if tool_use['input'] and next(tool_use['input']) then - arguments = cjson.encode(tool_use['input']) + local tools_used + + for _, t in ipairs(content) do + if t.toolUse then + tools_used = tools_used or {} + + local arguments + if t.toolUse['input'] and next(t.toolUse['input']) then + arguments = cjson.encode(t.toolUse['input']) + end + + tools_used[#tools_used+1] = { + -- set explicit numbering to ensure ordering in later modifications + ['function'] = { + arguments = arguments, + name = t.toolUse.name, + }, + id = t.toolUse.toolUseId, + type = "function", + } + end end - return { - -- set explicit numbering to ensure ordering in later modifications - [1] = { - ['function'] = { - arguments = arguments, - name = tool_use.name, - }, - id = tool_use.toolUseId, - type = "function", - }, - } + return tools_used end local function handle_stream_event(event_t, model_info, route_type) @@ -326,23 +335,15 @@ local function from_bedrock_chat_openai(response, model_info, route_type) if response.output and response.output.message and response.output.message.content - and #response.output.message.content > 0 - and response.output.message.content[1].text then + and #response.output.message.content > 0 then - local tool_use, err - if #response.output.message.content > 1 and response.output.message.content[2].toolUse then - tool_use, err = from_tool_call_response(response.output.message.content[2].toolUse) - - if err then - return nil, fmt("unable to process function call response arguments: %s", err) - end - end + local tool_use, err = from_tool_call_response(response.output.message.content) client_response.choices[1] = { index = 0, message = { role = "assistant", - content = response.output.message.content[1].text, + content = response.output.message.content[1].text, -- may be nil tool_calls = tool_use, }, finish_reason = _OPENAI_STOP_REASON_MAPPING[response.stopReason] or "stop", diff --git a/kong/llm/drivers/gemini.lua b/kong/llm/drivers/gemini.lua index 16f5b25c36f4c..bfab0b15743e0 100644 --- a/kong/llm/drivers/gemini.lua +++ b/kong/llm/drivers/gemini.lua @@ -42,6 +42,25 @@ local function is_response_content(content) and content.candidates[1].content.parts[1].text end +local function is_tool_content(content) + return content + and content.candidates + and #content.candidates > 0 + and content.candidates[1].content + and content.candidates[1].content.parts + and #content.candidates[1].content.parts > 0 + and content.candidates[1].content.parts[1].functionCall +end + +local function is_function_call_message(message) + return message + and message.role + and message.role == "assistant" + and message.tool_calls + and type(message.tool_calls) == "table" + and #message.tool_calls > 0 +end + local function handle_stream_event(event_t, model_info, route_type) -- discard empty frames, it should either be a random new line, or comment if (not event_t.data) or (#event_t.data < 1) then @@ -83,10 +102,28 @@ local function handle_stream_event(event_t, model_info, route_type) end end +local function to_tools(in_tools) + local out_tools + + for i, v in ipairs(in_tools) do + if v['function'] then + out_tools = out_tools or { + [1] = { + function_declarations = {} + } + } + + out_tools[1].function_declarations[i] = v['function'] + end + end + + return out_tools +end + local function to_gemini_chat_openai(request_table, model_info, route_type) - if request_table then -- try-catch type mechanism - local new_r = {} + local new_r = {} + if request_table then if request_table.messages and #request_table.messages > 0 then local system_prompt @@ -96,18 +133,60 @@ local function to_gemini_chat_openai(request_table, model_info, route_type) if v.role and v.role == "system" then system_prompt = system_prompt or buffer.new() system_prompt:put(v.content or "") + + elseif v.role and v.role == "tool" then + -- handle tool execution output + table_insert(new_r.contents, { + role = "function", + parts = { + { + function_response = { + response = { + content = { + v.content, + }, + }, + name = "get_product_info", + }, + }, + }, + }) + + elseif is_function_call_message(v) then + -- treat specific 'assistant function call' tool execution input message + local function_calls = {} + for i, t in ipairs(v.tool_calls) do + function_calls[i] = { + function_call = { + name = t['function'].name, + }, + } + end + + table_insert(new_r.contents, { + role = "function", + parts = function_calls, + }) + else -- for any other role, just construct the chat history as 'parts.text' type new_r.contents = new_r.contents or {} + + local part = v.content + if type(v.content) == "string" then + part = { + text = v.content + } + end + table_insert(new_r.contents, { role = _OPENAI_ROLE_MAPPING[v.role or "user"], -- default to 'user' parts = { - { - text = v.content or "" - }, + part, }, }) end + end -- This was only added in Gemini 1.5 @@ -127,36 +206,11 @@ local function to_gemini_chat_openai(request_table, model_info, route_type) new_r.generationConfig = to_gemini_generation_config(request_table) - return new_r, "application/json", nil - end - - local new_r = {} - - if request_table.messages and #request_table.messages > 0 then - local system_prompt - - for i, v in ipairs(request_table.messages) do - - -- for 'system', we just concat them all into one Gemini instruction - if v.role and v.role == "system" then - system_prompt = system_prompt or buffer.new() - system_prompt:put(v.content or "") - else - -- for any other role, just construct the chat history as 'parts.text' type - new_r.contents = new_r.contents or {} - table_insert(new_r.contents, { - role = _OPENAI_ROLE_MAPPING[v.role or "user"], -- default to 'user' - parts = { - { - text = v.content or "" - }, - }, - }) - end - end + -- handle function calling translation from OpenAI format + new_r.tools = request_table.tools and to_tools(request_table.tools) end - new_r.generationConfig = to_gemini_generation_config(request_table) + kong.log.warn(cjson.encode(new_r)) return new_r, "application/json", nil end @@ -174,20 +228,38 @@ local function from_gemini_chat_openai(response, model_info, route_type) local messages = {} messages.choices = {} - if response.candidates - and #response.candidates > 0 - and is_response_content(response) then - - messages.choices[1] = { - index = 0, - message = { - role = "assistant", - content = response.candidates[1].content.parts[1].text, - }, - finish_reason = string_lower(response.candidates[1].finishReason), - } - messages.object = "chat.completion" - messages.model = model_info.name + if response.candidates and #response.candidates > 0 then + if is_response_content(response) then + messages.choices[1] = { + index = 0, + message = { + role = "assistant", + content = response.candidates[1].content.parts[1].text, + }, + finish_reason = string_lower(response.candidates[1].finishReason), + } + messages.object = "chat.completion" + messages.model = model_info.name + + elseif is_tool_content(response) then + local function_call_responses = response.candidates[1].content.parts + for i, v in ipairs(function_call_responses) do + messages.choices[i] = { + index = 0, + message = { + role = "assistant", + tool_calls = { + { + ['function'] = { + name = v.functionCall.name, + arguments = cjson.encode(v.functionCall.args), + }, + }, + }, + }, + } + end + end -- process analytics if response.usageMetadata then @@ -206,7 +278,7 @@ local function from_gemini_chat_openai(response, model_info, route_type) ngx.log(ngx.ERR, err) return nil, err - else-- probably a server fault or other unexpected response + else -- probably a server fault or other unexpected response local err = "no generation candidates received from Gemini, or max_tokens too short" ngx.log(ngx.ERR, err) return nil, err