Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(prometheus): add AI metrics and fix #9320 #13148

Merged
merged 29 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
25610e9
prometheus + fix
AntoineJac Jun 3, 2024
556f74c
prometheus + fix
AntoineJac Jun 3, 2024
b08ee83
fix
AntoineJac Jun 3, 2024
92ae81c
add test details
AntoineJac Jun 3, 2024
30ed8cb
add test
AntoineJac Jun 3, 2024
d284742
add changelogs
AntoineJac Jun 3, 2024
008c3bb
rename with llm prefix
AntoineJac Jun 7, 2024
c1a1960
Merge branch 'master' into feat/FTI-5861-AI-Metrics-Prometheus
AntoineJac Jun 7, 2024
81d6814
Update init.lua
AntoineJac Jun 7, 2024
a7f3f78
Update init.lua
AntoineJac Jun 7, 2024
ce19b11
Update changelog/unreleased/kong/add-ai-data-prometheus.yml
AntoineJac Jun 17, 2024
e88453b
new version
AntoineJac Jun 17, 2024
1e6d3e2
new version
AntoineJac Jun 17, 2024
a4db9df
Merge branch 'master' into feat/FTI-5861-AI-Metrics-Prometheus
AntoineJac Jun 17, 2024
7a80dd6
fix test
AntoineJac Jun 17, 2024
06bbcbe
test
AntoineJac Jun 17, 2024
cd85513
edit cost name
AntoineJac Jun 17, 2024
7c7afb7
remove bool
AntoineJac Jun 18, 2024
3872f5e
Update kong/plugins/prometheus/exporter.lua
AntoineJac Jun 18, 2024
17ebe7a
Update kong/plugins/prometheus/handler.lua
AntoineJac Jun 18, 2024
7c79cf9
Update kong/plugins/prometheus/schema.lua
AntoineJac Jun 18, 2024
3e7a964
d
AntoineJac Jun 18, 2024
8785af5
rename containers
AntoineJac Jun 18, 2024
2124a2b
fix order
AntoineJac Jun 18, 2024
f71ef89
remove unneeded info
AntoineJac Jun 18, 2024
763cb6a
remove unneeded info
AntoineJac Jun 18, 2024
ff63000
prepare to merge
AntoineJac Jun 20, 2024
e6f6d60
Merge branch 'master' into feat/FTI-5861-AI-Metrics-Prometheus
AntoineJac Jun 20, 2024
486f6eb
Update changelog/unreleased/kong/add-ai-data-prometheus.yml
AntoineJac Jun 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/unreleased/kong/add-ai-data-prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"message": "**prometheus**: Added `ai_requests_total`, `ai_cost_total` and `ai_tokens_total` metrics in the Prometheus plugin to start counting AI usage."
"type": feature
"scope": Core
70 changes: 40 additions & 30 deletions kong/llm/drivers/shared.lua
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,32 @@ end
--

local log_entry_keys = {
TOKENS_CONTAINER = "usage",
USAGE_CONTAINER = "usage",
META_CONTAINER = "meta",
PAYLOAD_CONTAINER = "payload",
CACHE_CONTAINER = "cache",

-- payload keys
REQUEST_BODY = "request",
RESPONSE_BODY = "response",

-- meta keys
PLUGIN_ID = "plugin_id",
PROVIDER_NAME = "provider_name",
REQUEST_MODEL = "request_model",
RESPONSE_MODEL = "response_model",
PROVIDER_NAME = "provider_name",
PLUGIN_ID = "plugin_id",

-- usage keys
PROCESSING_TIME = "processing_time",
PROMPT_TOKEN = "prompt_token",
COMPLETION_TOKEN = "completion_token",
PROMPT_TOKENS = "prompt_tokens",
COMPLETION_TOKENS = "completion_tokens",
TOTAL_TOKENS = "total_tokens",
COST = "cost",

-- cache keys
VECTOR_DB = "vector_db",
EMBEDDINGS_PROVIDER = "embeddings_provider",
EMBEDDINGS_MODEL = "embeddings_model",
CACHE_STATUS = "cache_status",
}

local openai_override = os.getenv("OPENAI_TEST_PORT")
Expand Down Expand Up @@ -487,26 +494,18 @@ function _M.post_request(conf, response_object)
request_analytics = {}
end

-- check if we already have analytics for this provider
local request_analytics_plugin = request_analytics[plugin_name]

-- create a new structure if not
if not request_analytics_plugin then
request_analytics_plugin = {
[log_entry_keys.META_CONTAINER] = {},
[log_entry_keys.TOKENS_CONTAINER] = {
[log_entry_keys.PROMPT_TOKEN] = 0,
[log_entry_keys.COMPLETION_TOKEN] = 0,
[log_entry_keys.TOTAL_TOKENS] = 0,
},
}
end
-- create a new analytics structure for this plugin
local request_analytics_plugin = {
[log_entry_keys.META_CONTAINER] = {},
[log_entry_keys.USAGE_CONTAINER] = {},
[log_entry_keys.CACHE_CONTAINER] = {},
}

-- Set the model, response, and provider names in the current try context
request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PLUGIN_ID] = conf.__plugin_id
request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PROVIDER_NAME] = provider_name
request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.REQUEST_MODEL] = kong.ctx.plugin.llm_model_requested or conf.model.name
request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.RESPONSE_MODEL] = response_object.model or conf.model.name
request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PROVIDER_NAME] = provider_name
request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PLUGIN_ID] = conf.__plugin_id

-- set extra per-provider meta
if kong.ctx.plugin.ai_extra_meta and type(kong.ctx.plugin.ai_extra_meta) == "table" then
Expand All @@ -518,13 +517,20 @@ function _M.post_request(conf, response_object)
-- Capture openai-format usage stats from the transformed response body
if response_object.usage then
if response_object.usage.prompt_tokens then
request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.PROMPT_TOKEN] = request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.PROMPT_TOKEN] + response_object.usage.prompt_tokens
request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.PROMPT_TOKENS] = response_object.usage.prompt_tokens
end
if response_object.usage.completion_tokens then
request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.COMPLETION_TOKEN] = request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.COMPLETION_TOKEN] + response_object.usage.completion_tokens
request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.COMPLETION_TOKENS] = response_object.usage.completion_tokens
end
if response_object.usage.total_tokens then
request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.TOTAL_TOKENS] = request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.TOTAL_TOKENS] + response_object.usage.total_tokens
request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.TOTAL_TOKENS] = response_object.usage.total_tokens
end

if response_object.usage.prompt_tokens and response_object.usage.completion_tokens
and conf.model.options.input_cost and conf.model.options.output_cost then
request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.COST] =
(response_object.usage.prompt_tokens * conf.model.options.input_cost
+ response_object.usage.completion_tokens * conf.model.options.output_cost) / 1000000 -- 1 million
end
end

Expand All @@ -541,13 +547,17 @@ function _M.post_request(conf, response_object)
kong.ctx.shared.analytics = request_analytics

if conf.logging and conf.logging.log_statistics then
-- Log analytics data
kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.TOKENS_CONTAINER),
request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER])

-- Log meta
-- Log meta data
kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.META_CONTAINER),
request_analytics_plugin[log_entry_keys.META_CONTAINER])

-- Log usage data
kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.USAGE_CONTAINER),
request_analytics_plugin[log_entry_keys.USAGE_CONTAINER])

-- Log cache data
kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.CACHE_CONTAINER),
request_analytics_plugin[log_entry_keys.CACHE_CONTAINER])
end

-- log tokens response for reports and billing
Expand Down
10 changes: 10 additions & 0 deletions kong/llm/schemas/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ local model_options_schema = {
description = "Defines the max_tokens, if using chat or completion models.",
required = false,
default = 256 }},
{ input_cost = {
type = "number",
description = "Defines the cost per 1M tokens in your prompt.",
required = false,
gt = 0}},
{ output_cost = {
type = "number",
description = "Defines the cost per 1M tokens in the output of the AI.",
required = false,
gt = 0}},
{ temperature = {
type = "number",
description = "Defines the matching temperature, if using chat or completion models.",
Expand Down
62 changes: 61 additions & 1 deletion kong/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ package.loaded['prometheus_resty_counter'] = require("resty.counter")
local kong_subsystem = ngx.config.subsystem
local http_subsystem = kong_subsystem == "http"


local function init()
local shm = "prometheus_metrics"
if not ngx.shared[shm] then
Expand Down Expand Up @@ -145,6 +144,19 @@ local function init()
{"service", "route", "direction", "workspace"})
end

-- AI mode
metrics.ai_llm_requests = prometheus:counter("ai_llm_requests_total",
"AI requests total per ai_provider in Kong",
{"ai_provider", "ai_model", "cache_status", "vector_db", "embeddings_provider", "embeddings_model", "workspace"})

metrics.ai_llm_cost = prometheus:counter("ai_llm_cost_total",
"AI requests cost per ai_provider/cache in Kong",
{"ai_provider", "ai_model", "cache_status", "vector_db", "embeddings_provider", "embeddings_model", "workspace"})

metrics.ai_llm_tokens = prometheus:counter("ai_llm_tokens_total",
"AI requests cost per ai_provider/cache in Kong",
{"ai_provider", "ai_model", "cache_status", "vector_db", "embeddings_provider", "embeddings_model", "token_type", "workspace"})

-- Hybrid mode status
if role == "control_plane" then
metrics.data_plane_last_seen = prometheus:gauge("data_plane_last_seen",
Expand Down Expand Up @@ -207,6 +219,9 @@ local upstream_target_addr_health_table = {
{ value = 0, labels = { 0, 0, 0, "unhealthy", ngx.config.subsystem } },
{ value = 0, labels = { 0, 0, 0, "dns_error", ngx.config.subsystem } },
}
-- ai
local labels_table_ai_llm_status = {0, 0, 0, 0, 0, 0, 0}
local labels_table_ai_llm_tokens = {0, 0, 0, 0, 0, 0, 0, 0}

local function set_healthiness_metrics(table, upstream, target, address, status, metrics_bucket)
for i = 1, #table do
Expand Down Expand Up @@ -313,6 +328,51 @@ local function log(message, serialized)
metrics.kong_latency:observe(kong_proxy_latency, labels_table_latency)
end
end

if serialized.ai_metrics then
for _, ai_plugin in pairs(serialized.ai_metrics) do
local cache_status = ai_plugin.cache.cache_status or ""
local vector_db = ai_plugin.cache.vector_db or ""
local embeddings_provider = ai_plugin.cache.embeddings_provider or ""
local embeddings_model = ai_plugin.cache.embeddings_model or ""

labels_table_ai_llm_status[1] = ai_plugin.meta.provider_name
labels_table_ai_llm_status[2] = ai_plugin.meta.request_model
labels_table_ai_llm_status[3] = cache_status
labels_table_ai_llm_status[4] = vector_db
labels_table_ai_llm_status[5] = embeddings_provider
labels_table_ai_llm_status[6] = embeddings_model
labels_table_ai_llm_status[7] = workspace
metrics.ai_llm_requests:inc(1, labels_table_ai_llm_status)

if ai_plugin.usage.cost and ai_plugin.usage.cost > 0 then
metrics.ai_llm_cost:inc(ai_plugin.usage.cost, labels_table_ai_llm_status)
end

labels_table_ai_llm_tokens[1] = ai_plugin.meta.provider_name
labels_table_ai_llm_tokens[2] = ai_plugin.meta.request_model
labels_table_ai_llm_tokens[3] = cache_status
labels_table_ai_llm_tokens[4] = vector_db
labels_table_ai_llm_tokens[5] = embeddings_provider
labels_table_ai_llm_tokens[6] = embeddings_model
labels_table_ai_llm_tokens[8] = workspace

if ai_plugin.usage.prompt_tokens and ai_plugin.usage.prompt_tokens > 0 then
labels_table_ai_llm_tokens[7] = "prompt_tokens"
metrics.ai_llm_tokens:inc(ai_plugin.usage.prompt_tokens, labels_table_ai_llm_tokens)
end

if ai_plugin.usage.completion_tokens and ai_plugin.usage.completion_tokens > 0 then
labels_table_ai_llm_tokens[7] = "completion_tokens"
metrics.ai_llm_tokens:inc(ai_plugin.usage.completion_tokens, labels_table_ai_llm_tokens)
end

if ai_plugin.usage.total_tokens and ai_plugin.usage.total_tokens > 0 then
labels_table_ai_llm_tokens[7] = "total_tokens"
metrics.ai_llm_tokens:inc(ai_plugin.usage.total_tokens, labels_table_ai_llm_tokens)
end
end
end
end

-- The upstream health metrics is turned on if at least one of
Expand Down
4 changes: 4 additions & 0 deletions kong/plugins/prometheus/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ function PrometheusHandler:log(conf)
serialized.latencies = message.latencies
end

if conf.ai_metrics then
serialized.ai_metrics = message.ai
end

if conf.upstream_health_metrics then
exporter.set_export_upstream_health_metrics(true)
else
Expand Down
1 change: 1 addition & 0 deletions kong/plugins/prometheus/schema.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ return {
fields = {
{ per_consumer = { description = "A boolean value that determines if per-consumer metrics should be collected. If enabled, the `kong_http_requests_total` and `kong_bandwidth_bytes` metrics fill in the consumer label when available.", type = "boolean", default = false }, },
{ status_code_metrics = { description = "A boolean value that determines if status code metrics should be collected. If enabled, `http_requests_total`, `stream_sessions_total` metrics will be exported.", type = "boolean", default = false }, },
{ ai_metrics = { description = "A boolean value that determines if ai metrics should be collected. If enabled, the `ai_llm_requests_total`, `ai_llm_cost_total` and `ai_llm_tokens_total` metrics will be exported.", type = "boolean", default = false }, },
{ latency_metrics = { description = "A boolean value that determines if latency metrics should be collected. If enabled, `kong_latency_ms`, `upstream_latency_ms` and `request_latency_ms` metrics will be exported.", type = "boolean", default = false }, },
{ bandwidth_metrics = { description = "A boolean value that determines if bandwidth metrics should be collected. If enabled, `bandwidth_bytes` and `stream_sessions_total` metrics will be exported.", type = "boolean", default = false }, },
{ upstream_health_metrics = { description = "A boolean value that determines if upstream metrics should be collected. If enabled, `upstream_target_health` metric will be exported.", type = "boolean", default = false }, },
Expand Down
Loading
Loading