diff --git a/changelog/unreleased/kong/add-ai-data-prometheus.yml b/changelog/unreleased/kong/add-ai-data-prometheus.yml
new file mode 100644
index 000000000000..284c4fd933ce
--- /dev/null
+++ b/changelog/unreleased/kong/add-ai-data-prometheus.yml
@@ -0,0 +1,3 @@
+"message": "**prometheus**: Added `ai_requests_total`, `ai_cost_total` and `ai_tokens_total` metrics in the Prometheus plugin to start counting AI usage."
+"type": feature
+"scope": Core
diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua
index a41a6e664c7f..9d62998c34cd 100644
--- a/kong/llm/drivers/shared.lua
+++ b/kong/llm/drivers/shared.lua
@@ -21,25 +21,32 @@ end
 --
 
 local log_entry_keys = {
-  TOKENS_CONTAINER = "usage",
+  USAGE_CONTAINER = "usage",
   META_CONTAINER = "meta",
   PAYLOAD_CONTAINER = "payload",
+  CACHE_CONTAINER = "cache",
 
   -- payload keys
   REQUEST_BODY = "request",
   RESPONSE_BODY = "response",
 
   -- meta keys
+  PLUGIN_ID = "plugin_id",
+  PROVIDER_NAME = "provider_name",
   REQUEST_MODEL = "request_model",
   RESPONSE_MODEL = "response_model",
-  PROVIDER_NAME = "provider_name",
-  PLUGIN_ID = "plugin_id",
 
   -- usage keys
-  PROCESSING_TIME = "processing_time",
-  PROMPT_TOKEN = "prompt_token",
-  COMPLETION_TOKEN = "completion_token",
+  PROMPT_TOKENS = "prompt_tokens",
+  COMPLETION_TOKENS = "completion_tokens",
   TOTAL_TOKENS = "total_tokens",
+  COST = "cost",
+
+  -- cache keys
+  VECTOR_DB = "vector_db",
+  EMBEDDINGS_PROVIDER = "embeddings_provider",
+  EMBEDDINGS_MODEL = "embeddings_model",
+  CACHE_STATUS = "cache_status",
 }
 
 local openai_override = os.getenv("OPENAI_TEST_PORT")
@@ -487,26 +494,18 @@ function _M.post_request(conf, response_object)
     request_analytics = {}
   end
 
-  -- check if we already have analytics for this provider
-  local request_analytics_plugin = request_analytics[plugin_name]
-
-  -- create a new structure if not
-  if not request_analytics_plugin then
-    request_analytics_plugin = {
-      [log_entry_keys.META_CONTAINER] = {},
-      [log_entry_keys.TOKENS_CONTAINER] = {
-        [log_entry_keys.PROMPT_TOKEN] = 0,
-        [log_entry_keys.COMPLETION_TOKEN] = 0,
-        [log_entry_keys.TOTAL_TOKENS] = 0,
-      },
-    }
-  end
+  -- create a new analytics structure for this plugin
+  local request_analytics_plugin = {
+    [log_entry_keys.META_CONTAINER] = {},
+    [log_entry_keys.USAGE_CONTAINER] = {},
+    [log_entry_keys.CACHE_CONTAINER] = {},
+  }
 
   -- Set the model, response, and provider names in the current try context
+  request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PLUGIN_ID] = conf.__plugin_id
+  request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PROVIDER_NAME] = provider_name
   request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.REQUEST_MODEL] = kong.ctx.plugin.llm_model_requested or conf.model.name
   request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.RESPONSE_MODEL] = response_object.model or conf.model.name
-  request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PROVIDER_NAME] = provider_name
-  request_analytics_plugin[log_entry_keys.META_CONTAINER][log_entry_keys.PLUGIN_ID] = conf.__plugin_id
 
   -- set extra per-provider meta
   if kong.ctx.plugin.ai_extra_meta and type(kong.ctx.plugin.ai_extra_meta) == "table" then
@@ -518,13 +517,20 @@ function _M.post_request(conf, response_object)
   -- Capture openai-format usage stats from the transformed response body
   if response_object.usage then
     if response_object.usage.prompt_tokens then
-      request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.PROMPT_TOKEN] = request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.PROMPT_TOKEN] + response_object.usage.prompt_tokens
+      request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.PROMPT_TOKENS] = response_object.usage.prompt_tokens
     end
     if response_object.usage.completion_tokens then
-      request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.COMPLETION_TOKEN] = request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.COMPLETION_TOKEN] + response_object.usage.completion_tokens
+      request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.COMPLETION_TOKENS] = response_object.usage.completion_tokens
     end
     if response_object.usage.total_tokens then
-      request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.TOTAL_TOKENS] = request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.TOTAL_TOKENS] + response_object.usage.total_tokens
+      request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.TOTAL_TOKENS] = response_object.usage.total_tokens
+    end
+
+    if response_object.usage.prompt_tokens and response_object.usage.completion_tokens
+      and conf.model.options.input_cost and conf.model.options.output_cost then 
+        request_analytics_plugin[log_entry_keys.USAGE_CONTAINER][log_entry_keys.COST] = 
+          (response_object.usage.prompt_tokens * conf.model.options.input_cost
+          + response_object.usage.completion_tokens * conf.model.options.output_cost) / 1000000 -- 1 million
     end
   end
 
@@ -541,13 +547,17 @@ function _M.post_request(conf, response_object)
   kong.ctx.shared.analytics = request_analytics
 
   if conf.logging and conf.logging.log_statistics then
-    -- Log analytics data
-    kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.TOKENS_CONTAINER),
-      request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER])
-
-    -- Log meta
+    -- Log meta data
     kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.META_CONTAINER),
       request_analytics_plugin[log_entry_keys.META_CONTAINER])
+
+    -- Log usage data
+    kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.USAGE_CONTAINER),
+      request_analytics_plugin[log_entry_keys.USAGE_CONTAINER])
+
+    -- Log cache data
+    kong.log.set_serialize_value(fmt("ai.%s.%s", plugin_name, log_entry_keys.CACHE_CONTAINER),
+      request_analytics_plugin[log_entry_keys.CACHE_CONTAINER])
   end
 
   -- log tokens response for reports and billing
diff --git a/kong/llm/schemas/init.lua b/kong/llm/schemas/init.lua
index 37b5aaf34761..15ce1a2a1ef0 100644
--- a/kong/llm/schemas/init.lua
+++ b/kong/llm/schemas/init.lua
@@ -49,6 +49,16 @@ local model_options_schema = {
         description = "Defines the max_tokens, if using chat or completion models.",
         required = false,
         default = 256 }},
+    { input_cost = {
+        type = "number",
+        description = "Defines the cost per 1M tokens in your prompt.",
+        required = false,
+        gt = 0}},
+    { output_cost = {
+        type = "number",
+        description = "Defines the cost per 1M tokens in the output of the AI.",
+        required = false,
+        gt = 0}},
     { temperature = {
         type = "number",
         description = "Defines the matching temperature, if using chat or completion models.",
diff --git a/kong/plugins/prometheus/exporter.lua b/kong/plugins/prometheus/exporter.lua
index d94d9a08e14b..2a94ebac272c 100644
--- a/kong/plugins/prometheus/exporter.lua
+++ b/kong/plugins/prometheus/exporter.lua
@@ -34,7 +34,6 @@ package.loaded['prometheus_resty_counter'] = require("resty.counter")
 local kong_subsystem = ngx.config.subsystem
 local http_subsystem = kong_subsystem == "http"
 
-
 local function init()
   local shm = "prometheus_metrics"
   if not ngx.shared[shm] then
@@ -145,6 +144,19 @@ local function init()
                                           {"service", "route", "direction", "workspace"})
   end
 
+  -- AI mode
+  metrics.ai_llm_requests = prometheus:counter("ai_llm_requests_total",
+                                      "AI requests total per ai_provider in Kong",
+                                      {"ai_provider", "ai_model", "cache_status", "vector_db", "embeddings_provider", "embeddings_model", "workspace"})
+
+  metrics.ai_llm_cost = prometheus:counter("ai_llm_cost_total",
+                                      "AI requests cost per ai_provider/cache in Kong",
+                                      {"ai_provider", "ai_model", "cache_status", "vector_db", "embeddings_provider", "embeddings_model", "workspace"})
+
+  metrics.ai_llm_tokens = prometheus:counter("ai_llm_tokens_total",
+                                      "AI requests cost per ai_provider/cache in Kong",
+                                      {"ai_provider", "ai_model", "cache_status", "vector_db", "embeddings_provider", "embeddings_model", "token_type", "workspace"})
+
   -- Hybrid mode status
   if role == "control_plane" then
     metrics.data_plane_last_seen = prometheus:gauge("data_plane_last_seen",
@@ -207,6 +219,9 @@ local upstream_target_addr_health_table = {
   { value = 0, labels = { 0, 0, 0, "unhealthy", ngx.config.subsystem } },
   { value = 0, labels = { 0, 0, 0, "dns_error", ngx.config.subsystem } },
 }
+-- ai
+local labels_table_ai_llm_status = {0, 0, 0, 0, 0, 0, 0}
+local labels_table_ai_llm_tokens = {0, 0, 0, 0, 0, 0, 0, 0}
 
 local function set_healthiness_metrics(table, upstream, target, address, status, metrics_bucket)
   for i = 1, #table do
@@ -313,6 +328,51 @@ local function log(message, serialized)
       metrics.kong_latency:observe(kong_proxy_latency, labels_table_latency)
     end
   end
+
+  if serialized.ai_metrics then
+    for _, ai_plugin in pairs(serialized.ai_metrics) do
+      local cache_status = ai_plugin.cache.cache_status or ""
+      local vector_db = ai_plugin.cache.vector_db or ""
+      local embeddings_provider = ai_plugin.cache.embeddings_provider or ""
+      local embeddings_model = ai_plugin.cache.embeddings_model or ""
+
+      labels_table_ai_llm_status[1] = ai_plugin.meta.provider_name
+      labels_table_ai_llm_status[2] = ai_plugin.meta.request_model
+      labels_table_ai_llm_status[3] = cache_status
+      labels_table_ai_llm_status[4] = vector_db
+      labels_table_ai_llm_status[5] = embeddings_provider
+      labels_table_ai_llm_status[6] = embeddings_model
+      labels_table_ai_llm_status[7] = workspace
+      metrics.ai_llm_requests:inc(1, labels_table_ai_llm_status)
+
+      if ai_plugin.usage.cost and ai_plugin.usage.cost > 0 then
+        metrics.ai_llm_cost:inc(ai_plugin.usage.cost, labels_table_ai_llm_status)
+      end
+
+      labels_table_ai_llm_tokens[1] = ai_plugin.meta.provider_name
+      labels_table_ai_llm_tokens[2] = ai_plugin.meta.request_model
+      labels_table_ai_llm_tokens[3] = cache_status
+      labels_table_ai_llm_tokens[4] = vector_db
+      labels_table_ai_llm_tokens[5] = embeddings_provider
+      labels_table_ai_llm_tokens[6] = embeddings_model
+      labels_table_ai_llm_tokens[8] = workspace
+
+      if ai_plugin.usage.prompt_tokens and ai_plugin.usage.prompt_tokens > 0 then
+        labels_table_ai_llm_tokens[7] = "prompt_tokens"
+        metrics.ai_llm_tokens:inc(ai_plugin.usage.prompt_tokens, labels_table_ai_llm_tokens)
+      end
+
+      if ai_plugin.usage.completion_tokens and ai_plugin.usage.completion_tokens > 0 then
+        labels_table_ai_llm_tokens[7] = "completion_tokens"
+        metrics.ai_llm_tokens:inc(ai_plugin.usage.completion_tokens, labels_table_ai_llm_tokens)
+      end
+
+      if ai_plugin.usage.total_tokens and ai_plugin.usage.total_tokens > 0 then
+        labels_table_ai_llm_tokens[7] = "total_tokens"
+        metrics.ai_llm_tokens:inc(ai_plugin.usage.total_tokens, labels_table_ai_llm_tokens)
+      end
+    end
+  end
 end
 
 -- The upstream health metrics is turned on if at least one of
diff --git a/kong/plugins/prometheus/handler.lua b/kong/plugins/prometheus/handler.lua
index d7bce154eb74..3666b406f009 100644
--- a/kong/plugins/prometheus/handler.lua
+++ b/kong/plugins/prometheus/handler.lua
@@ -54,6 +54,10 @@ function PrometheusHandler:log(conf)
     serialized.latencies = message.latencies
   end
 
+  if conf.ai_metrics then
+    serialized.ai_metrics = message.ai
+  end
+
   if conf.upstream_health_metrics then
     exporter.set_export_upstream_health_metrics(true)
   else
diff --git a/kong/plugins/prometheus/schema.lua b/kong/plugins/prometheus/schema.lua
index 9b067e3bf877..a23e3b3fc5ed 100644
--- a/kong/plugins/prometheus/schema.lua
+++ b/kong/plugins/prometheus/schema.lua
@@ -18,6 +18,7 @@ return {
         fields = {
           { per_consumer = { description = "A boolean value that determines if per-consumer metrics should be collected. If enabled, the `kong_http_requests_total` and `kong_bandwidth_bytes` metrics fill in the consumer label when available.", type = "boolean", default = false }, },
           { status_code_metrics = { description = "A boolean value that determines if status code metrics should be collected. If enabled, `http_requests_total`, `stream_sessions_total` metrics will be exported.", type = "boolean", default = false }, },
+          { ai_metrics = { description = "A boolean value that determines if ai metrics should be collected. If enabled, the `ai_llm_requests_total`, `ai_llm_cost_total` and `ai_llm_tokens_total` metrics will be exported.", type = "boolean", default = false }, },
           { latency_metrics = { description = "A boolean value that determines if latency metrics should be collected. If enabled, `kong_latency_ms`, `upstream_latency_ms` and `request_latency_ms` metrics will be exported.", type = "boolean", default = false }, },
           { bandwidth_metrics = { description = "A boolean value that determines if bandwidth metrics should be collected. If enabled, `bandwidth_bytes` and `stream_sessions_total` metrics will be exported.", type = "boolean", default = false }, },
           { upstream_health_metrics = { description = "A boolean value that determines if upstream metrics should be collected. If enabled, `upstream_target_health` metric will be exported.", type = "boolean", default = false }, },
diff --git a/spec/03-plugins/26-prometheus/02-access_spec.lua b/spec/03-plugins/26-prometheus/02-access_spec.lua
index f1478b558383..9138637d2f27 100644
--- a/spec/03-plugins/26-prometheus/02-access_spec.lua
+++ b/spec/03-plugins/26-prometheus/02-access_spec.lua
@@ -1,8 +1,10 @@
 local helpers = require "spec.helpers"
 local shell = require "resty.shell"
+local pl_file = require "pl.file"
 
 local tcp_service_port = helpers.get_available_port()
 local tcp_proxy_port = helpers.get_available_port()
+local MOCK_PORT = helpers.get_available_port()
 local UUID_PATTERN = "%x%x%x%x%x%x%x%x%-%x%x%x%x%-%x%x%x%x%-%x%x%x%x%-%x%x%x%x%x%x%x%x%x%x%x%x"
 
 describe("Plugin: prometheus (access)", function()
@@ -611,3 +613,287 @@ describe("Plugin: prometheus (access) granular metrics switch", function()
 
 end)
 end
+
+describe("Plugin: prometheus (access) AI metrics", function()
+  local proxy_client
+  local admin_client
+  local prometheus_plugin
+
+  setup(function()
+    local bp = helpers.get_db_utils()
+
+    local fixtures = {
+      http_mock = {},
+    }
+
+    fixtures.http_mock.openai = [[
+      server {
+          server_name openai;
+          listen ]]..MOCK_PORT..[[;
+          
+          default_type 'application/json';
+    
+  
+          location = "/llm/v1/chat/good" {
+            content_by_lua_block {
+              local pl_file = require "pl.file"
+              local json = require("cjson.safe")
+  
+              ngx.req.read_body()
+              local body, err = ngx.req.get_body_data()
+              body, err = json.decode(body)
+  
+              local token = ngx.req.get_headers()["authorization"]
+              local token_query = ngx.req.get_uri_args()["apikey"]
+  
+              if token == "Bearer openai-key" or token_query == "openai-key" or body.apikey == "openai-key" then
+                ngx.req.read_body()
+                local body, err = ngx.req.get_body_data()
+                body, err = json.decode(body)
+                
+                if err or (body.messages == ngx.null) then
+                  ngx.status = 400
+                  ngx.print(pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/responses/bad_request.json"))
+                else
+                  ngx.status = 200
+                  ngx.print(pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/responses/good.json"))
+                end
+              else
+                ngx.status = 401
+                ngx.print(pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/responses/unauthorized.json"))
+              end
+            }
+          }
+      }
+    ]]
+
+    local empty_service = assert(bp.services:insert {
+      name = "empty_service",
+      host = "localhost", --helpers.mock_upstream_host,
+      port = 8080, --MOCK_PORT,
+      path = "/",
+    })
+  
+    -- 200 chat good with one option
+    local chat_good = assert(bp.routes:insert {
+      service = empty_service,
+      name = "http-route",
+      protocols = { "http" },
+      strip_path = true,
+      paths = { "/" }
+    })
+
+    bp.plugins:insert {
+      name = "ai-proxy",
+      route = { id = chat_good.id },
+      config = {
+        route_type = "llm/v1/chat",
+        logging = {
+          log_payloads = false,
+          log_statistics = true,
+        },
+        auth = {
+          header_name = "Authorization",
+          header_value = "Bearer openai-key",
+        },
+        model = {
+          name = "gpt-3.5-turbo",
+          provider = "openai",
+          options = {
+            max_tokens = 256,
+            temperature = 1.0,
+            upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/good",
+            input_cost = 10.0,
+            output_cost = 10.0,
+          },
+        },
+      },
+    }
+
+    prometheus_plugin = assert(bp.plugins:insert {
+      protocols = { "http", "https", "grpc", "grpcs", "tcp", "tls" },
+      name = "prometheus",
+      config = {
+        -- ai_metrics = true,
+        status_code_metrics = true,
+      },
+    })
+
+    assert(helpers.start_kong ({
+        nginx_conf = "spec/fixtures/custom_nginx.template",
+        plugins = "bundled, prometheus",
+    }, nil, nil, fixtures))
+    proxy_client = helpers.proxy_client()
+    admin_client = helpers.admin_client()
+  end)
+
+  teardown(function()
+    if proxy_client then
+      proxy_client:close()
+    end
+    if admin_client then
+      admin_client:close()
+    end
+
+    helpers.stop_kong()
+  end)
+
+  it("no AI metrics when not enable in Prometheus plugin", function()
+    local res = assert(proxy_client:send {
+      method  = "GET",
+      path    = "/status/200",
+      headers = {
+        host = helpers.mock_upstream_host,
+        authorization = 'Bearer openai-key',
+        ["content-type"] = 'application/json',
+        accept = 'application/json',
+      },
+      body = pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/requests/good.json"),
+    })
+    assert.res_status(200, res)
+
+    local body
+    helpers.wait_until(function()
+      local res = assert(admin_client:send {
+        method  = "GET",
+        path    = "/metrics",
+      })
+      body = assert.res_status(200, res)
+      return res.status == 200
+    end)
+
+    assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)
+    assert.matches('http_requests_total{service="empty_service",route="http-route",code="200",source="service",workspace="default",consumer=""} 1', body, nil, true)
+    
+    assert.not_match('ai_llm_requests_total', body, nil, true)
+    assert.not_match('ai_llm_cost_total', body, nil, true)
+    assert.not_match('ai_llm_tokens_total', body, nil, true)
+  end)
+
+  it("update prometheus plugin config", function()
+    local body
+    helpers.wait_until(function()
+      local res = assert(admin_client:send {
+        method  = "PATCH",
+        path = "/plugins/" .. prometheus_plugin.id,
+        body = {
+          name = "prometheus",
+          config = {
+            status_code_metrics = true,
+            ai_metrics = true,
+          }
+        },
+        headers = {
+          ["Content-Type"] = "application/json"
+        }
+      })
+      body = assert.res_status(200, res)
+      return res.status == 200
+    end)
+
+    local cjson = require "cjson"
+    local json = cjson.decode(body)
+    assert.equal(true, json.config.ai_metrics)
+
+    ngx.sleep(2)
+  end)
+  
+  it("add the count for proxied AI requests", function()
+    local res = assert(proxy_client:send {
+      method  = "GET",
+      path    = "/status/200",
+      headers = {
+        host = helpers.mock_upstream_host,
+        authorization = 'Bearer openai-key',
+        ["content-type"] = 'application/json',
+        accept = 'application/json',
+      },
+      body = pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/requests/good.json"),
+    })
+    assert.res_status(200, res)
+
+    local body
+    helpers.wait_until(function()
+      local res = assert(admin_client:send {
+        method  = "GET",
+        path    = "/metrics",
+      })
+      body = assert.res_status(200, res)
+      return res.status == 200
+    end)
+
+    assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)
+    assert.matches('http_requests_total{service="empty_service",route="http-route",code="200",source="service",workspace="default",consumer=""} 2', body, nil, true)
+    
+    assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 1', body, nil, true)
+
+    assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 0.00037', body, nil, true)
+
+    assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",token_type="completion_tokens",workspace="default"} 12', body, nil, true)
+    assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",token_type="prompt_tokens",workspace="default"} 25', body, nil, true)
+    assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",token_type="total_tokens",workspace="default"} 37', body, nil, true)
+  end)
+
+  it("increments the count for proxied AI requests", function()
+    local res = assert(proxy_client:send {
+      method  = "GET",
+      path    = "/status/200",
+      headers = {
+        host = helpers.mock_upstream_host,
+        authorization = 'Bearer openai-key',
+        ["content-type"] = 'application/json',
+        accept = 'application/json',
+      },
+      body = pl_file.read("spec/fixtures/ai-proxy/openai/llm-v1-chat/requests/good.json"),
+    })
+    assert.res_status(200, res)
+
+    local body
+    helpers.wait_until(function()
+      local res = assert(admin_client:send {
+        method  = "GET",
+        path    = "/metrics",
+      })
+      body = assert.res_status(200, res)
+      return res.status == 200
+    end)
+
+    assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)
+    assert.matches('http_requests_total{service="empty_service",route="http-route",code="200",source="service",workspace="default",consumer=""} 3', body, nil, true)
+    
+    assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 2', body, nil, true)
+
+    assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 0.00074', body, nil, true)
+
+    assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",token_type="completion_tokens",workspace="default"} 24', body, nil, true)
+    assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",token_type="prompt_tokens",workspace="default"} 50', body, nil, true)
+    assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",token_type="total_tokens",workspace="default"} 74', body, nil, true)
+  end)
+
+  it("behave correctly if AI metrics are not found", function()
+    local res = assert(proxy_client:send {
+      method  = "GET",
+      path    = "/status/400",
+      headers = {
+        host = helpers.mock_upstream_host,
+      }
+    })
+    assert.res_status(400, res)
+
+    local body
+    helpers.wait_until(function()
+      local res = assert(admin_client:send {
+        method  = "GET",
+        path    = "/metrics",
+      })
+      body = assert.res_status(200, res)
+      return res.status == 200
+    end)
+
+    assert.matches('http_requests_total{service="empty_service",route="http-route",code="400",source="kong",workspace="default",consumer=""} 1', body, nil, true)
+    assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)
+
+    assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 2', body, nil, true)
+    assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache_status="",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 0.00074', body, nil, true)
+  end)
+end)
\ No newline at end of file
diff --git a/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua b/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua
index b0c6e4ee7ef6..b67d815fa07e 100644
--- a/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua
+++ b/spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua
@@ -46,10 +46,12 @@ local _EXPECTED_CHAT_STATS = {
       response_model = 'gpt-3.5-turbo-0613',
     },
     usage = {
-      completion_token = 12,
-      prompt_token = 25,
+      prompt_tokens = 25,
+      completion_tokens = 12,
       total_tokens = 37,
+      cost = 0.00037,
     },
+    cache = {}
   },
 }
 
@@ -250,7 +252,9 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then
             options = {
               max_tokens = 256,
               temperature = 1.0,
-              upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/good"
+              upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/good",
+              input_cost = 10.0,
+              output_cost = 10.0,
             },
           },
         },
diff --git a/spec/03-plugins/39-ai-request-transformer/02-integration_spec.lua b/spec/03-plugins/39-ai-request-transformer/02-integration_spec.lua
index 25351787ec2c..0e8014dc5fee 100644
--- a/spec/03-plugins/39-ai-request-transformer/02-integration_spec.lua
+++ b/spec/03-plugins/39-ai-request-transformer/02-integration_spec.lua
@@ -43,7 +43,9 @@ local OPENAI_FLAT_RESPONSE = {
     options = {
       max_tokens = 512,
       temperature = 0.5,
-      upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/flat"
+      upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/flat",
+      input_cost = 10.0,
+      output_cost = 10.0,
     },
   },
   auth = {
@@ -124,10 +126,12 @@ local _EXPECTED_CHAT_STATS = {
       response_model = 'gpt-3.5-turbo-0613',
     },
     usage = {
-      completion_token = 12,
-      prompt_token = 25,
+      prompt_tokens = 25,
+      completion_tokens = 12,
       total_tokens = 37,
+      cost = 0.00037,
     },
+    cache = {}
   },
 }
 
diff --git a/spec/03-plugins/40-ai-response-transformer/02-integration_spec.lua b/spec/03-plugins/40-ai-response-transformer/02-integration_spec.lua
index 47072bb39a06..34f5afab3b6c 100644
--- a/spec/03-plugins/40-ai-response-transformer/02-integration_spec.lua
+++ b/spec/03-plugins/40-ai-response-transformer/02-integration_spec.lua
@@ -60,7 +60,9 @@ local OPENAI_FLAT_RESPONSE = {
     options = {
       max_tokens = 512,
       temperature = 0.5,
-      upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/flat"
+      upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/flat",
+      input_cost = 10.0,
+      output_cost = 10.0,
     },
   },
   auth = {
@@ -181,10 +183,12 @@ local _EXPECTED_CHAT_STATS = {
       response_model = 'gpt-3.5-turbo-0613',
     },
     usage = {
-      completion_token = 12,
-      prompt_token = 25,
+      prompt_tokens = 25,
+      completion_tokens = 12,
       total_tokens = 37,
+      cost = 0.00037,
     },
+    cache = {}
   },
 }