TencentBlueKing · wklken · Oct 23, 2023 · Oct 20, 2023 · Oct 23, 2023 · Oct 23, 2023
diff --git a/src/apisix/plugins/bk-rate-limit/rate-limit-redis.lua b/src/apisix/plugins/bk-rate-limit/rate-limit-redis.lua
@@ -91,6 +91,7 @@ local function redis_cli(conf)
     local red = redis_new()
     local timeout = conf.redis_timeout or 1000 -- 1sec
 
+    -- set connect, send, and read to 1000ms, 1s
     red:set_timeouts(timeout, timeout, timeout)
 
     local ok, connect_err = red:connect(conf.redis_host, conf.redis_port or 6379)
@@ -115,7 +116,7 @@ local function redis_cli(conf)
             end
         end
     elseif check_err then
-        return nil, check_err
+        return nil, "failed to check reused times, err: " .. check_err
     end
     return red, nil
 end
@@ -161,15 +162,19 @@ function _M.incoming(self, key, limit, window)
 
     local ttl = 0
     res, err = red:eval(script, 1, key, limit, window)
-
     if err then
         return nil, "failed to eval script, err: " .. err, ttl
     end
 
     local remaining = res[1]
     ttl = res[2]
 
-    local ok, set_err = red:set_keepalive(10000, 100)
+    -- max_idle_timeout: ms, here set 5s
+    -- pool_size: 75
+    --   if 8 pods, 4 workers each; 8 * 4 * 75 = 2400 (currently)
+    --   if 10 pods, 4 workers each; 10 * 4 * 75 = 3000
+    --   if 12 pods, 4 workers each; 12 * 4 * 75 = 3600
+    local ok, set_err = red:set_keepalive(5000, 75)
     if not ok then
         return nil, "failed to set keepalive, err: " .. set_err, ttl
     end

diff --git a/src/apisix/plugins/bk-response-check.lua b/src/apisix/plugins/bk-response-check.lua
@@ -62,13 +62,9 @@ function _M.init()
     metric_api_requests_total = prometheus_registry:counter(
         "apigateway_api_requests_total",
         "How many HTTP requests processed, partitioned by status code, method and HTTP path.", {
-            "gateway",
             "api_name",
             "stage_name",
             "resource_name",
-            "service_name",
-            "method",
-            "matched_uri",
             "status",
             "proxy_phase",
             "proxy_error",
@@ -78,13 +74,9 @@ function _M.init()
     metric_api_request_duration = prometheus_registry:histogram(
         "apigateway_api_request_duration_milliseconds",
         "How long it took to process the request, partitioned by status code, method and HTTP path.", {
-            "gateway",
             "api_name",
             "stage_name",
             "resource_name",
-            "service_name",
-            "method",
-            "matched_uri",
         }, {
             100,
             300,
@@ -95,12 +87,10 @@ function _M.init()
 
     metric_app_requests_total = prometheus_registry:counter(
         "apigateway_app_requests_total", "How many HTTP requests per app_code/api/resource.", {
-            "gateway",
             "app_code",
             "api_name",
             "stage_name",
             "resource_name",
-            "service_name",
         }
     )
 end
@@ -116,19 +106,18 @@ function _M.log(conf, ctx)
     local api_name = ctx.var.bk_gateway_name or ""
     local stage_name = ctx.var.bk_stage_name or ""
     local resource_name = ctx.var.bk_resource_name or ""
-    local service_name = ctx.var.bk_service_name or ""
-    local instance = ctx.var.instance_id or ""
-    local method = ctx.var.method
     local proxy_phase = ctx.var.proxy_phase or ""
     local status = ctx.var.status
     local proxy_error = ctx.var.proxy_error or "0"
 
-    -- NOTE: change from path to matched_uri, to decrease the metrics(use /a/{id} instead of /a/123)
-    -- local path = ctx.var.uri
-    local matched_uri = ""
-    if ctx.curr_req_matched then
-        matched_uri = ctx.curr_req_matched._path or ""
-    end
+    -- 2023-10-18
+    -- remove unused labels: service_name/method/matched_uri
+    -- remove gateway=instance label, use cluster_id and namespace to identify the gateway instance
+
+    -- TODO:
+    -- 1. api_name to gateway_name
+    -- 2. all *_name to *_id
+    -- 3. make the name shorter `bk_apigateway_apigateway_api_request_duration_milliseconds_bucket`
 
     local status_label = ""
     if status then
@@ -137,13 +126,9 @@ function _M.log(conf, ctx)
 
     metric_api_requests_total:inc(
         1, {
-            instance,
             api_name,
             stage_name,
             resource_name,
-            service_name,
-            method,
-            matched_uri,
             status_label,
             proxy_phase,
             proxy_error,
@@ -153,26 +138,20 @@ function _M.log(conf, ctx)
     if ctx.var.request_time then
         metric_api_request_duration:observe(
             ctx.var.request_time * 1000, {
-                instance,
                 api_name,
                 stage_name,
                 resource_name,
-                service_name,
-                method,
-                matched_uri,
             }
         )
     end
 
     if ctx.var.bk_app_code then
         metric_app_requests_total:inc(
             1, {
-                instance,
                 ctx.var.bk_app_code,
                 api_name,
                 stage_name,
                 resource_name,
-                service_name,
             }
         )
     end

diff --git a/src/apisix/tests/test-bk-response-check.lua b/src/apisix/tests/test-bk-response-check.lua
@@ -62,13 +62,9 @@ describe(
                     "should log the metrics", function()
                         ctx = {
                             var = {
-                                gateway = "gateway",
                                 api_name = "api_name",
                                 stage_name = "stage_name",
                                 resource_name = "resource_name",
-                                service_name = "service_name",
-                                method = "method",
-                                matched_uri = "matched_uri",
                                 status = 200,
                                 proxy_phase = "proxy_phase",
                                 proxy_error = "proxy_error",
@@ -86,19 +82,15 @@ describe(
 
                         local api_requests_total = prometheus.registry["apigateway_api_requests_total"]
                         local expected_label_names = {
-                            'gateway',
                             'api_name',
                             'stage_name',
                             'resource_name',
-                            'service_name',
-                            'method',
-                            'matched_uri',
                             'status',
                             'proxy_phase',
                             'proxy_error' ,
                         }
-                        local expected_key = 'apigateway_api_requests_total{gateway="",api_name="",stage_name="",' ..
-                        'resource_name="",service_name="",method="method",matched_uri="matched_uri",status="200",' ..
+                        local expected_key = 'apigateway_api_requests_total{api_name="",stage_name="",' ..
+                        'resource_name="",status="200",' ..
                         'proxy_phase="proxy_phase",proxy_error="proxy_error"}'
 
                         assert.is_same(expected_label_names, api_requests_total["label_names"])