diff --git a/src/apisix/plugins/bk-rate-limit/rate-limit-redis.lua b/src/apisix/plugins/bk-rate-limit/rate-limit-redis.lua index 1649366..a2887b2 100644 --- a/src/apisix/plugins/bk-rate-limit/rate-limit-redis.lua +++ b/src/apisix/plugins/bk-rate-limit/rate-limit-redis.lua @@ -91,6 +91,7 @@ local function redis_cli(conf) local red = redis_new() local timeout = conf.redis_timeout or 1000 -- 1sec + -- set connect, send, and read to 1000ms, 1s red:set_timeouts(timeout, timeout, timeout) local ok, connect_err = red:connect(conf.redis_host, conf.redis_port or 6379) @@ -115,7 +116,7 @@ local function redis_cli(conf) end end elseif check_err then - return nil, check_err + return nil, "failed to check reused times, err: " .. check_err end return red, nil end @@ -161,7 +162,6 @@ function _M.incoming(self, key, limit, window) local ttl = 0 res, err = red:eval(script, 1, key, limit, window) - if err then return nil, "failed to eval script, err: " .. err, ttl end @@ -169,7 +169,12 @@ function _M.incoming(self, key, limit, window) local remaining = res[1] ttl = res[2] - local ok, set_err = red:set_keepalive(10000, 100) + -- max_idle_timeout: ms, here set 5s + -- pool_size: 75 + -- if 8 pods, 4 workers each; 8 * 4 * 75 = 2400 (currently) + -- if 10 pods, 4 workers each; 10 * 4 * 75 = 3000 + -- if 12 pods, 4 workers each; 12 * 4 * 75 = 3600 + local ok, set_err = red:set_keepalive(5000, 75) if not ok then return nil, "failed to set keepalive, err: " .. set_err, ttl end diff --git a/src/apisix/plugins/bk-response-check.lua b/src/apisix/plugins/bk-response-check.lua index 5ad13f5..4a89206 100644 --- a/src/apisix/plugins/bk-response-check.lua +++ b/src/apisix/plugins/bk-response-check.lua @@ -62,13 +62,9 @@ function _M.init() metric_api_requests_total = prometheus_registry:counter( "apigateway_api_requests_total", "How many HTTP requests processed, partitioned by status code, method and HTTP path.", { - "gateway", "api_name", "stage_name", "resource_name", - "service_name", - "method", - "matched_uri", "status", "proxy_phase", "proxy_error", @@ -78,13 +74,9 @@ function _M.init() metric_api_request_duration = prometheus_registry:histogram( "apigateway_api_request_duration_milliseconds", "How long it took to process the request, partitioned by status code, method and HTTP path.", { - "gateway", "api_name", "stage_name", "resource_name", - "service_name", - "method", - "matched_uri", }, { 100, 300, @@ -95,12 +87,10 @@ function _M.init() metric_app_requests_total = prometheus_registry:counter( "apigateway_app_requests_total", "How many HTTP requests per app_code/api/resource.", { - "gateway", "app_code", "api_name", "stage_name", "resource_name", - "service_name", } ) end @@ -116,19 +106,18 @@ function _M.log(conf, ctx) local api_name = ctx.var.bk_gateway_name or "" local stage_name = ctx.var.bk_stage_name or "" local resource_name = ctx.var.bk_resource_name or "" - local service_name = ctx.var.bk_service_name or "" - local instance = ctx.var.instance_id or "" - local method = ctx.var.method local proxy_phase = ctx.var.proxy_phase or "" local status = ctx.var.status local proxy_error = ctx.var.proxy_error or "0" - -- NOTE: change from path to matched_uri, to decrease the metrics(use /a/{id} instead of /a/123) - -- local path = ctx.var.uri - local matched_uri = "" - if ctx.curr_req_matched then - matched_uri = ctx.curr_req_matched._path or "" - end + -- 2023-10-18 + -- remove unused labels: service_name/method/matched_uri + -- remove gateway=instance label, use cluster_id and namespace to identify the gateway instance + + -- TODO: + -- 1. api_name to gateway_name + -- 2. all *_name to *_id + -- 3. make the name shorter `bk_apigateway_apigateway_api_request_duration_milliseconds_bucket` local status_label = "" if status then @@ -137,13 +126,9 @@ function _M.log(conf, ctx) metric_api_requests_total:inc( 1, { - instance, api_name, stage_name, resource_name, - service_name, - method, - matched_uri, status_label, proxy_phase, proxy_error, @@ -153,13 +138,9 @@ function _M.log(conf, ctx) if ctx.var.request_time then metric_api_request_duration:observe( ctx.var.request_time * 1000, { - instance, api_name, stage_name, resource_name, - service_name, - method, - matched_uri, } ) end @@ -167,12 +148,10 @@ function _M.log(conf, ctx) if ctx.var.bk_app_code then metric_app_requests_total:inc( 1, { - instance, ctx.var.bk_app_code, api_name, stage_name, resource_name, - service_name, } ) end diff --git a/src/apisix/tests/test-bk-response-check.lua b/src/apisix/tests/test-bk-response-check.lua index 7b4107f..51eb7b4 100644 --- a/src/apisix/tests/test-bk-response-check.lua +++ b/src/apisix/tests/test-bk-response-check.lua @@ -62,13 +62,9 @@ describe( "should log the metrics", function() ctx = { var = { - gateway = "gateway", api_name = "api_name", stage_name = "stage_name", resource_name = "resource_name", - service_name = "service_name", - method = "method", - matched_uri = "matched_uri", status = 200, proxy_phase = "proxy_phase", proxy_error = "proxy_error", @@ -86,19 +82,15 @@ describe( local api_requests_total = prometheus.registry["apigateway_api_requests_total"] local expected_label_names = { - 'gateway', 'api_name', 'stage_name', 'resource_name', - 'service_name', - 'method', - 'matched_uri', 'status', 'proxy_phase', 'proxy_error' , } - local expected_key = 'apigateway_api_requests_total{gateway="",api_name="",stage_name="",' .. - 'resource_name="",service_name="",method="method",matched_uri="matched_uri",status="200",' .. + local expected_key = 'apigateway_api_requests_total{api_name="",stage_name="",' .. + 'resource_name="",status="200",' .. 'proxy_phase="proxy_phase",proxy_error="proxy_error"}' assert.is_same(expected_label_names, api_requests_total["label_names"])