Skip to content

Commit

Permalink
refactor(tools/string): speed up strip (whitespace) (#13168)
Browse files Browse the repository at this point in the history
### Summary

With simple microbenchmark:
```lua
ngx.update_time()
local s = ngx.now()
for i = 1, 100000 do
    local a = strip("           \t \ndogestr   \f\t\r ")
end
ngx.update_time()
local e = ngx.now()

print("took: ", (e * 1000) - (s * 1000), " ms")
```

I get these results:
Current: `took: 57 ms`
     PR: `took: 7 ms`

Signed-off-by: Aapo Talvensaari <[email protected]>
  • Loading branch information
bungle authored Jun 12, 2024
1 parent 5c34759 commit 582d5ac
Showing 1 changed file with 57 additions and 15 deletions.
72 changes: 57 additions & 15 deletions kong/tools/string.lua
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
local pl_stringx = require "pl.stringx"


local type = type
local ipairs = ipairs
local tostring = tostring
local lower = string.lower
local fmt = string.format
local find = string.find
local gsub = string.gsub
local type = type
local ipairs = ipairs
local tostring = tostring
local lower = string.lower
local sub = string.sub
local fmt = string.format
local find = string.find
local gsub = string.gsub
local byte = string.byte


local SPACE_BYTE = byte(" ")
local TAB_BYTE = byte("\t")
local CR_BYTE = byte("\r")


local _M = {}
Expand All @@ -24,16 +31,52 @@ _M.split = pl_stringx.split

--- strips whitespace from a string.
-- @function strip
_M.strip = function(str)
if str == nil then
_M.strip = function(value)
if value == nil then
return ""
end
str = tostring(str)
if #str > 200 then
return str:gsub("^%s+", ""):reverse():gsub("^%s+", ""):reverse()
else
return str:match("^%s*(.-)%s*$")

-- TODO: do we want to operate on non-string values (kept for backward compatibility)?
if type(value) ~= "string" then
value = tostring(value) or ""
end

if value == "" then
return ""
end

local len = #value
local s = 1 -- position of the leftmost non-whitespace char
for i = 1, len do
local b = byte(value, i)
if b == SPACE_BYTE or (b >= TAB_BYTE and b <= CR_BYTE) then
s = s + 1
else
break
end
end

if s > len then
return ""
end

local e = len -- position of the rightmost non-whitespace char
if s < e then
for i = e, 1, -1 do
local b = byte(value, i)
if b == SPACE_BYTE or (b >= TAB_BYTE and b <= CR_BYTE) then
e = e - 1
else
break
end
end
end

if s ~= 1 or e ~= len then
value = sub(value, s, e)
end

return value
end


Expand Down Expand Up @@ -180,4 +223,3 @@ _M.replace_dashes_lower = replace_dashes_lower


return _M

1 comment on commit 582d5ac

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bazel Build

Docker image available kong/kong:582d5acf5953330c6e04b5b8c0c108127abe207b
Artifacts available https://github.com/Kong/kong/actions/runs/9479525499

Please sign in to comment.