From 1824a8f67aad4eef67b06e5f8e5cdb5dbaf82f7c Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 13 Sep 2017 18:04:01 +0200 Subject: [PATCH 1/2] Use binary search for all lookups --- src/lib/ctable.lua | 50 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/src/lib/ctable.lua b/src/lib/ctable.lua index 61a1c7ab41..c944a91861 100644 --- a/src/lib/ctable.lua +++ b/src/lib/ctable.lua @@ -147,6 +147,7 @@ function new(params) ctab.size = 0 ctab.max_displacement = 0 ctab.occupancy = 0 + ctab.lookup_helpers = {} ctab.max_occupancy_rate = params.max_occupancy_rate ctab.min_occupancy_rate = params.min_occupancy_rate ctab = setmetatable(ctab, { __index = CTable }) @@ -221,6 +222,7 @@ function CTable:resize(size) self.scale = self.size / HASH_MAX self.occupancy = 0 self.max_displacement = 0 + self.lookup_helper = self:make_lookup_helper() self.occupancy_hi = ceil(self.size * self.max_occupancy_rate) self.occupancy_lo = floor(self.size * self.min_occupancy_rate) for i=0,self.size*2-1 do self.entries[i].hash = HASH_MAX end @@ -260,7 +262,7 @@ function load(stream, params) params_copy.max_occupancy_rate = header.max_occupancy_rate local ctab = new(params_copy) ctab.occupancy = header.occupancy - ctab.max_displacement = header.max_displacement + ctab:maybe_increase_max_displacement(header.max_displacement) local entry_count = ctab.size + ctab.max_displacement -- Slurp the entries directly into the ctable's backing store. @@ -282,6 +284,22 @@ function CTable:save(stream) self.size + self.max_displacement) end +function CTable:make_lookup_helper() + local entries_per_lookup = self.max_displacement + 1 + local search = self.lookup_helpers[entries_per_lookup] + if search == nil then + search = binary_search.gen(entries_per_lookup, self.entry_type) + self.lookup_helpers[entries_per_lookup] = search + end + return search +end + +function CTable:maybe_increase_max_displacement(displacement) + if displacement <= self.max_displacement then return end + self.max_displacement = displacement + self.lookup_helper = self:make_lookup_helper() +end + function CTable:add(key, value, updates_allowed) if self.occupancy + 1 > self.occupancy_hi then -- Note that resizing will invalidate all hash keys, so we need @@ -324,7 +342,7 @@ function CTable:add(key, value, updates_allowed) assert(updates_allowed ~= 'required', "key not found in ctable") - self.max_displacement = max(self.max_displacement, index - start_index) + self:maybe_increase_max_displacement(index - start_index) if entries[index].hash ~= HASH_MAX then -- In a robin hood hash, we seek to spread the wealth around among @@ -340,7 +358,7 @@ function CTable:add(key, value, updates_allowed) while empty > index do entries[empty] = entries[empty - 1] local displacement = empty - hash_to_index(entries[empty].hash, scale) - self.max_displacement = max(self.max_displacement, displacement) + self:maybe_increase_max_displacement(displacement) empty = empty - 1; end end @@ -359,22 +377,24 @@ end function CTable:lookup_ptr(key) local hash = self.hash_fn(key) local entry = self.entries + hash_to_index(hash, self.scale) + entry = self.lookup_helper(entry, hash) - -- Fast path in case we find it directly. - if hash == entry.hash and self.equal_fn(key, entry.key) then - return entry - end - - while entry.hash < hash do entry = entry + 1 end - - while entry.hash == hash do + if hash == entry.hash then + -- Peel the first iteration of the loop; collisions will be rare. if self.equal_fn(key, entry.key) then return entry end - -- Otherwise possibly a collision. entry = entry + 1 + if entry.hash ~= hash then return nil end + while entry.hash == hash do + if self.equal_fn(key, entry.key) then return entry end + -- Otherwise possibly a collision. + entry = entry + 1 + end + -- Not found. + return nil + else + -- Not found. + return nil end - - -- Not found. - return nil end function CTable:lookup_and_copy(key, entry) From a403450be3bdf51a1d79f0a1d809c2d80f1a5a3b Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 13 Sep 2017 18:12:03 +0200 Subject: [PATCH 2/2] Add a type cache, in case that's important --- src/lib/binary_search.dasl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lib/binary_search.dasl b/src/lib/binary_search.dasl index 214b254736..3f06fd61ce 100644 --- a/src/lib/binary_search.dasl +++ b/src/lib/binary_search.dasl @@ -35,6 +35,8 @@ local function assemble (name, prototype, generator) return ffi.cast(prototype, mcode) end +local ffi_type_cache = {} + function gen(count, entry_type) local function gen_binary_search(Dst) if count == 1 then @@ -80,8 +82,11 @@ function gen(count, entry_type) | mov rax, rdi | ret end - return assemble("binary_search_"..count, - ffi.typeof("$*(*)($*, uint32_t)", entry_type, entry_type), + if not ffi_type_cache[entry_type] then + ffi_type_cache[entry_type] = ffi.typeof( + "$*(*)($*, uint32_t)", entry_type, entry_type) + end + return assemble("binary_search_"..count, ffi_type_cache[entry_type], gen_binary_search) end