Skip to content

Commit

Permalink
Merge pull request #1 from farhadi/optimizations
Browse files Browse the repository at this point in the history
Optimizations
  • Loading branch information
farhadi authored Jan 11, 2024
2 parents d9ae923 + 6a94ddf commit 260135b
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 29 deletions.
16 changes: 10 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,22 @@ jobs:

strategy:
matrix:
otp: [21, 22, 23, 24]
otp: [25, 26]
rebar3: [3.22.1]
rust: [stable]

container:
image: erlang:${{matrix.otp}}

steps:
- name: Checkout sources
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Install Erlang OTP
uses: erlef/setup-beam@v1
with:
otp-version: ${{matrix.otp}}
rebar3-version: ${{matrix.rebar3}}

- name: Install Rust ${{matrix.rust}} toolchain
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@stable
with:
toolchain: ${{matrix.rust}}
override: true
Expand Down
2 changes: 1 addition & 1 deletion rebar.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{erl_opts, [debug_info]}.

{profiles, [
{test, [{deps, [{xxh3, "0.3.2"}]}]}
{test, [{deps, [{xxh3, "0.3.5"}]}]}
]}.

{project_plugins, [erlfmt]}.
Expand Down
37 changes: 21 additions & 16 deletions src/cuckoo_filter.erl
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ add_hash(
ok ->
ok;
{error, full} ->
{Rand, RState} = rand:uniform_s(2, rand:seed_s(exsplus)),
RState = rand:mwc59_seed(),
Rand = rand:mwc59_value32(RState) bsr 31 + 1,
RandIndex = element(Rand, {Index, AltIndex}),
try_insert(Filter, RandIndex, Fingerprint, RState, LockTimeout)
end
Expand Down Expand Up @@ -289,10 +290,8 @@ hash(
hash_function = HashFunction
},
Element
) when is_binary(Element) ->
) ->
HashFunction(Element) band MaxHash;
hash(Filter = #cuckoo_filter{}, Element) ->
hash(Filter, term_to_binary(Element));
hash(FilterName, Element) ->
hash(?FILTER(FilterName), Element).

Expand Down Expand Up @@ -354,10 +353,16 @@ import(FilterName, Data) ->
%% Internal functions
%%%-------------------------------------------------------------------

-dialyzer({nowarn_function, default_hash_function/1}).

default_hash_function(Size) when Size > 64 ->
fun xxh3:hash128/1;
fun(Element) -> xxh3:hash128(term_to_binary(Element)) end;
default_hash_function(Size) when Size > 32 ->
fun(Element) -> xxh3:hash64(term_to_binary(Element)) end;
default_hash_function(Size) when Size > 27 ->
fun(Element) -> erlang:phash2(Element, 4294967296) end;
default_hash_function(_Size) ->
fun xxh3:hash64/1.
fun erlang:phash2/1.

import(_Buckets, <<>>, _Index) ->
ok;
Expand Down Expand Up @@ -404,10 +409,10 @@ index_and_fingerprint(Hash, FingerprintSize) ->
{Index, Fingerprint}.

alt_index(Index, Fingerprint, NumBuckets, HashFunction) ->
Index bxor HashFunction(binary:encode_unsigned(Fingerprint)) rem NumBuckets.
Index bxor HashFunction(Fingerprint) band (NumBuckets - 1).

atomic_index(BitIndex) ->
BitIndex div 64 + 3.
BitIndex bsr 6 + 3.

insert_at_index(Filter, Index, Fingerprint) ->
Bucket = read_bucket(Index, Filter),
Expand Down Expand Up @@ -450,9 +455,9 @@ release_write_lock(#cuckoo_filter{buckets = Buckets}) ->
try_insert(Filter = #cuckoo_filter{bucket_size = BucketSize}, Index, Fingerprint, RState, force) ->
Filter#cuckoo_filter.max_evictions == 0 orelse error(badarg),
Bucket = read_bucket(Index, Filter),
{Rand, UpdatedRState} = rand:uniform_s(BucketSize, RState),
SubIndex = Rand - 1,
case lists:nth(Rand, Bucket) of
UpdatedRState = rand:mwc59(RState),
SubIndex = (rand:mwc59_value32(UpdatedRState) * BucketSize) bsr 32,
case lists:nth(SubIndex + 1, Bucket) of
0 ->
case update_in_bucket(Filter, Index, SubIndex, 0, Fingerprint) of
ok -> ok;
Expand Down Expand Up @@ -520,9 +525,9 @@ try_insert(
try_insert(Filter, Index, Fingerprint, RState, Evictions, EvictionsList, Retry)
end;
{error, not_found} ->
{Rand, UpdatedRState} = rand:uniform_s(BucketSize, RState),
SubIndex = Rand - 1,
Evicted = lists:nth(Rand, Bucket),
UpdatedRState = rand:mwc59(RState),
SubIndex = (rand:mwc59_value32(UpdatedRState) * BucketSize) bsr 32,
Evicted = lists:nth(SubIndex + 1, Bucket),
Key = {Index, SubIndex},
if
Fingerprint == Evicted orelse is_map_key(Key, Evictions) ->
Expand Down Expand Up @@ -582,7 +587,7 @@ read_bucket(
BucketBitSize = BucketSize * FingerprintSize,
BitIndex = Index * BucketBitSize,
AtomicIndex = atomic_index(BitIndex),
SkipBits = BitIndex rem 64,
SkipBits = BitIndex band 63,
EndIndex = atomic_index(BitIndex + BucketBitSize - 1),
<<_:SkipBits, Bucket:BucketBitSize/bitstring, _/bitstring>> = <<
<<(atomics:get(Buckets, I)):64/big-unsigned-integer>>
Expand All @@ -603,7 +608,7 @@ update_in_bucket(
) ->
BitIndex = Index * BucketSize * FingerprintSize + SubIndex * FingerprintSize,
AtomicIndex = atomic_index(BitIndex),
SkipBits = BitIndex rem 64,
SkipBits = BitIndex band 63,
AtomicValue = atomics:get(Buckets, AtomicIndex),
case <<AtomicValue:64/big-unsigned-integer>> of
<<Prefix:SkipBits/bitstring, OldValue:FingerprintSize/big-unsigned-integer,
Expand Down
2 changes: 1 addition & 1 deletion src/cuckoo_filter.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
bucket_size :: pos_integer(),
fingerprint_size :: 4 | 8 | 16 | 32 | 64,
max_evictions :: non_neg_integer(),
hash_function :: fun((binary()) -> non_neg_integer())
hash_function :: fun((any()) -> non_neg_integer())
}).
10 changes: 5 additions & 5 deletions test/cuckoo_filter_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ new(_Config) ->
RealCapacity = cuckoo_filter:capacity(Filter),
NumBuckets = RealCapacity div 4,
MaxHash = NumBuckets bsl 16 - 1,
HashFunction = fun xxh3:hash64/1,
HashFunction = fun erlang:phash2/1,
?assert(RealCapacity >= Capacity),
?assertMatch(
#cuckoo_filter{
Expand Down Expand Up @@ -77,19 +77,19 @@ new_with_hash128(_Config) ->
RealCapacity = cuckoo_filter:capacity(Filter),
NumBuckets = RealCapacity div 4,
MaxHash = NumBuckets bsl 64 - 1,
HashFunction = fun xxh3:hash128/1,
?assert(RealCapacity >= Capacity),
?assertMatch(
#cuckoo_filter{
bucket_size = 4,
num_buckets = NumBuckets,
max_hash = MaxHash,
fingerprint_size = 64,
max_evictions = 100,
hash_function = HashFunction
max_evictions = 100
},
Filter
).
),
HashFunction = Filter#cuckoo_filter.hash_function,
?assertEqual(HashFunction(123), xxh3:hash128(term_to_binary(123))).

new_with_args(_Config) ->
Capacity = rand:uniform(1000),
Expand Down

0 comments on commit 260135b

Please sign in to comment.