diff --git a/R/RadixTree_search_helpers.R b/R/RadixTree_search_helpers.R
old mode 100644
new mode 100755
diff --git a/R/pairwise.R b/R/pairwise.R
old mode 100644
new mode 100755
diff --git a/R/utils.R b/R/utils.R
old mode 100644
new mode 100755
diff --git a/R/zzz.R b/R/zzz.R
old mode 100644
new mode 100755
diff --git a/inst/extra_tests/benchmark.r b/inst/extra_tests/benchmark.r
index 049b944..b4b5552 100755
--- a/inst/extra_tests/benchmark.r
+++ b/inst/extra_tests/benchmark.r
@@ -60,12 +60,12 @@ run_og <- function(query, target, max_distance, show_progress = F) {
   results %>% arrange(query, target)
 }
 
-run_dnatree <- function(query, target, max_distance=NULL, max_fraction=NULL, mode = "levenshtein", show_progress = FALSE, nthreads = 8) {
-  x <- treedist::DNATree$new()
-  x$insert(target)
-  x$search(query, max_distance = max_distance, max_fraction = max_fraction, mode = mode, show_progress=show_progress, nthreads=nthreads) %>%
-    arrange(query, target)
-}
+# run_dnatree <- function(query, target, max_distance=NULL, max_fraction=NULL, mode = "levenshtein", show_progress = FALSE, nthreads = 8) {
+#   x <- treedist::DNATree$new()
+#   x$insert(target)
+#   x$search(query, max_distance = max_distance, max_fraction = max_fraction, mode = mode, show_progress=show_progress, nthreads=nthreads) %>%
+#     arrange(query, target)
+# }
 
 run_radixtree <- function(query, target, max_distance=NULL, max_fraction=NULL, mode = "levenshtein", show_progress = FALSE, nthreads = 8) {
   x <- seqtrie::RadixTree$new()
@@ -81,12 +81,12 @@ run_radixforest <- function(query, target, max_distance=NULL, max_fraction=NULL,
     arrange(query, target)
 }
 
-run_prefixtree <- function(query, target, max_distance=NULL, max_fraction=NULL, mode = "levenshtein", show_progress = FALSE, nthreads = 8) {
-  x <- treedist::PrefixTree$new()
-  x$insert(target)
-  x$search(query, max_distance = max_distance, max_fraction = max_fraction, mode = mode, show_progress=show_progress, nthreads=nthreads) %>%
-    arrange(query, target)
-}
+# run_prefixtree <- function(query, target, max_distance=NULL, max_fraction=NULL, mode = "levenshtein", show_progress = FALSE, nthreads = 8) {
+#   x <- treedist::PrefixTree$new()
+#   x$insert(target)
+#   x$search(query, max_distance = max_distance, max_fraction = max_fraction, mode = mode, show_progress=show_progress, nthreads=nthreads) %>%
+#     arrange(query, target)
+# }
 
 run_stringdist <- function(query, target, max_distance=NULL, max_fraction=NULL, nthreads = 8, show_progress = F) {
   results <- stringdist::stringdistmatrix(query, target, method = "lv", nthread=nthreads)
@@ -103,29 +103,32 @@ run_stringdist <- function(query, target, max_distance=NULL, max_fraction=NULL,
   dplyr::arrange(results, query, target)
 }
 
-methods <- list(run_dnatree, run_radixtree, run_radixforest, run_prefixtree, run_stringdist, run_og)
-names(methods) <- c("DNATree", "RadixTree", "RadixForest", "PrefixTree", "stringdist", "OG")
+# methods <- list(run_dnatree, run_radixtree, run_radixforest, run_prefixtree, run_stringdist, run_og)
+# names(methods) <- c("DNATree", "RadixTree", "RadixForest", "PrefixTree", "stringdist", "OG")
+
+methods <- list(run_radixtree, run_radixforest, run_og)
+names(methods) <- c("RadixTree", "RadixForest", "OG")
 
-data("covid_cdr3")
+# data("covid_cdr3")
 cc3_subset <- sample(covid_cdr3, size = 1000)
 
-sd_results <- run_stringdist(cc3_subset, cc3_subset, 2)
-og_results <- run_og(cc3_subset, cc3_subset, 2)
+# sd_results <- run_stringdist(cc3_subset, cc3_subset, 2)
+# og_results <- run_og(cc3_subset, cc3_subset, 2)
 # dt_results <- run_dnatree(cc3_subset, cc3_subset, max_distance = 2)
-rt_results <- run_radixtree(cc3_subset, cc3_subset, max_distance = 2)
-rf_results <- run_radixforest(cc3_subset, cc3_subset, max_distance = 2)
+# rt_results <- run_radixtree(cc3_subset, cc3_subset, max_distance = 2)
+# rf_results <- run_radixforest(cc3_subset, cc3_subset, max_distance = 2)
 # pt_results <- run_prefixtree(cc3_subset, cc3_subset, max_distance = 2)
 
-stopifnot(identical(sd_results, og_results))
+# stopifnot(identical(sd_results, og_results))
 # stopifnot(identical(sd_results, dt_results))
-stopifnot(identical(sd_results, rt_results))
-stopifnot(identical(sd_results, rf_results))
+# stopifnot(identical(sd_results, rt_results))
+# stopifnot(identical(sd_results, rf_results))
 # stopifnot(identical(sd_results, pt_results))
 
 ################################################################################
 
-grid <- expand.grid(nseqs = c(10000), maxdist = c(2,3), iter = 1:NITER, method = names(methods)) %>% sample_n(nrow(.))
-grid <- filter(grid, nseqs <= 1000 | method %in% c("DNATree", "RadixTree", "RadixForest", "PrefixTree"))
+grid <- expand.grid(nseqs = c(100,300,1000,3000,10000), maxdist = c(2,3), iter = 1:NITER, method = names(methods)) %>% sample_n(nrow(.))
+# grid <- filter(grid, nseqs <= 1000 | method %in% c("DNATree", "RadixTree", "RadixForest", "PrefixTree"))
 grid$time <- rep(0, nrow(grid))
 for(i in 1:nrow(grid)) {
   print(grid[i,])
@@ -138,7 +141,7 @@ for(i in 1:nrow(grid)) {
 }
 maxdist_results <- grid
 
-grid <- expand.grid(nseqs = c(100,300,1000,3000,10000,30000), maxfrac = c(0.035,0.15), iter = 1:NITER, method = c("DNATree", "RadixTree", "RadixForest", "PrefixTree")) %>% sample_n(nrow(.))
+grid <- expand.grid(nseqs = c(100,300,1000,3000,10000,30000), maxfrac = c(0.035,0.15), iter = 1:NITER, method = c("RadixTree", "RadixForest")) %>% sample_n(nrow(.))
 grid$time <- rep(0, nrow(grid))
 for(i in 1:nrow(grid)) {
   print(grid[i,])
@@ -154,9 +157,13 @@ maxfrac_results <- grid
 maxdist_results %>% group_by(nseqs, method, maxdist) %>% summarize(time = mean(time)) %>% as.data.frame %>% print
 maxfrac_results %>% group_by(nseqs, method, maxfrac) %>% summarize(time = mean(time)) %>% as.data.frame %>% print
 
-g <- ggplot(grid, aes(x = nseqs, y = time, color = method)) + geom_point() + geom_smooth(fill = NA) +
+ggplot(maxfrac_results, aes(x = nseqs, y = time, color = method)) + geom_point() + geom_smooth(fill = NA) +
+  scale_x_log10() +
+  facet_wrap(~maxfrac, scales = "free") + 
+  theme_bw(base_size = 16)
+
+ggplot(maxdist_results, aes(x = nseqs, y = time, color = method)) + geom_point() + geom_smooth(fill = NA) +
   scale_x_log10() +
-  # scale_y_log10() +
-  facet_wrap(~maxfrac) + 
+  facet_wrap(~maxfrac, scales = "free") + 
   theme_bw(base_size = 16)
-ggsave(g, file = "benchmark_plot.png", width = 6, height = 4)
+# ggsave(g, file = "benchmark_plot.png", width = 6, height = 4)
diff --git a/inst/extra_tests/benchmark_plot.png b/inst/extra_tests/benchmark_plot.png
old mode 100644
new mode 100755
diff --git a/inst/extra_tests/simple_benchmark.R b/inst/extra_tests/simple_benchmark.R
new file mode 100755
index 0000000..09291a7
--- /dev/null
+++ b/inst/extra_tests/simple_benchmark.R
@@ -0,0 +1,21 @@
+suppressPackageStartupMessages({
+  library(seqtrie)
+  library(dplyr)
+})
+data(covid_cdr3)
+set.seed(314156)
+NITER = 5
+NT = 4
+
+grid <- expand.grid(nseqs = c(30000), maxfrac = c(0.05), iter = 1:NITER, method = c("RadixForest")) %>% sample_n(nrow(.))
+grid$time <- rep(0, nrow(grid))
+for(i in 1:nrow(grid)) {
+  x <- sample(covid_cdr3, size = grid$nseqs[i])
+  time <- Sys.time()
+  r <- seqtrie::dist_search(x, x, max_fraction = grid$maxfrac[i], show_progres = FALSE, tree_class = grid$method[i], nthread=NT)
+  grid$time[i] <- as.numeric(Sys.time() - time, units = "secs")
+  rm(x, r)
+  gc(full=TRUE)
+}
+
+cat(mean(grid$time), "\n")
diff --git a/inst/extra_tests/small_array_size_bench.R b/inst/extra_tests/small_array_size_bench.R
new file mode 100755
index 0000000..be9c33c
--- /dev/null
+++ b/inst/extra_tests/small_array_size_bench.R
@@ -0,0 +1,40 @@
+library(dplyr)
+library(data.table)
+library(ggplot2)
+library(patchwork)
+library(this.path)
+setwd(dirname(this.path())) # ...seqtrie/inst/extra_tests
+
+array_sizes <- c(seq(0,96,by = 8),1000) %>% rep(each = 3) %>% sample
+lapply(array_sizes, function(AS) {
+  Sys.setenv("SEQTRIE_SMALL_ARRAY_SIZE"=AS)
+  system("cd ../../ && make install")
+  res <- system2("/usr/bin/time",  args=c("-v", "Rscript simple_benchmark.R"), stdout=T, stderr=T)
+  mem <- grep("Maximum resident set size", res, value=T) %>%
+    gsub(".+:", "", .) %>%
+    as.numeric
+  data.frame(array_size=AS, time = as.numeric(res[1]), mem_usage = mem)
+}) %>% rbindlist -> results
+
+results <- filter(results, array_size != 1000)
+
+results2 <- arrange(results, array_size) %>%
+  group_by(array_size) %>%
+  summarize(mem_usage = mean(mem_usage), time = mean(time))
+
+g1 <- ggplot(results, aes(x = array_size, y = time)) + 
+  geom_point(pch=21, color = "chartreuse") + 
+  geom_line(data=results2, color = "chartreuse") + 
+  scale_x_continuous(breaks = array_sizes) + 
+  theme_bw(base_size=14)
+
+g2 <- ggplot(results, aes(x = array_size, y = mem_usage)) + 
+  geom_point(pch=21, color = "darkorange") + 
+  geom_line(data=results2, color = "darkorange") + 
+  scale_x_continuous(breaks = array_sizes) + 
+  theme_bw(base_size=14)
+
+g1 + g2 + plot_layout(ncol=1)
+
+
+
diff --git a/inst/include/ankerl/unordered_dense.h b/inst/include/ankerl/unordered_dense.h
new file mode 100755
index 0000000..2aaacd6
--- /dev/null
+++ b/inst/include/ankerl/unordered_dense.h
@@ -0,0 +1,2032 @@
+///////////////////////// ankerl::unordered_dense::{map, set} /////////////////////////
+
+// A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion.
+// Version 4.4.0
+// https://github.com/martinus/unordered_dense
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2022-2023 Martin Leitner-Ankerl <martin.ankerl@gmail.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ANKERL_UNORDERED_DENSE_H
+#define ANKERL_UNORDERED_DENSE_H
+
+// see https://semver.org/spec/v2.0.0.html
+#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 4 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes
+#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 4 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality
+#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes
+
+// API versioning with inline namespace, see https://www.foonathan.net/2018/11/inline-namespaces/
+
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#define ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch) v##major##_##minor##_##patch
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#define ANKERL_UNORDERED_DENSE_VERSION_CONCAT(major, minor, patch) ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch)
+#define ANKERL_UNORDERED_DENSE_NAMESPACE   \
+    ANKERL_UNORDERED_DENSE_VERSION_CONCAT( \
+        ANKERL_UNORDERED_DENSE_VERSION_MAJOR, ANKERL_UNORDERED_DENSE_VERSION_MINOR, ANKERL_UNORDERED_DENSE_VERSION_PATCH)
+
+#if defined(_MSVC_LANG)
+#    define ANKERL_UNORDERED_DENSE_CPP_VERSION _MSVC_LANG
+#else
+#    define ANKERL_UNORDERED_DENSE_CPP_VERSION __cplusplus
+#endif
+
+#if defined(__GNUC__)
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#    define ANKERL_UNORDERED_DENSE_PACK(decl) decl __attribute__((__packed__))
+#elif defined(_MSC_VER)
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#    define ANKERL_UNORDERED_DENSE_PACK(decl) __pragma(pack(push, 1)) decl __pragma(pack(pop))
+#endif
+
+// exceptions
+#if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+#    define ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() 1 // NOLINT(cppcoreguidelines-macro-usage)
+#else
+#    define ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() 0 // NOLINT(cppcoreguidelines-macro-usage)
+#endif
+#ifdef _MSC_VER
+#    define ANKERL_UNORDERED_DENSE_NOINLINE __declspec(noinline)
+#else
+#    define ANKERL_UNORDERED_DENSE_NOINLINE __attribute__((noinline))
+#endif
+
+// defined in unordered_dense.cpp
+#if !defined(ANKERL_UNORDERED_DENSE_EXPORT)
+#    define ANKERL_UNORDERED_DENSE_EXPORT
+#endif
+
+#if ANKERL_UNORDERED_DENSE_CPP_VERSION < 201703L
+#    error ankerl::unordered_dense requires C++17 or higher
+#else
+#    include <array>            // for array
+#    include <cstdint>          // for uint64_t, uint32_t, uint8_t, UINT64_C
+#    include <cstring>          // for size_t, memcpy, memset
+#    include <functional>       // for equal_to, hash
+#    include <initializer_list> // for initializer_list
+#    include <iterator>         // for pair, distance
+#    include <limits>           // for numeric_limits
+#    include <memory>           // for allocator, allocator_traits, shared_ptr
+#    include <optional>         // for optional
+#    include <stdexcept>        // for out_of_range
+#    include <string>           // for basic_string
+#    include <string_view>      // for basic_string_view, hash
+#    include <tuple>            // for forward_as_tuple
+#    include <type_traits>      // for enable_if_t, declval, conditional_t, ena...
+#    include <utility>          // for forward, exchange, pair, as_const, piece...
+#    include <vector>           // for vector
+#    if ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() == 0
+#        include <cstdlib> // for abort
+#    endif
+
+#    if defined(__has_include)
+#        if __has_include(<memory_resource>)
+#            define ANKERL_UNORDERED_DENSE_PMR std::pmr // NOLINT(cppcoreguidelines-macro-usage)
+#            include <memory_resource>                  // for polymorphic_allocator
+#        elif __has_include(<experimental/memory_resource>)
+#            define ANKERL_UNORDERED_DENSE_PMR std::experimental::pmr // NOLINT(cppcoreguidelines-macro-usage)
+#            include <experimental/memory_resource>                   // for polymorphic_allocator
+#        endif
+#    endif
+
+#    if defined(_MSC_VER) && defined(_M_X64)
+#        include <intrin.h>
+#        pragma intrinsic(_umul128)
+#    endif
+
+#    if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+#        define ANKERL_UNORDERED_DENSE_LIKELY(x) __builtin_expect(x, 1)   // NOLINT(cppcoreguidelines-macro-usage)
+#        define ANKERL_UNORDERED_DENSE_UNLIKELY(x) __builtin_expect(x, 0) // NOLINT(cppcoreguidelines-macro-usage)
+#    else
+#        define ANKERL_UNORDERED_DENSE_LIKELY(x) (x)   // NOLINT(cppcoreguidelines-macro-usage)
+#        define ANKERL_UNORDERED_DENSE_UNLIKELY(x) (x) // NOLINT(cppcoreguidelines-macro-usage)
+#    endif
+
+namespace ankerl::unordered_dense {
+inline namespace ANKERL_UNORDERED_DENSE_NAMESPACE {
+
+namespace detail {
+
+#    if ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS()
+
+// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
+// inlinings more difficult. Throws are also generally the slow path.
+[[noreturn]] inline ANKERL_UNORDERED_DENSE_NOINLINE void on_error_key_not_found() {
+    throw std::out_of_range("ankerl::unordered_dense::map::at(): key not found");
+}
+[[noreturn]] inline ANKERL_UNORDERED_DENSE_NOINLINE void on_error_bucket_overflow() {
+    throw std::overflow_error("ankerl::unordered_dense: reached max bucket size, cannot increase size");
+}
+[[noreturn]] inline ANKERL_UNORDERED_DENSE_NOINLINE void on_error_too_many_elements() {
+    throw std::out_of_range("ankerl::unordered_dense::map::replace(): too many elements");
+}
+
+#    else
+
+[[noreturn]] inline void on_error_key_not_found() {
+    abort();
+}
+[[noreturn]] inline void on_error_bucket_overflow() {
+    abort();
+}
+[[noreturn]] inline void on_error_too_many_elements() {
+    abort();
+}
+
+#    endif
+
+} // namespace detail
+
+// hash ///////////////////////////////////////////////////////////////////////
+
+// This is a stripped-down implementation of wyhash: https://github.com/wangyi-fudan/wyhash
+// No big-endian support (because different values on different machines don't matter),
+// hardcodes seed and the secret, reformats the code, and clang-tidy fixes.
+namespace detail::wyhash {
+
+inline void mum(uint64_t* a, uint64_t* b) {
+#    if defined(__SIZEOF_INT128__)
+    __uint128_t r = *a;
+    r *= *b;
+    *a = static_cast<uint64_t>(r);
+    *b = static_cast<uint64_t>(r >> 64U);
+#    elif defined(_MSC_VER) && defined(_M_X64)
+    *a = _umul128(*a, *b, b);
+#    else
+    uint64_t ha = *a >> 32U;
+    uint64_t hb = *b >> 32U;
+    uint64_t la = static_cast<uint32_t>(*a);
+    uint64_t lb = static_cast<uint32_t>(*b);
+    uint64_t hi{};
+    uint64_t lo{};
+    uint64_t rh = ha * hb;
+    uint64_t rm0 = ha * lb;
+    uint64_t rm1 = hb * la;
+    uint64_t rl = la * lb;
+    uint64_t t = rl + (rm0 << 32U);
+    auto c = static_cast<uint64_t>(t < rl);
+    lo = t + (rm1 << 32U);
+    c += static_cast<uint64_t>(lo < t);
+    hi = rh + (rm0 >> 32U) + (rm1 >> 32U) + c;
+    *a = lo;
+    *b = hi;
+#    endif
+}
+
+// multiply and xor mix function, aka MUM
+[[nodiscard]] inline auto mix(uint64_t a, uint64_t b) -> uint64_t {
+    mum(&a, &b);
+    return a ^ b;
+}
+
+// read functions. WARNING: we don't care about endianness, so results are different on big endian!
+[[nodiscard]] inline auto r8(const uint8_t* p) -> uint64_t {
+    uint64_t v{};
+    std::memcpy(&v, p, 8U);
+    return v;
+}
+
+[[nodiscard]] inline auto r4(const uint8_t* p) -> uint64_t {
+    uint32_t v{};
+    std::memcpy(&v, p, 4);
+    return v;
+}
+
+// reads 1, 2, or 3 bytes
+[[nodiscard]] inline auto r3(const uint8_t* p, size_t k) -> uint64_t {
+    return (static_cast<uint64_t>(p[0]) << 16U) | (static_cast<uint64_t>(p[k >> 1U]) << 8U) | p[k - 1];
+}
+
+[[maybe_unused]] [[nodiscard]] inline auto hash(void const* key, size_t len) -> uint64_t {
+    static constexpr auto secret = std::array{UINT64_C(0xa0761d6478bd642f),
+                                              UINT64_C(0xe7037ed1a0b428db),
+                                              UINT64_C(0x8ebc6af09c88c6e3),
+                                              UINT64_C(0x589965cc75374cc3)};
+
+    auto const* p = static_cast<uint8_t const*>(key);
+    uint64_t seed = secret[0];
+    uint64_t a{};
+    uint64_t b{};
+    if (ANKERL_UNORDERED_DENSE_LIKELY(len <= 16)) {
+        if (ANKERL_UNORDERED_DENSE_LIKELY(len >= 4)) {
+            a = (r4(p) << 32U) | r4(p + ((len >> 3U) << 2U));
+            b = (r4(p + len - 4) << 32U) | r4(p + len - 4 - ((len >> 3U) << 2U));
+        } else if (ANKERL_UNORDERED_DENSE_LIKELY(len > 0)) {
+            a = r3(p, len);
+            b = 0;
+        } else {
+            a = 0;
+            b = 0;
+        }
+    } else {
+        size_t i = len;
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(i > 48)) {
+            uint64_t see1 = seed;
+            uint64_t see2 = seed;
+            do {
+                seed = mix(r8(p) ^ secret[1], r8(p + 8) ^ seed);
+                see1 = mix(r8(p + 16) ^ secret[2], r8(p + 24) ^ see1);
+                see2 = mix(r8(p + 32) ^ secret[3], r8(p + 40) ^ see2);
+                p += 48;
+                i -= 48;
+            } while (ANKERL_UNORDERED_DENSE_LIKELY(i > 48));
+            seed ^= see1 ^ see2;
+        }
+        while (ANKERL_UNORDERED_DENSE_UNLIKELY(i > 16)) {
+            seed = mix(r8(p) ^ secret[1], r8(p + 8) ^ seed);
+            i -= 16;
+            p += 16;
+        }
+        a = r8(p + i - 16);
+        b = r8(p + i - 8);
+    }
+
+    return mix(secret[1] ^ len, mix(a ^ secret[1], b ^ seed));
+}
+
+[[nodiscard]] inline auto hash(uint64_t x) -> uint64_t {
+    return detail::wyhash::mix(x, UINT64_C(0x9E3779B97F4A7C15));
+}
+
+} // namespace detail::wyhash
+
+ANKERL_UNORDERED_DENSE_EXPORT template <typename T, typename Enable = void>
+struct hash {
+    auto operator()(T const& obj) const noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>())))
+        -> uint64_t {
+        return std::hash<T>{}(obj);
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string<CharT>> {
+    using is_avalanching = void;
+    auto operator()(std::basic_string<CharT> const& str) const noexcept -> uint64_t {
+        return detail::wyhash::hash(str.data(), sizeof(CharT) * str.size());
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string_view<CharT>> {
+    using is_avalanching = void;
+    auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> uint64_t {
+        return detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size());
+    }
+};
+
+template <class T>
+struct hash<T*> {
+    using is_avalanching = void;
+    auto operator()(T* ptr) const noexcept -> uint64_t {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+        return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr));
+    }
+};
+
+template <class T>
+struct hash<std::unique_ptr<T>> {
+    using is_avalanching = void;
+    auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> uint64_t {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+        return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
+    }
+};
+
+template <class T>
+struct hash<std::shared_ptr<T>> {
+    using is_avalanching = void;
+    auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> uint64_t {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+        return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
+    }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+    using is_avalanching = void;
+    auto operator()(Enum e) const noexcept -> uint64_t {
+        using underlying = typename std::underlying_type_t<Enum>;
+        return detail::wyhash::hash(static_cast<underlying>(e));
+    }
+};
+
+template <typename... Args>
+struct tuple_hash_helper {
+    // Converts the value into 64bit. If it is an integral type, just cast it. Mixing is doing the rest.
+    // If it isn't an integral we need to hash it.
+    template <typename Arg>
+    [[nodiscard]] constexpr static auto to64(Arg const& arg) -> uint64_t {
+        if constexpr (std::is_integral_v<Arg> || std::is_enum_v<Arg>) {
+            return static_cast<uint64_t>(arg);
+        } else {
+            return hash<Arg>{}(arg);
+        }
+    }
+
+    [[nodiscard]] static auto mix64(uint64_t state, uint64_t v) -> uint64_t {
+        return detail::wyhash::mix(state + v, uint64_t{0x9ddfea08eb382d69});
+    }
+
+    // Creates a buffer that holds all the data from each element of the tuple. If possible we memcpy the data directly. If
+    // not, we hash the object and use this for the array. Size of the array is known at compile time, and memcpy is optimized
+    // away, so filling the buffer is highly efficient. Finally, call wyhash with this buffer.
+    template <typename T, std::size_t... Idx>
+    [[nodiscard]] static auto calc_hash(T const& t, std::index_sequence<Idx...>) noexcept -> uint64_t {
+        auto h = uint64_t{};
+        ((h = mix64(h, to64(std::get<Idx>(t)))), ...);
+        return h;
+    }
+};
+
+template <typename... Args>
+struct hash<std::tuple<Args...>> : tuple_hash_helper<Args...> {
+    using is_avalanching = void;
+    auto operator()(std::tuple<Args...> const& t) const noexcept -> uint64_t {
+        return tuple_hash_helper<Args...>::calc_hash(t, std::index_sequence_for<Args...>{});
+    }
+};
+
+template <typename A, typename B>
+struct hash<std::pair<A, B>> : tuple_hash_helper<A, B> {
+    using is_avalanching = void;
+    auto operator()(std::pair<A, B> const& t) const noexcept -> uint64_t {
+        return tuple_hash_helper<A, B>::calc_hash(t, std::index_sequence_for<A, B>{});
+    }
+};
+
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#    define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T)                    \
+        template <>                                                      \
+        struct hash<T> {                                                 \
+            using is_avalanching = void;                                 \
+            auto operator()(T const& obj) const noexcept -> uint64_t {   \
+                return detail::wyhash::hash(static_cast<uint64_t>(obj)); \
+            }                                                            \
+        }
+
+#    if defined(__GNUC__) && !defined(__clang__)
+#        pragma GCC diagnostic push
+#        pragma GCC diagnostic ignored "-Wuseless-cast"
+#    endif
+// see https://en.cppreference.com/w/cpp/utility/hash
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(bool);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(signed char);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned char);
+#    if ANKERL_UNORDERED_DENSE_CPP_VERSION >= 202002L && defined(__cpp_char8_t)
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char8_t);
+#    endif
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char16_t);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char32_t);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(wchar_t);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(short);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned short);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(int);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned int);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(long);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(long long);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned long);
+ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned long long);
+
+#    if defined(__GNUC__) && !defined(__clang__)
+#        pragma GCC diagnostic pop
+#    endif
+
+// bucket_type //////////////////////////////////////////////////////////
+
+namespace bucket_type {
+
+struct standard {
+    static constexpr uint32_t dist_inc = 1U << 8U;             // skip 1 byte fingerprint
+    static constexpr uint32_t fingerprint_mask = dist_inc - 1; // mask for 1 byte of fingerprint
+
+    uint32_t m_dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
+    uint32_t m_value_idx;            // index into the m_values vector.
+};
+
+ANKERL_UNORDERED_DENSE_PACK(struct big {
+    static constexpr uint32_t dist_inc = 1U << 8U;             // skip 1 byte fingerprint
+    static constexpr uint32_t fingerprint_mask = dist_inc - 1; // mask for 1 byte of fingerprint
+
+    uint32_t m_dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
+    size_t m_value_idx;              // index into the m_values vector.
+});
+
+} // namespace bucket_type
+
+namespace detail {
+
+struct nonesuch {};
+
+template <class Default, class AlwaysVoid, template <class...> class Op, class... Args>
+struct detector {
+    using value_t = std::false_type;
+    using type = Default;
+};
+
+template <class Default, template <class...> class Op, class... Args>
+struct detector<Default, std::void_t<Op<Args...>>, Op, Args...> {
+    using value_t = std::true_type;
+    using type = Op<Args...>;
+};
+
+template <template <class...> class Op, class... Args>
+using is_detected = typename detail::detector<detail::nonesuch, void, Op, Args...>::value_t;
+
+template <template <class...> class Op, class... Args>
+constexpr bool is_detected_v = is_detected<Op, Args...>::value;
+
+template <typename T>
+using detect_avalanching = typename T::is_avalanching;
+
+template <typename T>
+using detect_is_transparent = typename T::is_transparent;
+
+template <typename T>
+using detect_iterator = typename T::iterator;
+
+template <typename T>
+using detect_reserve = decltype(std::declval<T&>().reserve(size_t{}));
+
+// enable_if helpers
+
+template <typename Mapped>
+constexpr bool is_map_v = !std::is_void_v<Mapped>;
+
+// clang-format off
+template <typename Hash, typename KeyEqual>
+constexpr bool is_transparent_v = is_detected_v<detect_is_transparent, Hash> && is_detected_v<detect_is_transparent, KeyEqual>;
+// clang-format on
+
+template <typename From, typename To1, typename To2>
+constexpr bool is_neither_convertible_v = !std::is_convertible_v<From, To1> && !std::is_convertible_v<From, To2>;
+
+template <typename T>
+constexpr bool has_reserve = is_detected_v<detect_reserve, T>;
+
+// base type for map has mapped_type
+template <class T>
+struct base_table_type_map {
+    using mapped_type = T;
+};
+
+// base type for set doesn't have mapped_type
+struct base_table_type_set {};
+
+} // namespace detail
+
+// Very much like std::deque, but faster for indexing (in most cases). As of now this doesn't implement the full std::vector
+// API, but merely what's necessary to work as an underlying container for ankerl::unordered_dense::{map, set}.
+// It allocates blocks of equal size and puts them into the m_blocks vector. That means it can grow simply by adding a new
+// block to the back of m_blocks, and doesn't double its size like an std::vector. The disadvantage is that memory is not
+// linear and thus there is one more indirection necessary for indexing.
+template <typename T, typename Allocator = std::allocator<T>, size_t MaxSegmentSizeBytes = 4096>
+class segmented_vector {
+    template <bool IsConst>
+    class iter_t;
+
+public:
+    using allocator_type = Allocator;
+    using pointer = typename std::allocator_traits<allocator_type>::pointer;
+    using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
+    using difference_type = typename std::allocator_traits<allocator_type>::difference_type;
+    using value_type = T;
+    using size_type = std::size_t;
+    using reference = T&;
+    using const_reference = T const&;
+    using iterator = iter_t<false>;
+    using const_iterator = iter_t<true>;
+
+private:
+    using vec_alloc = typename std::allocator_traits<Allocator>::template rebind_alloc<pointer>;
+    std::vector<pointer, vec_alloc> m_blocks{};
+    size_t m_size{};
+
+    // Calculates the maximum number for x in  (s << x) <= max_val
+    static constexpr auto num_bits_closest(size_t max_val, size_t s) -> size_t {
+        auto f = size_t{0};
+        while (s << (f + 1) <= max_val) {
+            ++f;
+        }
+        return f;
+    }
+
+    using self_t = segmented_vector<T, Allocator, MaxSegmentSizeBytes>;
+    static constexpr auto num_bits = num_bits_closest(MaxSegmentSizeBytes, sizeof(T));
+    static constexpr auto num_elements_in_block = 1U << num_bits;
+    static constexpr auto mask = num_elements_in_block - 1U;
+
+    /**
+     * Iterator class doubles as const_iterator and iterator
+     */
+    template <bool IsConst>
+    class iter_t {
+        using ptr_t = typename std::conditional_t<IsConst, segmented_vector::const_pointer const*, segmented_vector::pointer*>;
+        ptr_t m_data{};
+        size_t m_idx{};
+
+        template <bool B>
+        friend class iter_t;
+
+    public:
+        using difference_type = segmented_vector::difference_type;
+        using value_type = T;
+        using reference = typename std::conditional_t<IsConst, value_type const&, value_type&>;
+        using pointer = typename std::conditional_t<IsConst, segmented_vector::const_pointer, segmented_vector::pointer>;
+        using iterator_category = std::forward_iterator_tag;
+
+        iter_t() noexcept = default;
+
+        template <bool OtherIsConst, typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
+        constexpr iter_t(iter_t<OtherIsConst> const& other) noexcept
+            : m_data(other.m_data)
+            , m_idx(other.m_idx) {}
+
+        constexpr iter_t(ptr_t data, size_t idx) noexcept
+            : m_data(data)
+            , m_idx(idx) {}
+
+        template <bool OtherIsConst, typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        constexpr auto operator=(iter_t<OtherIsConst> const& other) noexcept -> iter_t& {
+            m_data = other.m_data;
+            m_idx = other.m_idx;
+            return *this;
+        }
+
+        constexpr auto operator++() noexcept -> iter_t& {
+            ++m_idx;
+            return *this;
+        }
+
+        constexpr auto operator+(difference_type diff) noexcept -> iter_t {
+            return {m_data, static_cast<size_t>(static_cast<difference_type>(m_idx) + diff)};
+        }
+
+        template <bool OtherIsConst>
+        constexpr auto operator-(iter_t<OtherIsConst> const& other) noexcept -> difference_type {
+            return static_cast<difference_type>(m_idx) - static_cast<difference_type>(other.m_idx);
+        }
+
+        constexpr auto operator*() const noexcept -> reference {
+            return m_data[m_idx >> num_bits][m_idx & mask];
+        }
+
+        constexpr auto operator->() const noexcept -> pointer {
+            return &m_data[m_idx >> num_bits][m_idx & mask];
+        }
+
+        template <bool O>
+        constexpr auto operator==(iter_t<O> const& o) const noexcept -> bool {
+            return m_idx == o.m_idx;
+        }
+
+        template <bool O>
+        constexpr auto operator!=(iter_t<O> const& o) const noexcept -> bool {
+            return !(*this == o);
+        }
+    };
+
+    // slow path: need to allocate a new segment every once in a while
+    void increase_capacity() {
+        auto ba = Allocator(m_blocks.get_allocator());
+        pointer block = std::allocator_traits<Allocator>::allocate(ba, num_elements_in_block);
+        m_blocks.push_back(block);
+    }
+
+    // Moves everything from other
+    void append_everything_from(segmented_vector&& other) {
+        reserve(size() + other.size());
+        for (auto&& o : other) {
+            emplace_back(std::move(o));
+        }
+    }
+
+    // Copies everything from other
+    void append_everything_from(segmented_vector const& other) {
+        reserve(size() + other.size());
+        for (auto const& o : other) {
+            emplace_back(o);
+        }
+    }
+
+    void dealloc() {
+        auto ba = Allocator(m_blocks.get_allocator());
+        for (auto ptr : m_blocks) {
+            std::allocator_traits<Allocator>::deallocate(ba, ptr, num_elements_in_block);
+        }
+    }
+
+    [[nodiscard]] static constexpr auto calc_num_blocks_for_capacity(size_t capacity) {
+        return (capacity + num_elements_in_block - 1U) / num_elements_in_block;
+    }
+
+public:
+    segmented_vector() = default;
+
+    // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
+    segmented_vector(Allocator alloc)
+        : m_blocks(vec_alloc(alloc)) {}
+
+    segmented_vector(segmented_vector&& other, Allocator alloc)
+        : segmented_vector(alloc) {
+        *this = std::move(other);
+    }
+
+    segmented_vector(segmented_vector const& other, Allocator alloc)
+        : m_blocks(vec_alloc(alloc)) {
+        append_everything_from(other);
+    }
+
+    segmented_vector(segmented_vector&& other) noexcept
+        : segmented_vector(std::move(other), get_allocator()) {}
+
+    segmented_vector(segmented_vector const& other) {
+        append_everything_from(other);
+    }
+
+    auto operator=(segmented_vector const& other) -> segmented_vector& {
+        if (this == &other) {
+            return *this;
+        }
+        clear();
+        append_everything_from(other);
+        return *this;
+    }
+
+    auto operator=(segmented_vector&& other) noexcept -> segmented_vector& {
+        clear();
+        dealloc();
+        if (other.get_allocator() == get_allocator()) {
+            m_blocks = std::move(other.m_blocks);
+            m_size = std::exchange(other.m_size, {});
+        } else {
+            // make sure to construct with other's allocator!
+            m_blocks = std::vector<pointer, vec_alloc>(vec_alloc(other.get_allocator()));
+            append_everything_from(std::move(other));
+        }
+        return *this;
+    }
+
+    ~segmented_vector() {
+        clear();
+        dealloc();
+    }
+
+    [[nodiscard]] constexpr auto size() const -> size_t {
+        return m_size;
+    }
+
+    [[nodiscard]] constexpr auto capacity() const -> size_t {
+        return m_blocks.size() * num_elements_in_block;
+    }
+
+    // Indexing is highly performance critical
+    [[nodiscard]] constexpr auto operator[](size_t i) const noexcept -> T const& {
+        return m_blocks[i >> num_bits][i & mask];
+    }
+
+    [[nodiscard]] constexpr auto operator[](size_t i) noexcept -> T& {
+        return m_blocks[i >> num_bits][i & mask];
+    }
+
+    [[nodiscard]] constexpr auto begin() -> iterator {
+        return {m_blocks.data(), 0U};
+    }
+    [[nodiscard]] constexpr auto begin() const -> const_iterator {
+        return {m_blocks.data(), 0U};
+    }
+    [[nodiscard]] constexpr auto cbegin() const -> const_iterator {
+        return {m_blocks.data(), 0U};
+    }
+
+    [[nodiscard]] constexpr auto end() -> iterator {
+        return {m_blocks.data(), m_size};
+    }
+    [[nodiscard]] constexpr auto end() const -> const_iterator {
+        return {m_blocks.data(), m_size};
+    }
+    [[nodiscard]] constexpr auto cend() const -> const_iterator {
+        return {m_blocks.data(), m_size};
+    }
+
+    [[nodiscard]] constexpr auto back() -> reference {
+        return operator[](m_size - 1);
+    }
+    [[nodiscard]] constexpr auto back() const -> const_reference {
+        return operator[](m_size - 1);
+    }
+
+    void pop_back() {
+        back().~T();
+        --m_size;
+    }
+
+    [[nodiscard]] auto empty() const {
+        return 0 == m_size;
+    }
+
+    void reserve(size_t new_capacity) {
+        m_blocks.reserve(calc_num_blocks_for_capacity(new_capacity));
+        while (new_capacity > capacity()) {
+            increase_capacity();
+        }
+    }
+
+    [[nodiscard]] auto get_allocator() const -> allocator_type {
+        return allocator_type{m_blocks.get_allocator()};
+    }
+
+    template <class... Args>
+    auto emplace_back(Args&&... args) -> reference {
+        if (m_size == capacity()) {
+            increase_capacity();
+        }
+        auto* ptr = static_cast<void*>(&operator[](m_size));
+        auto& ref = *new (ptr) T(std::forward<Args>(args)...);
+        ++m_size;
+        return ref;
+    }
+
+    void clear() {
+        if constexpr (!std::is_trivially_destructible_v<T>) {
+            for (size_t i = 0, s = size(); i < s; ++i) {
+                operator[](i).~T();
+            }
+        }
+        m_size = 0;
+    }
+
+    void shrink_to_fit() {
+        auto ba = Allocator(m_blocks.get_allocator());
+        auto num_blocks_required = calc_num_blocks_for_capacity(m_size);
+        while (m_blocks.size() > num_blocks_required) {
+            std::allocator_traits<Allocator>::deallocate(ba, m_blocks.back(), num_elements_in_block);
+            m_blocks.pop_back();
+        }
+        m_blocks.shrink_to_fit();
+    }
+};
+
+namespace detail {
+
+// This is it, the table. Doubles as map and set, and uses `void` for T when its used as a set.
+template <class Key,
+          class T, // when void, treat it as a set.
+          class Hash,
+          class KeyEqual,
+          class AllocatorOrContainer,
+          class Bucket,
+          bool IsSegmented>
+class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, base_table_type_set> {
+    using underlying_value_type = typename std::conditional_t<is_map_v<T>, std::pair<Key, T>, Key>;
+    using underlying_container_type = std::conditional_t<IsSegmented,
+                                                         segmented_vector<underlying_value_type, AllocatorOrContainer>,
+                                                         std::vector<underlying_value_type, AllocatorOrContainer>>;
+
+public:
+    using value_container_type = std::
+        conditional_t<is_detected_v<detect_iterator, AllocatorOrContainer>, AllocatorOrContainer, underlying_container_type>;
+
+private:
+    using bucket_alloc =
+        typename std::allocator_traits<typename value_container_type::allocator_type>::template rebind_alloc<Bucket>;
+    using bucket_alloc_traits = std::allocator_traits<bucket_alloc>;
+
+    static constexpr uint8_t initial_shifts = 64 - 2; // 2^(64-m_shift) number of buckets
+    static constexpr float default_max_load_factor = 0.8F;
+
+public:
+    using key_type = Key;
+    using value_type = typename value_container_type::value_type;
+    using size_type = typename value_container_type::size_type;
+    using difference_type = typename value_container_type::difference_type;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using allocator_type = typename value_container_type::allocator_type;
+    using reference = typename value_container_type::reference;
+    using const_reference = typename value_container_type::const_reference;
+    using pointer = typename value_container_type::pointer;
+    using const_pointer = typename value_container_type::const_pointer;
+    using const_iterator = typename value_container_type::const_iterator;
+    using iterator = std::conditional_t<is_map_v<T>, typename value_container_type::iterator, const_iterator>;
+    using bucket_type = Bucket;
+
+private:
+    using value_idx_type = decltype(Bucket::m_value_idx);
+    using dist_and_fingerprint_type = decltype(Bucket::m_dist_and_fingerprint);
+
+    static_assert(std::is_trivially_destructible_v<Bucket>, "assert there's no need to call destructor / std::destroy");
+    static_assert(std::is_trivially_copyable_v<Bucket>, "assert we can just memset / memcpy");
+
+    value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
+    using bucket_pointer = typename std::allocator_traits<bucket_alloc>::pointer;
+    bucket_pointer m_buckets{};
+    size_t m_num_buckets = 0;
+    size_t m_max_bucket_capacity = 0;
+    float m_max_load_factor = default_max_load_factor;
+    Hash m_hash{};
+    KeyEqual m_equal{};
+    uint8_t m_shifts = initial_shifts;
+
+    [[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type {
+        return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets)
+                   ? 0
+                   : static_cast<value_idx_type>(bucket_idx + 1U);
+    }
+
+    // Helper to access bucket through pointer types
+    [[nodiscard]] static constexpr auto at(bucket_pointer bucket_ptr, size_t offset) -> Bucket& {
+        return *(bucket_ptr + static_cast<typename std::allocator_traits<bucket_alloc>::difference_type>(offset));
+    }
+
+    // use the dist_inc and dist_dec functions so that uint16_t types work without warning
+    [[nodiscard]] static constexpr auto dist_inc(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
+        return static_cast<dist_and_fingerprint_type>(x + Bucket::dist_inc);
+    }
+
+    [[nodiscard]] static constexpr auto dist_dec(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
+        return static_cast<dist_and_fingerprint_type>(x - Bucket::dist_inc);
+    }
+
+    // The goal of mixed_hash is to always produce a high quality 64bit hash.
+    template <typename K>
+    [[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
+        if constexpr (is_detected_v<detect_avalanching, Hash>) {
+            // we know that the hash is good because is_avalanching.
+            if constexpr (sizeof(decltype(m_hash(key))) < sizeof(uint64_t)) {
+                // 32bit hash and is_avalanching => multiply with a constant to avalanche bits upwards
+                return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
+            } else {
+                // 64bit and is_avalanching => only use the hash itself.
+                return m_hash(key);
+            }
+        } else {
+            // not is_avalanching => apply wyhash
+            return wyhash::hash(m_hash(key));
+        }
+    }
+
+    [[nodiscard]] constexpr auto dist_and_fingerprint_from_hash(uint64_t hash) const -> dist_and_fingerprint_type {
+        return Bucket::dist_inc | (static_cast<dist_and_fingerprint_type>(hash) & Bucket::fingerprint_mask);
+    }
+
+    [[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> value_idx_type {
+        return static_cast<value_idx_type>(hash >> m_shifts);
+    }
+
+    [[nodiscard]] static constexpr auto get_key(value_type const& vt) -> key_type const& {
+        if constexpr (is_map_v<T>) {
+            return vt.first;
+        } else {
+            return vt;
+        }
+    }
+
+    template <typename K>
+    [[nodiscard]] auto next_while_less(K const& key) const -> Bucket {
+        auto hash = mixed_hash(key);
+        auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
+        auto bucket_idx = bucket_idx_from_hash(hash);
+
+        while (dist_and_fingerprint < at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
+        }
+        return {dist_and_fingerprint, bucket_idx};
+    }
+
+    void place_and_shift_up(Bucket bucket, value_idx_type place) {
+        while (0 != at(m_buckets, place).m_dist_and_fingerprint) {
+            bucket = std::exchange(at(m_buckets, place), bucket);
+            bucket.m_dist_and_fingerprint = dist_inc(bucket.m_dist_and_fingerprint);
+            place = next(place);
+        }
+        at(m_buckets, place) = bucket;
+    }
+
+    [[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> size_t {
+        return (std::min)(max_bucket_count(), size_t{1} << (64U - shifts));
+    }
+
+    [[nodiscard]] constexpr auto calc_shifts_for_size(size_t s) const -> uint8_t {
+        auto shifts = initial_shifts;
+        while (shifts > 0 && static_cast<size_t>(static_cast<float>(calc_num_buckets(shifts)) * max_load_factor()) < s) {
+            --shifts;
+        }
+        return shifts;
+    }
+
+    // assumes m_values has data, m_buckets=m_buckets_end=nullptr, m_shifts is INITIAL_SHIFTS
+    void copy_buckets(table const& other) {
+        // assumes m_values has already the correct data copied over.
+        if (empty()) {
+            // when empty, at least allocate an initial buckets and clear them.
+            allocate_buckets_from_shift();
+            clear_buckets();
+        } else {
+            m_shifts = other.m_shifts;
+            allocate_buckets_from_shift();
+            std::memcpy(m_buckets, other.m_buckets, sizeof(Bucket) * bucket_count());
+        }
+    }
+
+    /**
+     * True when no element can be added any more without increasing the size
+     */
+    [[nodiscard]] auto is_full() const -> bool {
+        return size() > m_max_bucket_capacity;
+    }
+
+    void deallocate_buckets() {
+        auto ba = bucket_alloc(m_values.get_allocator());
+        if (nullptr != m_buckets) {
+            bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
+            m_buckets = nullptr;
+        }
+        m_num_buckets = 0;
+        m_max_bucket_capacity = 0;
+    }
+
+    void allocate_buckets_from_shift() {
+        auto ba = bucket_alloc(m_values.get_allocator());
+        m_num_buckets = calc_num_buckets(m_shifts);
+        m_buckets = bucket_alloc_traits::allocate(ba, m_num_buckets);
+        if (m_num_buckets == max_bucket_count()) {
+            // reached the maximum, make sure we can use each bucket
+            m_max_bucket_capacity = max_bucket_count();
+        } else {
+            m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
+        }
+    }
+
+    void clear_buckets() {
+        if (m_buckets != nullptr) {
+            std::memset(&*m_buckets, 0, sizeof(Bucket) * bucket_count());
+        }
+    }
+
+    void clear_and_fill_buckets_from_values() {
+        clear_buckets();
+        for (value_idx_type value_idx = 0, end_idx = static_cast<value_idx_type>(m_values.size()); value_idx < end_idx;
+             ++value_idx) {
+            auto const& key = get_key(m_values[value_idx]);
+            auto [dist_and_fingerprint, bucket] = next_while_less(key);
+
+            // we know for certain that key has not yet been inserted, so no need to check it.
+            place_and_shift_up({dist_and_fingerprint, value_idx}, bucket);
+        }
+    }
+
+    void increase_size() {
+        if (m_max_bucket_capacity == max_bucket_count()) {
+            // remove the value again, we can't add it!
+            m_values.pop_back();
+            on_error_bucket_overflow();
+        }
+        --m_shifts;
+        deallocate_buckets();
+        allocate_buckets_from_shift();
+        clear_and_fill_buckets_from_values();
+    }
+
+    template <typename Op>
+    void do_erase(value_idx_type bucket_idx, Op handle_erased_value) {
+        auto const value_idx_to_remove = at(m_buckets, bucket_idx).m_value_idx;
+
+        // shift down until either empty or an element with correct spot is found
+        auto next_bucket_idx = next(bucket_idx);
+        while (at(m_buckets, next_bucket_idx).m_dist_and_fingerprint >= Bucket::dist_inc * 2) {
+            at(m_buckets, bucket_idx) = {dist_dec(at(m_buckets, next_bucket_idx).m_dist_and_fingerprint),
+                                         at(m_buckets, next_bucket_idx).m_value_idx};
+            bucket_idx = std::exchange(next_bucket_idx, next(next_bucket_idx));
+        }
+        at(m_buckets, bucket_idx) = {};
+        handle_erased_value(std::move(m_values[value_idx_to_remove]));
+
+        // update m_values
+        if (value_idx_to_remove != m_values.size() - 1) {
+            // no luck, we'll have to replace the value with the last one and update the index accordingly
+            auto& val = m_values[value_idx_to_remove];
+            val = std::move(m_values.back());
+
+            // update the values_idx of the moved entry. No need to play the info game, just look until we find the values_idx
+            auto mh = mixed_hash(get_key(val));
+            bucket_idx = bucket_idx_from_hash(mh);
+
+            auto const values_idx_back = static_cast<value_idx_type>(m_values.size() - 1);
+            while (values_idx_back != at(m_buckets, bucket_idx).m_value_idx) {
+                bucket_idx = next(bucket_idx);
+            }
+            at(m_buckets, bucket_idx).m_value_idx = value_idx_to_remove;
+        }
+        m_values.pop_back();
+    }
+
+    template <typename K, typename Op>
+    auto do_erase_key(K&& key, Op handle_erased_value) -> size_t {
+        if (empty()) {
+            return 0;
+        }
+
+        auto [dist_and_fingerprint, bucket_idx] = next_while_less(key);
+
+        while (dist_and_fingerprint == at(m_buckets, bucket_idx).m_dist_and_fingerprint &&
+               !m_equal(key, get_key(m_values[at(m_buckets, bucket_idx).m_value_idx]))) {
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
+        }
+
+        if (dist_and_fingerprint != at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
+            return 0;
+        }
+        do_erase(bucket_idx, handle_erased_value);
+        return 1;
+    }
+
+    template <class K, class M>
+    auto do_insert_or_assign(K&& key, M&& mapped) -> std::pair<iterator, bool> {
+        auto it_isinserted = try_emplace(std::forward<K>(key), std::forward<M>(mapped));
+        if (!it_isinserted.second) {
+            it_isinserted.first->second = std::forward<M>(mapped);
+        }
+        return it_isinserted;
+    }
+
+    template <typename... Args>
+    auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, value_idx_type bucket_idx, Args&&... args)
+        -> std::pair<iterator, bool> {
+
+        // emplace the new value. If that throws an exception, no harm done; index is still in a valid state
+        m_values.emplace_back(std::forward<Args>(args)...);
+
+        auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(is_full())) {
+            increase_size();
+        } else {
+            place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
+        }
+
+        // place element and shift up until we find an empty spot
+        return {begin() + static_cast<difference_type>(value_idx), true};
+    }
+
+    template <typename K, typename... Args>
+    auto do_try_emplace(K&& key, Args&&... args) -> std::pair<iterator, bool> {
+        auto hash = mixed_hash(key);
+        auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
+        auto bucket_idx = bucket_idx_from_hash(hash);
+
+        while (true) {
+            auto* bucket = &at(m_buckets, bucket_idx);
+            if (dist_and_fingerprint == bucket->m_dist_and_fingerprint) {
+                if (m_equal(key, get_key(m_values[bucket->m_value_idx]))) {
+                    return {begin() + static_cast<difference_type>(bucket->m_value_idx), false};
+                }
+            } else if (dist_and_fingerprint > bucket->m_dist_and_fingerprint) {
+                return do_place_element(dist_and_fingerprint,
+                                        bucket_idx,
+                                        std::piecewise_construct,
+                                        std::forward_as_tuple(std::forward<K>(key)),
+                                        std::forward_as_tuple(std::forward<Args>(args)...));
+            }
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
+        }
+    }
+
+    template <typename K>
+    auto do_find(K const& key) -> iterator {
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(empty())) {
+            return end();
+        }
+
+        auto mh = mixed_hash(key);
+        auto dist_and_fingerprint = dist_and_fingerprint_from_hash(mh);
+        auto bucket_idx = bucket_idx_from_hash(mh);
+        auto* bucket = &at(m_buckets, bucket_idx);
+
+        // unrolled loop. *Always* check a few directly, then enter the loop. This is faster.
+        if (dist_and_fingerprint == bucket->m_dist_and_fingerprint && m_equal(key, get_key(m_values[bucket->m_value_idx]))) {
+            return begin() + static_cast<difference_type>(bucket->m_value_idx);
+        }
+        dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+        bucket_idx = next(bucket_idx);
+        bucket = &at(m_buckets, bucket_idx);
+
+        if (dist_and_fingerprint == bucket->m_dist_and_fingerprint && m_equal(key, get_key(m_values[bucket->m_value_idx]))) {
+            return begin() + static_cast<difference_type>(bucket->m_value_idx);
+        }
+        dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+        bucket_idx = next(bucket_idx);
+        bucket = &at(m_buckets, bucket_idx);
+
+        while (true) {
+            if (dist_and_fingerprint == bucket->m_dist_and_fingerprint) {
+                if (m_equal(key, get_key(m_values[bucket->m_value_idx]))) {
+                    return begin() + static_cast<difference_type>(bucket->m_value_idx);
+                }
+            } else if (dist_and_fingerprint > bucket->m_dist_and_fingerprint) {
+                return end();
+            }
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
+            bucket = &at(m_buckets, bucket_idx);
+        }
+    }
+
+    template <typename K>
+    auto do_find(K const& key) const -> const_iterator {
+        return const_cast<table*>(this)->do_find(key); // NOLINT(cppcoreguidelines-pro-type-const-cast)
+    }
+
+    template <typename K, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto do_at(K const& key) -> Q& {
+        if (auto it = find(key); ANKERL_UNORDERED_DENSE_LIKELY(end() != it)) {
+            return it->second;
+        }
+        on_error_key_not_found();
+    }
+
+    template <typename K, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto do_at(K const& key) const -> Q const& {
+        return const_cast<table*>(this)->at(key); // NOLINT(cppcoreguidelines-pro-type-const-cast)
+    }
+
+public:
+    explicit table(size_t bucket_count,
+                   Hash const& hash = Hash(),
+                   KeyEqual const& equal = KeyEqual(),
+                   allocator_type const& alloc_or_container = allocator_type())
+        : m_values(alloc_or_container)
+        , m_hash(hash)
+        , m_equal(equal) {
+        if (0 != bucket_count) {
+            reserve(bucket_count);
+        } else {
+            allocate_buckets_from_shift();
+            clear_buckets();
+        }
+    }
+
+    table()
+        : table(0) {}
+
+    table(size_t bucket_count, allocator_type const& alloc)
+        : table(bucket_count, Hash(), KeyEqual(), alloc) {}
+
+    table(size_t bucket_count, Hash const& hash, allocator_type const& alloc)
+        : table(bucket_count, hash, KeyEqual(), alloc) {}
+
+    explicit table(allocator_type const& alloc)
+        : table(0, Hash(), KeyEqual(), alloc) {}
+
+    template <class InputIt>
+    table(InputIt first,
+          InputIt last,
+          size_type bucket_count = 0,
+          Hash const& hash = Hash(),
+          KeyEqual const& equal = KeyEqual(),
+          allocator_type const& alloc = allocator_type())
+        : table(bucket_count, hash, equal, alloc) {
+        insert(first, last);
+    }
+
+    template <class InputIt>
+    table(InputIt first, InputIt last, size_type bucket_count, allocator_type const& alloc)
+        : table(first, last, bucket_count, Hash(), KeyEqual(), alloc) {}
+
+    template <class InputIt>
+    table(InputIt first, InputIt last, size_type bucket_count, Hash const& hash, allocator_type const& alloc)
+        : table(first, last, bucket_count, hash, KeyEqual(), alloc) {}
+
+    table(table const& other)
+        : table(other, other.m_values.get_allocator()) {}
+
+    table(table const& other, allocator_type const& alloc)
+        : m_values(other.m_values, alloc)
+        , m_max_load_factor(other.m_max_load_factor)
+        , m_hash(other.m_hash)
+        , m_equal(other.m_equal) {
+        copy_buckets(other);
+    }
+
+    table(table&& other) noexcept
+        : table(std::move(other), other.m_values.get_allocator()) {}
+
+    table(table&& other, allocator_type const& alloc) noexcept
+        : m_values(alloc) {
+        *this = std::move(other);
+    }
+
+    table(std::initializer_list<value_type> ilist,
+          size_t bucket_count = 0,
+          Hash const& hash = Hash(),
+          KeyEqual const& equal = KeyEqual(),
+          allocator_type const& alloc = allocator_type())
+        : table(bucket_count, hash, equal, alloc) {
+        insert(ilist);
+    }
+
+    table(std::initializer_list<value_type> ilist, size_type bucket_count, allocator_type const& alloc)
+        : table(ilist, bucket_count, Hash(), KeyEqual(), alloc) {}
+
+    table(std::initializer_list<value_type> init, size_type bucket_count, Hash const& hash, allocator_type const& alloc)
+        : table(init, bucket_count, hash, KeyEqual(), alloc) {}
+
+    ~table() {
+        if (nullptr != m_buckets) {
+            auto ba = bucket_alloc(m_values.get_allocator());
+            bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
+        }
+    }
+
+    auto operator=(table const& other) -> table& {
+        if (&other != this) {
+            deallocate_buckets(); // deallocate before m_values is set (might have another allocator)
+            m_values = other.m_values;
+            m_max_load_factor = other.m_max_load_factor;
+            m_hash = other.m_hash;
+            m_equal = other.m_equal;
+            m_shifts = initial_shifts;
+            copy_buckets(other);
+        }
+        return *this;
+    }
+
+    auto operator=(table&& other) noexcept(noexcept(std::is_nothrow_move_assignable_v<value_container_type> &&
+                                                    std::is_nothrow_move_assignable_v<Hash> &&
+                                                    std::is_nothrow_move_assignable_v<KeyEqual>)) -> table& {
+        if (&other != this) {
+            deallocate_buckets(); // deallocate before m_values is set (might have another allocator)
+            m_values = std::move(other.m_values);
+            other.m_values.clear();
+
+            // we can only reuse m_buckets when both maps have the same allocator!
+            if (get_allocator() == other.get_allocator()) {
+                m_buckets = std::exchange(other.m_buckets, nullptr);
+                m_num_buckets = std::exchange(other.m_num_buckets, 0);
+                m_max_bucket_capacity = std::exchange(other.m_max_bucket_capacity, 0);
+                m_shifts = std::exchange(other.m_shifts, initial_shifts);
+                m_max_load_factor = std::exchange(other.m_max_load_factor, default_max_load_factor);
+                m_hash = std::exchange(other.m_hash, {});
+                m_equal = std::exchange(other.m_equal, {});
+                other.allocate_buckets_from_shift();
+                other.clear_buckets();
+            } else {
+                // set max_load_factor *before* copying the other's buckets, so we have the same
+                // behavior
+                m_max_load_factor = other.m_max_load_factor;
+
+                // copy_buckets sets m_buckets, m_num_buckets, m_max_bucket_capacity, m_shifts
+                copy_buckets(other);
+                // clear's the other's buckets so other is now already usable.
+                other.clear_buckets();
+                m_hash = other.m_hash;
+                m_equal = other.m_equal;
+            }
+            // map "other" is now already usable, it's empty.
+        }
+        return *this;
+    }
+
+    auto operator=(std::initializer_list<value_type> ilist) -> table& {
+        clear();
+        insert(ilist);
+        return *this;
+    }
+
+    auto get_allocator() const noexcept -> allocator_type {
+        return m_values.get_allocator();
+    }
+
+    // iterators //////////////////////////////////////////////////////////////
+
+    auto begin() noexcept -> iterator {
+        return m_values.begin();
+    }
+
+    auto begin() const noexcept -> const_iterator {
+        return m_values.begin();
+    }
+
+    auto cbegin() const noexcept -> const_iterator {
+        return m_values.cbegin();
+    }
+
+    auto end() noexcept -> iterator {
+        return m_values.end();
+    }
+
+    auto cend() const noexcept -> const_iterator {
+        return m_values.cend();
+    }
+
+    auto end() const noexcept -> const_iterator {
+        return m_values.end();
+    }
+
+    // capacity ///////////////////////////////////////////////////////////////
+
+    [[nodiscard]] auto empty() const noexcept -> bool {
+        return m_values.empty();
+    }
+
+    [[nodiscard]] auto size() const noexcept -> size_t {
+        return m_values.size();
+    }
+
+    [[nodiscard]] static constexpr auto max_size() noexcept -> size_t {
+        if constexpr ((std::numeric_limits<value_idx_type>::max)() == (std::numeric_limits<size_t>::max)()) {
+            return size_t{1} << (sizeof(value_idx_type) * 8 - 1);
+        } else {
+            return size_t{1} << (sizeof(value_idx_type) * 8);
+        }
+    }
+
+    // modifiers //////////////////////////////////////////////////////////////
+
+    void clear() {
+        m_values.clear();
+        clear_buckets();
+    }
+
+    auto insert(value_type const& value) -> std::pair<iterator, bool> {
+        return emplace(value);
+    }
+
+    auto insert(value_type&& value) -> std::pair<iterator, bool> {
+        return emplace(std::move(value));
+    }
+
+    template <class P, std::enable_if_t<std::is_constructible_v<value_type, P&&>, bool> = true>
+    auto insert(P&& value) -> std::pair<iterator, bool> {
+        return emplace(std::forward<P>(value));
+    }
+
+    auto insert(const_iterator /*hint*/, value_type const& value) -> iterator {
+        return insert(value).first;
+    }
+
+    auto insert(const_iterator /*hint*/, value_type&& value) -> iterator {
+        return insert(std::move(value)).first;
+    }
+
+    template <class P, std::enable_if_t<std::is_constructible_v<value_type, P&&>, bool> = true>
+    auto insert(const_iterator /*hint*/, P&& value) -> iterator {
+        return insert(std::forward<P>(value)).first;
+    }
+
+    template <class InputIt>
+    void insert(InputIt first, InputIt last) {
+        while (first != last) {
+            insert(*first);
+            ++first;
+        }
+    }
+
+    void insert(std::initializer_list<value_type> ilist) {
+        insert(ilist.begin(), ilist.end());
+    }
+
+    // nonstandard API: *this is emptied.
+    // Also see "A Standard flat_map" https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p0429r9.pdf
+    auto extract() && -> value_container_type {
+        return std::move(m_values);
+    }
+
+    // nonstandard API:
+    // Discards the internally held container and replaces it with the one passed. Erases non-unique elements.
+    auto replace(value_container_type&& container) {
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(container.size() > max_size())) {
+            on_error_too_many_elements();
+        }
+        auto shifts = calc_shifts_for_size(container.size());
+        if (0 == m_num_buckets || shifts < m_shifts || container.get_allocator() != m_values.get_allocator()) {
+            m_shifts = shifts;
+            deallocate_buckets();
+            allocate_buckets_from_shift();
+        }
+        clear_buckets();
+
+        m_values = std::move(container);
+
+        // can't use clear_and_fill_buckets_from_values() because container elements might not be unique
+        auto value_idx = value_idx_type{};
+
+        // loop until we reach the end of the container. duplicated entries will be replaced with back().
+        while (value_idx != static_cast<value_idx_type>(m_values.size())) {
+            auto const& key = get_key(m_values[value_idx]);
+
+            auto hash = mixed_hash(key);
+            auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
+            auto bucket_idx = bucket_idx_from_hash(hash);
+
+            bool key_found = false;
+            while (true) {
+                auto const& bucket = at(m_buckets, bucket_idx);
+                if (dist_and_fingerprint > bucket.m_dist_and_fingerprint) {
+                    break;
+                }
+                if (dist_and_fingerprint == bucket.m_dist_and_fingerprint &&
+                    m_equal(key, get_key(m_values[bucket.m_value_idx]))) {
+                    key_found = true;
+                    break;
+                }
+                dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+                bucket_idx = next(bucket_idx);
+            }
+
+            if (key_found) {
+                if (value_idx != static_cast<value_idx_type>(m_values.size() - 1)) {
+                    m_values[value_idx] = std::move(m_values.back());
+                }
+                m_values.pop_back();
+            } else {
+                place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
+                ++value_idx;
+            }
+        }
+    }
+
+    template <class M, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto insert_or_assign(Key const& key, M&& mapped) -> std::pair<iterator, bool> {
+        return do_insert_or_assign(key, std::forward<M>(mapped));
+    }
+
+    template <class M, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto insert_or_assign(Key&& key, M&& mapped) -> std::pair<iterator, bool> {
+        return do_insert_or_assign(std::move(key), std::forward<M>(mapped));
+    }
+
+    template <typename K,
+              typename M,
+              typename Q = T,
+              typename H = Hash,
+              typename KE = KeyEqual,
+              std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE>, bool> = true>
+    auto insert_or_assign(K&& key, M&& mapped) -> std::pair<iterator, bool> {
+        return do_insert_or_assign(std::forward<K>(key), std::forward<M>(mapped));
+    }
+
+    template <class M, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto insert_or_assign(const_iterator /*hint*/, Key const& key, M&& mapped) -> iterator {
+        return do_insert_or_assign(key, std::forward<M>(mapped)).first;
+    }
+
+    template <class M, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto insert_or_assign(const_iterator /*hint*/, Key&& key, M&& mapped) -> iterator {
+        return do_insert_or_assign(std::move(key), std::forward<M>(mapped)).first;
+    }
+
+    template <typename K,
+              typename M,
+              typename Q = T,
+              typename H = Hash,
+              typename KE = KeyEqual,
+              std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE>, bool> = true>
+    auto insert_or_assign(const_iterator /*hint*/, K&& key, M&& mapped) -> iterator {
+        return do_insert_or_assign(std::forward<K>(key), std::forward<M>(mapped)).first;
+    }
+
+    // Single arguments for unordered_set can be used without having to construct the value_type
+    template <class K,
+              typename Q = T,
+              typename H = Hash,
+              typename KE = KeyEqual,
+              std::enable_if_t<!is_map_v<Q> && is_transparent_v<H, KE>, bool> = true>
+    auto emplace(K&& key) -> std::pair<iterator, bool> {
+        auto hash = mixed_hash(key);
+        auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
+        auto bucket_idx = bucket_idx_from_hash(hash);
+
+        while (dist_and_fingerprint <= at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
+            if (dist_and_fingerprint == at(m_buckets, bucket_idx).m_dist_and_fingerprint &&
+                m_equal(key, m_values[at(m_buckets, bucket_idx).m_value_idx])) {
+                // found it, return without ever actually creating anything
+                return {begin() + static_cast<difference_type>(at(m_buckets, bucket_idx).m_value_idx), false};
+            }
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
+        }
+
+        // value is new, insert element first, so when exception happens we are in a valid state
+        return do_place_element(dist_and_fingerprint, bucket_idx, std::forward<K>(key));
+    }
+
+    template <class... Args>
+    auto emplace(Args&&... args) -> std::pair<iterator, bool> {
+        // we have to instantiate the value_type to be able to access the key.
+        // 1. emplace_back the object so it is constructed. 2. If the key is already there, pop it later in the loop.
+        auto& key = get_key(m_values.emplace_back(std::forward<Args>(args)...));
+        auto hash = mixed_hash(key);
+        auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
+        auto bucket_idx = bucket_idx_from_hash(hash);
+
+        while (dist_and_fingerprint <= at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
+            if (dist_and_fingerprint == at(m_buckets, bucket_idx).m_dist_and_fingerprint &&
+                m_equal(key, get_key(m_values[at(m_buckets, bucket_idx).m_value_idx]))) {
+                m_values.pop_back(); // value was already there, so get rid of it
+                return {begin() + static_cast<difference_type>(at(m_buckets, bucket_idx).m_value_idx), false};
+            }
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
+        }
+
+        // value is new, place the bucket and shift up until we find an empty spot
+        auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(is_full())) {
+            // increase_size just rehashes all the data we have in m_values
+            increase_size();
+        } else {
+            // place element and shift up until we find an empty spot
+            place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
+        }
+        return {begin() + static_cast<difference_type>(value_idx), true};
+    }
+
+    template <class... Args>
+    auto emplace_hint(const_iterator /*hint*/, Args&&... args) -> iterator {
+        return emplace(std::forward<Args>(args)...).first;
+    }
+
+    template <class... Args, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto try_emplace(Key const& key, Args&&... args) -> std::pair<iterator, bool> {
+        return do_try_emplace(key, std::forward<Args>(args)...);
+    }
+
+    template <class... Args, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto try_emplace(Key&& key, Args&&... args) -> std::pair<iterator, bool> {
+        return do_try_emplace(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <class... Args, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto try_emplace(const_iterator /*hint*/, Key const& key, Args&&... args) -> iterator {
+        return do_try_emplace(key, std::forward<Args>(args)...).first;
+    }
+
+    template <class... Args, typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto try_emplace(const_iterator /*hint*/, Key&& key, Args&&... args) -> iterator {
+        return do_try_emplace(std::move(key), std::forward<Args>(args)...).first;
+    }
+
+    template <
+        typename K,
+        typename... Args,
+        typename Q = T,
+        typename H = Hash,
+        typename KE = KeyEqual,
+        std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE> && is_neither_convertible_v<K&&, iterator, const_iterator>,
+                         bool> = true>
+    auto try_emplace(K&& key, Args&&... args) -> std::pair<iterator, bool> {
+        return do_try_emplace(std::forward<K>(key), std::forward<Args>(args)...);
+    }
+
+    template <
+        typename K,
+        typename... Args,
+        typename Q = T,
+        typename H = Hash,
+        typename KE = KeyEqual,
+        std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE> && is_neither_convertible_v<K&&, iterator, const_iterator>,
+                         bool> = true>
+    auto try_emplace(const_iterator /*hint*/, K&& key, Args&&... args) -> iterator {
+        return do_try_emplace(std::forward<K>(key), std::forward<Args>(args)...).first;
+    }
+
+    auto erase(iterator it) -> iterator {
+        auto hash = mixed_hash(get_key(*it));
+        auto bucket_idx = bucket_idx_from_hash(hash);
+
+        auto const value_idx_to_remove = static_cast<value_idx_type>(it - cbegin());
+        while (at(m_buckets, bucket_idx).m_value_idx != value_idx_to_remove) {
+            bucket_idx = next(bucket_idx);
+        }
+
+        do_erase(bucket_idx, [](value_type&& /*unused*/) {
+        });
+        return begin() + static_cast<difference_type>(value_idx_to_remove);
+    }
+
+    auto extract(iterator it) -> value_type {
+        auto hash = mixed_hash(get_key(*it));
+        auto bucket_idx = bucket_idx_from_hash(hash);
+
+        auto const value_idx_to_remove = static_cast<value_idx_type>(it - cbegin());
+        while (at(m_buckets, bucket_idx).m_value_idx != value_idx_to_remove) {
+            bucket_idx = next(bucket_idx);
+        }
+
+        auto tmp = std::optional<value_type>{};
+        do_erase(bucket_idx, [&tmp](value_type&& val) {
+            tmp = std::move(val);
+        });
+        return std::move(tmp).value();
+    }
+
+    template <typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto erase(const_iterator it) -> iterator {
+        return erase(begin() + (it - cbegin()));
+    }
+
+    template <typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto extract(const_iterator it) -> value_type {
+        return extract(begin() + (it - cbegin()));
+    }
+
+    auto erase(const_iterator first, const_iterator last) -> iterator {
+        auto const idx_first = first - cbegin();
+        auto const idx_last = last - cbegin();
+        auto const first_to_last = std::distance(first, last);
+        auto const last_to_end = std::distance(last, cend());
+
+        // remove elements from left to right which moves elements from the end back
+        auto const mid = idx_first + (std::min)(first_to_last, last_to_end);
+        auto idx = idx_first;
+        while (idx != mid) {
+            erase(begin() + idx);
+            ++idx;
+        }
+
+        // all elements from the right are moved, now remove the last element until all done
+        idx = idx_last;
+        while (idx != mid) {
+            --idx;
+            erase(begin() + idx);
+        }
+
+        return begin() + idx_first;
+    }
+
+    auto erase(Key const& key) -> size_t {
+        return do_erase_key(key, [](value_type&& /*unused*/) {
+        });
+    }
+
+    auto extract(Key const& key) -> std::optional<value_type> {
+        auto tmp = std::optional<value_type>{};
+        do_erase_key(key, [&tmp](value_type&& val) {
+            tmp = std::move(val);
+        });
+        return tmp;
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto erase(K&& key) -> size_t {
+        return do_erase_key(std::forward<K>(key), [](value_type&& /*unused*/) {
+        });
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto extract(K&& key) -> std::optional<value_type> {
+        auto tmp = std::optional<value_type>{};
+        do_erase_key(std::forward<K>(key), [&tmp](value_type&& val) {
+            tmp = std::move(val);
+        });
+        return tmp;
+    }
+
+    void swap(table& other) noexcept(noexcept(std::is_nothrow_swappable_v<value_container_type> &&
+                                              std::is_nothrow_swappable_v<Hash> && std::is_nothrow_swappable_v<KeyEqual>)) {
+        using std::swap;
+        swap(other, *this);
+    }
+
+    // lookup /////////////////////////////////////////////////////////////////
+
+    template <typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto at(key_type const& key) -> Q& {
+        return do_at(key);
+    }
+
+    template <typename K,
+              typename Q = T,
+              typename H = Hash,
+              typename KE = KeyEqual,
+              std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE>, bool> = true>
+    auto at(K const& key) -> Q& {
+        return do_at(key);
+    }
+
+    template <typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto at(key_type const& key) const -> Q const& {
+        return do_at(key);
+    }
+
+    template <typename K,
+              typename Q = T,
+              typename H = Hash,
+              typename KE = KeyEqual,
+              std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE>, bool> = true>
+    auto at(K const& key) const -> Q const& {
+        return do_at(key);
+    }
+
+    template <typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto operator[](Key const& key) -> Q& {
+        return try_emplace(key).first->second;
+    }
+
+    template <typename Q = T, std::enable_if_t<is_map_v<Q>, bool> = true>
+    auto operator[](Key&& key) -> Q& {
+        return try_emplace(std::move(key)).first->second;
+    }
+
+    template <typename K,
+              typename Q = T,
+              typename H = Hash,
+              typename KE = KeyEqual,
+              std::enable_if_t<is_map_v<Q> && is_transparent_v<H, KE>, bool> = true>
+    auto operator[](K&& key) -> Q& {
+        return try_emplace(std::forward<K>(key)).first->second;
+    }
+
+    auto count(Key const& key) const -> size_t {
+        return find(key) == end() ? 0 : 1;
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto count(K const& key) const -> size_t {
+        return find(key) == end() ? 0 : 1;
+    }
+
+    auto find(Key const& key) -> iterator {
+        return do_find(key);
+    }
+
+    auto find(Key const& key) const -> const_iterator {
+        return do_find(key);
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto find(K const& key) -> iterator {
+        return do_find(key);
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto find(K const& key) const -> const_iterator {
+        return do_find(key);
+    }
+
+    auto contains(Key const& key) const -> bool {
+        return find(key) != end();
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto contains(K const& key) const -> bool {
+        return find(key) != end();
+    }
+
+    auto equal_range(Key const& key) -> std::pair<iterator, iterator> {
+        auto it = do_find(key);
+        return {it, it == end() ? end() : it + 1};
+    }
+
+    auto equal_range(const Key& key) const -> std::pair<const_iterator, const_iterator> {
+        auto it = do_find(key);
+        return {it, it == end() ? end() : it + 1};
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto equal_range(K const& key) -> std::pair<iterator, iterator> {
+        auto it = do_find(key);
+        return {it, it == end() ? end() : it + 1};
+    }
+
+    template <class K, class H = Hash, class KE = KeyEqual, std::enable_if_t<is_transparent_v<H, KE>, bool> = true>
+    auto equal_range(K const& key) const -> std::pair<const_iterator, const_iterator> {
+        auto it = do_find(key);
+        return {it, it == end() ? end() : it + 1};
+    }
+
+    // bucket interface ///////////////////////////////////////////////////////
+
+    auto bucket_count() const noexcept -> size_t { // NOLINT(modernize-use-nodiscard)
+        return m_num_buckets;
+    }
+
+    static constexpr auto max_bucket_count() noexcept -> size_t { // NOLINT(modernize-use-nodiscard)
+        return max_size();
+    }
+
+    // hash policy ////////////////////////////////////////////////////////////
+
+    [[nodiscard]] auto load_factor() const -> float {
+        return bucket_count() ? static_cast<float>(size()) / static_cast<float>(bucket_count()) : 0.0F;
+    }
+
+    [[nodiscard]] auto max_load_factor() const -> float {
+        return m_max_load_factor;
+    }
+
+    void max_load_factor(float ml) {
+        m_max_load_factor = ml;
+        if (m_num_buckets != max_bucket_count()) {
+            m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(bucket_count()) * max_load_factor());
+        }
+    }
+
+    void rehash(size_t count) {
+        count = (std::min)(count, max_size());
+        auto shifts = calc_shifts_for_size((std::max)(count, size()));
+        if (shifts != m_shifts) {
+            m_shifts = shifts;
+            deallocate_buckets();
+            m_values.shrink_to_fit();
+            allocate_buckets_from_shift();
+            clear_and_fill_buckets_from_values();
+        }
+    }
+
+    void reserve(size_t capa) {
+        capa = (std::min)(capa, max_size());
+        if constexpr (has_reserve<value_container_type>) {
+            // std::deque doesn't have reserve(). Make sure we only call when available
+            m_values.reserve(capa);
+        }
+        auto shifts = calc_shifts_for_size((std::max)(capa, size()));
+        if (0 == m_num_buckets || shifts < m_shifts) {
+            m_shifts = shifts;
+            deallocate_buckets();
+            allocate_buckets_from_shift();
+            clear_and_fill_buckets_from_values();
+        }
+    }
+
+    // observers //////////////////////////////////////////////////////////////
+
+    auto hash_function() const -> hasher {
+        return m_hash;
+    }
+
+    auto key_eq() const -> key_equal {
+        return m_equal;
+    }
+
+    // nonstandard API: expose the underlying values container
+    [[nodiscard]] auto values() const noexcept -> value_container_type const& {
+        return m_values;
+    }
+
+    // non-member functions ///////////////////////////////////////////////////
+
+    friend auto operator==(table const& a, table const& b) -> bool {
+        if (&a == &b) {
+            return true;
+        }
+        if (a.size() != b.size()) {
+            return false;
+        }
+        for (auto const& b_entry : b) {
+            auto it = a.find(get_key(b_entry));
+            if constexpr (is_map_v<T>) {
+                // map: check that key is here, then also check that value is the same
+                if (a.end() == it || !(b_entry.second == it->second)) {
+                    return false;
+                }
+            } else {
+                // set: only check that the key is here
+                if (a.end() == it) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    friend auto operator!=(table const& a, table const& b) -> bool {
+        return !(a == b);
+    }
+};
+
+} // namespace detail
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class T,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class AllocatorOrContainer = std::allocator<std::pair<Key, T>>,
+                                        class Bucket = bucket_type::standard>
+using map = detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, false>;
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class T,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class AllocatorOrContainer = std::allocator<std::pair<Key, T>>,
+                                        class Bucket = bucket_type::standard>
+using segmented_map = detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, true>;
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class AllocatorOrContainer = std::allocator<Key>,
+                                        class Bucket = bucket_type::standard>
+using set = detail::table<Key, void, Hash, KeyEqual, AllocatorOrContainer, Bucket, false>;
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class AllocatorOrContainer = std::allocator<Key>,
+                                        class Bucket = bucket_type::standard>
+using segmented_set = detail::table<Key, void, Hash, KeyEqual, AllocatorOrContainer, Bucket, true>;
+
+#    if defined(ANKERL_UNORDERED_DENSE_PMR)
+
+namespace pmr {
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class T,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class Bucket = bucket_type::standard>
+using map =
+    detail::table<Key, T, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<std::pair<Key, T>>, Bucket, false>;
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class T,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class Bucket = bucket_type::standard>
+using segmented_map =
+    detail::table<Key, T, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<std::pair<Key, T>>, Bucket, true>;
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class Bucket = bucket_type::standard>
+using set = detail::table<Key, void, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<Key>, Bucket, false>;
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class Hash = hash<Key>,
+                                        class KeyEqual = std::equal_to<Key>,
+                                        class Bucket = bucket_type::standard>
+using segmented_set =
+    detail::table<Key, void, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<Key>, Bucket, true>;
+
+} // namespace pmr
+
+#    endif
+
+// deduction guides ///////////////////////////////////////////////////////////
+
+// deduction guides for alias templates are only possible since C++20
+// see https://en.cppreference.com/w/cpp/language/class_template_argument_deduction
+
+} // namespace ANKERL_UNORDERED_DENSE_NAMESPACE
+} // namespace ankerl::unordered_dense
+
+// std extensions /////////////////////////////////////////////////////////////
+
+namespace std { // NOLINT(cert-dcl58-cpp)
+
+ANKERL_UNORDERED_DENSE_EXPORT template <class Key,
+                                        class T,
+                                        class Hash,
+                                        class KeyEqual,
+                                        class AllocatorOrContainer,
+                                        class Bucket,
+                                        class Pred,
+                                        bool IsSegmented>
+// NOLINTNEXTLINE(cert-dcl58-cpp)
+auto erase_if(ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, IsSegmented>& map,
+              Pred pred) -> size_t {
+    using map_t = ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, IsSegmented>;
+
+    // going back to front because erase() invalidates the end iterator
+    auto const old_size = map.size();
+    auto idx = old_size;
+    while (idx) {
+        --idx;
+        auto it = map.begin() + static_cast<typename map_t::difference_type>(idx);
+        if (pred(*it)) {
+            map.erase(it);
+        }
+    }
+
+    return old_size - map.size();
+}
+
+} // namespace std
+
+#endif
+#endif
diff --git a/inst/include/pairwise/pairwise.h b/inst/include/seqtrie/pairwise.h
old mode 100644
new mode 100755
similarity index 88%
rename from inst/include/pairwise/pairwise.h
rename to inst/include/seqtrie/pairwise.h
index 91960a3..6285337
--- a/inst/include/pairwise/pairwise.h
+++ b/inst/include/seqtrie/pairwise.h
@@ -11,8 +11,8 @@
 // tuple anchored_distance_linear(query, target, cost_map) // Anchored alignment, gap penalty is linear
 // tuple anchored_distance_affine(query, target, cost_map) // Anchored alignment, gap penalty is affine
 
-#ifndef pairwise_PAIRWISE_H
-#define pairwise_PAIRWISE_H
+#ifndef seqtrie_PAIRWISE_H
+#define seqtrie_PAIRWISE_H
 
 #include <set>
 #include <memory>
@@ -21,11 +21,13 @@
 #include <nonstd/span.hpp>
 #include <limits>
 #include <boost/numeric/ublas/matrix.hpp>
+// #include <boost/functional/hash.hpp>
 
 namespace pairwise {
 using IMatrix = boost::numeric::ublas::matrix<int>;
 using pairchar_type = std::pair<char, char>;
-using pairchar_map_type = std::unordered_map<pairchar_type, int>;
+// make this template'd
+// using pairchar_map_type = std::unordered_map<pairchar_type, int, boost::hash<pairchar_type>>;
 using cspan = nonstd::span<const char>;
 
 constexpr int R_NA_INTEGER = std::numeric_limits<int>::min();
@@ -34,6 +36,7 @@ constexpr char GAP_CHAR = '\0'; // any gap cost for non-affine
 constexpr char GAP_OPEN_CHAR = std::numeric_limits<char>::min(); // gap open cost for affine
 constexpr char GAP_EXTN_CHAR = '\0'; // extension for affine
 constexpr int NO_ALIGN = std::numeric_limits<int>::max() / 2; // used to represent impossible affine positions; use half INT_MAX so we dont overflow
+
 // GAP_COST = GAP_OPEN_COST + GAP_EXTN_COST * (LENGTH - 1)
 
 // void print_matrix(IMatrix & mat) {
@@ -56,7 +59,7 @@ constexpr int NO_ALIGN = std::numeric_limits<int>::max() / 2; // used to represe
 // }
 
 int hamming_distance(cspan query, cspan target) {
-  if(query.size() != target.size()) return NA_INTEGER;
+  if(query.size() != target.size()) return R_NA_INTEGER;
   int distance = 0;
   for(size_t i=0; i<query.size(); ++i) {
     if(query[i] != target[i]) distance++;
@@ -82,7 +85,7 @@ IMatrix get_dprog_matrix(cspan query, cspan target) {
   return mat;
 }
 
-IMatrix get_dprog_matrix_linear(cspan query, cspan target, pairchar_map_type & cost_map) {
+template <typename pairchar_map_type> IMatrix get_dprog_matrix_linear(cspan query, cspan target, pairchar_map_type & cost_map) {
   IMatrix mat(query.size()+1, target.size()+1);
   mat(0,0) = 0;
   for(size_t j=1; j<mat.size2(); ++j) mat(0,j) = mat(0,j-1) + cost_map.at(pairchar_type(GAP_CHAR, target[j-1])); // gap in query
@@ -100,7 +103,7 @@ IMatrix get_dprog_matrix_linear(cspan query, cspan target, pairchar_map_type & c
   return mat;
 }
 
-std::tuple<IMatrix, IMatrix, IMatrix> get_dprog_matrix_affine(cspan query, cspan target, pairchar_map_type & cost_map) {
+template <typename pairchar_map_type> std::tuple<IMatrix, IMatrix, IMatrix> get_dprog_matrix_affine(cspan query, cspan target, pairchar_map_type & cost_map) {
   size_t size1 = query.size()+1;
   size_t size2 = target.size()+1;
   std::tuple<IMatrix, IMatrix, IMatrix> mats = std::make_tuple(IMatrix(size1, size2), IMatrix(size1, size2), IMatrix(size1, size2));
@@ -157,12 +160,12 @@ int global_distance(cspan query, cspan target) {
 }
 const auto& levenshtein_distance = global_distance; // alias
 
-int global_distance_linear(cspan query, cspan target, pairchar_map_type & cost_map) {
+template <typename pairchar_map_type> int global_distance_linear(cspan query, cspan target, pairchar_map_type & cost_map) {
   IMatrix mat = get_dprog_matrix_linear(query, target, cost_map);
   return mat(mat.size1()-1, mat.size2()-1);
 }
 
-int global_distance_affine(cspan query, cspan target, pairchar_map_type & cost_map) {
+template <typename pairchar_map_type> int global_distance_affine(cspan query, cspan target, pairchar_map_type & cost_map) {
   // print_pairchar_map(cost_map);
   auto mats = get_dprog_matrix_affine(query, target, cost_map);
   IMatrix & M = std::get<0>(mats);
@@ -204,7 +207,7 @@ std::tuple<int, int, int> anchored_distance(cspan query, cspan target) {
   return std::tuple<int, int, int>(distance, query_size, target_size);
 }
 
-std::tuple<int, int, int> anchored_distance_linear(cspan query, cspan target, pairchar_map_type & cost_map) {
+template <typename pairchar_map_type> std::tuple<int, int, int> anchored_distance_linear(cspan query, cspan target, pairchar_map_type & cost_map) {
   IMatrix mat = get_dprog_matrix_linear(query, target, cost_map);
   int distance = NO_ALIGN;
   int query_size = 0;
@@ -228,7 +231,7 @@ std::tuple<int, int, int> anchored_distance_linear(cspan query, cspan target, pa
   return std::tuple<int, int, int>(distance, query_size, target_size);
 }
 
-std::tuple<int, int, int> anchored_distance_affine(cspan query, cspan target, pairchar_map_type & cost_map) {
+template <typename pairchar_map_type> std::tuple<int, int, int> anchored_distance_affine(cspan query, cspan target, pairchar_map_type & cost_map) {
   auto mats = get_dprog_matrix_affine(query, target, cost_map);
   IMatrix & M = std::get<0>(mats);
   IMatrix & X = std::get<1>(mats);
diff --git a/inst/include/seqtrie/radixmap.h b/inst/include/seqtrie/radixmap.h
index 78d3cee..8a987a7 100755
--- a/inst/include/seqtrie/radixmap.h
+++ b/inst/include/seqtrie/radixmap.h
@@ -5,16 +5,7 @@
 
 namespace seqtrie {
 
-// inline void print_vec(const std::vector<int> & v) {
-//   for(auto & x : v) {
-//     if(x > 100000) {
-//       std::cout << "Z ";
-//     } else {
-//       std::cout << x << " ";
-//     }
-//   }
-//   std::cout << "\t";
-// }
+
 
 #define TEMPLATE_LIST template <class A, template<typename...> class M, template<typename...> class B, class I>
 #define RADIXMAP_T RadixMap<A,M,B,I>
@@ -93,7 +84,6 @@ class RadixMap {
 
   // search using a custom edit distance cost matrix
   // MT is a map type, e.g. std::map<pairchar_type, int>
-  // (pairchar_type is defined as std::pair<char, char> in utility.h)
   // the map key is the pair of characters (query, target) to compare
   // All possibile pairs of characters must be included in the map, or else it return an error
   // The map value is the cost of the edit operation and must be non-negative
@@ -209,6 +199,8 @@ TEMPLATE_LIST std::string RADIXMAP_T::print_impl(size_t depth) const {
     result += ":";
     result += std::to_string(terminal_idx);
   }
+  // result += ",";
+  // result += ptr_tostring(this);
   result += "\n";
   std::vector<atomic_type> child_node_keys;
   for(auto & ch : child_nodes) {
diff --git a/inst/include/seqtrie/utility.h b/inst/include/seqtrie/utility.h
index 620ae13..e2ff49b 100755
--- a/inst/include/seqtrie/utility.h
+++ b/inst/include/seqtrie/utility.h
@@ -80,7 +80,7 @@ template <> inline char * array_data(std::string & x) { return &x[0]; }
 // template <> inline uint8_t * array_data(std::basic_string<uint8_t> & x) { return &x[0]; }
   
 // subvector
-template <typename T, typename F> T subvector(const F & x, const size_t start, const size_t len = -1) {
+template <typename T, typename F> inline T subvector(const F & x, const size_t start, const size_t len = -1) {
   size_t rlen = std::min(len, x.size() - start);
   T result(rlen);
   std::copy(x.data() + start, x.data() + start + rlen, result.data());
@@ -88,14 +88,14 @@ template <typename T, typename F> T subvector(const F & x, const size_t start, c
 }
 
 // appendspan -- append span to vector
-template <typename T, typename S> void appendspan(T & x, const S & y) {
+template <typename T, typename S> inline void appendspan(T & x, const S & y) {
   static_assert(std::is_same<typename T::value_type, typename S::value_type>::value, "appendspan x and y value_type must be the same");
   size_t xs = x.size();
   x.resize(xs + y.size());
   std::copy(y.data(), y.data() + y.size(), x.data() + xs);
 }
 
-template <typename T> T iota_range(const typename T::value_type value, const size_t len) {
+template <typename T> inline T iota_range(const typename T::value_type value, const size_t len) {
   T result(len);
   std::iota(result.begin(), result.end(), value);
   return result;
@@ -115,14 +115,10 @@ template <typename T> T iota_range(const typename T::value_type value, const siz
 // }
 }
 
-// hash for unordered_map with std::pair<char, char> as key
-namespace std {
-  template <>
-  struct hash<std::pair<char, char>> {
-    size_t operator()(const std::pair<char, char> & p) const {
-      return ((p.first + 128) << 8) + (p.second + 128);
-    }
-  };
+inline std::string ptr_tostring(const void * ptr) {
+  std::stringstream ss;
+  ss << ptr;
+  return ss.str();
 }
 
 #endif // include guard
diff --git a/man/RadixForest.Rd b/man/RadixForest.Rd
old mode 100644
new mode 100755
diff --git a/man/covid_cdr3.Rd b/man/covid_cdr3.Rd
index 26ef8bb..3146ed0 100755
--- a/man/covid_cdr3.Rd
+++ b/man/covid_cdr3.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/covid_cdr3.R
+% Please edit documentation in R/covid_cdr3.r
 \docType{data}
 \name{covid_cdr3}
 \alias{covid_cdr3}
diff --git a/man/dist_matrix.Rd b/man/dist_matrix.Rd
old mode 100644
new mode 100755
diff --git a/man/dist_pairwise.Rd b/man/dist_pairwise.Rd
old mode 100644
new mode 100755
diff --git a/man/dist_search.Rd b/man/dist_search.Rd
old mode 100644
new mode 100755
diff --git a/man/generate_cost_matrix.Rd b/man/generate_cost_matrix.Rd
old mode 100644
new mode 100755
diff --git a/man/split_search.Rd b/man/split_search.Rd
old mode 100644
new mode 100755
diff --git a/src/CharCounter.cpp b/src/CharCounter.cpp
index a826a53..4594f16 100644
--- a/src/CharCounter.cpp
+++ b/src/CharCounter.cpp
@@ -1,9 +1,4 @@
-#include <set>
-#include <Rcpp.h>
 #include "seqtrie_types.h"
-#include "seqtrie_utils.h"
-
-using namespace Rcpp;
 
 // [[Rcpp::export(rng = false)]]
 CharCounterXPtr CharCounter_create() {
diff --git a/src/Makevars b/src/Makevars
index 4c1712f..a9e485f 100644
--- a/src/Makevars
+++ b/src/Makevars
@@ -1,3 +1,5 @@
-PKG_CPPFLAGS = -DRCPP_USE_UNWIND_PROTECT -I. -I../inst/include
-PKG_LIBS     += $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()")
+CXX_STD=CXX17
 
+PKG_CPPFLAGS = -DRCPP_USE_UNWIND_PROTECT -DSEQTRIE_SMALL_ARRAY_SIZE=$(shell ${R_HOME}/bin/Rscript -e "cat(Sys.getenv('SEQTRIE_SMALL_ARRAY_SIZE',unset=32))")
+PKG_CPPFLAGS += -I. -I../inst/include
+PKG_LIBS += $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()")
diff --git a/src/Makevars.win b/src/Makevars.win
index f6a7e34..031b3b4 100644
--- a/src/Makevars.win
+++ b/src/Makevars.win
@@ -1,3 +1,4 @@
+CXX_STD=CXX17
 PKG_CPPFLAGS = -DRCPP_USE_UNWIND_PROTECT -DRCPP_PARALLEL_USE_TBB=1 -I. -I../inst/include
 PKG_CXXFLAGS += -DRCPP_PARALLEL_USE_TBB=1
 PKG_LIBS     += $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()")
diff --git a/src/RadixForest.cpp b/src/RadixForest.cpp
index 60e07b1..d34ec82 100644
--- a/src/RadixForest.cpp
+++ b/src/RadixForest.cpp
@@ -1,17 +1,6 @@
-#include <Rcpp.h>
-#include <RcppParallel.h>
-
-#include <set>
-#include <memory>
-#include <tuple>
 #include "seqtrie_types.h"
-#include "seqtrie_utils.h"
-
 #include "simple_progress/simple_progress.h"
 
-using namespace Rcpp;
-using namespace RcppParallel;
-
 ////////////////////////////////////////////////////////////////////////////////
 // RadixForest
 
@@ -121,7 +110,7 @@ DataFrame RadixForest_prefix_search(RadixForestRXPtr xp, CharacterVector sequenc
     auto & targets = output[i];
     for(size_t j=0; j<targets.size(); ++j) {
       SET_STRING_ELT(query_results, q, STRING_ELT(sequences, queries[i]));
-      auto s = targets[j]->template sequence<trqwe::small_array<char>>();
+      auto s = targets[j]->template sequence<SeqTrie::array_r<char>>();
       SET_STRING_ELT(target_results, q, to_charsxp(s));
       q++;
     }
@@ -178,7 +167,7 @@ CharacterVector RadixForest_to_vector(RadixForestRXPtr xp) {
   }
   CharacterVector sequence(seqs.size());
   for(size_t i=0; i<seqs.size(); ++i) {
-    auto s = seqs[i]->template sequence<trqwe::small_array<char>>();
+    auto s = seqs[i]->template sequence<SeqTrie::array_r<char>>();
     SET_STRING_ELT(sequence, i, to_charsxp(s));
   }
   return sequence;
diff --git a/src/RadixTree.cpp b/src/RadixTree.cpp
index 1b579c1..b3459b7 100644
--- a/src/RadixTree.cpp
+++ b/src/RadixTree.cpp
@@ -1,17 +1,6 @@
-#include <Rcpp.h>
-#include <RcppParallel.h>
-
-#include <set>
-#include <memory>
-#include <tuple>
 #include "seqtrie_types.h"
-#include "seqtrie_utils.h"
-
 #include "simple_progress/simple_progress.h"
 
-using namespace Rcpp;
-using namespace RcppParallel;
-
 ////////////////////////////////////////////////////////////////////////////////
 // RadixTree R functions
 
@@ -91,7 +80,7 @@ DataFrame RadixTree_prefix_search(RadixTreeRXPtr xp, CharacterVector sequences)
     auto & targets = output[i];
     for(size_t j=0; j<targets.size(); ++j) {
       SET_STRING_ELT(query_results, q, STRING_ELT(sequences, i));
-      auto s = targets[j]->template sequence<trqwe::small_array<char>>();
+      auto s = targets[j]->template sequence<SeqTrie::array_r<char>>();
       SET_STRING_ELT(target_results, q, to_charsxp(s));
       q++;
     }
@@ -134,7 +123,7 @@ CharacterVector RadixTree_to_vector(RadixTreeRXPtr xp) {
   auto seqs = root.all();
   CharacterVector sequence(seqs.size());
   for(size_t i=0; i<seqs.size(); ++i) {
-    auto s = seqs[i]->template sequence<trqwe::small_array<char>>();
+    auto s = seqs[i]->template sequence<SeqTrie::array_r<char>>();
     SET_STRING_ELT(sequence, i, to_charsxp(s));
   }
   return sequence;
diff --git a/src/pairwise.cpp b/src/pairwise.cpp
index 706432e..17c6a1d 100644
--- a/src/pairwise.cpp
+++ b/src/pairwise.cpp
@@ -1,22 +1,7 @@
-#include <Rcpp.h>
-#include <RcppParallel.h>
-
-#include <set>
-#include <memory>
-#include <tuple>
-#include <unordered_map>
 #include "seqtrie_types.h"
-#include "seqtrie_utils.h"
-
-#include "pairwise/pairwise.h"
+#include "seqtrie/pairwise.h"
 #include "simple_progress/simple_progress.h"
 
-#include <boost/numeric/ublas/matrix.hpp>
-using IMatrix = boost::numeric::ublas::matrix<int>;
-
-using namespace Rcpp;
-using namespace RcppParallel;
-
 // All input parameters should be checked in R, so any error thrown here is an internal error
 // [[Rcpp::export(rng = false)]]
 IntegerMatrix c_dist_matrix(CharacterVector query, CharacterVector target, 
diff --git a/src/seqtrie_types.h b/src/seqtrie_types.h
index d9a345e..7827003 100644
--- a/src/seqtrie_types.h
+++ b/src/seqtrie_types.h
@@ -2,7 +2,7 @@
 #define seqtrie_TYPES_H
 
 #include <Rcpp.h>
-
+#include <RcppParallel.h>
 #include <unordered_map>
 #include <map>
 #include <cstring>
@@ -10,47 +10,78 @@
 #include <set>
 #include <memory>
 #include <tuple>
+#include <boost/functional/hash.hpp>
+
+using namespace Rcpp;
+using namespace RcppParallel;
 
+#include "ankerl/unordered_dense.h"
 #include "seqtrie/radixmap.h"
 #include "simple_array/small_array.h"
-#include "simple_progress/simple_progress.h"
 
 using namespace Rcpp;
 
+#if __cplusplus >= 201703L
+  #define USE_ANKERL 1
+#endif
+
+#define USE_SEQTRIE_SMALL_ARRAY_SIZE SEQTRIE_SMALL_ARRAY_SIZE
+
 // constants and types
 using pairchar_type = std::pair<char, char>;
-using pairchar_map_type = std::unordered_map<pairchar_type, int>;
+#ifdef USE_ANKERL
+using pairchar_map_type = ankerl::unordered_dense::map<pairchar_type, int>;
+#else
+using pairchar_map_type = std::unordered_map<pairchar_type, int, boost::hash<pairchar_type>>;
+#endif
 using cspan = nonstd::span<const char>;
 constexpr char GAP_CHAR = '\0';                                     // '\0' any gap cost for non-affine
 constexpr char GAP_OPEN_CHAR = std::numeric_limits<char>::min();    // '\255' gap open cost for affine
 constexpr char GAP_EXTN_CHAR = '\0';    
 
 // used in utils.cpp, a map for counting chars, to make sure input cost_matrix contains all chars in a trie
+#ifdef USE_ANKERL
+using CharCounter = ankerl::unordered_dense::map<char, size_t>;
+#else
 using CharCounter = std::unordered_map<char, size_t>;
+#endif
+
 using CharCounterXPtr = Rcpp::XPtr<CharCounter>;
 
-// defined in utils.cpp
+// inline
 // Convert a string to a SEXP
 // Be careful about R protection / GC
-SEXP to_charsxp(const trqwe::small_array<char> & x);
+// SEXP to_charsxp(const SeqTrie::array_r<char> & x);
 
-// defined in utils.cpp
+// inline
 // Define a span of const char from a SEXP
-cspan charsxp_to_cspan(SEXP x);
+// cspan charsxp_to_cspan(SEXP x);
 
-// defined in utils.cpp
+// inline
 // CharacterVector to cspan vector
-std::vector<cspan> strsxp_to_cspan(CharacterVector x);
+// std::vector<cspan> strsxp_to_cspan(CharacterVector x);
 
-// defined in utils.cpp
 // Input: cost_matrix
 // a NxN matrix where column/row names are the characters to use for pairchar_map_type keys
 // The special column "gap" is recoded as '\0'
 // Output: pairchar_map_type
-pairchar_map_type convert_cost_matrix(IntegerMatrix cost_matrix);
+// pairchar_map_type convert_cost_matrix(IntegerMatrix cost_matrix);
 
 namespace SeqTrie {
-  using RadixTreeR = seqtrie::RadixMap<char, std::map, trqwe::small_array, size_t>;
+  // If using ankerl, unordered_dense map does not have stable addresses on insert/delete
+  // due to data being stored in a plain std::vector. Cannot use for RadixForest.
+  // Not necessary for RadixTree as all nodes use unique_ptr
+  // To do: convert RadixForest to use unique_ptr
+#if USE_SEQTRIE_SMALL_ARRAY_SIZE > 0
+  template <typename T> using array_r = trqwe::small_array<T, std::allocator<T>, size_t, std::integral_constant<size_t, USE_SEQTRIE_SMALL_ARRAY_SIZE>>;
+#else
+  template <typename T> using array_r = std::vector<T>;
+#endif
+#ifdef USE_ANKERL
+  using RadixTreeR = seqtrie::RadixMap<char, ankerl::unordered_dense::map, array_r, size_t>;
+#else
+  using RadixTreeR = seqtrie::RadixMap<char, std::map, array_r, size_t>;
+#endif
   using RadixForestR = std::unordered_map<size_t, RadixTreeR>;
   typedef typename RadixTreeR::self_type node_type;
   typedef typename RadixTreeR::pointer_type pointer_type;
@@ -63,4 +94,103 @@ using RadixTreeRXPtr = Rcpp::XPtr<SeqTrie::RadixTreeR>;
 using RadixForestRXPtr = Rcpp::XPtr<SeqTrie::RadixForestR>;
 using CharCounterXPtr = Rcpp::XPtr<CharCounter>;
 
+// parallel for helper function
+// accepts functors (lambdas) with std::size_t begin, std::size_t end
+template <typename Func>
+struct DoParallelFor : public RcppParallel::Worker {
+  Func f;
+  DoParallelFor(Func f) : f(f) {}
+  void operator()(std::size_t begin, std::size_t end) {
+    f(begin, end);
+  }
+  
+};
+template <typename Func>
+inline void do_parallel_for(Func f, std::size_t begin, std::size_t end, std::size_t grainSize = 1, int numThreads = -1) {
+  DoParallelFor<Func> w(f);
+  parallelFor(begin, end, w, grainSize, numThreads);
+}
+
+// Convert a string to a SEXP
+// Be careful about R protection / GC
+inline SEXP to_charsxp(const SeqTrie::array_r<char> & x) {
+  return Rf_mkCharLen(x.data(), x.size());
+}
+
+// Define a span of const char from a SEXP
+inline cspan charsxp_to_cspan(SEXP x) {
+  return cspan(CHAR(x), Rf_xlength(x));
+}
+
+inline std::vector<cspan> strsxp_to_cspan(CharacterVector x) {
+  size_t n = Rf_xlength(x);
+  SEXP * xp = STRING_PTR(x);
+  std::vector<cspan> out(n);
+  for(size_t i=0; i<n; ++i) {
+    out[i] = charsxp_to_cspan(xp[i]);
+  }
+  return out;
+}
+
+// Input: cost_matrix
+// a NxN matrix where column/row names are the characters to use for pairchar_map_type keys
+// The special column "gap" is recoded as '\0'
+// Output: pairchar_map_type
+inline pairchar_map_type convert_cost_matrix(IntegerMatrix cost_matrix) {
+  pairchar_map_type cost_map;
+  std::vector<char> map_elements;
+  {
+    List dimnames = cost_matrix.attr("dimnames");
+    CharacterVector rownames = dimnames[0];
+    map_elements.resize(rownames.size());
+    for(size_t i=0; i<map_elements.size(); ++i) {
+      if(rownames[i] == "gap") {
+        map_elements[i] = GAP_CHAR; // '\0' same symbol as GAP_EXTN_CHAR
+      } else if(rownames[i] == "gap_open") {
+        map_elements[i] = GAP_OPEN_CHAR; // '\255'
+      } else {
+        Rcpp::String s = rownames[i];
+        map_elements[i] = s.get_cstring()[0];
+      }
+    }
+  }
+  size_t N = map_elements.size();
+  for(size_t i=0; i<N; ++i) {
+    for(size_t j=0; j<N; ++j) {
+      // skip any combination of gap_open, gap
+      if((map_elements[i] == GAP_CHAR || map_elements[i] == GAP_OPEN_CHAR) &&
+         (map_elements[j] == GAP_CHAR || map_elements[j] == GAP_OPEN_CHAR)) continue;
+      cost_map[pairchar_type(map_elements[i], map_elements[j])] = cost_matrix(i,j);
+    }
+  }
+  return cost_map;
+}
+
+// converts seqtrie search results to a data.frame
+inline DataFrame seqtrie_results_to_dataframe(CharacterVector query, std::vector<SeqTrie::search_context> & output) {
+  size_t nresults = 0;
+  size_t nseqs = output.size();
+  for(size_t i=0; i<nseqs; ++i) { nresults += output[i].match.size(); }
+  CharacterVector query_results(nresults);
+  CharacterVector target_results(nresults);
+  IntegerVector distance_results(nresults);
+  int * distance_results_ptr = INTEGER(distance_results);
+  size_t q = 0;
+  for(size_t i=0; i<nseqs; ++i) {
+    auto & targets = output[i].match;
+    auto & distances = output[i].distance;
+    for(size_t j=0; j<targets.size(); ++j) {
+      SET_STRING_ELT(query_results, q, STRING_ELT(query, i));
+      auto s = targets[j]->template sequence<SeqTrie::array_r<char>>();
+      SET_STRING_ELT(target_results, q, to_charsxp(s));
+      distance_results_ptr[q] = distances[j];
+      q++;
+    }
+  }
+  return DataFrame::create(_["query"] = query_results,
+                           _["target"] = target_results,
+                           _["distance"] = distance_results,
+                           _["stringsAsFactors"] = false);
+}
+
 #endif
diff --git a/src/seqtrie_utils.h b/src/seqtrie_utils.h
deleted file mode 100644
index cd7d183..0000000
--- a/src/seqtrie_utils.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef seqtrie_UTILS_H
-#define seqtrie_UTILS_H
-
-#include <Rcpp.h>
-#include <RcppParallel.h>
-#include "seqtrie_types.h"
-#include "seqtrie/radixmap.h"
-#include "simple_array/small_array.h"
-
-using namespace Rcpp;
-using namespace RcppParallel;
-
-// parallel for helper function
-// accepts functors (lambdas) with std::size_t begin, std::size_t end
-template <typename Func>
-struct DoParallelFor : public RcppParallel::Worker {
-  Func f;
-  DoParallelFor(Func f) : f(f) {}
-  void operator()(std::size_t begin, std::size_t end) {
-    f(begin, end);
-  }
-
-};
-template <typename Func>
-inline void do_parallel_for(Func f, std::size_t begin, std::size_t end, std::size_t grainSize = 1, int numThreads = -1) {
-  DoParallelFor<Func> w(f);
-  parallelFor(begin, end, w, grainSize, numThreads);
-}
-
-// Convert a string to a SEXP
-// Be careful about R protection / GC
-inline SEXP to_charsxp(const trqwe::small_array<char> & x) {
-    return Rf_mkCharLen(x.data(), x.size());
-}
-
-// Define a span of const char from a SEXP
-inline cspan charsxp_to_cspan(SEXP x) {
-    return cspan(CHAR(x), Rf_xlength(x));
-}
-
-inline std::vector<cspan> strsxp_to_cspan(CharacterVector x) {
-    size_t n = Rf_xlength(x);
-    SEXP * xp = STRING_PTR(x);
-    std::vector<cspan> out(n);
-    for(size_t i=0; i<n; ++i) {
-        out[i] = charsxp_to_cspan(xp[i]);
-    }
-    return out;
-}
-
-// Input: cost_matrix
-// a NxN matrix where column/row names are the characters to use for pairchar_map_type keys
-// The special column "gap" is recoded as '\0'
-// Output: pairchar_map_type
-inline pairchar_map_type convert_cost_matrix(IntegerMatrix cost_matrix) {
-  pairchar_map_type cost_map;
-  std::vector<char> map_elements;
-  {
-    List dimnames = cost_matrix.attr("dimnames");
-    CharacterVector rownames = dimnames[0];
-    map_elements.resize(rownames.size());
-    for(size_t i=0; i<map_elements.size(); ++i) {
-      if(rownames[i] == "gap") {
-        map_elements[i] = GAP_CHAR; // '\0' same symbol as GAP_EXTN_CHAR
-      } else if(rownames[i] == "gap_open") {
-        map_elements[i] = GAP_OPEN_CHAR; // '\255'
-      } else {
-        Rcpp::String s = rownames[i];
-        map_elements[i] = s.get_cstring()[0];
-      }
-    }
-  }
-  size_t N = map_elements.size();
-  for(size_t i=0; i<N; ++i) {
-    for(size_t j=0; j<N; ++j) {
-      // skip any combination of gap_open, gap
-      if((map_elements[i] == GAP_CHAR || map_elements[i] == GAP_OPEN_CHAR) &&
-         (map_elements[j] == GAP_CHAR || map_elements[j] == GAP_OPEN_CHAR)) continue;
-      cost_map[pairchar_type(map_elements[i], map_elements[j])] = cost_matrix(i,j);
-    }
-  }
-  return cost_map;
-}
-
-// converts seqtrie search results to a data.frame
-inline DataFrame seqtrie_results_to_dataframe(CharacterVector query, std::vector<SeqTrie::search_context> & output) {
-  size_t nresults = 0;
-  size_t nseqs = output.size();
-  for(size_t i=0; i<nseqs; ++i) { nresults += output[i].match.size(); }
-  CharacterVector query_results(nresults);
-  CharacterVector target_results(nresults);
-  IntegerVector distance_results(nresults);
-  int * distance_results_ptr = INTEGER(distance_results);
-  size_t q = 0;
-  for(size_t i=0; i<nseqs; ++i) {
-    auto & targets = output[i].match;
-    auto & distances = output[i].distance;
-    for(size_t j=0; j<targets.size(); ++j) {
-      SET_STRING_ELT(query_results, q, STRING_ELT(query, i));
-      auto s = targets[j]->template sequence<trqwe::small_array<char>>();
-      SET_STRING_ELT(target_results, q, to_charsxp(s));
-      distance_results_ptr[q] = distances[j];
-      q++;
-    }
-  }
-  return DataFrame::create(_["query"] = query_results,
-                           _["target"] = target_results,
-                           _["distance"] = distance_results,
-                           _["stringsAsFactors"] = false);
-}
-#endif
\ No newline at end of file
diff --git a/vignettes/simple_tree.png b/vignettes/simple_tree.png
old mode 100644
new mode 100755
diff --git a/vignettes/vignette.html b/vignettes/vignette.html
old mode 100644
new mode 100755
diff --git a/vignettes/vignette.rmd b/vignettes/vignette.rmd
old mode 100644
new mode 100755