diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
new file mode 100644
index 0000000..903da37
--- /dev/null
+++ b/.JuliaFormatter.toml
@@ -0,0 +1,8 @@
+always_for_in = true
+whitespace_typedefs = true
+whitespace_ops_in_indices = true
+remove_extra_newlines = true
+import_to_using = true
+normalize_line_endings = "unix"
+separate_kwargs_with_semicolon = true
+whitespace_in_kwargs = false
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
index 35d4097..9bf03bd 100644
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@@ -10,17 +10,13 @@ on:
   pull_request:
 
 jobs:
-  build:
+  Documenter:
+    name: Documentation
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: '1'
-      - name: Install dependencies
-        run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
-      - name: Build and deploy
+      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-docdeploy@latest
         env:
-          # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
-        run: julia --color=yes --project=docs/ docs/make.jl
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml
index cff1346..d8ee908 100644
--- a/.github/workflows/UnitTests.yml
+++ b/.github/workflows/UnitTests.yml
@@ -11,31 +11,30 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        julia-version:
-          - '1.6' # LTS
-          - '1'
-        julia-arch: [x86]
-        os: [ubuntu-latest, windows-latest, macOS-latest]
+        julia-version: ['1', '1.10']
+        os: [ubuntu-latest, macOS-latest, windows-latest]
         experimental: [false]
         include:
+          # Include nightly, but experimental, so it's allowed to fail without
+          # failing CI.
           - julia-version: nightly
-            julia-arch: x86
             os: ubuntu-latest
             experimental: true
+            fail_ci_if_error: false
 
     steps:
       - name: Checkout Repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Julia
-        uses: julia-actions/setup-julia@v1
+        uses: julia-actions/setup-julia@latest
         with:
           version: ${{ matrix.julia-version }}
       - name: Run Tests
         uses: julia-actions/julia-runtest@latest
       - name: Create CodeCov
-        uses: julia-actions/julia-processcoverage@v1
+        uses: julia-actions/julia-processcoverage@latest
       - name: Upload CodeCov
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v4
         with:
           file: ./lcov.info
           flags: unittests
diff --git a/.gitignore b/.gitignore
index 02e1207..37956a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,6 @@
 *.jl.*.cov
 *.jl.mem
 .DS_Store
-Manifest.toml
\ No newline at end of file
+Manifest.toml
+TODO.md
+docs/build
diff --git a/Project.toml b/Project.toml
index 28dacf5..5b81b21 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,17 +1,33 @@
 name = "Kmers"
 uuid = "445028e4-d31f-4f27-89ad-17affd83fc22"
-authors = ["Sabrina Jaye Ward <sabrinajward@protonmail.com>"]
-version = "0.1.0"
+authors = [
+    "Jakob Nybo Nissen <jakobnybonissen@gmail.com>",
+    "Sabrina Jaye Ward <sabrinajward@protonmail.com>"
+]
+version = "1.0.0"
+
+[weakdeps]
+StringViews = "354b36f9-a18e-4713-926e-db85100087ba"
 
 [deps]
 BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
+BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
+
+[extensions]
+StringViewsExt = "StringViews"
 
+# Note: We intentionally have strict compat on BioSequences because Kmers
+# reaches into the internals of BioSequences.
 [compat]
-BioSequences = "3.1.3"
-julia = "1.5"
+BioSequences = "~3.4.1"
+Random = "1.10"
+julia = "1.10"
+StringViews = "1"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StringViews = "354b36f9-a18e-4713-926e-db85100087ba"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Random", "StringViews"]
diff --git a/README.md b/README.md
index ed51eff..2dd7595 100644
--- a/README.md
+++ b/README.md
@@ -3,57 +3,57 @@
 [![Latest Release](https://img.shields.io/github/release/BioJulia/Kmers.jl.svg)](https://github.com/BioJulia/Kmers.jl/releases/latest)
 [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/Kmers.jl/blob/master/LICENSE)
 [![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/Kmers.jl/stable)
-[![Pkg Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
-
 
 ## Description
+Kmers.jl provide the `Kmer <: BioSequence` type which implement the concept of a
+[k-mer](https://en.wikipedia.org/wiki/K-mer), a biological sequence of exactly length `k`.
 
-Kmers provides a specialised concrete `BioSequence` subtype, optimised for
-representing short immutable sequences called kmers: contiguous sub-strings of k
-nucleotides of some reference sequence.
 
-They are used extensively in bioinformatic analyses as an informational unit.
-This concept was popularised by short read assemblers. 
-Analyses within the kmer space benefit from a simple formulation of the sampling
-problem and direct in-hash comparisons.
+K-mers are used frequently in bioinformatics because, when k is small and known at
+compile time, these sequences can be efficiently represented as integers and stored
+directly in CPU registers, allowing for much more efficient computation than arbitrary-length sequences.
 
-Kmers provides the type representing kmers as well as the implementations of
-the APIs specified by the
-[`BioSequences.jl`](https://github.com/BioJulia/BioSequences.jl) package.
+In Kmers.jl, the `Kmer` type is psrameterized by its length, and its data is stored in an `NTuple`. This makes `Kmers` bitstypes and highly efficient.
 
-## Installation
+Conceptually, one may use the following analogy:
+* `BioSequence` is like `AbstractVector`
+* `LongSequence` is like `Vector`
+* `Kmer` is like [`SVector`](https://github.com/JuliaArrays/StaticArrays.jl) from `StaticArrays`
+
+Kmers.jl is tightly coupled to the
+[`BioSequences.jl`](https://github.com/BioJulia/BioSequences.jl) package,
+and relies on its internals.
+Hence, you should expect strict compat bounds on BioSequences.jl.
+
+## Usage
+### ⚠️ WARNING ⚠️
+`Kmer`s are parameterized by their length. That means any operation on `Kmer`s that change their length, such as `push`, `pop`, slicing, or masking (logical indexing) will be **type unstable** and hence slow and memory inefficient, unless you write your code in such as way that the compiler can use constant folding.
 
+Further, as `Kmer`s are immutable and their operations are aggressively inlined and unrolled,
+they become inefficent as they get longer.
+For example, reverse-complementing a 32-mer takes 26 ns, compared to 102 ns for the equivalent `LongSequence`. However, for 512-mers, the `LongSequence` takes 126 ns, and the `Kmer` 16 μs!
+
+Kmers.jl is intended for high-performance computing. If you do not need the extra performance that register-stored sequences provide, you might consider using `LongSequence` from BioSequences.jl instead
+
+## Installation
 You can install BioSequences from the julia
 REPL. Press `]` to enter pkg mode, and enter the following:
 
 ```julia
-add Kmers
+pkg> add Kmers
 ```
 
-If you are interested in the cutting edge of the development, please check out
+If you are interested in the cutting edge of development, please check out
 the master branch to try new features before release.
 
-
-## Testing
-
-Kmers is tested against Julia `1.X` on Linux, OS X, and Windows.
-
-[![Unit tests](https://github.com/BioJulia/Kmers.jl/workflows/Unit%20tests/badge.svg?branch=master)](https://github.com/BioJulia/Kmers.jl/actions?query=workflow%3A%22Unit+tests%22+branch%3Amaster)
-[![Documentation](https://github.com/BioJulia/Kmers.jl/workflows/Documentation/badge.svg?branch=master)](https://github.com/BioJulia/BioKmers.jl/actions?query=workflow%3ADocumentation+branch%3Amaster)
-[![](https://codecov.io/gh/BioJulia/Kmers.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/BioJulia/Kmers.jl)
-
-
 ## Contributing
-
 We appreciate contributions from users including reporting bugs, fixing
 issues, improving performance and adding new features.
 
 Take a look at the [contributing files](https://github.com/BioJulia/Contributing)
 detailed contributor and maintainer guidelines, and code of conduct.
 
-
 ## Questions?
-
 If you have a question about contributing or using BioJulia software, come
-on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the
+on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the
 [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
diff --git a/docs/Project.toml b/docs/Project.toml
index e064fd1..2d21498 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,9 @@
 [deps]
+BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
+Kmers = "445028e4-d31f-4f27-89ad-17affd83fc22"
+MinHash = "4b3c9753-2685-44e9-8a29-365b96c023ed"
 
 [compat]
-Documenter = "0.24"
\ No newline at end of file
+Documenter = "1"
diff --git a/docs/make.jl b/docs/make.jl
index 61b4844..32919be 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,29 +1,33 @@
 using Documenter, Kmers
 
-makedocs(
-    format = Documenter.HTML(),
-    sitename = "Kmers.jl",
-    pages = [
-        "Home"                           => "index.md",
-        "Kmer types"                     => "kmer_types.md",
-        "Constructing kmers"             => "construction.md",
-        "Indexing & modifying kmers"     => "transforms.md",
-        "Predicates"                     => "predicates.md",
-        "Random kmers"                   => "random.md",
-        "Iterating over Kmers"           => "iteration.md",
-        "Translation"                    => "translate.md",
-        #"Pattern matching and searching" => "sequence_search.md",
-        #"Iteration"                      => "iteration.md",
-        #"Counting"                       => "counting.md",
-        #"I/O"                            => "io.md",
-        #"Interfaces"                     => "interfaces.md"
+DocMeta.setdocmeta!(
+    Kmers,
+    :DocTestSetup,
+    :(using BioSequences, Kmers, Test);
+    recursive=true,
+)
+
+makedocs(;
+    modules=[Kmers],
+    format=Documenter.HTML(; prettyurls=get(ENV, "CI", nothing) == "true"),
+    sitename="Kmers.jl",
+    pages=[
+        "Home" => "index.md",
+        "The Kmer type" => "kmers.md",
+        "Iteration" => "iteration.md",
+        "Translation" => "translation.md",
+        "Hashing" => "hashing.md",
+        "K-mer replacements" => "replacements.md",
+        "FAQ" => "faq.md",
+        "Cookbook" => ["MinHash" => "minhash.md", "Kmer composition" => "composition.md"],
     ],
-    authors = "Ben J. Ward, The BioJulia Organisation and other contributors."
+    authors="Jakob Nybo Nissen, Sabrina J. Ward, The BioJulia Organisation and other contributors.",
+    checkdocs=:exports,
 )
 
-deploydocs(
-    repo = "github.com/BioJulia/Kmers.jl.git",
-    push_preview = true,
-    deps = nothing,
-    make = nothing
+deploydocs(;
+    repo="github.com/BioJulia/Kmers.jl.git",
+    push_preview=true,
+    deps=nothing,
+    make=nothing,
 )
diff --git a/docs/src/composition.md b/docs/src/composition.md
new file mode 100644
index 0000000..ac9cc4f
--- /dev/null
+++ b/docs/src/composition.md
@@ -0,0 +1,52 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+end
+```
+## Kmer composition
+In metagenomics, sequences are often summarized by counting the occurrence of
+all k-mers of a given length in a sequence.
+For example, for K=4, there are 4^4 = 256 possible DNA 4-mers.
+If these counts are ordered, the composition can be represented by a length 256
+vector.
+
+Vector similarity operations (e.g. cosine distance) can then be used as an
+approximate proxy for phylogenetic distance.
+
+In the example below, we exploit that:
+* A `DNAKmer{4}`'s data is a single-element tuple, which
+  stores the sequence in the 8 lower bits.
+* The `encoded_data` function will return this tuple.
+
+```jldoctest; output=false
+using BioSequences, FASTX, Kmers
+using BioSequences: encoded_data
+
+function composition(record::FASTARecord)
+    counts = zeros(UInt32, 256)
+    frequencies = zeros(Float32, 256)
+    for kmer in FwDNAMers{4}(sequence(record))
+        @inbounds counts[only(encoded_data(kmer)) + 1] += 1
+    end
+    factor = 1 / sum(counts; init=zero(eltype(counts)))
+    for i in eachindex(counts, frequencies)
+        frequencies[i] = counts[i] * factor
+    end
+    frequencies
+end
+
+# Make two FASTA records - could be from an assembly
+recs = [FASTARecord(string(i), randdnaseq(10000)) for i in "AB"]
+
+# Compute the 2-norm difference and verify it's in [0, 2].
+(comp_a, comp_b) = map(composition, recs)
+comp_distance = sum((comp_a .- comp_b).^2)
+println(0.0 ≤ comp_distance ≤ 2.0)
+
+# output
+true
+
+```
\ No newline at end of file
diff --git a/docs/src/construction.md b/docs/src/construction.md
deleted file mode 100644
index 6b300a9..0000000
--- a/docs/src/construction.md
+++ /dev/null
@@ -1,12 +0,0 @@
-```@meta
-CurrentModule = Kmers
-DocTestSetup = quote
-    using Kmers
-end
-```
-
-# Construction & conversion
-
-```@docs
-Kmer{A,K,N}(itr)
-```
diff --git a/docs/src/faq.md b/docs/src/faq.md
new file mode 100644
index 0000000..970e095
--- /dev/null
+++ b/docs/src/faq.md
@@ -0,0 +1,40 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+end
+```
+## FAQ
+### Why can kmers not be compared to biosequences?
+It may be surprising that kmers cannot be compared to other biosequences:
+
+```jldoctest
+julia> dna"TAG" == mer"TAG"d
+ERROR: MethodError
+[...]
+```
+
+In fact, this is implemented by a manually thrown `MethodError`; the generic case `Base.:==(::BioSequence, ::BioSequence)` is defined.
+
+The reason for this is the consequence of the following limitations:
+* `isequal(x, y)` implies `hash(x) == hash(y)`
+* `isequal(x, y)` and `x == y` ought to be identical for well-defined elements (i.e. in the absence of `missing`s and `NaN`s etc.)
+* `hash(::Kmer)` must be absolutely maximally efficient
+
+If kmers were to be comparable to `BioSequence`, then the hashing of `BioSequence` should follow `Kmer`, which practically speaking would mean that all biosequences would need to be recoded to `Kmer`s before hashing.
+
+### Why isn't there an iterator of unambiguous, canonical kmers or spaced, canonical kmers?
+Any iterator of nucleotide kmers can be made into a canonical kmer iterator by simply calling `canonical` on its output kers.
+
+The `CanonicalKmers` iterator is special cased, because with a step size of 1, it is generally faster to build the next kmer by storing both the reverse and forward kmer, then creating the next kmer by prepending/append the next symbol.
+
+However, with a larger step size, it becomes more efficient to build the forward kmer, then reverse-complement the whole kmer.
+
+### Why isn't there an iterator of skipmers/minimizers/k-min-mers, etc?
+The concept of kmers have turned out to be remarkably flexible and useful in bioinformatics, and have spawned a neverending stream of variations.
+We simply can't implement them all.
+
+However, see the section [Building kmer replacements](@ref replacements) on how to implement them
+as a user of Kmers.jl yourself.
diff --git a/docs/src/hashing.md b/docs/src/hashing.md
new file mode 100644
index 0000000..1ac8125
--- /dev/null
+++ b/docs/src/hashing.md
@@ -0,0 +1,60 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+end
+```
+
+!!! warning
+    The value of hashes are guaranteed to be reproducible for a given version
+    of Kmers.jl and Julia, but may __change__ in new minor versions of Julia
+    or Kmers.jl
+
+## Hashing
+Kmers.jl implements `Base.hash`, yielding a `UInt` value:
+
+```jldoctest; filter = r"^0x[0-9a-fA-F]+$"
+julia> hash(mer"UGCUGUAC"r)
+0xe5057d38c8907b22
+```
+
+The implementation of `Base.hash` for kmers strikes a compromise between providing a high-quality (non-cryptographic) hash, while being reasonably fast.
+While hash collisions can easily be found, they are unlikely to occur at random.
+When kmers are of the same (or compatible) alphabets, different kmers hash to different values
+(not counting the occational hash collision), even when they have the same underlying bitpattern:
+
+```jldoctest
+julia> using BioSequences: encoded_data
+
+julia> a = mer"TAG"d; b = mer"AAAAAAATAG"d;
+
+julia> encoded_data(a) === encoded_data(b)
+true
+
+julia> hash(a) == hash(b)
+false
+```
+
+When they are of compatible alphabets, and have the same content, they hash to the same value.
+Currently, only DNA and RNA of the alphabets `DNAAlphabet` and `RNAAlphabet` are compatible:
+
+```jldoctest
+julia> a = mer"UUGU"r; b = mer"TTGT"d;
+
+julia> a == b # equal
+true
+
+julia> a === b # not egal
+false
+
+julia> hash(a) === hash(b)
+true
+```
+
+For some applications, fast hashing is absolutely crucial. For these cases, Kmers.jl provides [`fx_hash`](@ref), which trades off hash quality for speed:
+
+```@docs
+fx_hash
+```
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index 11c4fe9..b3ca804 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,59 +1,53 @@
-# Kmers
+# Kmers.jl
+Kmers.jl provides the `Kmer <: BioSequence` type which implement the concept of a
+[k-mer](https://en.wikipedia.org/wiki/K-mer), a biological sequence of exactly length `k`.
 
-[![Latest Release](https://img.shields.io/github/release/BioJulia/Kmers.jl.svg)](https://github.com/BioJulia/Kmers.jl/releases/latest)
-[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/Kmers.jl/blob/master/LICENSE)
-[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/Kmers.jl/stable)
-[![Pkg Status](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
+Compared to other `BioSequence`s, `Kmer`s are characterized by:
+* Being immutable bitstypes
+* Being parameterized by its length, such that the length can be known at compile time.
 
+When kmers are short, and their length is known at compile time,
+these characteristics allow k-mers to be stack allocated, or stored in registers,
+allowing for much more efficient computation than arbitrary-length sequences.
 
-## Description
+Conceptually, one may use the following analogy:
+* `BioSequence` is like `AbstractVector`
+* `LongSequence` is like `Vector`
+* `Kmer` is like [`SVector`](https://github.com/JuliaArrays/StaticArrays.jl) from `StaticArrays`
 
-Kmers provides a specialised concrete `BioSequence` subtype, optimised for
-representing short immutable sequences called kmers: contiguous sub-strings of k
-nucleotides of some reference sequence.
+Kmers.jl is tightly coupled to the
+[`BioSequences.jl`](https://github.com/BioJulia/BioSequences.jl) package,
+and relies on its internals.
+Hence, you should expect strict compat bounds on BioSequences.jl.
 
-They are used extensively in bioinformatic analyses as an informational unit.
-This concept was popularised by short read assemblers. 
-Analyses within the kmer space benefit from a simple formulation of the sampling
-problem and direct in-hash comparisons.
+!!! warning
+    `Kmer`s are parameterized by their length.
+    That means any operation on `Kmer`s that change their length, such as `push`,
+    `pop`, slicing, or masking (logical indexing) will be **type unstable**
+    and hence slow and memory inefficient,
+    unless you write your code in such as way that the compiler can use constant folding.
 
-Kmers provides the type representing kmers as well as the implementations of
-the APIs specified by the
-[`BioSequences.jl`](https://github.com/BioJulia/BioSequences.jl) package.
+Kmers.jl is intended for high-performance computing. If you do not need the extra performance that register-stored sequences provide, you might consider using `LongSequence` from BioSequences.jl instead
 
 ## Installation
-
 You can install BioSequences from the julia
 REPL. Press `]` to enter pkg mode, and enter the following:
 
 ```julia
-add Kmers
+pkg> add Kmers
 ```
 
-If you are interested in the cutting edge of the development, please check out
+If you are interested in the cutting edge of development, please check out
 the master branch to try new features before release.
 
-
-## Testing
-
-Kmers is tested against Julia `1.X` on Linux, OS X, and Windows.
-
-[![Unit tests](https://github.com/BioJulia/Kmers.jl/workflows/Unit%20tests/badge.svg?branch=master)](https://github.com/BioJulia/Kmers.jl/actions?query=workflow%3A%22Unit+tests%22+branch%3Amaster)
-[![Documentation](https://github.com/BioJulia/Kmers.jl/workflows/Documentation/badge.svg?branch=master)](https://github.com/BioJulia/BioKmers.jl/actions?query=workflow%3ADocumentation+branch%3Amaster)
-[![](https://codecov.io/gh/BioJulia/Kmers.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/BioJulia/Kmers.jl)
-
-
 ## Contributing
-
 We appreciate contributions from users including reporting bugs, fixing
 issues, improving performance and adding new features.
 
 Take a look at the [contributing files](https://github.com/BioJulia/Contributing)
 detailed contributor and maintainer guidelines, and code of conduct.
 
-
 ## Questions?
-
 If you have a question about contributing or using BioJulia software, come
-on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the
+on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the
 [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
diff --git a/docs/src/iteration.md b/docs/src/iteration.md
index 3b36181..bf37d76 100644
--- a/docs/src/iteration.md
+++ b/docs/src/iteration.md
@@ -1,31 +1,105 @@
 ```@meta
 CurrentModule = Kmers
 DocTestSetup = quote
+    using BioSequences
+    using Test
     using Kmers
 end
 ```
+## Iteration
+Most applications of kmers extract multiple kmers from an underlying sequence.
+To facilitate this, Kmers.jl implements a few basic kmer iterators, most of which are subtypes of `AbstractKmerIterator`.
 
-# Iterating over kmers
+The underlying sequence can be a `BioSequence`, `AbstractString`, or `AbstractVector{UInt8}`.
+In the latter case, if the alphabet of the element type implements `BioSequences.AsciiAlphabet`, the vector will be treated as a vector of ASCII characters.
 
-When introducing the `Kmer` type we described kmers as contiguous sub-strings of
-k nucleotides of some reference sequence.
+Similarly to the rules when constructing kmers directly, DNA and RNA is treated interchangeably when the underlying sequence is a `BioSequence`, but when the underlying sequence is a string or bytevector, `U` and `T` are considered different, and e.g. uracil cannot be constructed from a sequence containing `T`:
 
-This package therefore contains functionality for iterating over all the valid
-`Kmers{A,K,N}` in a longer `BioSequence`.
+```jldoctest
+julia> only(FwDNAMers{3}(rna"UGU"))
+DNA 3-mer:
+TGT
+
+julia> only(FwDNAMers{3}("UGU"))
+ERROR:
+[...]
+```
+
+The following kmer iterators are implemented:
+
+### `FwKmers`
+The most basic kmer iterator is `FwKmers`, which simply iterates every kmer, in order:
+
+```@docs
+FwKmers
+FwDNAMers
+FwRNAMers
+FwAAMers
+```
+
+### `FwRvIterator`
+This iterates over a nucleic acid sequence. For every kmer it encounters, it outputs the kmer and its reverse complement.
+
+```@docs
+FwRvIterator
+```
+
+### `CanonicalKmers`
+This iterator is similar to [`FwKmers`](@ref), however, for each `Kmer` encountered, it returns the _canonical_ kmer.
+
+The canonical kmer is defined as the lexographically smaller of a kmer and its reverse complement.
+That is, if [`FwKmers`](@ref) would iterate `TCAC`, then [`CanonicalKmers`](@ref) would return `GTGA`, as this is the reverse complement of `TCAC`, and is before `TCAC` in the alphabet.
+
+[`CanonicalKmers`](@ref) is useful for summarizing the kmer composition of sequences whose strandedness is unknown.
+
+```@docs
+CanonicalKmers
+CanonicalDNAMers
+CanonicalRNAMers
+```
+
+### `UnambiguousKmers`
+[`UnambiguousKmers`](@ref) iterates unambiguous nucleotides (that is, kmers of the alphabets `DNAAlphabet{2}` or `RNAAlphabet{2}`).
+Any kmers containing [ambiguous nucleotides](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC341218/) such as `W` or `N` are skipped.
+
+```@docs
+UnambiguousKmers
+UnambiguousDNAMers
+UnambiguousRNAMers
+```
+
+### `SpacedKmers`
+The [`SpacedKmers`](@ref) iterator iterates kmers with a fixed step size between k-mers.
+For example, for a K of 4, and a step size of 3, the output kmers would overlap with a single nucleotide, like so:
+
+```
+seq: TGATGCGTAGTG
+     TGCT
+        TGCG
+           GTAG
+```
+
+Hence, if `FwKmers` are analogous to `UnitRange`, `SpacedKmers` is analogous to `StepRange`.
 
 ```@docs
-EveryKmer
-EveryKmer{T,S}(seq::S, start::Int = firstindex(seq), stop::Int = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-EveryKmer(seq::BioSequence{A}, ::Val{K}, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
 SpacedKmers
-SpacedKmers{T,S}(seq::S, step::Int, start::Int, stop::Int) where {T<:Kmer,S<:BioSequence}
-SpacedKmers(seq::BioSequence{A}, ::Val{K}, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-EveryCanonicalKmer
-EveryCanonicalKmer{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-EveryCanonicalKmer(seq::BioSequence{A}, ::Val{K}, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-SpacedCanonicalKmers
-SpacedCanonicalKmers{T}(seq::S, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-SpacedCanonicalKmers(seq::BioSequence{A}, ::Val{K}, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
+SpacedDNAMers
+SpacedRNAMers
+SpacedAAMers
+```
+
+The convenience functions [`each_codon`](@ref) return `SpacedKmers` with a K value of 3 and step size of 3:
+
+```@docs
+each_codon
 ```
 
+## The `AbstractKmerIterator` interface
+It's very likely that users of Kmers.jl need to implement their own custom kmer iterators, in which case they should subtype [`AbstractKmerIterator`](@ref).
+
+```@docs
+AbstractKmerIterator
+```
 
+At the moment, there is no real interface implemented for this abstract type,
+other than that `AbstractKmerIterator{A, K}` needs to iterate `Kmer{A, K}`.
diff --git a/docs/src/kmer_types.md b/docs/src/kmer_types.md
deleted file mode 100644
index aab9330..0000000
--- a/docs/src/kmer_types.md
+++ /dev/null
@@ -1,92 +0,0 @@
-```@meta
-CurrentModule = Kmers
-DocTestSetup = quote
-    using Kmers
-end
-```
-
-# Kmer types
-
-Bioinformatic analyses make extensive use of kmers.
-Kmers are contiguous sub-strings of k nucleotides of some reference sequence. 
-
-They are used extensively in bioinformatic analyses as an informational unit.
-This concept popularised by short read assemblers. 
-Analyses within the kmer space benefit from a simple formulation of the sampling
-problem and direct in-hash comparisons.
-
-BioSequences provides the following types to represent Kmers.
-
-```@docs
-Kmer
-```
-
-The following aliases are also defined:
-
-```@docs
-DNAKmer
-DNA27mer
-DNA31mer
-DNA63mer
-RNAKmer
-RNA27mer
-RNA31mer
-RNA63mer
-AAKmer
-DNACodon
-RNACodon
-```
-
-
-### Skipmers
-
-For some analyses, the contiguous nature of kmers imposes limitations.
-A single base difference, due to real biological variation or a sequencing error,
-affects all k-mers crossing that position thus impeding direct analyses by identity.
-Also, given the strong interdependence of local sequence, contiguous sections
-capture less information about genome structure, and so they are more affected by
-sequence repetition. 
-
-Skipmers are a generalisation of the concept of a kmer.
-They are created using a cyclic pattern of used-and-skipped positions which
-achieves increased entropy and tolerance to nucleotide substitution differences
-by following some simple rules.
-
-Skipmers preserve many of the elegant properties of kmers such as reverse
-complementability and existence of a canonical representation.
-Also, using cycles of three greatly increases the power of direct intersection
-between the genomes of different organisms by grouping together the more conserved 
-nucleotides of protein-coding regions.
-
-BioSequences currently does not provide a separate type for skipmers, they are
-represented using `Mer` and `BigMer` as their representation as a short immutable
-sequence encoded in an unsigned integer is the same.
-The distinction lies in how they are generated.
-
-#### Skipmer generation
-
-A skipmer is a simple cyclic q-gram that includes _m_ out of every _n_ bases
-until a total of _k_ bases is reached. 
-
-This is illustrated in the figure below (from
-[this paper](https://www.biorxiv.org/content/biorxiv/early/2017/08/23/179960.full.pdf).):
-
-![skipmer-fig](skipmers.png)
-
-To maintain cyclic properties and the existence of the reverse-complement as a
-skipmer defined by the same function, _k_ should be a multiple of _m_.
-
-This also enables the existence of a canonical representation for each skipmer,
-defined as the lexicographically smaller of the forward and reverse-complement 
-representations.
-
-Defining _m_, _n_ and _k_ fixes a value for _S_, the total span of the skipmer,
-given by: 
-
-```math
-S = n * (\frac{k}{m} - 1) + m
-```
-
-To see how to iterate over skipmers cf. kmers, see the Iteration section
-of the manual.
-
diff --git a/docs/src/kmers.md b/docs/src/kmers.md
new file mode 100644
index 0000000..d2d9dab
--- /dev/null
+++ b/docs/src/kmers.md
@@ -0,0 +1,201 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+end
+```
+
+## The `Kmer` type
+The central type of Kmers.jl is the `Kmer`.
+A `Kmer` is an immutable, bitstype `BioSequence`, with a length known at compile
+time. Compared to `LongSequence` in BioSequences.jl,
+this gives to one advantage, and comes with two disadvantages:
+* Kmers are much faster than `LongSequence`, as they can be stored in registers.
+* As kmers gets longer, the code gets increasingly inefficient, as the unrolling
+  and inlining of the immutable operations breaks down.
+* Since their length is part of their type, any operation that results in a kmer
+  whose length cannot be determined at compile time will be type unstable.
+  This includes slicing a kmer, pushing and popping it, and other operations.
+
+The `Kmer` type is (roughly) defined as
+```julia
+struct Kmer{A <: Alphabet, K, N} <: BioSequence{A}
+    x::NTuple{N, UInt}
+end
+```
+Where:
+* `A` is the `Alphabet` as defined in BioSequences.jl.
+* `K` is the length.
+* `N` is an extra type parameter derived from the first two used for the
+  length of the tuple,
+  which exists only because Julia does not allow computed type parameters.
+
+Given a `Kmer{A, K}`, the `N` parameter may be computed using the function
+[`derive_type`](@ref):
+
+```jldoctest
+julia> derive_type(Kmer{AminoAcidAlphabet, 6})
+Kmer{AminoAcidAlphabet, 6, 1}
+```
+
+### Construction
+Kmers can be constructed from a `BioSequence` or `AbstractString` by explicitly
+specifying the length of the sequence:
+
+```jldoctest
+julia> Kmer{DNAAlphabet{2}, 5, 1}("TAGCT")
+DNA 5-mer:
+TAGCT
+```
+
+The final type parameter `N` can be elided, in which case it will be inferred:
+
+```jldoctest
+julia> Kmer{DNAAlphabet{2}, 5}("TAGCT")
+DNA 5-mer:
+TAGCT
+```
+
+Kmers with alphabets `DNAAlphabet{2}`, `RNAAlphabet{2}` and `AminoAcidAlphabet`
+can be created with the type aliases `DNAKmer`, `RNAKmer` and `AAKmer`:
+
+```jldoctest
+julia> DNAKmer{3}("tag")
+DNA 3-mer:
+TAG
+
+julia> AAKmer{5}("PWYSK")
+AminoAcid 5-mer:
+PWYSK
+```
+
+For kmers with an `Alphabet` that implement the trait `BioSequences.AsciiAlphabet`, they can also be constructed from `AbstractVector{UInt8}`, in which case the vector is interpreted as being bytes of ASCII text:
+
+```jldoctest
+julia> AAKmer{3}([0x65, 0x67, 0x7a])
+AminoAcid 3-mer:
+EGZ
+```
+
+When constructing from an `AbstractString` (or byte vector), uracil (`U`) and thymine `T` are treated differently - a `U` cannot be read as thymine:
+
+```jldoctest
+julia> DNAKmer{3}("UAG")
+ERROR: cannot encode 0x55 (Char 'U') in DNAAlphabet{2}
+[...]
+```
+
+However, when constructing from a `BioSequence`, these nucleotides are considered
+interchangeable:
+
+```jldoctest
+julia> RNAKmer{4}(dna"TATC")
+RNA 4-mer:
+UAUC
+```
+
+Finally, kmers can be constructed with a string literal `@mer_str`, where the string must be appended with `d` for DNA, `r` for RNA, or `a` for amino acid:
+
+```jldoctest
+julia> mer"UGCUGA"r
+RNA 6-mer:
+UGCUGA
+
+julia> mer"EDEHL"a
+AminoAcid 5-mer:
+EDEHL
+```
+
+Since the literals produce the kmer at parse time and inserts the kmer directly into the abstract syntax tree, this will always be type stable,
+and the overhead related to parsing the string will not be paid.
+
+In the following example, each iteration takes less than 1 nanosecond,
+which implies that parsing the string literal `mer"AAA"d` is not done
+in the loop at runtime:
+
+```jldoctest; filter = [r"^\s*0\.\d+ seconds.+"s, r"^\d+"s]
+julia> function count_aaas(dna)
+           x = 0
+           for kmer in FwDNAMers{3}(dna)
+               # The parsing happens once here, when the
+               # code is parsed, and is fine to have in the loop
+               x += kmer == mer"AAA"d
+           end
+           x
+       end;
+
+julia> seq = randseq(DNAAlphabet{2}(), 100_000_000);
+
+julia> count_aaas(seq); # compile
+
+julia> @time count_aaas(seq) # about 1ns per iteration
+  0.088556 seconds (1 allocation: 16 bytes)
+1561361
+```
+
+### Indexing
+Kmers support most normal indexing, such as scalar indexing:
+
+```jldoctest
+julia> mer"CAGCU"r[3]
+RNA_G
+```
+
+Slicing
+
+```jldoctest
+julia> mer"AGGCTA"d[2:5]
+DNA 4-mer:
+GGCT
+```
+
+And indexing with boolean vectors, and vectors of indices:
+
+```jldoctest
+julia> m = mer"MDGKRY"a;
+
+julia> m[[true, false, true, true, false, true]]
+AminoAcid 4-mer:
+MGKY
+
+julia> m[[4,2]]
+AminoAcid 2-mer:
+KD
+```
+
+### A note on type stability
+Except scalar indexing which always returns a single symbol, all the operations
+above are _type unstable_, since the length (and thus type) of the resulting 
+kmer depends on the input value, not its type.
+
+However, type unstable functions may be type-stable, if the indexing value is
+known at compile time, and the Julia compiler uses constant folding:
+
+```jldoctest
+julia> f(x) = x[2:5]; # 2:5 is a compile time constant
+
+julia> Test.@inferred f(mer"UCGUAGC"r)
+RNA 4-mer:
+CGUA
+```
+
+### Reference
+```@docs
+Kmer
+derive_type
+Mer
+@mer_str
+DNAKmer
+RNAKmer
+AAKmer
+DNACodon
+RNACodon
+pop
+pop_first
+push
+push_first
+shift
+shift_first
+```
\ No newline at end of file
diff --git a/docs/src/minhash.md b/docs/src/minhash.md
new file mode 100644
index 0000000..0ee41d5
--- /dev/null
+++ b/docs/src/minhash.md
@@ -0,0 +1,42 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+    using FASTX
+    using MinHash
+end
+```
+## MinHash
+The MinHash algorithm is used in tools such as
+[Mash](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0997-x)
+and [sourmash](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6720031/)
+to quickly compute approximate similarities of genomes, collections of genomes, or collections of reads.
+
+```jldoctest; filter = r"^\d+ MB/s$" => s"***"
+using BioSequences, MinHash, FASTX, Kmers
+
+# Write 25 sequences of length 20 to a buffer.
+# Try changing this to length 4 million!
+buffer = IOBuffer()
+writer = FASTAWriter(buffer)
+n_bytes = sum(1:25) do genome
+    rec = FASTARecord("seq_$(genome)", randdnaseq(20))
+    write(writer, rec)
+end
+flush(writer)
+
+# Time minhashing the 50 genomes
+timing = @timed FASTAReader(seekstart(buffer); copy=false) do reader
+    map(reader) do record
+        seq = codeunits(sequence(record))
+        sketch(fx_hash, CanonicalDNAMers{16}(sequence(record)), 1000)
+    end
+end
+println(round(Int, n_bytes / (timing.time * 1e6)), " MB/s")
+
+# output
+
+200 MB/s
+```
diff --git a/docs/src/predicates.md b/docs/src/predicates.md
deleted file mode 100644
index 2aebc49..0000000
--- a/docs/src/predicates.md
+++ /dev/null
@@ -1,18 +0,0 @@
-```@meta
-CurrentModule = Kmers
-DocTestSetup = quote
-    using Kmers
-end
-```
-
-# Predicates
-
-The following predicate functions from BioSequences.jl are compatible with `Kmer`s.
-Some have an optimised method defined in Kmers.jl.
-
-```@docs
-isrepetitive
-ispalindromic
-hasambiguity
-iscanonical
-```
\ No newline at end of file
diff --git a/docs/src/random.md b/docs/src/random.md
deleted file mode 100644
index 8a1990b..0000000
--- a/docs/src/random.md
+++ /dev/null
@@ -1,14 +0,0 @@
-```@meta
-CurrentModule = Kmers
-DocTestSetup = quote
-    using Kmers
-end
-```
-
-# Generating random sequences
-
-You can generate random kmers using `Base.rand` function.
-
-```@docs
-Base.rand(::Type{<:Kmer})
-```
\ No newline at end of file
diff --git a/docs/src/replacements.md b/docs/src/replacements.md
new file mode 100644
index 0000000..bdefb8c
--- /dev/null
+++ b/docs/src/replacements.md
@@ -0,0 +1,59 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+    using Random
+end
+```
+
+# [Building kmer replacements](@id replacements)
+_Kmer replacements_ is the general term for sequences that can be represented
+computationally like kmers, but are sampled differently. Examples include minimizers, strobemers, syncmers and k-min-mers.
+
+Since there is no end to the variations of kmer replacements, Kmers.jl does not try to implement all of them.
+Instead, Kmers.jl implements the base kmer type, and exposes some efficient primitives to allow downstream users to build kmer replacements.
+
+These functions are:
+
+```@docs
+unsafe_extract
+shift_encoding
+unsafe_shift_from
+```
+
+# Example: Minimizers
+Minimizers are currently the most common kmer replacement.
+They are defined as the minimum of W consecutive kmers, as ordered by some ordering O.
+
+If we use [`fx_hash`](@ref) as the ordering function, and assume K and W are known at compile time, we can implement it reasonably efficiently like so:
+
+```jldoctest
+function unsafe_extract_minimizer(
+    seq::LongDNA{2},
+    i::Int,
+    ::Val{K},
+    ::Val{W},
+) where {K, W}
+    T = derive_type(Kmer{DNAAlphabet{2}, K})
+    kmer = Kmers.unsafe_extract(Kmers.Copyable(), T, seq, i)
+    hash = fx_hash(kmer)
+    for offset in 0:W-2
+        new_kmer = Kmers.unsafe_shift_from(Kmers.Copyable(), kmer, seq, i+K+offset, Val(1))
+        new_hash = fx_hash(new_kmer)
+        if new_hash < hash
+            hash = new_hash
+            kmer = new_kmer
+        end
+    end
+    kmer
+end
+
+rng = Random.Xoshiro(1)
+unsafe_extract_minimizer(randseq(rng, DNAAlphabet{2}(), 100), 1, Val(5), Val(9))
+
+# output
+DNA 5-mer:
+TATCA
+```
diff --git a/docs/src/skipmers.png b/docs/src/skipmers.png
deleted file mode 100644
index 31edbc6..0000000
Binary files a/docs/src/skipmers.png and /dev/null differ
diff --git a/docs/src/transforms.md b/docs/src/transforms.md
deleted file mode 100644
index 853b10c..0000000
--- a/docs/src/transforms.md
+++ /dev/null
@@ -1,52 +0,0 @@
-```@meta
-CurrentModule = Kmers
-DocTestSetup = quote
-    using Kmers
-end
-```
-
-# Indexing & modifying kmers
-
-## Indexing
-
-As `BioSequence` concrete subtypes, kmers can be indexed using integers
-
-```jldoctest
-julia> kmer = Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C)
-DNA 5-mer:
-TTAGC
-
-julia> kmer[3]
-DNA_A
-```
-
-You can also slice Kmers using UnitRanges:
-
-```jldoctest
-julia> kmer = Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C)
-DNA 5-mer:
-TTAGC
-
-julia> kmer[1:3]
-DNA 3-mer:
-TTA
-```
-
-!!! warning
-    Using slicing will introduce performance penalties in your code if
-    you pass values of `i` that are not constants that can be propagated.
-
-## Modifying sequences
-
-Many modifying operations that are possible for some `BioSequences` such as
-`LongSequence` are not possible for `Kmer`s, this is primarily due to the fact
-`Kmer`s are an immutable struct.
-
-However some non-mutating transformations are available:
-
-```@docs
-BioSequences.complement(::Kmer)
-Base.reverse(::Kmer)
-BioSequences.reverse_complement(::Kmer)
-canonical
-```
\ No newline at end of file
diff --git a/docs/src/translate.md b/docs/src/translate.md
deleted file mode 100644
index d518556..0000000
--- a/docs/src/translate.md
+++ /dev/null
@@ -1,62 +0,0 @@
-```@meta
-CurrentModule = Kmers
-DocTestSetup = quote
-    using Kmers
-end
-```
-
-# Translating and reverse translating
-
-## Translating
-Just like other `BioSequence`s, `Kmer`s of RNA or DNA alphabets can be efficiently translated to amino acids:
-
-```
-julia> kmer = RNAKmer("AUGGGCCACUGA");
-
-julia> translate(kmer)
-AminoAcid 4-mer:
-MGH*
-```
-
-For more information on translation and different genetic codes, see the documentation of BioSequences.jl.
-
-## Reverse translation
-Reverse translation (or "revtrans", for short) refers to the mapping from amino acids back to the set of RNA codons that code for the given amino acid, under a given genetic code.
-There is no known natural process of revtrans, but it can be useful to do _in silico_.
-
-In Kmers.jl, revtrans is done through the `reverse_translate` function.
-This takes an amino acid sequence and produces a `Vector{CodonSet}`, where `CodonSet <: AbstractSet{RNACodon}`.
-Alternatively, it takes an amino acid and produces a `CodonSet`.
-
-A reverse genetic code can optionally be specified as the second argument.
-If not provided, it default to the reverse standard genetic code.
-
-### Example of reverse translation
-```julia
-julia> reverse_translate(AA_W) # default to standard genetic code
-Kmers.CodonSet with 1 element:
-  UGG
-
-julia> code = ReverseGeneticCode(BioSequences.trematode_mitochondrial_genetic_code);
-
-julia> reverse_translate(AA_W, code)
-Kmers.CodonSet with 2 elements:
-  UGA
-  UGG
-```
-
-### Important notes on reverse translation
-* `AA_Gap` cannot be reverse translated. Attempting so throws an error
-* In cells, `AA_O` and `AA_U` are encoded by dynamic overloading of the codons `UAG` and `UGA`, respectively.
-  Because these codons normally code for `AA_Term`, the forward genetic code returns `AA_Term` for these codons.
-  However, we can unambiguously reverse translate them, so these amino acids translate to codonsets with these
-  precise codons.
-* Ambiguous amino acids translate to the union of the possible amino acids. For example, if `AA_L` translate to set `S1`,
-  and `AA_I` translate to `S2`, then `AA_J` translate to `union(S1, S2)`.
-
-```@docs
-Kmers.CodonSet
-Kmers.ReverseGeneticCode
-reverse_translate
-reverse_translate!
-```
diff --git a/docs/src/translation.md b/docs/src/translation.md
new file mode 100644
index 0000000..d4397d1
--- /dev/null
+++ b/docs/src/translation.md
@@ -0,0 +1,67 @@
+```@meta
+CurrentModule = Kmers
+DocTestSetup = quote
+    using BioSequences
+    using Test
+    using Kmers
+end
+```
+
+## Translation
+`Kmer`s can be translated using the `translate` function exported by `BioSequences`:
+
+```jldoctest
+julia> translate(mer"UGCUUGAUC"r)
+AminoAcid 3-mer:
+CLI
+```
+
+Since `Kmer`s are immutable, the in-place `translate!` function is not implemented for `Kmers`.
+Also, remember that `Kmer`s are only efficient when short (at most a few hundred symbols). Hence, entire exons or genes should probably not ever be represented by a `Kmer`, but rather as a `LongSequence` or `LongSubSeq` from BioSequences.jl.
+
+### Reverse translation
+Kmers.jl implements reverse translation, which maps an amino acid sequence to one or more RNA sequences.
+While this process doesn't occur naturally (as far as we know), it is still useful for some analyses.
+
+Since genetic codes are degenerate, i.e. multiple codons code for the same amino acid, reverse translating a sequence does not return a nucleic acid sequence, but a vector of `CodonSet`:
+
+```@docs
+reverse_translate
+CodonSet
+```
+
+`CodonSet` is an efficiently implemented `AbstractSet{RNACodon}` (and remember, `RNACodon` is an alias for `RNAKmer{3, 1}`).
+
+To avoid allocating a new `Vector`, you can use `reverse_translate!`:
+
+```@docs
+reverse_translate!
+```
+
+Both functions take a genetic code as a keyword argument of the type `ReverseGeneticCode`. This object determines the mapping from amino acid to `CodonSet` - by default the [standard genetic code](https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables#Standard_RNA_codon_table) is used - this mapping is used by nearly all organisms.
+
+Only the reverse standard genetic code is defined in Kmers.jl.
+To use another genetic code, build a `ReverseGeneticCode` object from an existing
+`BioSequences.GeneticCode`:
+
+```jldoctest
+julia> code = BioSequences.pterobrachia_mitochondrial_genetic_code;
+
+julia> rv_code = ReverseGeneticCode(code);
+
+julia> seq = aa"KWLP";
+
+julia> codonsets = reverse_translate(seq, rv_code)
+4-element Vector{CodonSet}:
+ CodonSet(0x0000000000000405)
+ CodonSet(0x0500000000000000)
+ CodonSet(0x50000000f0000000)
+ CodonSet(0x0000000000f00000)
+
+julia> codonsets == reverse_translate(seq) # default standard code
+false
+```
+
+```@docs
+ReverseGeneticCode
+```
diff --git a/ext/StringViewsExt.jl b/ext/StringViewsExt.jl
new file mode 100644
index 0000000..f674427
--- /dev/null
+++ b/ext/StringViewsExt.jl
@@ -0,0 +1,11 @@
+module StringViewsExt
+
+using StringViews: StringView
+using Kmers: Kmers
+
+# This extension is important because FASTX uses string views.
+# The documentation of StringViews promises that the underlying
+# string is UTF-8 encoded.
+Kmers.is_ascii(::Type{<:StringView}) = true
+
+end # module
diff --git a/src/Kmers.jl b/src/Kmers.jl
index be1a2c5..3e46ab9 100644
--- a/src/Kmers.jl
+++ b/src/Kmers.jl
@@ -5,82 +5,61 @@
 #
 # This file is a part of the Kmers.jl, a package in the BioJulia ecosystem.
 # License is MIT: https://github.com/BioJulia/Kmers.jl/blob/master/LICENSE
+module Kmers
+
+export Kmer,
+    Mer,
+    DNAKmer,
+    RNAKmer,
+    AAKmer,
+    DNACodon,
+    RNACodon,
+    ReverseGeneticCode,
+    reverse_translate,
+    reverse_translate!,
+    @mer_str,
+    fx_hash,
+    derive_type,
+
+    # Immutable operations
+    push,
+    push_first,
+    shift,
+    shift_first,
+    pop,
+    pop_first,
 
-__precompile__()
+    # Iterators
+    FwKmers,
+    FwDNAMers,
+    FwRNAMers,
+    FwAAMers,
+    FwRvIterator,
+    CanonicalKmers,
+    CanonicalDNAMers,
+    CanonicalRNAMers,
+    UnambiguousKmers,
+    UnambiguousDNAMers,
+    UnambiguousRNAMers,
+    SpacedKmers,
+    SpacedDNAMers,
+    SpacedRNAMers,
+    SpacedAAMers,
+    each_codon,
 
-module Kmers
+    # Reverse translation
+    CodonSet,
+    delete, # push already exported
 
-export
-    # BioSymbols re-exports.
+    ##################
+    # Re-exports
+    ##################
+    # BioSymbols re-exports
     NucleicAcid,
     DNA,
     RNA,
-    DNA_A,
-    DNA_C,
-    DNA_G,
-    DNA_T,
-    DNA_M,
-    DNA_R,
-    DNA_W,
-    DNA_S,
-    DNA_Y,
-    DNA_K,
-    DNA_V,
-    DNA_H,
-    DNA_D,
-    DNA_B,
-    DNA_N,
-    DNA_Gap,
-    ACGT,
-    ACGTN,
-    RNA_A,
-    RNA_C,
-    RNA_G,
-    RNA_U,
-    RNA_M,
-    RNA_R,
-    RNA_W,
-    RNA_S,
-    RNA_Y,
-    RNA_K,
-    RNA_V,
-    RNA_H,
-    RNA_D,
-    RNA_B,
-    RNA_N,
-    RNA_Gap,
-    ACGU,
-    ACGUN,
     AminoAcid,
-    AA_A,
-    AA_R,
-    AA_N,
-    AA_D,
-    AA_C,
-    AA_Q,
-    AA_E,
-    AA_G,
-    AA_H,
-    AA_I,
-    AA_L,
-    AA_K,
-    AA_M,
-    AA_F,
-    AA_P,
-    AA_S,
-    AA_T,
-    AA_W,
-    AA_Y,
-    AA_V,
-    AA_O,
-    AA_U,
-    AA_B,
-    AA_J,
-    AA_Z,
-    AA_X,
-    AA_Term,
-    AA_Gap,
-    
+
     # BioSequences re-exports
     Alphabet,
     BioSequence,
@@ -89,65 +68,65 @@ export
     DNAAlphabet,
     RNAAlphabet,
     translate,
-    
-    
-    ###
-    ### Mers
-    ###
-
-    # Type & aliases
-    Kmer,
-    DNAKmer,
-    DNA27mer,
-    DNA31mer,
-    DNA63mer,
-    RNAKmer,
-    RNA27mer,
-    RNA31mer,
-    RNA63mer,
-    AAKmer,
-    DNACodon,
-    RNACodon,
+    complement,
+    reverse_complement,
+    canonical,
+    iscanonical
 
-    # Iteration
-    EveryKmer,
-    SpacedKmers,
-    EveryCanonicalKmer,
-    SpacedCanonicalKmers,
-    fw_neighbors,
-    bw_neighbors,
+# TODO: Remove this ugly hack when 1.11 becomes LTS
+if VERSION >= v"1.11.0-DEV.469"
+    let str = """
+        public unsafe_shift_from,
+            shift_encoding,
+            unsafe_extract,
+            RecodingScheme,
+            Copyable,
+            TwoToFour,
+            FourToTwo,
+            AsciiEncode,
+            GenericRecoding
+        """
+        eval(Meta.parse(str))
+    end
+end
 
-    # Immutable operators
-    push,
-    delete,
+# Kmers.jl is tightly coupled to BioSequences and relies on much of its internals.
+# Hence, we do not care about carefully importing specific symbols
+using BioSequences
+using BioSymbols: BioSymbol
 
-    # Translation
-    reverse_translate,
-    reverse_translate!,
-    ReverseGeneticCode,
-    
-    ###
-    ### Sequence literals
-    ###
-    
-    @mer_str,
-    @bigmer_str
+# This is a documented method, not internals
+using Base: tail
 
-using BioSequences
+"""
+    Kmers.Unsafe
+
+Internal trait object used to access unsafe methods of functions.
+`unsafe` is the singleton of `Unsafe`.
+"""
+struct Unsafe end
+const unsafe = Unsafe()
 
-ispermitted(::DNAAlphabet{2}, nt::DNA) = count_ones(nt) == 1 && isvalid(nt)
-ispermitted(::DNAAlphabet{2}, data::UInt) = data < UInt(4)
-ispermitted(::DNAAlphabet{4}, nt::DNA) = isvalid(nt)
-ispermitted(::DNAAlphabet{4}, data::UInt) = isvalid(DNA, data)
-ispermitted(::AminoAcidAlphabet, aa::AminoAcid) = reinterpret(UInt8, aa) <= reinterpret(UInt8, AA_Gap)
-ispermitted(::AminoAcidAlphabet, data::UInt) = data <= 0x1b
+const FourBit = Union{DNAAlphabet{4}, RNAAlphabet{4}}
+const TwoBit = Union{DNAAlphabet{2}, RNAAlphabet{2}}
+const BitInteger =
+    Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128}
 
+include("tuple_bitflipping.jl")
 include("kmer.jl")
+include("construction.jl")
+include("indexing.jl")
+include("transformations.jl")
+include("revtrans.jl")
+
+include("iterators/common.jl")
+include("iterators/FwKmers.jl")
+include("iterators/CanonicalKmers.jl")
+include("iterators/UnambiguousKmers.jl")
+include("iterators/SpacedKmers.jl")
 
-include("kmer_iteration/AbstractKmerIterator.jl")
-include("kmer_iteration/EveryKmer.jl")
-include("kmer_iteration/SpacedKmers.jl")
-include("kmer_iteration/EveryCanonicalKmer.jl")
-include("kmer_iteration/SpacedCanonicalKmers.jl")
+if !isdefined(Base, :get_extension)
+    include("../ext/StringViewsExt.jl")
+end
 
 end # module
diff --git a/src/construction.jl b/src/construction.jl
new file mode 100644
index 0000000..032c3aa
--- /dev/null
+++ b/src/construction.jl
@@ -0,0 +1,359 @@
+################################################
+# Trait dispatch
+################################################
+
+"""
+    RecodingScheme
+
+Trait object which determines the methods used to recode
+data from one sequence into a `BioSequence`.
+Any given construction of a kmer from a source sequence will
+dispatch to one of the concrete subtypes of `RecodingScheme`
+to determine how the data is copied most effectively.
+"""
+abstract type RecodingScheme end
+
+"""
+    Copyable <: RecodingScheme
+
+Trait object that signifies that two sequences share identical encodings,
+such that the encoded sequence can be copied directly.
+This is the case for e.g. sequences of the same alphabet, or two
+sequences of the alphabets `DNAAlphabet{2}` and `RNAAlphabet{2}`
+"""
+struct Copyable <: RecodingScheme end
+
+"""
+    TwoToFour <: RecodingScheme
+
+Trait object that signifies a recoding from a 4-bit nucleotide sequence
+to a 2-bit nucleotide sequence.
+This can be more efficient than using `GenericRecoding`
+"""
+struct TwoToFour <: RecodingScheme end
+
+"""
+    FourToTwo <: RecodingScheme
+
+Trait object that signifies a recoding from a 2-bit nucleotide sequence
+to a 4-bit nucleotide sequence.
+This can be more efficient than using `GenericRecoding`
+"""
+struct FourToTwo <: RecodingScheme end
+
+"""
+    AsciiEncode <: RecodingScheme
+
+Trait object that signifies a recoding from an ASCII-encoded
+`AbstractVector{UInt8}` to an `Alphabet` that implements the
+`BioSequences.AsciiAlphabet` trait.
+The validity of the bytes are checked during the recoding, including
+checking that the sequence is actually ASCII encoded.
+"""
+struct AsciiEncode <: RecodingScheme end
+
+"""
+    GenericRecoding <: RecodingScheme
+
+Trait object that signifies that none of the other recoding schemes
+apply, that the fallback implementations must be used instead.
+"""
+struct GenericRecoding <: RecodingScheme end
+
+"""
+    is_ascii(::Type{T})::Bool
+
+Trait function. Should return `true` for `AbstractVector{UInt8}`, or for
+string types for which `codeunits(s)` returns an `AbstractVector{UInt8}`, where
+every ASCII byte in the string is perserved in the vector.
+This is true for all UTF8, latin1 and ASCII encoded string types.
+"""
+is_ascii(::Type) = false
+is_ascii(::Type{<:Union{String, SubString{String}}}) = true
+is_ascii(::Type{<:AbstractVector{UInt8}}) = true
+
+function RecodingScheme(A::Alphabet, source_type::Type)::RecodingScheme
+    return if source_type <: BioSequence
+        if BioSequences.encoded_data_eltype(source_type) <: BitInteger
+            As = Alphabet(source_type)
+            if As == A
+                Copyable()
+            elseif As isa TwoBit && A isa TwoBit
+                Copyable()
+            elseif As isa FourBit && A isa FourBit
+                Copyable()
+            elseif As isa FourBit && A isa TwoBit
+                FourToTwo()
+            elseif As isa TwoBit && A isa FourBit
+                TwoToFour()
+            else
+                GenericRecoding()
+            end
+        else
+            GenericRecoding()
+        end
+    elseif is_ascii(source_type) && BioSequences.codetype(A) isa BioSequences.AsciiAlphabet
+        AsciiEncode()
+    else
+        GenericRecoding()
+    end
+end
+
+include("construction_utils.jl")
+
+################################################
+# Unsafe extract
+################################################
+
+@noinline function throw_uncertain(A::Alphabet, T::Type{<:BioSymbol}, enc::Unsigned)
+    throw(BioSequences.EncodeError(A, reinterpret(T, enc % UInt8)))
+end
+
+#=
+"Extract a full kmer at a given index of a sequence.
+Note: These methods don't do any bounds checking"
+function unsafe_extract end
+
+@inline function unsafe_extract(
+    ::TwoToFour,
+    ::Type{T},
+    seq::BioSequence,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    for i in from_index:(from_index + ksize(T) - 1)
+        encoding = left_shift(UInt(1), UInt(BioSequences.extract_encoded_element(seq, i)))
+        (_, data) = leftshift_carry(data, 4, encoding)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::FourToTwo,
+    ::Type{T},
+    seq::BioSequence,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    for i in from_index:(from_index + ksize(T) - 1)
+        encoding = UInt(BioSequences.extract_encoded_element(seq, i))::UInt
+        isone(count_ones(encoding)) || throw_uncertain(Alphabet(T), eltype(seq), encoding)
+        (_, data) = leftshift_carry(data, 2, trailing_zeros(encoding) % UInt)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::Copyable,
+    ::Type{T},
+    seq::BioSequence,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    bps = BioSequences.bits_per_symbol(Alphabet(seq))
+    for i in from_index:(from_index + ksize(T) - 1)
+        encoding = UInt(BioSequences.extract_encoded_element(seq, i))::UInt
+        (_, data) = leftshift_carry(data, bps, encoding)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::AsciiEncode,
+    ::Type{T},
+    seq::AbstractVector{UInt8},
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    bps = BioSequences.bits_per_symbol(Alphabet(T))
+    @inbounds for i in from_index:(from_index + ksize(T) - 1)
+        byte = seq[i]
+        encoding = BioSequences.ascii_encode(Alphabet(T), byte)
+        if encoding > 0x7f
+            throw(BioSequences.EncodeError(Alphabet(T), byte))
+        end
+        (_, data) = leftshift_carry(data, bps, encoding % UInt)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::GenericRecoding,
+    ::Type{T},
+    seq,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    bps = BioSequences.bits_per_symbol(Alphabet(T))
+    @inbounds for i in 1:ksize(T)
+        symbol = convert(eltype(T), seq[i])
+        encoding = UInt(BioSequences.encode(Alphabet(T), symbol))
+        (_, data) = leftshift_carry(data, bps, encoding)
+    end
+    T(unsafe, data)
+end
+=#
+
+################################################
+# Constructors with full parameterisation
+################################################
+
+function Kmer{A, K, N}(x) where {A, K, N}
+    check_kmer(Kmer{A, K, N})
+    build_kmer(RecodingScheme(A(), typeof(x)), Kmer{A, K, N}, x)
+end
+
+# BioSequences support indexing and fast length checks
+@inline function build_kmer(R::RecodingScheme, ::Type{T}, s::BioSequence) where {T}
+    length(s) == ksize(T) || error("Length of sequence must be K elements to build Kmer")
+    unsafe_extract(R, T, s, 1)
+end
+
+# LongSequence with compatible alphabet: Extract whole coding elements
+@inline function build_kmer(::Copyable, ::Type{T}, s::LongSequence) where {T}
+    length(s) == ksize(T) || error("Length of sequence must be K elements to build Kmer")
+    bps = BioSequences.BitsPerSymbol(Alphabet(T))
+    data = ntuple(i -> BioSequences.reversebits(@inbounds(s.data[i]), bps), Val{nsize(T)}())
+    (_, data) = rightshift_carry(data, bits_unused(T), zero(UInt))
+    T(unsafe, data)
+end
+
+# TODO: LongSubSeq with compatible alphabet
+# Note: LongSequence may be UInt64 whereas kmers use UInt32
+
+# For UTF8-strings combined with an ASCII kmer alphabet, we convert to byte vector
+@inline function build_kmer(
+    R::AsciiEncode,
+    ::Type{T},
+    s::Union{String, SubString{String}},
+) where {T}
+    build_kmer(R, T, codeunits(s))
+end
+
+# For byte vectors, we can build a kmer iff the kmer alphabet is AsciiAlphabet
+@inline function build_kmer(R::AsciiEncode, ::Type{T}, s::AbstractVector{UInt8}) where {T}
+    length(s) == ksize(T) || error("Length of sequence must be K elements to build Kmer")
+    unsafe_extract(R, T, s, 1)
+end
+
+# The generic fallback - dispatch on whether we can check length once
+@inline function build_kmer(R::RecodingScheme, T::Type, s)
+    build_kmer(Base.IteratorSize(typeof(s)), R, T, s)
+end
+
+@inline function build_kmer(::Base.SizeUnknown, ::RecodingScheme, T::Type, s)
+    data = zero_tuple(T)
+    A = Alphabet(T)
+    bps = BioSequences.bits_per_symbol(A)
+    i = 0
+    for element in s
+        i += 1
+        i > ksize(T) && error("Length of sequence must be K elements to build Kmer")
+        symbol = convert(eltype(A), element)
+        carry = UInt(BioSequences.encode(A, symbol))
+        (_, data) = leftshift_carry(data, bps, carry)
+    end
+    i == ksize(T) || error("Length of sequence must be K elements to build Kmer")
+    T(unsafe, data)
+end
+
+@inline function build_kmer(
+    ::Union{Base.HasLength, Base.HasShape},
+    ::RecodingScheme,
+    T::Type,
+    s,
+)
+    length(s) == ksize(T) || error("Length of sequence must be K elements to build Kmer")
+    data = zero_tuple(T)
+    A = Alphabet(T)
+    bps = BioSequences.bits_per_symbol(A)
+    for element in s
+        symbol = convert(eltype(A), element)
+        carry = UInt(BioSequences.encode(A, symbol))
+        (_, data) = leftshift_carry(data, bps, carry)
+    end
+    T(unsafe, data)
+end
+
+################################################
+# Derived constructors
+################################################
+
+Kmer{A, K}(x) where {A, K} = derive_type(Kmer{A, K})(x)
+Kmer{A1}(x::Kmer{A2, K, N}) where {A1, A2, K, N} = Kmer{A1, K, N}(x)
+
+################################################
+# Construct other types from Kmers
+################################################
+
+function BioSequences.LongSequence{A}(kmer::Kmer{A}) where {A <: Alphabet}
+    ratio = div(sizeof(UInt64), sizeof(UInt))
+    data = zeros(UInt64, cld(n_coding_elements(typeof(kmer)), ratio))
+    @inbounds for i in 1:n_coding_elements(typeof(kmer))
+        target = cld(i, ratio)
+        src = kmer.data[i] % UInt64
+        if ratio == 2 && iseven(i)
+            src <<= 32
+        end
+        data[target] |= src
+    end
+    bps = BioSequences.BitsPerSymbol(A())
+    @inbounds for i in eachindex(data)
+        data[i] = BioSequences.reversebits(data[i], bps)
+    end
+    bu = bits_unused(typeof(kmer))
+    @inbounds if !iszero(bu)
+        data[end] = data[end] >> bu
+    end
+    LongSequence{A}(data, length(kmer) % UInt)
+end
+
+# TODO: Do we want specialized constructors to contruct cross-alphabet longseqs
+# from kmers?
+
+################################################
+# String literals
+################################################
+
+"""
+    @mer_str -> Kmer
+
+Construct a `Kmer` from the given string. The macro must be used with a flag
+after the input string, e.g. `d` in `mer"TAG"d` or `a` in `mer"PCW"a`, signifying
+the alphabet of the kmer.
+The flags `d = DNAAlphabet{2}`, `r = RNAAlphabet{2}` and `a = AminoAcidAlphabet`
+are recognized.
+
+Because the macro is resolved and the kmer is created at parse time,
+the macro is type stable, and may be used in high performance code.
+
+# Examples
+```jldoctest
+julia> mer"UGCUA"r
+RNA 5-mer:
+UGCUA
+
+julia> mer"YKVSTEDLLKKR"a
+AminoAcid 12-mer:
+YKVSTEDLLKKR
+
+julia> mer"TATTAGCA"d
+DNA 8-mer:
+TATTAGCA
+```
+"""
+macro mer_str(seq, flag)
+    trimmed = BioSequences.remove_newlines(seq)
+    ncu = ncodeunits(trimmed)
+    # Unlike @dna_str, we default to 2-bit alphabets, because kmers
+    # by convention are usually 2-bit only
+    if flag == "dna" || flag == "d"
+        Kmer{DNAAlphabet{2}, ncu}(trimmed)
+    elseif flag == "rna" || flag == "r"
+        Kmer{RNAAlphabet{2}, ncu}(trimmed)
+    elseif flag == "aa" || flag == "a"
+        Kmer{AminoAcidAlphabet, ncu}(trimmed)
+    else
+        error("Invalid type flag: '$(flag)'")
+    end
+end
diff --git a/src/construction_utils.jl b/src/construction_utils.jl
new file mode 100644
index 0000000..3b6463d
--- /dev/null
+++ b/src/construction_utils.jl
@@ -0,0 +1,236 @@
+# This file contains construction utilities that are public, allowing users
+# to create custom K-mer types such as syncmers, strobemers, minimizers etc.
+
+# unsafe_extract
+# shift_encoding
+# shift_seq
+
+"""
+    unsafe_extract(::RecodingScheme, T::Type{<:Kmer}, seq, from::Int) -> T
+
+Extract a `Kmer` of type `T` from `seq` beginning at index `from`.
+This function is useful to create kmer or kmer-like types.
+
+This function does not do any bounds checking, so the user must know
+that `from:from+K-1` is inbounds in `seq`.
+The validity of the data in the `seq` is validated by this function.
+
+# Examples
+```jldoctest
+julia> seq = b"TAGCTAGA";
+
+julia> Kmers.unsafe_extract(Kmers.AsciiEncode(), DNAKmer{4, 1}, seq, 2)
+DNA 4-mer:
+AGCT
+```
+"""
+@inline function unsafe_extract(
+    ::TwoToFour,
+    ::Type{T},
+    seq::BioSequence,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    for i in from_index:(from_index + ksize(T) - 1)
+        encoding = left_shift(UInt(1), UInt(BioSequences.extract_encoded_element(seq, i)))
+        (_, data) = leftshift_carry(data, 4, encoding)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::FourToTwo,
+    ::Type{T},
+    seq::BioSequence,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    for i in from_index:(from_index + ksize(T) - 1)
+        encoding = UInt(BioSequences.extract_encoded_element(seq, i))::UInt
+        isone(count_ones(encoding)) || throw_uncertain(Alphabet(T), eltype(seq), encoding)
+        (_, data) = leftshift_carry(data, 2, trailing_zeros(encoding) % UInt)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::Copyable,
+    ::Type{T},
+    seq::BioSequence,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    bps = BioSequences.bits_per_symbol(Alphabet(seq))
+    for i in from_index:(from_index + ksize(T) - 1)
+        encoding = UInt(BioSequences.extract_encoded_element(seq, i))::UInt
+        (_, data) = leftshift_carry(data, bps, encoding)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::AsciiEncode,
+    ::Type{T},
+    seq::AbstractVector{UInt8},
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    bps = BioSequences.bits_per_symbol(Alphabet(T))
+    @inbounds for i in from_index:(from_index + ksize(T) - 1)
+        byte = seq[i]
+        encoding = BioSequences.ascii_encode(Alphabet(T), byte)
+        if encoding > 0x7f
+            throw(BioSequences.EncodeError(Alphabet(T), byte))
+        end
+        (_, data) = leftshift_carry(data, bps, encoding % UInt)
+    end
+    T(unsafe, data)
+end
+
+@inline function unsafe_extract(
+    ::GenericRecoding,
+    ::Type{T},
+    seq,
+    from_index,
+) where {T <: Kmer}
+    data = zero_tuple(T)
+    bps = BioSequences.bits_per_symbol(Alphabet(T))
+    @inbounds for i in 0:(ksize(T) - 1)
+        symbol = convert(eltype(T), seq[from_index + i])
+        encoding = UInt(BioSequences.encode(Alphabet(T), symbol))
+        (_, data) = leftshift_carry(data, bps, encoding)
+    end
+    T(unsafe, data)
+end
+
+##########################
+# Shift encoding
+##########################
+
+"""
+    shift_encoding(kmer::T, encoding::UInt) where {T <: Kmer} -> T
+
+Add `encoding`, a valid encoding in the alphabet of the `kmer`,
+to the end of `kmer` and discarding the first symbol in `kmer`.
+
+It is the user's responsibility to ensure that `encoding` is valid.
+
+# Examples
+```jldoctest
+julia> enc = UInt(0x0a); # encoding of DNA_Y in 4-bit alphabets
+
+julia> kmer = Kmer{DNAAlphabet{4}, 4}("TAGA");
+
+julia> Kmers.shift_encoding(kmer, enc)
+DNA 4-mer:
+AGAY
+```
+"""
+@inline function shift_encoding(kmer::Kmer, encoding::UInt)
+    isempty(kmer) && return kmer
+    bps = BioSequences.bits_per_symbol(kmer)
+    (_, new_data) = leftshift_carry(kmer.data, bps, encoding)
+    typeof(kmer)(unsafe, (first(new_data) & get_mask(typeof(kmer)), Base.tail(new_data)...))
+end
+
+###########################
+
+"""
+    unsafe_shift_from(::RecodingScheme, kmer::T, seq, from::Int, ::Val{S}) -> T
+
+Extract `S::Int` symbols from sequence `seq` at positions `from:from+S-1`,
+and shift them into `kmer`.
+
+This function does not do any bounds checking, so it is the user's
+responsibility to ensure that `from` is inbounds, and the recoding scheme
+valid.
+It is assumed that `S < K`, where `K == length(kmer)`. If `S ≥ K`, use
+[`unsafe_extract`](@ref) instead.
+
+# Examples
+```jldoctest
+julia> seq = dna"TAGCGGA";
+
+julia> kmer = mer"GGTG"d;
+
+julia> Kmers.unsafe_shift_from(Kmers.FourToTwo(), kmer, seq, 3, Val(2))
+DNA 4-mer:
+TGGC
+```
+"""
+@inline function unsafe_shift_from(
+    ::GenericRecoding,
+    kmer::Kmer,
+    seq,
+    from::Int,
+    ::Val{S},
+) where {S}
+    for i in 0:(S - 1)
+        symbol = @inbounds seq[from + i]
+        kmer = shift(kmer, convert(eltype(kmer), symbol))
+    end
+    kmer
+end
+
+@inline function unsafe_shift_from(
+    ::Copyable,
+    kmer::Kmer,
+    seq::BioSequence,
+    from::Int,
+    ::Val{S},
+) where {S}
+    for i in 0:(S - 1)
+        encoding = UInt(BioSequences.extract_encoded_element(seq, from + i))
+        kmer = shift_encoding(kmer, encoding)
+    end
+    kmer
+end
+
+@inline function unsafe_shift_from(
+    ::TwoToFour,
+    kmer::Kmer{<:NucleicAcidAlphabet{4}},
+    seq::BioSequence{<:NucleicAcidAlphabet{2}},
+    from::Int,
+    ::Val{S},
+) where {S}
+    for i in 0:(S - 1)
+        encoding =
+            left_shift(UInt(1), UInt(BioSequences.extract_encoded_element(seq, from + i)))
+        kmer = shift_encoding(kmer, encoding)
+    end
+    kmer
+end
+
+@inline function unsafe_shift_from(
+    ::FourToTwo,
+    kmer::Kmer{<:NucleicAcidAlphabet{2}},
+    seq::BioSequence{<:NucleicAcidAlphabet{4}},
+    from::Int,
+    ::Val{S},
+) where {S}
+    for i in 0:(S - 1)
+        encoding = UInt(BioSequences.extract_encoded_element(seq, from + i))::UInt
+        isone(count_ones(encoding)) ||
+            throw_uncertain(Alphabet(kmer), eltype(seq), encoding)
+        kmer = shift_encoding(kmer, trailing_zeros(encoding) % UInt)
+    end
+    kmer
+end
+
+@inline function unsafe_shift_from(
+    ::AsciiEncode,
+    kmer::Kmer,
+    seq::AbstractVector{UInt8},
+    from::Int,
+    ::Val{S},
+) where {S}
+    for i in 0:(S - 1)
+        byte = @inbounds seq[from + i]
+        encoding = BioSequences.ascii_encode(Alphabet(typeof(kmer)), byte)
+        if encoding > 0x7f
+            throw(BioSequences.EncodeError(Alphabet(typeof(kmer)), byte))
+        end
+        kmer = shift_encoding(kmer, encoding % UInt)
+    end
+    kmer
+end
diff --git a/src/counting.jl b/src/counting.jl
deleted file mode 100644
index cbd9340..0000000
--- a/src/counting.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-###
-### Mer specific specializations of src/biosequence/counting.jl
-###
-
-for i in [(:_count_a, :a_bitcount), (:_count_c, :c_bitcount), (:_count_g, :g_bitcount), (:_count_t, :t_bitcount)]
-    @eval begin
-        @inline function $(i[1])(alph::A, head::UInt64, tail...) where {A<:NucleicAcidAlphabet}
-            return $BioSequences.$(i[2])(head, alph) + $(i[1])(alph, tail...)
-        end
-        @inline $(i[1])(alph::A) where {A<:NucleicAcidAlphabet} = 0
-    end
-end
-
-@inline function _count_gc(alph::A, head::UInt64, tail...) where {A<:NucleicAcidAlphabet}
-    return BioSequences.gc_bitcount(head, alph) + _count_gc(alph, tail...)
-end
-@inline _count_gc(::A) where {A<:NucleicAcidAlphabet} = 0
-
-count_a(x::Kmer{A,K,N})  where {A<:NucleicAcidAlphabet,K,N} = _count_a(A(), x.data...) - n_unused(x)
-count_c(x::Kmer{A,K,N})  where {A<:NucleicAcidAlphabet,K,N} = _count_c(A(), x.data...)
-count_g(x::Kmer{A,K,N})  where {A<:NucleicAcidAlphabet,K,N} = _count_g(A(), x.data...)
-count_t(x::Kmer{A,K,N})  where {A<:NucleicAcidAlphabet,K,N} = _count_t(A(), x.data...)
-
-count_gc(x::Kmer{A,K,N}) where {A<:NucleicAcidAlphabet,K,N} = _count_gc(A(), x.data...)
-Base.count(::typeof(isGC), x::Kmer{A,K,N}) where {A<:NucleicAcidAlphabet,K,N} = count_gc(x)
-
-# TODO: Expand to Amino Acid Kmers as well...
-@inline function Base.count(::typeof(!=), a::Kmer{A,K,N}, b::Kmer{A,K,N}) where {A<:NucleicAcidAlphabet,K,N}
-    ad = a.data
-    bd = b.data
-    sum = 0
-    @inbounds for i in 1:N
-        sum += BioSequences.mismatch_bitcount(ad[i], bd[i], A())
-    end
-    return sum
-end
-
-# TODO: Expand to Amino Acid Kmers as well...
-@inline function Base.count(::typeof(==), a::Kmer{A,K,N}, b::Kmer{A,K,N}) where {A<:NucleicAcidAlphabet,K,N}
-    ad = a.data
-    bd = b.data
-    sum = 0
-    @inbounds for i in 1:N
-        sum += BioSequences.match_bitcount(ad[i], bd[i], A())
-    end
-    return sum - n_unused(a)
-end
\ No newline at end of file
diff --git a/src/indexing.jl b/src/indexing.jl
index 021926a..daa28dc 100644
--- a/src/indexing.jl
+++ b/src/indexing.jl
@@ -1,35 +1,86 @@
-@inline BioSequences.encoded_data_eltype(::Type{<:Kmer}) = UInt64
-
 @inline function BioSequences.extract_encoded_element(seq::Kmer, i::Integer)
-    bi = BioSequences.bitindex(seq, i % UInt)
-    return BioSequences.extract_encoded_element(bi, seq.data)
+    T = typeof(seq)
+    bps = BioSequences.bits_per_symbol(Alphabet(seq)) % UInt
+    index = div((i + n_unused(T) - 1) % UInt, per_word_capacity(T) % UInt) + 1
+    offset = mod(((elements_in_head(T) - i) * bps) % UInt, 8 * sizeof(UInt))
+    mask = UInt(1) << bps - 1
+    right_shift(@inbounds(seq.data[index]), offset) & mask
 end
 
-@inline Base.copy(seq::Kmer) = typeof(seq)(seq.data)
-
-@inline encoded_data(x::Kmer) = x.data
+# This is usually type unstable, but in user code, users may use constant-folded ranges,
+# e.g. f(x) = x[2:4]. In this case, we need it to compile to very efficient code.
+# Hence, it MUST use @inline
+@inline function Base.getindex(kmer::Kmer{A}, range::AbstractUnitRange{<:Integer}) where {A}
+    @boundscheck checkbounds(kmer, range)
+    K = length(range)
+    iszero(K) && return Kmer{A, 0, 0}(unsafe, ())
+    (i1, _) = BioSequences.bitindex(kmer, first(range))
+    (i2, o2) = BioSequences.bitindex(kmer, last(range))
+    data = kmer.data[i1:i2]
+    (_, data) = rightshift_carry(data, o2, zero(UInt))
+    T = derive_type(Kmer{A, K})
+    N = nsize(T)
+    # After the shift, the first coding element may be unused
+    new_data = if N < length(data)
+        tail(data)
+    else
+        data
+    end
+    T(unsafe, (first(new_data) & get_mask(T), tail(new_data)...))
+end
 
-@inline BioSequences.bitindex(seq::Kmer, i::Integer) = BioSequences.bitindex(BioSequences.BitsPerSymbol(seq), BioSequences.encoded_data_eltype(typeof(seq)), i + n_unused(seq))
+# Same as above: This needs to be able to inline if the indices are known statically
+@inline function Base.getindex(kmer::Kmer{A}, indices::AbstractVector{Bool}) where {A}
+    @boundscheck checkbounds(eachindex(kmer), indices)
+    K = sum(indices)
+    N = n_coding_elements(Kmer{A, K})
+    T = Kmer{A, K, N}
+    data = zero_tuple(T)
+    nbits = BioSequences.bits_per_symbol(A())
+    for (i, bool) in enumerate(indices)
+        bool || continue
+        (_, data) =
+            leftshift_carry(data, nbits, BioSequences.extract_encoded_element(kmer, i))
+    end
+    T(unsafe, data)
+end
 
+function Base.getindex(kmer::Kmer{A}, indices::AbstractVector{<:Integer}) where {A}
+    K = length(indices)
+    N = n_coding_elements(Kmer{A, K})
+    T = Kmer{A, K, N}
+    data = zero_tuple(T)
+    nbits = BioSequences.bits_per_symbol(A())
+    for i in indices
+        checkbounds(kmer, i)
+        (_, data) =
+            leftshift_carry(data, nbits, BioSequences.extract_encoded_element(kmer, i))
+    end
+    T(unsafe, data)
+end
 
-"""
-Base.getindex(seq::Kmer, i::UnitRange)
+@inline function BioSequences.bitindex(kmer::Kmer, i::Integer)
+    BioSequences.bitindex(kmer, UInt(i)::UInt)
+end
 
-Slice a Kmer by a UnitRange.
+@inline function BioSequences.bitindex(kmer::Kmer, i::UInt)::Tuple{UInt, UInt}
+    bps = BioSequences.bits_per_symbol(kmer) % UInt
+    bpe = (8 * sizeof(UInt)) % UInt
+    num = (UInt(i) - UInt(1) + n_unused(typeof(kmer)) % UInt) * bps
+    (i, o) = divrem(num, bpe)
+    o = bpe - o - bps
+    i + 1, o
+end
 
-!!! warning
-    Using this function will introduce performance penalties in your code if
-    you pass values of `i` that are not constants that can be propagated.
-"""
-@inline function Base.getindex(seq::Kmer{A}, i::UnitRange) where A
-    @boundscheck Base.checkbounds(seq, i)
-    ind(s, i) = BioSequences.index(BioSequences.bitindex(s, i))
-    off(s, i) = BioSequences.offset(BioSequences.bitindex(s, i))
-    isempty(i) && return Kmer{A, 0, 0}(())
-    rshift = (64 - off(seq, last(i) + 1)) & 63
-    stop = ind(seq, last(i))
-    start = BioSequences.index(BioSequences.bitindex(seq, first(i)) + rshift)
-    data = Kmers.rightshift_carry(seq.data, rshift)
-    T = Kmers.kmertype(Kmer{A, length(i)})
-    return T(data[start:stop])
-end
\ No newline at end of file
+@inline function Base.setindex(kmer::Kmer, i::Integer, s)
+    @boundscheck checkbounds(kmer, i)
+    bps = BioSequences.bits_per_symbol(kmer)
+    symbol = convert(eltype(kmer), s)
+    encoding = UInt(BioSequences.encode(Alphabet(kmer), symbol))
+    (i, o) = BioSequences.bitindex(kmer, i % UInt)
+    element = @inbounds kmer.data[i]
+    mask = left_shift(UInt(1) << bps - 1, o)
+    element &= ~mask
+    element |= left_shift(encoding, o)
+    typeof(kmer)(unsafe, @inbounds Base.setindex(kmer.data, element, i))
+end
diff --git a/src/iterators/CanonicalKmers.jl b/src/iterators/CanonicalKmers.jl
new file mode 100644
index 0000000..d972ced
--- /dev/null
+++ b/src/iterators/CanonicalKmers.jl
@@ -0,0 +1,225 @@
+"""
+    FwRvIterator{A <: NucleicAcidAlphabet, K, S}
+
+Iterates 2-tuples of `(forward, reverse_complement)` of every kmer of type
+`Kmer{A, K}` from the underlying sequence, in order.
+`S` signifies the type of the underlying sequence.
+This is typically more efficient than iterating over a `FwKmers` and
+computing `reverse_complement` on every element.
+
+See also: [`FwKmers`](@ref), [`CanonicalKmers`](@ref)
+
+# Examples:
+```jldoctest
+julia> collect(FwRvIterator{DNAAlphabet{4}, 3}("AGCGT"))
+3-element Vector{Tuple{Mer{3, DNAAlphabet{4}, 1}, Mer{3, DNAAlphabet{4}, 1}}}:
+ (AGC, GCT)
+ (GCG, CGC)
+ (CGT, ACG)
+
+julia> collect(FwRvIterator{DNAAlphabet{2}, 3}("AGNGT"))
+ERROR: cannot encode 0x4e (Char 'N') in DNAAlphabet{2}
+[...]
+```
+"""
+struct FwRvIterator{A <: NucleicAcidAlphabet, K, S}
+    seq::S
+
+    function FwRvIterator{A, K, S}(seq::S) where {A, K, S}
+        K isa Int || error("K must be an Int")
+        K > 0 || error("K must be at least 1")
+        new{A, K, S}(seq)
+    end
+end
+
+"`FwRvDNAIterator{K, S}`: Alias for `FwRvIterator{DNAAlphabet{2}, K, S}`"
+const FwRvDNAIterator{K, S} = FwRvIterator{DNAAlphabet{2}, K, S}
+
+"`FwRvRNAIterator{K, S}`: Alias for `FwRvIterator{RNAAlphabet{2}, K, S}`"
+const FwRvRNAIterator{K, S} = FwRvIterator{RNAAlphabet{2}, K, S}
+
+source_type(::Type{FwRvIterator{A, K, S}}) where {A, K, S} = S
+kmertype(::Type{<:FwRvIterator{A, K}}) where {A, K} = derive_type(Kmer{A, K})
+kmertype(it::FwRvIterator) = kmertype(typeof(it))
+Base.eltype(T::Type{<:FwRvIterator{A, K}}) where {A, K} =
+    Tuple{K, K} where {K <: kmertype(T)}
+
+@inline function Base.length(it::FwRvIterator{A, K, S}) where {A, K, S}
+    src = used_source(RecodingScheme(A(), S), it.seq)
+    max(0, length(src) - K + 1)
+end
+
+FwRvIterator{A, K}(s) where {A <: Alphabet, K} = FwRvIterator{A, K, typeof(s)}(s)
+
+@inline function Base.iterate(it::FwRvIterator{A, K, S}, state...) where {A, K, S}
+    iterate_kmer(RecodingScheme(A(), S), it, state...)
+end
+
+# For the first kmer, we extract it, then reverse complement.
+# When it's not done incrementally, it's faster to RC the whole
+# kmer at once.
+@inline function iterate_kmer(R::RecodingScheme, it::FwRvIterator{A, K}) where {A, K}
+    length(it.seq) < K && return nothing
+    fw = unsafe_extract(R, kmertype(it), it.seq, 1)
+    rv = reverse_complement(fw)
+    ((fw, rv), (fw, rv, K + 1))
+end
+
+# Here, we need to convert to an abstractvector
+@inline function iterate_kmer(
+    R::AsciiEncode,
+    it::FwRvIterator{A, K, S},
+) where {A <: NucleicAcidAlphabet, K, S}
+    src = used_source(RecodingScheme(A(), S), it.seq)
+    Base.require_one_based_indexing(src)
+    length(src) < K && return nothing
+    fw = unsafe_extract(R, kmertype(it), src, 1)
+    rv = reverse_complement(fw)
+    ((fw, rv), (fw, rv, K + 1))
+end
+
+@inline function iterate_kmer(
+    ::GenericRecoding,
+    it::FwRvIterator,
+    state::Tuple{Kmer, Kmer, Int},
+)
+    (fw, rv, i) = state
+    i > length(it.seq) && return nothing
+    symbol = convert(eltype(fw), @inbounds it.seq[i])
+    fw = shift(fw, symbol)
+    rv = shift_first(rv, complement(symbol))
+    ((fw, rv), (fw, rv, i + 1))
+end
+
+@inline function iterate_kmer(
+    ::Copyable,
+    it::FwRvIterator{<:TwoBit, K, <:BioSequence{<:TwoBit}},
+    state::Tuple{Kmer, Kmer, Int},
+) where {K}
+    (fw, rv, i) = state
+    i > length(it.seq) && return nothing
+    encoding = UInt(BioSequences.extract_encoded_element(it.seq, i))
+    fw = shift_encoding(fw, encoding)
+    rv = shift_first_encoding(rv, encoding ⊻ 0x03)
+    ((fw, rv), (fw, rv, i + 1))
+end
+
+@inline function iterate_kmer(
+    ::Copyable,
+    it::FwRvIterator{<:FourBit, K, <:BioSequence{<:FourBit}},
+    state::Tuple{Kmer, Kmer, Int},
+) where {K}
+    (fw, rv, i) = state
+    i > length(it.seq) && return nothing
+    encoding = UInt(BioSequences.extract_encoded_element(it.seq, i))
+    fw = shift_encoding(fw, encoding)
+    rc_encoding =
+        reinterpret(UInt8, complement(reinterpret(eltype(rv), encoding % UInt8))) % UInt
+    rv = shift_first_encoding(rv, rc_encoding)
+    ((fw, rv), (fw, rv, i + 1))
+end
+
+@inline function iterate_kmer(::TwoToFour, it::FwRvIterator, state::Tuple{Kmer, Kmer, Int})
+    (fw, rv, i) = state
+    i > length(it.seq) && return nothing
+    encoding = UInt(BioSequences.extract_encoded_element(it.seq, i))
+    fw = shift_encoding(fw, left_shift(UInt(1), encoding))
+    rv = shift_first_encoding(rv, left_shift(UInt(1), encoding ⊻ 0x03))
+    ((fw, rv), (fw, rv, i + 1))
+end
+
+@inline function iterate_kmer(
+    ::FourToTwo,
+    it::FwRvIterator{A, K, <:BioSequence},
+    state::Tuple{Kmer, Kmer, Int},
+) where {A, K}
+    (fw, rv, i) = state
+    i > length(it.seq) && return nothing
+    encoding = UInt(BioSequences.extract_encoded_element(it.seq, i))::UInt
+    isone(count_ones(encoding)) || throw_uncertain(Alphabet(fw), eltype(it.seq), encoding)
+    enc = trailing_zeros(encoding) % UInt
+    fw = shift_encoding(fw, enc)
+    rv = shift_first_encoding(rv, enc ⊻ 0x03)
+    ((fw, rv), (fw, rv, i + 1))
+end
+
+@inline function iterate_kmer(
+    ::AsciiEncode,
+    it::FwRvIterator{A},
+    state::Tuple{Kmer, Kmer, Int},
+) where {A}
+    src = used_source(RecodingScheme(A(), source_type(typeof(it))), it.seq)
+    Base.require_one_based_indexing(src)
+    (fw, rv, i) = state
+    i > length(src) && return nothing
+    byte = @inbounds src[i]
+    encoding = BioSequences.ascii_encode(A(), byte)
+    if encoding > 0x7f
+        throw(BioSequences.EncodeError(A(), repr(byte)))
+    end
+    # Hopefully this branch is eliminated at compile time...
+    rc_encoding = if Alphabet(fw) isa FourBit
+        reinterpret(UInt8, complement(reinterpret(DNA, encoding)))
+    elseif Alphabet(fw) isa TwoBit
+        encoding ⊻ 0x03
+    else
+        error(
+            "Complementing encoding of a Nucleotide AsciiAlphabet which is neither 2 or 4 " *
+            "bits have not been implemented yet.",
+        )
+    end
+    fw = shift_encoding(fw, encoding % UInt)
+    rv = shift_first_encoding(rv, rc_encoding % UInt)
+    ((fw, rv), (fw, rv, i + 1))
+end
+
+"""
+    CanonicalKmers{A <: NucleicAcidAlphabet, K, S} <: AbstractKmerIterator{A, K}
+
+Iterator of canonical nucleic acid kmers. The result of this iterator is equivalent
+to calling `canonical` on each value of a `FwKmers` iterator, but may be more
+efficient.
+
+!!! note
+    When counting small kmers, it may be more efficient to count `FwKmers`,
+    then call `canonical` only once per unique kmer.
+
+Can be constructed more conventiently with the constructors `CanonicalDNAMers{K}(s)`
+`CanonicalRNAMers{K}(s)`
+
+# Examples:
+```jldoctest
+julia> collect(CanonicalRNAMers{3}("AGCGA"))
+3-element Vector{Kmer{RNAAlphabet{2}, 3, 1}}:
+ AGC
+ CGC
+ CGA
+```
+"""
+struct CanonicalKmers{A <: NucleicAcidAlphabet, K, S} <: AbstractKmerIterator{A, K}
+    it::FwRvIterator{A, K, S}
+end
+
+source_type(::Type{CanonicalKmers{A, K, S}}) where {A, K, S} = S
+@inline Base.length(it::CanonicalKmers) = length(it.it)
+
+# Constructors
+function CanonicalKmers{A, K}(s::S) where {S, A <: NucleicAcidAlphabet, K}
+    CanonicalKmers{A, K, S}(FwRvIterator{A, K}(s))
+end
+function CanonicalKmers{A, K, S}(s::S) where {S, A <: NucleicAcidAlphabet, K}
+    CanonicalKmers{A, K, S}(FwRvIterator{A, K}(s))
+end
+
+"`CanonicalDNAMers{K, S}`: Alias for `CanonicalKmers{DNAAlphabet{2}, K, S}`"
+const CanonicalDNAMers{K, S} = CanonicalKmers{DNAAlphabet{2}, K, S}
+
+"`CanonicalRNAMers{K, S}`: Alias for `CanonicalKmers{RNAAlphabet{2}, K, S}`"
+const CanonicalRNAMers{K, S} = CanonicalKmers{RNAAlphabet{2}, K, S}
+
+@inline function Base.iterate(it::CanonicalKmers{A, K, S}, state...) where {A, K, S}
+    it = iterate(it.it, state...)
+    isnothing(it) && return nothing
+    ((fw, rv), state) = it
+    (fw < rv ? fw : rv, state)
+end
diff --git a/src/iterators/FwKmers.jl b/src/iterators/FwKmers.jl
new file mode 100644
index 0000000..eb6af9e
--- /dev/null
+++ b/src/iterators/FwKmers.jl
@@ -0,0 +1,129 @@
+"""
+    FwKmers{A <: Alphabet, K, S} <: AbstractKmerIterator{A, K}
+
+Iterator of forward kmers. `S` signifies the type of the underlying sequence,
+and the eltype of the iterator is `Kmer{A, K, N}` with the appropriate `N`.
+The elements in a `FwKmers{A, K, S}(s::S)` correspond to all the `Kmer{A, K}`
+in `s`, in order. 
+
+Can be constructed more conventiently with the constructors `FwDNAMers{K}(s)`
+and similar also for `FwRNAMers` and `FwAAMers`.
+
+# Examples:
+```jldoctest
+julia> s = "AGCGTATA";
+
+julia> v = collect(FwDNAMers{3}(s));
+
+julia> v == [DNAKmer{3}(s[i:i+2]) for i in 1:length(s)-2]
+true
+
+julia> eltype(v), length(v)
+(Kmer{DNAAlphabet{2}, 3, 1}, 6)
+
+julia> collect(FwRNAMers{3}(rna"UGCDUGAVC"))
+ERROR: cannot encode D in RNAAlphabet{2}
+```
+"""
+struct FwKmers{A <: Alphabet, K, S} <: AbstractKmerIterator{A, K}
+    seq::S
+
+    function FwKmers{A, K, S}(seq::S) where {A, K, S}
+        K isa Int || error("K must be an Int")
+        K > 0 || error("K must be at least 1")
+        new{A, K, S}(seq)
+    end
+end
+
+source_type(::Type{FwKmers{A, K, S}}) where {A, K, S} = S
+
+@inline function Base.length(it::FwKmers{A, K, S}) where {A, K, S}
+    src = used_source(RecodingScheme(A(), S), it.seq)
+    max(0, length(src) - K + 1)
+end
+
+# Constructors
+FwKmers{A, K}(s) where {A <: Alphabet, K} = FwKmers{A, K, typeof(s)}(s)
+
+"`FwDNAMers{K, S}`: Alias for `FwKmers{DNAAlphabet{2}, K, S}`"
+const FwDNAMers{K, S} = FwKmers{DNAAlphabet{2}, K, S}
+
+"`FwRNAMers{K, S}`: Alias for `FwKmers{RNAAlphabet{2}, K, S}`"
+const FwRNAMers{K, S} = FwKmers{RNAAlphabet{2}, K, S}
+
+"`FwAAMers{K, S}`: Alias for `FwKmers{AminoAcidAlphabet, K, S}`"
+const FwAAMers{K, S} = FwKmers{AminoAcidAlphabet, K, S}
+
+@inline function Base.iterate(it::FwKmers{A, K, S}, state...) where {A, K, S}
+    iterate_kmer(RecodingScheme(A(), S), it, state...)
+end
+
+# For the first kmer, we just forward to `unsafe_extract`
+@inline function iterate_kmer(R::RecodingScheme, it::FwKmers)
+    length(it.seq) < ksize(eltype(it)) && return nothing
+    kmer = unsafe_extract(R, eltype(it), it.seq, 1)
+    (kmer, (kmer, ksize(eltype(it)) + 1))
+end
+
+# Here, we need to convert to an abstractvector
+@inline function iterate_kmer(
+    R::AsciiEncode,
+    it::FwKmers{A, K, S},
+) where {A <: Alphabet, K, S}
+    src = used_source(RecodingScheme(A(), S), it.seq)
+    Base.require_one_based_indexing(src)
+    length(src) < K && return nothing
+    kmer = unsafe_extract(R, eltype(it), src, 1)
+    (kmer, (kmer, K + 1))
+end
+
+@inline function iterate_kmer(::GenericRecoding, it::FwKmers, state::Tuple{Kmer, Int})
+    (kmer, i) = state
+    i > length(it.seq) && return nothing
+    symbol = @inbounds it.seq[i]
+    new_kmer = shift(kmer, convert(eltype(kmer), symbol))
+    (new_kmer, (new_kmer, nextind(it.seq, i)))
+end
+
+@inline function iterate_kmer(::Copyable, it::FwKmers, state::Tuple{Kmer, Int})
+    (kmer, i) = state
+    i > length(it.seq) && return nothing
+    encoding = UInt(BioSequences.extract_encoded_element(it.seq, i))
+    new_kmer = shift_encoding(kmer, encoding)
+    (new_kmer, (new_kmer, nextind(it.seq, i)))
+end
+
+@inline function iterate_kmer(::TwoToFour, it::FwKmers, state::Tuple{Kmer, Int})
+    (kmer, i) = state
+    i > length(it.seq) && return nothing
+    encoding = left_shift(UInt(1), UInt(BioSequences.extract_encoded_element(it.seq, i)))
+    new_kmer = shift_encoding(kmer, encoding)
+    (new_kmer, (new_kmer, nextind(it.seq, i)))
+end
+
+@inline function iterate_kmer(
+    ::FourToTwo,
+    it::FwKmers{A, K, <:BioSequence},
+    state::Tuple{Kmer, Int},
+) where {A, K}
+    (kmer, i) = state
+    i > length(it.seq) && return nothing
+    encoding = UInt(BioSequences.extract_encoded_element(it.seq, i))::UInt
+    isone(count_ones(encoding)) || throw_uncertain(Alphabet(kmer), eltype(it.seq), encoding)
+    kmer = shift_encoding(kmer, trailing_zeros(encoding) % UInt)
+    return (kmer, (kmer, nextind(it.seq, i)))
+end
+
+@inline function iterate_kmer(::AsciiEncode, it::FwKmers, state::Tuple{Kmer, Int})
+    src = used_source(RecodingScheme(Alphabet(eltype(it)), source_type(typeof(it))), it.seq)
+    Base.require_one_based_indexing(src)
+    (kmer, i) = state
+    i > length(src) && return nothing
+    byte = @inbounds src[i]
+    encoding = BioSequences.ascii_encode(Alphabet(eltype(it)), byte)
+    if encoding > 0x7f
+        throw(BioSequences.EncodeError(Alphabet(eltype(it)), repr(byte)))
+    end
+    kmer = shift_encoding(kmer, encoding % UInt)
+    return (kmer, (kmer, nextind(src, i)))
+end
diff --git a/src/iterators/SpacedKmers.jl b/src/iterators/SpacedKmers.jl
new file mode 100644
index 0000000..26fbfae
--- /dev/null
+++ b/src/iterators/SpacedKmers.jl
@@ -0,0 +1,139 @@
+"""
+    SpacedKmers{A <: Alphabet, K, J, S} <: AbstractKmerIterator{A, K}
+
+Iterator of kmers with step size. `J` signifies the step size, `S`
+the type of the underlying sequence, and the eltype of the iterator
+is `Kmer{A, K, N}` with the appropriate `N`.
+
+For example, a `SpacedKmers{AminoAcidAlphabet, 3, 5, Vector{UInt8}}` sampling
+over `seq::Vector{UInt8}` will sample all kmers corresponding to
+`seq[1:3], seq[6:8], seq[11:13]` etc.
+
+See also: [`each_codon`](@ref), [`FwKmers`](@ref)
+
+# Examples:
+```jldoctest
+julia> collect(SpacedDNAMers{3, 2}("AGCGTATA"))
+3-element Vector{Kmer{DNAAlphabet{2}, 3, 1}}:
+ AGC
+ CGT
+ TAT
+```
+"""
+struct SpacedKmers{A <: Alphabet, K, J, S} <: AbstractKmerIterator{A, K}
+    seq::S
+
+    function SpacedKmers{A, K, J, S}(seq::S) where {A, K, J, S}
+        K isa Int || error("K must be an Int")
+        K > 0 || error("K must be at least 1")
+        J isa Int || error("J must be an Int")
+        J > 0 || error("J must be at least 1")
+        new{A, K, J, S}(seq)
+    end
+end
+
+source_type(::Type{SpacedKmers{A, K, J, S}}) where {A, K, J, S} = S
+stepsize(::SpacedKmers{A, K, J}) where {A, K, J} = J
+
+@inline function Base.length(it::SpacedKmers{A, K, J}) where {A, K, J}
+    src = used_source(RecodingScheme(A(), source_type(typeof(it))), it.seq)
+    L = length(src)
+    L < K ? 0 : div((L - K), J) + 1
+end
+
+SpacedKmers{A, K, J}(s) where {A <: Alphabet, K, J} = SpacedKmers{A, K, J, typeof(s)}(s)
+
+"`SpacedDNAMers{K, J, S}`: Alias for `SpacedKmers{DNAAlphabet{2}, K, J, S}`"
+const SpacedDNAMers{K, J, S} = SpacedKmers{DNAAlphabet{2}, K, J, S}
+
+"`SpacedRNAMers{K, J, S}`: Alias for `SpacedKmers{RNAAlphabet{2}, K, J, S}`"
+const SpacedRNAMers{K, J, S} = SpacedKmers{RNAAlphabet{2}, K, J, S}
+
+"`SpacedAAMers{K, J, S}`: Alias for `SpacedKmers{AminoAcidAlphabet, K, J, S}`"
+const SpacedAAMers{K, J, S} = SpacedKmers{AminoAcidAlphabet, K, J, S}
+
+"""
+    each_codon(s::BioSequence{<:Union{DNAAlphabet, RNAAlphabet}})
+    each_codon(::Type{<:Union{DNA, RNA}}, s)
+
+Construct an iterator of nucleotide 3-mers with step size 3 from `s`.
+The sequence `s` may be an RNA or DNA biosequence, in which case the element
+type is inferred, or the element type may be specified explicitly, in which
+case `s` may be a byte-like sequence such as a `String` or `Vector{UInt8}`.
+
+This function returns [`SpacedKmers`](@ref) iterator.
+
+See also: [`SpacedKmers`](@ref)
+
+Examples:
+```jldoctest
+julia> collect(each_codon(DNA, "TGACGATCGAC"))
+3-element Vector{Kmer{DNAAlphabet{2}, 3, 1}}:
+ TGA
+ CGA
+ TCG
+```
+"""
+each_codon(::Type{DNA}, s) = SpacedDNAMers{3, 3}(s)
+each_codon(::Type{RNA}, s) = SpacedRNAMers{3, 3}(s)
+
+each_codon(s::BioSequence{<:DNAAlphabet}) = SpacedDNAMers{3, 3}(s)
+each_codon(s::BioSequence{<:RNAAlphabet}) = SpacedRNAMers{3, 3}(s)
+
+@inline function Base.iterate(it::SpacedKmers{A}, state...) where {A}
+    iterate_kmer(RecodingScheme(A(), source_type(typeof(it))), it, state...)
+end
+
+# TODO: Maybe in all kmer iterators, instantiate it with the source type,
+# so we don't have to get the source type in functions (and thus
+# it is allwoed to be a costly operation).
+# However, this means we instantiate e.g. a FwKmers{A, K, S} and change S
+# in the source type in the constructor
+@inline function iterate_kmer(
+    R::RecodingScheme,
+    it::SpacedKmers{A, K},
+) where {A <: Alphabet, K}
+    length(it.seq) < K && return nothing
+    kmer = unsafe_extract(
+        R,
+        eltype(it),
+        used_source(RecodingScheme(A(), source_type(typeof(it))), it.seq),
+        1,
+    )
+    next_index = 1 + max(stepsize(it), K)
+    (kmer, (kmer, next_index))
+end
+
+# Here, we need to convert to an abstractvector
+# TODO: This function and the one above can be merged with the FwKmers one?
+@inline function iterate_kmer(
+    R::AsciiEncode,
+    it::SpacedKmers{A, K, J, S},
+) where {A <: Alphabet, K, J, S}
+    src = used_source(RecodingScheme(A(), S), it.seq)
+    Base.require_one_based_indexing(src)
+    length(src) < K && return nothing
+    kmer = unsafe_extract(R, eltype(it), src, 1)
+    next_index = 1 + max(stepsize(it), K)
+    (kmer, (kmer, next_index))
+end
+
+@inline function iterate_kmer(
+    ::RecodingScheme,
+    it::SpacedKmers{A, K, J, S},
+    state,
+) where {A, K, S, J}
+    src = used_source(RecodingScheme(A(), S), it.seq)
+    R = RecodingScheme(A(), S)
+    Base.require_one_based_indexing(src)
+    (kmer, i) = state
+    i > lastindex(src) - min(K, J) + 1 && return nothing
+    next_i = i + J
+    # This branch should be resolved statically
+    if J ≥ K
+        kmer = unsafe_extract(R, eltype(it), src, i)
+    else
+        kmer = unsafe_shift_from(R, kmer, src, i, Val{J}())
+    end
+    (kmer, (kmer, next_i))
+end
diff --git a/src/iterators/UnambiguousKmers.jl b/src/iterators/UnambiguousKmers.jl
new file mode 100644
index 0000000..b386881
--- /dev/null
+++ b/src/iterators/UnambiguousKmers.jl
@@ -0,0 +1,148 @@
+"""
+    UnambiguousKmers{A <: TwoBit, K, S}
+
+Iterator of `(kmer, index)`, where `kmer` are 2-bit nucleic acid kmers in the
+underlying sequence, and `index::Int` the starting position of the kmer in the
+sequence.
+The extracted kmers differ from those of `FwKmers` in that any kmers
+containing ambiguous nucleotides are skipped, whereas using `FwKmers`, encountering
+unambiguous nucleotides result in an error.
+
+This iterator can be constructed more conventiently with the constructors
+`UnambiguousDNAMers{K}(s)` and `UnambiguousRNAMers{K}(s)`.
+
+!!! note
+    To obtain canonical unambiguous kmers, simply call `canonical` on each kmer output
+    by `UnambiguousKmers`.
+
+# Examples:
+```jldoctest
+julia> it = UnambiguousRNAMers{4}(dna"TGAGCWKCATC");
+
+julia> collect(it)
+3-element Vector{Tuple{Kmer{RNAAlphabet{2}, 4, 1}, Int64}}:
+ (UGAG, 1)
+ (GAGC, 2)
+ (CAUC, 8)
+```
+"""
+struct UnambiguousKmers{A <: TwoBit, K, S}
+    it::FwKmers{A, K, S}
+end
+
+Base.IteratorSize(::Type{<:UnambiguousKmers}) = Base.SizeUnknown()
+Base.IteratorSize(::Type{<:UnambiguousKmers{A, K, <:NucSeq{2}}}) where {A <: TwoBit, K} =
+    Base.HasLength()
+
+Base.length(it::UnambiguousKmers{A, K, <:NucSeq{2}}) where {A, K} = length(it.it)
+
+function Base.eltype(::Type{<:UnambiguousKmers{A, K}}) where {A, K}
+    Tuple{derive_type(Kmer{A, K}), Int}
+end
+
+source_type(::Type{UnambiguousKmers{A, K, S}}) where {A, K, S} = S
+
+# Constructors
+function UnambiguousKmers{A, K}(s::S) where {S, A <: TwoBit, K}
+    UnambiguousKmers{A, K, S}(FwKmers{A, K}(s))
+end
+function UnambiguousKmers{A, K, S}(s::S) where {S, A <: TwoBit, K}
+    UnambiguousKmers{A, K, S}(FwKmers{A, K}(s))
+end
+
+"`UnambiguousDNAMers{K, S}`: Alias for `UnambiguousKmers{DNAAlphabet{2}, K, S}`"
+const UnambiguousDNAMers{K, S} = UnambiguousKmers{DNAAlphabet{2}, K, S}
+
+"`UnambiguousRNAMers{K, S}`: Alias for `UnambiguousKmers{RNAAlphabet{2}, K, S}`"
+const UnambiguousRNAMers{K, S} = UnambiguousKmers{RNAAlphabet{2}, K, S}
+
+@inline function Base.iterate(it::UnambiguousKmers{A, K, S}, state...) where {A, K, S}
+    R = RecodingScheme(A(), S)
+    iterate_kmer(R, it, state...)
+end
+
+@inline function iterate_kmer(::Copyable, it::UnambiguousKmers)
+    itval = iterate(it.it)
+    isnothing(itval) && return nothing
+    (kmer, state) = itval
+    ((kmer, 1), state)
+end
+
+@inline function iterate_kmer(::Copyable, it::UnambiguousKmers, state::Tuple{Kmer, Integer})
+    itval = iterate(it.it, state)
+    isnothing(itval) && return nothing
+    (_, i) = state
+    (kmer, state) = itval
+    ((kmer, i - ksize(typeof(kmer)) + 1), state)
+end
+
+@inline function iterate_kmer(
+    ::RecodingScheme,
+    it::UnambiguousKmers{A, K, S},
+) where {A, K, S}
+    T = derive_type(Kmer{A, K})
+    state = (T(unsafe, zero_tuple(T)), K, 1)
+    iterate_kmer(RecodingScheme(A(), S), it, state)
+end
+
+@inline function iterate_kmer(
+    ::RecodingScheme,
+    it::UnambiguousKmers,
+    state::Tuple{Kmer, Int, Int},
+)
+    (kmer, remaining, index) = state
+    K = ksize(typeof(kmer))
+    while !iszero(remaining)
+        index > lastindex(it.it.seq) && return nothing
+        symbol = convert(eltype(kmer), it.it.seq[index])
+        index += 1
+        if isambiguous(symbol)
+            remaining = K
+        else
+            remaining -= 1
+            kmer = shift(kmer, symbol)
+        end
+    end
+    ((kmer, index - K), (kmer, 1, index))
+end
+
+@inline function iterate_kmer(
+    ::AsciiEncode,
+    it::UnambiguousKmers{A, K, S},
+    state::Tuple{Kmer, Int, Int},
+) where {A <: TwoBit, K, S}
+    src = used_source(RecodingScheme(A(), S), it.it.seq)
+    Base.require_one_based_indexing(src)
+    (kmer, remaining, index) = state
+    while !iszero(remaining)
+        index > lastindex(src) && return nothing
+        byte = @inbounds src[index]
+        index += 1
+        encoding = @inbounds ASCII_SKIPPING_LUT[(byte + 0x01) % Int]
+        if encoding == 0xff
+            throw(BioSequences.EncodeError(Alphabet(kmer), repr(byte)))
+        elseif encoding == 0xf0
+            remaining = K
+        else
+            remaining -= 1
+            kmer = shift_encoding(kmer, encoding % UInt)
+        end
+    end
+    ((kmer, index - K), (kmer, 1, index))
+end
+
+@inline function iterate_kmer(
+    ::FourToTwo,
+    it::UnambiguousKmers{A, K, S},
+    state::Tuple{Kmer, Int, Int},
+) where {A <: TwoBit, K, S}
+    (kmer, remaining, index) = state
+    while !iszero(remaining)
+        index > lastindex(it.it.seq) && return nothing
+        encoding = UInt(BioSequences.extract_encoded_element(it.it.seq, index))::UInt
+        kmer = shift_encoding(kmer, (trailing_zeros(encoding)) % UInt)
+        index += 1
+        remaining = isone(count_ones(encoding)) ? remaining - 1 : K
+    end
+    ((kmer, index - K), (kmer, 1, index))
+end
diff --git a/src/iterators/common.jl b/src/iterators/common.jl
new file mode 100644
index 0000000..dfbbbfc
--- /dev/null
+++ b/src/iterators/common.jl
@@ -0,0 +1,32 @@
+"""
+    AbstractKmerIterator{A <: Alphabet, K}
+
+Abstract type for kmer iterators. The element type is `Kmer{A, K, N}`,
+with the appropriately derived N.
+
+Functions to implement:
+* `Base.iterate`
+* `Base.length` or `Base.IteratorSize` if not `HasLength`
+"""
+abstract type AbstractKmerIterator{A <: Alphabet, K} end
+
+function Base.eltype(::Type{<:AbstractKmerIterator{A, K}}) where {A, K}
+    Kmer{A, K, n_coding_elements(Kmer{A, K})}
+end
+
+function used_source(::AsciiEncode, s::AbstractString)
+    is_ascii(typeof(s)) ? codeunits(s) : s
+end
+used_source(::RecodingScheme, s) = s
+
+const ASCII_SKIPPING_LUT = let
+    v = fill(0xff, 256)
+    for (i, s) in [(0, "Aa"), (1, "cC"), (2, "gG"), (3, "TtUu")], c in s
+        v[UInt8(c) + 1] = i
+    end
+    for c in "-MRSVWYHKDBN"
+        v[UInt8(c) + 1] = 0xf0
+        v[UInt8(lowercase(c)) + 1] = 0xf0
+    end
+    Tuple(v)
+end
diff --git a/src/kmer.jl b/src/kmer.jl
index b71a57a..246b6ef 100644
--- a/src/kmer.jl
+++ b/src/kmer.jl
@@ -1,590 +1,478 @@
-###
-### Kmer Type definition
-###
-
-# Include some basic tuple bitflipping ops - the secret sauce to efficiently
-# manipping Kmer's static data. 
-include("tuple_bitflipping.jl")
-
-"""
-    Kmers.Unsafe
-
-Trait object used to access unsafe methods of functions.
-`unsafe` is the singleton of `Unsafe`.
-"""
-struct Unsafe end
-const unsafe = Unsafe()
-
 """
     Kmer{A<:Alphabet,K,N} <: BioSequence{A}
 
-A parametric, immutable, bitstype for representing Kmers - short sequences.
-Given the number of Kmers generated from raw sequencing reads, avoiding
-repetetive memory allocation and triggering of garbage collection is important,
-as is the ability to effectively pack Kmers into arrays and similar collections.
-
-In practice that means we an immutable bitstype as the internal representation
-of these sequences. Thankfully, this is not much of a limitation - kmers are
-rarely manipulated and so by and large don't have to be mutable.
-
-Excepting their immutability, they fulfill the rest of the API and behaviours
-expected from a concrete `BioSequence` type, and non-mutating transformations
-of the type are still defined.
-
-!!! warning
-    Given their immutability, `setindex` and mutating sequence transformations
-    are not implemented for Kmers e.g. `reverse_complement!`. 
-!!! tip
-    Note that some sequence transformations that are not mutating are
-    available, since they can return a new kmer value as a result e.g.
-    `reverse_complement`. 
-"""
-struct Kmer{A<:Alphabet,K,N} <: BioSequence{A}
-    data::NTuple{N,UInt64}
-    
-    # This unsafe method do not clip the head
-    Kmer{A,K,N}(::Unsafe, data::NTuple{N,UInt64}) where {A<:Alphabet,K,N} = new{A,K,N}(data)
+An immutable bitstype for representing k-mers - short `BioSequences`
+of a fixed length `K`.
+Since they can be stored directly in registers, `Kmer`s are generally the most
+efficient type of `BioSequence`, when `K` is small and known at compile time.
 
-    function Kmer{A,K,N}(data::NTuple{N,UInt64}) where {A<:Alphabet,K,N}
-        checkmer(Kmer{A,K,N})
-        x = n_unused(Kmer{A,K,N}) * BioSequences.bits_per_symbol(A()) 
-        return new(_cliphead(x, data...))
-    end
-end
+The `N` parameter is derived from `A` and `K` and is not a free parameter.
 
-BioSequences.encoded_data(seq::Kmer{A,K,N}) where {A,K,N} = seq.data
+See also: [`DNAKmer`](@ref), [`RNAKmer`](@ref), [`AAKmer`](@ref), [`AbstractKmerIterator`](@ref)
 
-# Create a blank ntuple of appropriate length for a given Kmer with N.
-@inline blank_ntuple(::Type{Kmer{A,K,N}}) where {A,K,N} = ntuple(x -> zero(UInt64), Val{N}())
+# Examples
+```jldoctest
+julia> RNAKmer{5}("ACGUC")
+RNA 5-mer:
+ACGUC
 
-###
-### _build_kmer_data
-###
+julia> Kmer{DNAAlphabet{4}, 6}(dna"TGCTTA")
+DNA 6-mer:
+TGCTTA
 
-#=
-These are (hopefully!) very optimised kernel functions for building kmer internal
-data from individual elements or from sequences. Kmers themselves are static,
-tuple-based structs, and so I really didn't want these functions to create memory
-allocations or GC activity through use of vectors an such, for what should be
-the creation of a single, rather simple value.
-=#
+julia> AAKmer{5}((lowercase(i) for i in "KLWYR"))
+AminoAcid 5-mer:
+KLWYR
 
+julia> RNAKmer{3}("UAUC") # wrong length
+ERROR:
+[...]
+```
 """
-    _build_kmer_data(::Type{Kmer{A,K,N}}, seq::LongSequence{A}, from::Int = 1) where {A,K,N}
-
-Construct a ntuple of the bits data for an instance of a Kmer{A,K,N}.
+struct Kmer{A <: Alphabet, K, N} <: BioSequence{A}
+    # The number of UInt is always exactly the number needed, no less, no more.
+    # The first symbols pack into the first UInts
+    # An UInt with N elements pack into the lowest bits of the UInt, with the
+    # first symbols in the higher parts of the UInt.
+    # Hence, a sequence A-G of 16-bit elements would pack like:
+    # ( ABC, DEFG)
+    #  ^ 16 unused bits, the unused bits are always top bits of first UInt
+    # Unused bits are always zero
+
+    # This layout complicates some Kmer construction code, but simplifies comparison
+    # operators, and we really want Kmers to be efficient.
+    data::NTuple{N, UInt}
 
-This particular method is specialised for LongSequences, and for when the Kmer
-and LongSequence types used, share the same alphabet, since a lot of encoding /
-decoding can be skipped, and the problem is mostly one of shunting bits around.
-"""
-@inline function _build_kmer_data(::Type{Kmer{A,K,N}}, seq::LongSequence{A}, from::Int = 1) where {A,K,N}
-    checkmer(Kmer{A,K,N})
-    
-    bits_per_sym = BioSequences.bits_per_symbol(A()) # Based on alphabet type, should constant fold.
-    n_head = elements_in_head(Kmer{A,K,N}) # Based on kmer type, should constant fold.
-    n_per_chunk = per_word_capacity(Kmer{A,K,N}) # Based on kmer type, should constant fold.
-    
-    if from + K - 1 > length(seq)
-        return nothing
-    end
-    
-    # Construct the head.
-    head = zero(UInt64)
-    @inbounds for i in from:(from + n_head - 1)
-        bits = UInt64(BioSequences.extract_encoded_element(seq, i))
-        head = (head << bits_per_sym) | bits
-    end
-    
-    # And the rest of the sequence
-    idx = Ref(from + n_head)
-    tail = ntuple(Val{N - 1}()) do i
-        Base.@_inline_meta
-        body = zero(UInt64)
-        @inbounds for _ in 1:n_per_chunk
-            bits = UInt64(BioSequences.extract_encoded_element(seq, idx[]))
-            body = (body << bits_per_sym) | bits
-            idx[] += 1
-        end
-        return body
+    # This unsafe method do not clip the head
+    function Kmer{A, K, N}(::Unsafe, data::NTuple{N, UInt}) where {A <: Alphabet, K, N}
+        check_kmer(Kmer{A, K, N})
+        new{A, K, N}(data)
     end
-    
-    # Put head and tail together
-    return (head, tail...)
 end
 
+# Useful to do e.g. `mer"TAG"d isa Mer{3}`
+"""
+    Mer{K}
 
+Alias for `Kmer{<:Alphabet, K}`. Useful to dispatch on `K-mers` without regard
+for the alphabat
 
-###
-### Constructors
-###
+# Example
+```jldoctest
+julia> mer"DEKR"a isa Mer{4}
+true
 
+julia> DNAKmer{6}("TGATCA") isa Mer{6}
+true
+
+julia> RNACodon <: Mer{3}
+true
+```
 """
-    Kmer{A,K,N}(itr) where {A,K,N}
+const Mer{K} = Kmer{<:Alphabet, K}
 
-Construct a `Kmer{A,K,N}` from an iterable.
+# Aliases
+"Alias for `Kmer{DNAAlphabet{2},K,N}`"
+const DNAKmer{K, N} = Kmer{DNAAlphabet{2}, K, N}
 
-The most generic constructor.
+"Alias for `Kmer{RNAAlphabet{2},K,N}`"
+const RNAKmer{K, N} = Kmer{RNAAlphabet{2}, K, N}
 
-Currently the iterable must have `length` & support `getindex` with integers.
+"Alias for `Kmer{AminoAcidAlphabet,K,N}`"
+const AAKmer{K, N} = Kmer{AminoAcidAlphabet, K, N}
 
-# Examples
+"Alias for `DNAKmer{3,1}`"
+const DNACodon = DNAKmer{3, 1}
 
-```jldoctest
-julia> ntseq = LongSequence("TTAGC") # 4-bit DNA alphabet
-5nt DNA Sequence:
-TTAGC
+"Alias for `RNAKmer{3,1}`"
+const RNACodon = RNAKmer{3, 1}
 
-julia> DNAKmer{5}(ntseq) # 2-Bit DNA alphabet
-DNA 5-mer:
-TTAGC
-```
 """
-function Kmer{A,K,N}(itr) where {A,K,N}
-    checkmer(Kmer{A,K,N})
-    
-    seqlen = length(itr)
-    if seqlen != K
-        throw(ArgumentError("itr does not contain enough elements ($seqlen ≠ $K)"))
-    end
-    
-    ## All based on alphabet type of Kmer, so should constant fold.
-    bits_per_sym = BioSequences.bits_per_symbol(A())
-    n_head = elements_in_head(Kmer{A,K,N})
-    n_per_chunk = per_word_capacity(Kmer{A,K,N})
-    
-    # Construct the head.
-    head = zero(UInt64)
-    @inbounds for i in 1:n_head
-        (x, next_i) = iterate(itr, i)
-        sym = convert(eltype(Kmer{A,K,N}), x)
-        # Encode will throw if it cant encode an element.
-        head = (head << bits_per_sym) | UInt64(BioSequences.encode(A(), sym))
+    check_kmer(::Type{Kmer{A,K,N}}) where {A,K,N}
+
+Internal methods that checks that the type parameters are good.
+
+This function should compile to a noop in case the parameterization is good.
+"""
+@inline function check_kmer(::Type{Kmer{A, K, N}}) where {A, K, N}
+    if !(K isa Int)
+        throw(ArgumentError("K must be an Int"))
+    elseif K < 0
+        throw(ArgumentError("Bad kmer parameterisation. K must be greater than 0."))
     end
-    
-    # And the rest of the sequence
-    idx = Ref(n_head + 1)
-    tail = ntuple(Val{N - 1}()) do i
-        Base.@_inline_meta
-        body = zero(UInt64)
-        @inbounds for i in 1:n_per_chunk
-            (x, next_idx) = iterate(itr, idx[])
-            sym = convert(eltype(Kmer{A,K,N}), x)
-            # Encode will throw  if it cant encode an element.
-            body = (body << bits_per_sym) | UInt64(BioSequences.encode(A(), sym))
-            idx[] += 1
-        end
-        return body
+    n = cld((K * BioSequences.bits_per_symbol(A())) % UInt, (sizeof(UInt) * 8) % UInt) % Int
+    if !(N isa Int)
+        throw(ArgumentError("N must be an Int"))
+    elseif n !== N
+        # This has been significantly changed conceptually from before. Now we
+        # don't just check K, but *enforce* the most appropriate N for K.
+        throw(ArgumentError("Bad kmer parameterisation. For K = $K, N should be $n"))
     end
-    
-    data = (head, tail...)
-    
-    return Kmer{A,K,N}(data)
 end
 
-"""
-    Kmer{A,K,N}(seq::BioSequence{A})
+################################################
+# Compile-time functions computed on Kmer types
+################################################
 
-Construct a `Kmer{A,K,N}` from a `BioSequence{A}`.
+@inline ksize(::Type{<:Kmer{A, K}}) where {A, K} = K
+@inline nsize(::Type{<:Kmer{A, K, N}}) where {A, K, N} = N
+@inline n_unused(::Type{<:Kmer{A, K, N}}) where {A, K, N} = capacity(Kmer{A, K, N}) - K
+@inline bits_unused(T::Type{<:Kmer}) =
+    n_unused(T) * BioSequences.bits_per_symbol(Alphabet(T))
 
-This particular method is specialised for BioSequences, and for when the Kmer
-and BioSequence types used, share the same alphabet, since a lot of encoding /
-decoding can be skipped, and the problem is mostly one of shunting bits around.
-In the case where the alphabet of the Kmer and the alphabet of the BioSequence
-differ, dispatch to the more generic constructor occurs instead.
+@inline function n_coding_elements(::Type{<:Kmer{A, K}}) where {A, K}
+    cld(BioSequences.bits_per_symbol(A()) * K, 8 * sizeof(UInt))
+end
 
-# Examples
+@inline function per_word_capacity(::Type{<:Kmer{A}}) where {A}
+    div(8 * sizeof(UInt), BioSequences.bits_per_symbol(A()))
+end
 
-```jldoctest
-julia> ntseq = LongSequence{DNAAlphabet{2}}("TTAGC") # 2-bit DNA alphabet
-5nt DNA Sequence:
-TTAGC
+@inline function capacity(::Type{<:Kmer{A, K, N}}) where {A, K, N}
+    per_word_capacity(Kmer{A, K, N}) * N
+end
 
-julia> DNAKmer{5}(ntseq) # 2-Bit DNA alphabet
-DNA 5-mer:
-TTAGC
-```
-"""
-@inline function Kmer{A,K,N}(seq::BioSequence{A}) where {A,K,N}
-    checkmer(Kmer{A,K,N})
-    
-    seqlen = length(seq)
-    if seqlen != K
-        throw(ArgumentError("seq is not the correct length ($seqlen ≠ $K)"))
-    end
-    
-    ## All based on alphabet type of Kmer, so should constant fold.
-    bits_per_sym = BioSequences.bits_per_symbol(A())
-    n_head = elements_in_head(Kmer{A,K,N})
-    n_per_chunk = per_word_capacity(Kmer{A,K,N})
-    
-    # Construct the head.
-    head = zero(UInt64)
-    @inbounds for i in 1:n_head
-        bits = UInt64(BioSequences.extract_encoded_element(seq, i))
-        head = (head << bits_per_sym) | bits
-    end
-    
-    # And the rest of the sequence
-    idx = Ref(n_head + 1)
-    tail = ntuple(Val{N - 1}()) do i
-        Base.@_inline_meta
-        body = zero(UInt64)
-        @inbounds for _ in 1:n_per_chunk
-            bits = UInt64(BioSequences.extract_encoded_element(seq, idx[]))
-            body = (body << bits_per_sym) | bits
-            idx[] += 1
-        end
-        return body
-    end
-    
-    data = (head, tail...)
-    
-    return Kmer{A,K,N}(data)
+@inline function elements_in_head(::Type{<:Kmer{A, K, N}}) where {A, K, N}
+    per_word_capacity(Kmer{A, K, N}) - n_unused(Kmer{A, K, N})
 end
 
+"""
+    derive_type(::Type{Kmer{A, K}}) -> Type{Kmer{A, K, N}}
 
-# Convenience version of function above so you don't have to work out correct N.
+Compute the fully parameterized kmer type from only the parameters `A` and `K`.
 """
-    Kmer{A,K}(itr) where {A,K}
+@inline derive_type(::Type{Kmer{A, K}}) where {A, K} =
+    Kmer{A, K, n_coding_elements(Kmer{A, K})}
 
-Construct a `Kmer{A,K,N}` from an iterable.
+@inline zero_tuple(T::Type{<:Kmer}) = ntuple(i -> zero(UInt), Val{nsize(T)}())
 
-This is a convenience method which will work out the correct `N` parameter, for
-your given choice of `A` & `K`.
-"""
-@inline function Kmer{A,K}(itr) where {A,K}
-    T = kmertype(Kmer{A,K})
-    return T(itr)
+@inline function zero_kmer(::Type{<:Kmer{A, K}}) where {A, K}
+    T2 = derive_type(Kmer{A, K})
+    T2(unsafe, zero_tuple(T2))
 end
 
-"""
-    Kmer{A}(itr) where {A}
+##################
+# Various methods
+##################
 
-Construct a `Kmer{A,K,N}` from an iterable.
+# BioSequences interface
+Base.length(x::Kmer) = ksize(typeof(x))
+Base.copy(x::Kmer) = x # immutable
+BioSequences.encoded_data_eltype(::Type{<:Kmer}) = UInt
 
-This is a convenience method which will work out K from the length of `itr`, and
-the correct `N` parameter, for your given choice of `A` & `K`.
+# BioSequences helper methods
+BioSequences.encoded_data(seq::Kmer) = seq.data
 
-!!! warning
-    Since this gets K from runtime values, this is gonna be slow!
-"""
-@inline Kmer{A}(itr) where {A} = Kmer{A,length(itr)}(itr)
-@inline Kmer(seq::BioSequence{A}) where A = Kmer{A}(seq)
+# Misc methods
+Base.summary(x::Kmer{A, K, N}) where {A, K, N} = string(eltype(x), ' ', K, "-mer")
 
-function Kmer{A1}(seq::BioSequence{A2}) where {A1 <: NucleicAcidAlphabet, A2 <: NucleicAcidAlphabet}
-    kmertype(Kmer{A1, length(seq)})(seq)
+function Base.show(io::IO, ::MIME"text/plain", s::Kmer)
+    println(io, summary(s), ':')
+    print(io, s)
 end
 
-@inline function Kmer{A}(nts::Vararg{Union{DNA, RNA}, K}) where {A <: NucleicAcidAlphabet, K}
-    return kmertype(Kmer{A, K})(nts)
+# TODO: This is only efficient because the compiler, through Herculean effort,
+# is able to completely unroll and inline the indexing operation.
+@inline function _cmp(x::Kmer{A1, K1}, y::Kmer{A2, K2}) where {A1, A2, K1, K2}
+    if K1 == K2
+        cmp(x.data, y.data)
+    else
+        m = min(K1, K2)
+        a = @inline x[1:m]
+        b = @inline y[1:m]
+        c = cmp(a.data, b.data)
+        if iszero(c)
+            K1 < K2 ? -1 : K2 < K1 ? 1 : 0
+        else
+            c
+        end
+    end
+end
+
+# Here, we don't allow comparing twobit to fourbit sequences. We could do this semantically,
+# but this would open a whole can of worms, be impossible to optimise and defeat the purpose
+# of using Kmers.
+Base.cmp(x::Kmer{A}, y::Kmer{A}) where {A} = _cmp(x, y)
+Base.cmp(x::Kmer{<:FourBit}, y::Kmer{<:FourBit}) = _cmp(x, y)
+Base.cmp(x::Kmer{<:TwoBit}, y::Kmer{<:TwoBit}) = _cmp(x, y)
+Base.cmp(x::Kmer{A}, y::Kmer{B}) where {A, B} = throw(MethodError(cmp, (x, y)))
+
+Base.isless(x::Kmer, y::Kmer) = @inline(cmp(x, y)) == -1
+Base.:(==)(x::Kmer, y::Kmer) = iszero(@inline cmp(x, y))
+
+Base.:(==)(x::Kmer, y::BioSequence) = throw(MethodError(==, (x, y)))
+Base.:(==)(x::BioSequence, y::Kmer) = throw(MethodError(==, (x, y)))
+
+Base.hash(x::Kmer, h::UInt) = hash(x.data, h ⊻ ksize(typeof(x)))
+
+# These constants are from the original implementation
+@static if Sys.WORD_SIZE == 32
+    # typemax(UInt32) / golden ratio
+    const FX_CONSTANT = 0x9e3779b9
+elseif Sys.WORD_SIZE == 64
+    # typemax(UInt64) / pi
+    const FX_CONSTANT = 0x517cc1b727220a95
+else
+    error("Invalid word size")
 end
 
+# This implementation is translated from the Rust compiler source code,
+# licenced under MIT. The original source is the Firefox source code,
+# also freely licensed.
 """
-    Kmer(nts::Vararg{DNA,K}) where {K}
+    fx_hash(x, [h::UInt])::UInt
 
-Construct a Kmer from a variable number `K` of DNA nucleotides.
+An implementation of `FxHash`. This hash function is extremely fast, but the hashes
+are of poor quality compared to Julia's default MurmurHash3. In particular:
+* The hash function does not have a good avalanche effect, e.g. the lower bits
+  of the result depend only on the top few bits of the input
+* The bitpattern zero hashes to zero
 
-# Examples
+However, for many applications, `FxHash` is good enough, if the cost of the
+higher rate of hash collisions are offset by the faster speed.
 
-```jldoctest
-julia> Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C)
-DNA 5-mer:
-TTAGC
-```
-"""
-@inline Kmer(nt::DNA, nts::Vararg{DNA}) = DNAKmer((nt, nts...))
+The precise hash value of a given kmer is not guaranteed to be stable across minor
+releases of Kmers.jl, but _is_ guaranteed to be stable across minor versions of
+Julia.
 
-"""
-    Kmer(nts::Vararg{RNA,K}) where {K}
+# Examples
+```jldoctest
+julia> x = fx_hash(mer"KWQLDE"a);
 
-Construct a Kmer from a variable number `K` of RNA nucleotides.
+julia> y = fx_hash(mer"KWQLDE"a, UInt(1));
 
-# Examples
+julia> x isa UInt
+true
 
-```jldoctest
-julia> Kmer(RNA_U, RNA_U, RNA_A, RNA_G, RNA_C)
-DNA 5-mer:
-UUAGC
+julia> x == y
+false
 ```
 """
-@inline Kmer(nt::RNA, nts::Vararg{RNA}) = RNAKmer((nt, nts...))
-
+function fx_hash(x::Kmer, h::UInt)
+    for i in x.data
+        h = (bitrotate(h, 5) ⊻ i) * FX_CONSTANT
+    end
+    h
+end
+fx_hash(x) = fx_hash(x, zero(UInt))
 
 """
-    Kmer(seq::String)
+    push(kmer::Kmer{A, K}, s)::Kmer{A, K+1}
 
-Construct a DNA or RNA kmer from a string.
+Create a new kmer which is the concatenation of `kmer` and `s`.
+Returns a `K+1`-mer.
 
-!!! warning
-    As a convenience method, this derives the `K`, `Alphabet`, and `N` parameters
-    for the `Kmer{A,K,N}` type from the input string.
+!!! warn
+    Since the output of this function is a `K+1`-mer, use of this function
+    in a loop may result in type-instability.
 
-# Examples
+See also: [`push_first`](@ref), [`pop`](@ref), [`shift`](@ref)
 
+# Examples
 ```jldoctest
-julia> Kmer("TTAGC")
-DNA 5-mer:
-TTAGC
+julia> push(mer"UGCUGA"r, RNA_G)
+RNA 7-mer:
+UGCUGAG
+
+julia> push(mer"W"a, 'E')
+AminoAcid 2-mer:
+WE
 ```
 """
-@inline function Kmer(seq::String)
-    seq′ = BioSequences.remove_newlines(seq)
-    hast = false
-    hasu = false
-    for c in seq′
-        hast |= ((c == 'T') | (c == 't'))
-        hasu |= ((c == 'U') | (c == 'u'))
-    end
-    if (hast & hasu) | (!hast & !hasu)
-        throw(ArgumentError("Can't detect alphabet type from string"))
+function push(kmer::Kmer, s)
+    bps = BioSequences.bits_per_symbol(kmer)
+    A = Alphabet(kmer)
+    newT = derive_type(Kmer{typeof(A), length(kmer) + 1})
+    # If no free space in data, add new tuple
+    new_data = if bits_unused(typeof(kmer)) < bps
+        (zero(UInt), kmer.data...)
+    else
+        kmer.data
     end
-    A = ifelse(hast & !hasu, DNAAlphabet{2}, RNAAlphabet{2})
-    return Kmer{A,length(seq′)}(seq′)
+    # leftshift_carry the new encoding in.
+    encoding = UInt(BioSequences.encode(A, convert(eltype(kmer), s)))
+    (_, new_data) = leftshift_carry(new_data, bps, encoding)
+    newT(unsafe, new_data)
 end
 
-
 """
-    kmertype(::Type{Kmer{A,K}}) where {A,K}
-Resolve and incomplete kmer typing, computing the N parameter of
-`Kmer{A,K,N}`, given only `Kmer{A,K}`.
-## Example
-```julia
-julia> DNAKmer{63}
-Kmer{DNAAlphabet{2},63,N} where N
-julia> kmertype(DNAKmer{63})
-Kmer{DNAAlphabet{2},63,2}
-```
-"""
-@inline function kmertype(::Type{Kmer{A,K}}) where {A,K}
-    return Kmer{A,K,BioSequences.seq_data_len(A, K)}
-end
-@inline kmertype(::Type{Kmer{A,K,N}}) where {A,K,N} = Kmer{A,K,N}
-
-# Aliases
-"Shortcut for the type `Kmer{DNAAlphabet{2},K,N}`"
-const DNAKmer{K,N} = Kmer{DNAAlphabet{2},K,N}
-
-"Shortcut for the type `DNAKmer{27,1}`"
-const DNA27mer = DNAKmer{27,1}
-
-"Shortcut for the type `DNAKmer{31,1}`"
-const DNA31mer = DNAKmer{31,1}
-
-"Shortcut for the type `DNAKmer{63,2}`"
-const DNA63mer = DNAKmer{63,2}
-
-"Shortcut for the type `Kmer{RNAAlphabet{2},K,N}`"
-const RNAKmer{K,N} = Kmer{RNAAlphabet{2},K,N}
+    shift(kmer::Kmer{A, K}, s)::Kmer{A, K}
 
-"Shortcut for the type `RNAKmer{27,1}`"
-const RNA27mer = RNAKmer{27,1}
+Push `symbol` onto the end of `kmer`, and pop the first symbol in `kmer`.
+Unlike `push`, this preserves the input type, and is less likely to result in
+type instability.
 
-"Shortcut for the type `RNAKmer{31,1}`"
-const RNA31mer = RNAKmer{31,1}
+See also: [`shift_first`](@ref), [`push`](@ref)
 
-"Shortcut for the type `RNAKmer{63,2}`"
-const RNA63mer = RNAKmer{63,2}
-
-"Shortcut for the type `Kmer{AminoAcidAlphabet,K,N}`"
-const AAKmer{K,N} = Kmer{AminoAcidAlphabet,K,N}
-
-"Shorthand for `DNAKmer{3,1}`"
-const DNACodon = DNAKmer{3,1}
+# Examples
+```jldoctest
+julia> shift(mer"TACC"d, DNA_A)
+DNA 4-mer:
+ACCA
 
-"Shorthand for `RNAKmer{3,1}`"
-const RNACodon = RNAKmer{3,1}
+julia> shift(mer"WKYMLPIIRS"aa, 'F')
+AminoAcid 10-mer:
+KYMLPIIRSF
+```
+"""
+function shift(kmer::Kmer{A}, s) where {A}
+    encoding = UInt(BioSequences.encode(A(), convert(eltype(kmer), s)))
+    shift_encoding(kmer, encoding)
+end
 
+"""
+    push_first(kmer::Kmer{A, K}, s)::Kmer{A, K+1}
 
-###
-### Base Functions
-###
+Create a new kmer which is the concatenation of `s` and `kmer`.
+Returns a `K+1`-mer. Similar to [`push`](@ref), but places the new symbol `s`
+at the front.
 
-@inline ksize(::Type{Kmer{A,K,N}}) where {A,K,N} = K
-@inline nsize(::Type{Kmer{A,K,N}}) where {A,K,N} = N
-@inline per_word_capacity(::Type{Kmer{A,K,N}}) where {A,K,N} = div(64, BioSequences.bits_per_symbol(A()))
-@inline per_word_capacity(seq::Kmer) = per_word_capacity(typeof(seq))
-@inline capacity(::Type{Kmer{A,K,N}}) where {A,K,N} = per_word_capacity(Kmer{A,K,N}) * N
-@inline capacity(seq::Kmer) = capacity(typeof(seq))
-@inline n_unused(::Type{Kmer{A,K,N}}) where {A,K,N} = capacity(Kmer{A,K,N}) - K
-@inline n_unused(seq::Kmer) = n_unused(typeof(seq))
-@inline elements_in_head(::Type{Kmer{A,K,N}}) where {A,K,N} = per_word_capacity(Kmer{A,K,N}) - n_unused(Kmer{A,K,N})
-@inline elements_in_head(seq::Kmer) = elements_in_head(typeof(seq))
+!!! warn
+    Since the output of this function is a `K+1`-mer, use of this function
+    in a loop may result in type-instability.
 
-"""
-    checkmer(::Type{Kmer{A,K,N}}) where {A,K,N}
+See also: [`push`](@ref),  [`pop`](@ref), [`shift`](@ref)
 
-Internal method - enforces good kmer type parameterisation.
-
-For a given Kmer{A,K,N} of length K, the number of words used to
-represent it (N) should be the minimum needed to contain all K symbols,
-no larger (wasteful) no smaller (just... wrong).
+# Examples
+```jldoctest
+julia> push_first(mer"GCU"r, RNA_G)
+RNA 4-mer:
+GGCU
 
-Because it is used on type parameters / variables, these conditions should be
-checked at compile time, and the branches / error throws eliminated when the
-parameterisation of the Kmer type is good. 
+julia> push_first(mer"W"a, 'E')
+AminoAcid 2-mer:
+EW
+```
 """
-@inline function checkmer(::Type{Kmer{A,K,N}}) where {A,K,N}
-    if K < 1
-        throw(ArgumentError("Bad kmer parameterisation. K must be greater than 0."))
-    end
-    n = BioSequences.seq_data_len(A, K)
-    if n !== N
-        # This has been significantly changed conceptually from before. Now we
-        # don't just check K, but *enforce* the most appropriate N for K.
-        throw(ArgumentError("Bad kmer parameterisation. For K = $K, N should be $n"))
+function push_first(kmer::Kmer{A}, s) where {A}
+    bps = BioSequences.bits_per_symbol(A())
+    newT = derive_type(Kmer{A, length(kmer) + 1})
+    # If no free space in data, add new tuple
+    new_data = if bits_unused(typeof(kmer)) < bps
+        (zero(UInt), kmer.data...)
+    else
+        kmer.data
     end
+    encoding = UInt(BioSequences.encode(A(), convert(eltype(kmer), s)))
+    head = first(new_data) | left_shift(encoding, (elements_in_head(newT) - 1) * bps)
+    newT(unsafe, (head, tail(new_data)...))
 end
 
-@inline Base.length(x::Kmer{A,K,N}) where {A,K,N} = K
-@inline Base.summary(x::Kmer{A,K,N}) where {A,K,N} = string(eltype(x), ' ', K, "-mer")
+"""
+    shift_first(kmer::kmer, symbol)::typeof(kmer)
 
-function Base.typemin(::Type{Kmer{A,K,N}}) where {A,K,N}
-    return Kmer{A,K,N}(unsafe, ntuple(i -> zero(UInt64), N))
-end
+Push `symbol` onto the start of `kmer`, and pop the last symbol in `kmer`.
 
-function Base.typemax(::Type{Kmer{A,K,N}}) where {A,K,N}
-    return Kmer{A,K,N}((typemax(UInt64), ntuple(i -> typemax(UInt64), N - 1)...))
-end
+See also: [`shift`](@ref), [`push`](@ref)
 
-@inline function rand_kmer_data(::Type{Kmer{A,K,N}}, ::Val{true}) where {A,K,N}
-    return Kmer{A,K,N}(ntuple(i -> rand(UInt64), Val{N}()))
-end
+# Examples
+```jldoctest
+julia> shift_first(mer"TACC"d, DNA_A)
+DNA 4-mer:
+ATAC
 
-@inline function rand_kmer_data(::Type{Kmer{A,K,N}}, ::Val{false}) where {A,K,N}
-    ## All based on alphabet type of Kmer, so should constant fold.
-    bits_per_sym = BioSequences.bits_per_symbol(A())
-    n_head = elements_in_head(Kmer{A,K,N})
-    n_per_chunk = per_word_capacity(Kmer{A,K,N})
-    # Construct the head.
-    head = zero(UInt64)
-    @inbounds for i in 1:n_head
-        bits = UInt64(BioSequences.encode(A(), rand(symbols(A()))))
-        head = (head << bits_per_sym) | bits
-    end
-    # And the rest of the sequence
-    tail = ntuple(Val{N - 1}()) do i
-        Base.@_inline_meta
-        body = zero(UInt64)
-        @inbounds for _ in 1:n_per_chunk
-            bits = UInt64(BioSequences.encode(A(), rand(symbols(A()))))
-            body = (body << bits_per_sym) | bits
-        end
-        return body
-    end
-    return (head, tail...)
+julia> shift_first(mer"WKYMLPIIRS"aa, 'F')
+AminoAcid 10-mer:
+FWKYMLPIIR
+```
+"""
+function shift_first(kmer::Kmer{A}, s) where {A}
+    encoding = UInt(BioSequences.encode(A(), convert(eltype(kmer), s)))
+    shift_first_encoding(kmer, encoding)
 end
 
+function shift_first_encoding(kmer::Kmer{A}, encoding::UInt) where {A}
+    isempty(kmer) && return kmer
+    bps = BioSequences.bits_per_symbol(A())
+    (_, new_data) = rightshift_carry(kmer.data, bps, zero(UInt))
+    head =
+        first(new_data) | left_shift(encoding, (elements_in_head(typeof(kmer)) - 1) * bps)
+    typeof(kmer)(unsafe, (head, tail(new_data)...))
+end
 
 """
-    Base.rand(::Type{Kmer{A,K,N}}) where {A,K,N}
-    Base.rand(::Type{Kmer{A,K}}) where {A,K}
+    pop(kmer::Kmer{A, K})::Kmer{A, K-1}
 
-Create a random kmer of a specified alphabet and length
+Returns a new kmer with the last symbol of the input `kmer` removed.
+Throws an `ArgumentError` if `kmer` is empty.
+
+!!! warn
+    Since the output of this function is a `K-1`-mer, use of this function
+    in a loop may result in type-instability.
+
+See also: [`pop_first`](@ref), [`push`](@ref), [`shift`](@ref)
 
 # Examples
-```julia
-julia> rand(Kmer{DNAAlphabet{2}, 3})
-BioSymbols.DNA 3-mer:
-ACT
+```jldoctest
+julia> pop(mer"TCTGTA"d)
+DNA 5-mer:
+TCTGT
 
+julia> pop(mer"QPSY"a)
+AminoAcid 3-mer:
+QPS
+
+julia> pop(mer""a)
+ERROR: ArgumentError:
+[...]
 ```
 """
-@inline function Base.rand(::Type{Kmer{A,K,N}}) where {A,K,N}
-    checkmer(Kmer{A,K,N})
-    return Kmer{A,K,N}(rand_kmer_data(Kmer{A,K,N}, BioSequences.iscomplete(A())))
+function pop(kmer::Kmer{A}) where {A}
+    isempty(kmer) && throw(ArgumentError("Cannot pop 0-mer"))
+    bps = BioSequences.bits_per_symbol(A())
+    newT = derive_type(Kmer{A, length(kmer) - 1})
+    (_, new_data) = rightshift_carry(kmer.data, bps, zero(UInt))
+    new_data = if elements_in_head(typeof(kmer)) == 1
+        tail(new_data)
+    else
+        new_data
+    end
+    newT(unsafe, new_data)
 end
 
-Base.rand(::Type{Kmer{A,K}}) where {A,K} = rand(kmertype(Kmer{A,K}))
+"""
+    pop_first(kmer::Kmer{A, K})::Kmer{A, K-1}
 
-function Base.rand(::Type{T}, size::Integer) where {T<:Kmer}
-    return [rand(T) for _ in 1:size]
-end
+Returns a new kmer with the first symbol of the input `kmer` removed.
+Throws an `ArgumentError` if `kmer` is empty.
 
-###
-### Old Mer Base Functions - not transferred to new type.
-###
-#@inline encoded_data_type(::Type{Mer{A,K}}) where {A,K} = UInt64
-#@inline encoded_data_type(::Type{BigMer{A,K}}) where {A,K} = UInt128
-#@inline encoded_data_type(x::AbstractMer) = encoded_data_type(typeof(x))
-#@inline encoded_data(x::AbstractMer) = reinterpret(encoded_data_type(typeof(x)), x)
-#@inline ksize(::Type{T}) where {A,K,T<:AbstractMer{A,K}} = K
-#@inline Base.unsigned(x::AbstractMer) = encoded_data(x)
-#Base.:-(x::AbstractMer, y::Integer) = typeof(x)(encoded_data(x) - y % encoded_data_type(x))
-#Base.:+(x::AbstractMer, y::Integer) = typeof(x)(encoded_data(x) + y % encoded_data_type(x))
-#Base.:+(x::AbstractMer, y::AbstractMer) = y + x
-#Alphabet(::Type{Mer{A,K} where A<:NucleicAcidAlphabet{2}}) where {K} = Any
-
-include("indexing.jl")
-
-#LongSequence{A}(x::Kmer{A,K,N}) where {A,K,N} = LongSequence{A}([nt for nt in x])
-# Convenience method so as don't need to specify A in LongSequence{A}.
-BioSequences.LongSequence(x::Kmer{A,K,N}) where {A,K,N} = LongSequence{A}(x)
-
-include("predicates.jl")
-include("counting.jl")
-include("transformations.jl")
-
-###
-### Kmer de-bruijn neighbors
-###
-
-# TODO: Decide on this vs. old iterator pattern. I like the terseness of the code vs defining an iterator. Neither should allocate.
-fw_neighbors(kmer::Kmer{A,K,N}) where {A<:DNAAlphabet,K,N} = ntuple(i -> pushlast(kmer, ACGT[i]), Val{4}())
-fw_neighbors(kmer::Kmer{A,K,N}) where {A<:RNAAlphabet,K,N} = ntuple(i -> pushlast(kmer, ACGU[i]), Val{4}())
-bw_neighbors(kmer::Kmer{A,K,N}) where {A<:DNAAlphabet,K,N} = ntuple(i -> pushfirst(kmer, ACGT[i]), Val{4}())
-bw_neighbors(kmer::Kmer{A,K,N}) where {A<:RNAAlphabet,K,N} = ntuple(i -> pushfirst(kmer, ACGU[i]), Val{4}())
-
-#=
-# Neighbors on a de Bruijn graph
-struct KmerNeighborIterator{S<:Kmer}
-    x::S
-end
+!!! warn
+    Since the output of this function is a `K-1`-mer, use of this function
+    in a loop may result in type-instability.
 
-"""
-    neighbors(kmer::S) where {S<:Kmer}
+See also: [`pop`](@ref), [`push`](@ref), [`shift`](@ref)
 
-Return an iterator through skip-mers neighboring `skipmer` on a de Bruijn graph.
-"""
-neighbors(kmer::Kmer) = KmerNeighborIterator{typeof(kmer)}(kmer)
+# Examples
+```jldoctest
+julia> pop_first(mer"TCTGTA"d)
+DNA 5-mer:
+CTGTA
 
-Base.length(::KmerNeighborIterator) = 4
-Base.eltype(::Type{KmerNeighborIterator{S}}) where {S<:Kmer} = S
+julia> pop_first(mer"QPSY"a)
+AminoAcid 3-mer:
+PSY
 
-function Base.iterate(it::KmerNeighborIterator{S}, i::UInt64 = 0) where {S<:Kmer}
-    if i == 4
-        return nothing
-    else
-        #return S((encoded_data(it.x) << 2) | i), i + 1
-        return it.x << 1, i + one(UInt64)
-    end
-end
-=#
-
-###
-### String literals
-###
-
-macro mer_str(seq, flag)
-    seq′ = BioSequences.remove_newlines(seq)
-    if flag == "dna" || flag == "d"
-        T = kmertype(DNAKmer{length(seq′)})
-        return T(seq′)
-    elseif flag == "rna" || flag == "r"
-        T = kmertype(RNAKmer{length(seq′)})
-        return T(seq′)
-    elseif flag == "aa" || flag == "a" || flag == "prot" || flag == "p" 
-        T = kmertype(AAKmer{length(seq′)})
-        return T(seq′)
+julia> pop_first(mer""a)
+ERROR: ArgumentError:
+[...]
+```
+"""
+function pop_first(kmer::Kmer{A}) where {A}
+    isempty(kmer) && throw(ArgumentError("Cannot pop 0-mer"))
+    data = if elements_in_head(typeof(kmer)) == 1
+        tail(kmer.data)
     else
-        error("Invalid type flag: '$(flag)'")
+        bps = BioSequences.bits_per_symbol(A())
+        bits_used = 8 * sizeof(UInt) - (bits_unused(typeof(kmer)) + bps)
+        mask = left_shift(UInt(1), bits_used) - UInt(1)
+        (first(kmer.data) & mask, tail(kmer.data)...)
     end
+    newT = derive_type(Kmer{A, length(kmer) - 1})
+    newT(unsafe, data)
 end
 
-macro mer_str(seq)
-    seq′ = BioSequences.remove_newlines(seq)
-    T = kmertype(DNAKmer{length(seq′)})
-    return T(seq′)
+# Get a mask 0x0001111 ... masking away the unused bits of the head element
+# in the UInt tuple
+@inline function get_mask(T::Type{<:Kmer})
+    UInt(1) << (8 * sizeof(UInt) - bits_unused(T)) - 1
 end
-
-include("revtrans.jl")
\ No newline at end of file
diff --git a/src/kmer_iteration/AbstractKmerIterator.jl b/src/kmer_iteration/AbstractKmerIterator.jl
deleted file mode 100644
index f320476..0000000
--- a/src/kmer_iteration/AbstractKmerIterator.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-###
-### Kmer Iteration
-###
-### Abstract Kmer Iterator type.
-###
-### This file is a part of BioJulia.
-### License is MIT: https://github.com/BioJulia/BioSequences.jl/blob/master/LICENSE.md
-
-### Type for storing the result of Kmer iteration.
-
-abstract type AbstractKmerIterator{T<:Kmer,S<:BioSequence} end
-
-@inline Base.eltype(::Type{<:AbstractKmerIterator{T,S}}) where {T,S} = Tuple{UInt64,T}
-
-@inline Base.IteratorSize(::Type{<:AbstractKmerIterator{Kmer{A,K,N},S}}) where {A,S<:BioSequence{A},K,N} = Base.HasLength()
-@inline Base.IteratorSize(::Type{<:AbstractKmerIterator{Kmer{A,K,N},S}}) where {A,B,S<:BioSequence{B},K,N} = Base.SizeUnknown()
-
-@inline function Base.length(it::AbstractKmerIterator{Kmer{A,K,N},S}) where {A,K,N,S<:BioSequence{A}}
-    return max(0, fld(it.stop - it.start + 1 - K, step(it)) + 1)
-end
-
-# Iteration where the Kmer and Seq alphabets match:
-
-## Initial iteration without state.
-@inline function Base.iterate(it::AbstractKmerIterator{Kmer{A,K,N},LongSequence{A}}) where {A,K,N}
-    fwkmer = _build_kmer_data(Kmer{A,K,N}, it.seq, 1)
-    if isnothing(fwkmer)
-        return nothing
-    else
-        # Get the reverse.
-        alph = Alphabet(Kmer{A,K,N})
-        rshift = n_unused(Kmer{A,K,N}) * BioSequences.bits_per_symbol(alph) # Based on alphabet type, should constant fold.
-        rvkmer = rightshift_carry(_reverse(BioSequences.BitsPerSymbol(alph), _complement_bitpar(alph, fwkmer...)...), rshift)
-        return KmerAt{Kmer{A,K,N}}(1, Kmer{A,K,N}(fwkmer), Kmer{A,K,N}(rvkmer)), (K, fwkmer, rvkmer)
-    end
-end
\ No newline at end of file
diff --git a/src/kmer_iteration/EveryCanonicalKmer.jl b/src/kmer_iteration/EveryCanonicalKmer.jl
deleted file mode 100644
index 21e883b..0000000
--- a/src/kmer_iteration/EveryCanonicalKmer.jl
+++ /dev/null
@@ -1,122 +0,0 @@
-"""
-    EveryCanonicalKmer{T,S}(seq::S, start::Int = firstindex(seq), stop::Int = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-
-An iterator over every canonical valid overlapping `T<:Kmer` in a given longer 
-`BioSequence`, between a `start` and `stop` position.
-
-!!! note
-    Typically, the alphabet of the Kmer type matches the alphabet of the input
-    BioSequence. In these cases, the iterator will have `Base.IteratorSize` of
-    `Base.HasLength`, and successive kmers produced by the iterator will overlap
-    by K - 1 bases.
-    
-    However, in the specific case of iterating over kmers in a DNA or RNA sequence, you
-    may iterate over a Kmers where the alphabet is a NucleicAcidAlphabet{2}, but
-    the input BioSequence has a NucleicAcidAlphabet{4}.
-    
-    In this case then the iterator will skip over positions in the BioSequence
-    with characters that are not supported by the Kmer type's NucleicAcidAlphabet{2}.
-    
-    As a result, the overlap between successive kmers may not reliably be K - 1,
-    and the iterator will have `Base.IteratorSize` of `Base.SizeUnknown`.
-"""
-struct EveryCanonicalKmer{T<:Kmer,S<:BioSequence{<:NucleicAcidAlphabet}} <: AbstractKmerIterator{T,S}
-    seq::S
-    start::Int
-    stop::Int
-    
-    function EveryCanonicalKmer{T,S}(seq::S, start::Int = firstindex(seq), stop::Int = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-        T′ = kmertype(T)
-        checkmer(T′) # Should inline and constant fold.
-        return new{T′,S}(seq, start, stop)
-    end
-end
-
-"""
-    EveryCanonicalKmer{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-
-Convenience outer constructor so you don't have to specify `S` along with `T`.
-
-E.g. Instead of `EveryCanonicalKmer{DNACodon,typeof(s)}(s)`, you can just use `EveryCanonicalKmer{DNACodon}(s)`
-"""
-function EveryCanonicalKmer{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-    return EveryCanonicalKmer{T,S}(seq, start, stop)
-end
-
-"""
-    EveryCanonicalKmer(seq::BioSequence{A}, ::Val{K}, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-
-Convenience outer constructor so yyou don't have to specify full `Kmer` typing.
-
-In order to deduce `Kmer{A,K,N}`, `A` is taken from the input `seq` type, `K` is
-taken from `::Val{K}`, and `N` is deduced using `A` and `K`.
-
-E.g. Instead of `EveryCanonicalKmer{DNAKmer{3,1}}(s)`, or `EveryCanonicalKmer{DNACodon}(s)`,
-you can use `EveryCanonicalKmer(s, Val(3))`
-"""
-function EveryCanonicalKmer(seq::BioSequence{A}, ::Val{K}, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-    return EveryCanonicalKmer{Kmer{A,K}}(seq, start, stop)
-end
-
-Base.step(x::EveryCanonicalKmer) = 1
-
-
-
-## Initial iteration without state.
-@inline function Base.iterate(it::EveryCanonicalKmer{Kmer{A,K,N},LongSequence{A}}) where {A,K,N}
-    fwkmer = _build_kmer_data(Kmer{A,K,N}, it.seq, it.start)
-    if isnothing(fwkmer)
-        return nothing
-    else
-        rshift = n_unused(Kmer{A,K,N}) * BioSequences.bits_per_symbol(A()) # Based on alphabet type, should constant fold.
-        rvkmer = rightshift_carry(_reverse(BioSequences.BitsPerSymbol(A()), _complement_bitpar(A(), fwkmer...)...), rshift)
-        return (it.start, Kmer{A,K,N}(min(fwkmer, rvkmer))), (it.start + K - 1, fwkmer, rvkmer)
-    end
-end
-
-@inline function Base.iterate(it::EveryCanonicalKmer{Kmer{A,K,N},LongSequence{A}}, state) where {A,K,N}
-    i, fwkmer, rvkmer = state
-    i += 1
-    if i > it.stop
-        return nothing
-    else
-        bps = BioSequences.bits_per_symbol(A()) # Based on type info, should constant fold.
-        rshift = (64 - (n_unused(Kmer{A,K,N}) + 1) * bps) # Based on type info, should constant fold.
-        mask = (one(UInt64) << bps) - one(UInt64) # Based on type info, should constant fold.
-        
-        fbits = UInt64(BioSequences.extract_encoded_element(it.seq, i))
-        rbits = (BioSequences.complement_bitpar(fbits, A()) & mask) << rshift
-        fwkmer = leftshift_carry(fwkmer, bps, fbits)
-        rvkmer = rightshift_carry(rvkmer, bps, rbits)
-        pos = i - K + 1
-        return (pos, min(Kmer{A,K,N}(fwkmer), Kmer{A,K,N}(rvkmer))), (i, fwkmer, rvkmer)
-    end
-end
-
-@inline Base.IteratorSize(::Type{<:EveryCanonicalKmer{Kmer{A,N,K},LongSequence{B}}}) where {A<:NucleicAcidAlphabet{2},N,K,B<:NucleicAcidAlphabet{4}} = Base.SizeUnknown()
-
-@inline function Base.iterate(it::EveryCanonicalKmer{Kmer{A,K,N},LongSequence{B}},
-                              state = (it.start - 1, 1, blank_ntuple(Kmer{A,K,N}), blank_ntuple(Kmer{A,K,N}))
-                              ) where {A<:NucleicAcidAlphabet{2},B<:NucleicAcidAlphabet{4},K,N}
-    
-    i, filled, fwkmer, rvkmer = state
-    i += 1
-    filled -= 1
-
-    rshift = (64 - (n_unused(Kmer{A,K,N}) + 1) * 2) # Based on type info, should constant fold.
-    mask = (one(UInt64) << 2) - one(UInt64) # Based on type info, should constant fold.
-
-    while i ≤ it.stop
-        @inbounds nt = reinterpret(UInt8, it.seq[i])
-        @inbounds fbits = kmerbits[nt + 1]
-        rbits = (BioSequences.complement_bitpar(fbits, A()) & mask) << rshift
-        fwkmer = leftshift_carry(fwkmer, 2, fbits)
-        rvkmer = rightshift_carry(rvkmer, 2, rbits)
-        filled = ifelse(fbits == UInt64(0xff), 0, filled + 1)
-        if filled == K
-            return (i - K + 1, min(Kmer{A,K,N}(fwkmer), Kmer{A,K,N}(rvkmer))), (i, filled, fwkmer, rvkmer)
-        end
-        i += 1
-    end
-    return nothing
-end
\ No newline at end of file
diff --git a/src/kmer_iteration/EveryKmer.jl b/src/kmer_iteration/EveryKmer.jl
deleted file mode 100644
index 7885afb..0000000
--- a/src/kmer_iteration/EveryKmer.jl
+++ /dev/null
@@ -1,124 +0,0 @@
-###
-### Kmer Iteration
-###
-### Iterator type over every kmer in a sequence - overlapping.
-###
-### This file is a part of BioJulia.
-### License is MIT: https://github.com/BioJulia/BioSequences.jl/blob/master/LICENSE.md
-
-"""
-    EveryKmer{T,S}(seq::S, start::Int = firstindex(seq), stop::Int = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-
-An iterator over every valid overlapping `T<:Kmer` in a given longer
-`BioSequence` between a `start` and `stop` position. 
-
-!!! note
-    Typically, the alphabet of the Kmer type matches the alphabet of the input
-    BioSequence. In these cases, the iterator will have `Base.IteratorSize` of
-    `Base.HasLength`, and successive kmers produced by the iterator will overlap
-    by K - 1 bases.
-    
-    However, in the specific case of iterating over kmers in a DNA or RNA sequence, you
-    may iterate over a Kmers where the alphabet is a NucleicAcidAlphabet{2}, but
-    the input BioSequence has a NucleicAcidAlphabet{4}.
-    
-    In this case then the iterator will skip over positions in the BioSequence
-    with characters that are not supported by the Kmer type's NucleicAcidAlphabet{2}.
-    
-    As a result, the overlap between successive kmers may not reliably be K - 1,
-    and the iterator will have `Base.IteratorSize` of `Base.SizeUnknown`.
-"""
-struct EveryKmer{T<:Kmer,S<:BioSequence} <: AbstractKmerIterator{T,S}
-    seq::S
-    start::Int
-    stop::Int
-    
-    function EveryKmer{T,S}(seq::S, start::Int = firstindex(seq), stop::Int = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-        T′ = kmertype(T)
-        checkmer(T′) # Should inline and constant fold.
-        return new{T′,S}(seq, start, stop)
-    end
-end
-
-"""
-    EveryKmer{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-
-Convenience outer constructor so you don't have to specify `S` along with `T`.
-
-E.g. Instead of `EveryKmer{DNACodon,typeof(s)}(s)`, you can just use `EveryKmer{DNACodon}(s)`
-"""
-function EveryKmer{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-    return EveryKmer{T,S}(seq, start, stop)
-end
-
-"""
-    EveryKmer(seq::BioSequence{A}, ::Val{K}, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-
-Convenience outer constructor so yyou don't have to specify full `Kmer` typing.
-
-In order to deduce `Kmer{A,K,N}`, `A` is taken from the input `seq` type, `K` is
-taken from `::Val{K}`, and `N` is deduced using `A` and `K`.
-
-E.g. Instead of `EveryKmer{DNAKmer{3,1}}(s)`, or `EveryKmer{DNACodon}(s)`,
-you can use `EveryKmer(s, Val(3))`
-"""
-function EveryKmer(seq::BioSequence{A}, ::Val{K}, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-    return EveryKmer{Kmer{A,K}}(seq, start, stop)
-end
-
-Base.step(x::EveryKmer) = 1
-
-## Initial iteration without state.
-@inline function Base.iterate(it::EveryKmer{Kmer{A,K,N},LongSequence{A}}) where {A,K,N}
-    kmer = _build_kmer_data(Kmer{A,K,N}, it.seq, 1)
-    if isnothing(kmer)
-        return nothing
-    else
-        return (1, Kmer{A,K,N}(kmer)), (K, kmer)
-    end
-end
-
-@inline function Base.iterate(it::EveryKmer{Kmer{A,K,N},LongSequence{A}}, state) where {A,K,N}
-    i, fwkmer = state
-    i += 1
-    if i > it.stop
-        return nothing
-    else
-        bps = BioSequences.bits_per_symbol(A()) # Based on type info, should constant fold.
-        bits = UInt64(BioSequences.extract_encoded_element(it.seq, i))
-        kmer = leftshift_carry(fwkmer, bps, bits)
-        pos = i - K + 1
-        return (pos, Kmer{A,K,N}(kmer)), (i, kmer)
-    end
-end
-
-## Special case where iterating over 2-Bit encoded kmers in a 4-Bit encoded sequence,
-## behaviour is to produce kmers by skipping over the ambiguous sites.
-
-const kmerbits = (UInt64(0xff), UInt64(0x00), UInt64(0x01), UInt64(0xff),
-                  UInt64(0x02), UInt64(0xff), UInt64(0xff), UInt64(0xff),
-                  UInt64(0x03), UInt64(0xff), UInt64(0xff), UInt64(0xff),
-                  UInt64(0xff), UInt64(0xff), UInt64(0xff), UInt64(0xff))
-
-@inline Base.IteratorSize(::Type{<:EveryKmer{Kmer{A,N,K},S}}) where {A<:NucleicAcidAlphabet{2},N,K,B<:NucleicAcidAlphabet{4},S<:BioSequence{B}} = Base.SizeUnknown()
-
-@inline function Base.iterate(it::EveryKmer{Kmer{A,K,N},S},
-                              state = (it.start - 1, 1, blank_ntuple(Kmer{A,K,N}))
-                              ) where {A<:NucleicAcidAlphabet{2},B<:NucleicAcidAlphabet{4},S<:BioSequence{B},K,N}
-    
-    i, filled, fwkmer = state
-    i += 1
-    filled -= 1
-
-    while i ≤ it.stop
-        @inbounds nt = reinterpret(UInt8, it.seq[i])
-        @inbounds fbits = kmerbits[nt + 1]
-        fwkmer = leftshift_carry(fwkmer, 2, fbits)
-        filled = ifelse(fbits == UInt64(0xff), 0, filled + 1)
-        if filled == K
-            return (i - K + 1, Kmer{A,K,N}(fwkmer)), (i, filled, fwkmer)
-        end
-        i += 1
-    end
-    return nothing
-end
\ No newline at end of file
diff --git a/src/kmer_iteration/SpacedCanonicalKmers.jl b/src/kmer_iteration/SpacedCanonicalKmers.jl
deleted file mode 100644
index c650cf9..0000000
--- a/src/kmer_iteration/SpacedCanonicalKmers.jl
+++ /dev/null
@@ -1,133 +0,0 @@
-
-"""
-    SpacedCanonicalKmers{T,S}(seq::S, step::Int, start::Int, stop::Int) where {T<:Kmer,S<:BioSequence}
-
-An iterator over every valid `T<:Kmer` separated by a `step` parameter, in a given
-longer `BioSequence`, between a `start` and `stop` position.
-
-!!! note
-    Typically, the alphabet of the Kmer type matches the alphabet of the input
-    BioSequence. In these cases, the iterator will have `Base.IteratorSize` of
-    `Base.HasLength`, and successive kmers produced by the iterator will overlap
-    by `max(0, K - step)` bases.
-    
-    However, in the specific case of iterating over kmers in a DNA or RNA sequence, you
-    may iterate over a Kmers where the alphabet is a NucleicAcidAlphabet{2}, but
-    the input BioSequence has a NucleicAcidAlphabet{4}.
-    
-    In this case then the iterator will skip over positions in the BioSequence
-    with characters that are not supported by the Kmer type's NucleicAcidAlphabet{2}.
-    
-    As a result, the overlap between successive kmers may not consistent, but the
-    reading frame will be preserved.
-    In addition, the iterator will have `Base.IteratorSize` of `Base.SizeUnknown`.
-"""
-struct SpacedCanonicalKmers{T<:Kmer,S<:BioSequence} <: AbstractKmerIterator{T,S}
-    seq::S
-    start::Int
-    step::Int
-    stop::Int
-    filled::Int # This is cached for speed
-    increment::Int # This is cached for speed
-    
-    function SpacedCanonicalKmers{T,S}(seq::S, step::Int, start::Int, stop::Int) where {T<:Kmer,S<:BioSequence}
-        T′ = kmertype(T)
-        checkmer(T′) # Should inline and constant fold.
-        if step <= 1
-            throw(ArgumentError("step size must be greater than 1"))
-        end
-        filled = max(0, ksize(T′) - step)
-        increment = max(1, step - ksize(T′) + 1)
-        return new{T′,S}(seq, start, step, stop, filled, increment)
-    end
-end
-
-"""
-    SpacedCanonicalKmers{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-
-Convenience outer constructor so you don't have to specify `S` along with `T`.
-
-E.g. Instead of `SpacedCanonicalKmers{DNACodon,typeof(s)}(s, 3)`, you can just use `SpacedCanonicalKmers{DNACodon}(s, 3)`
-"""
-function SpacedCanonicalKmers{T}(seq::S, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-    return SpacedCanonicalKmers{T,S}(seq, step, start, stop)
-end
-
-"""
-    SpacedCanonicalKmers(seq::BioSequence{A}, ::Val{K}, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-
-Convenience outer constructor so yyou don't have to specify full `Kmer` typing.
-
-In order to deduce `Kmer{A,K,N}`, `A` is taken from the input `seq` type, `K` is
-taken from `::Val{K}`, and `N` is deduced using `A` and `K`.
-
-E.g. Instead of `SpacedCanonicalKmers{DNAKmer{3,1}}(s, 3)`, or `SpacedCanonicalKmers{DNACodon}(s, 3)`,
-you can use `SpacedCanonicalKmers(s, Val(3), 3)`
-"""
-function SpacedCanonicalKmers(seq::BioSequence{A}, ::Val{K}, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-    return SpacedCanonicalKmers{Kmer{A,K}}(seq, step, start, stop)
-end
-
-Base.step(x::SpacedCanonicalKmers) = x.step
-
-@inline function Base.iterate(it::SpacedCanonicalKmers{Kmer{A,K,N},LongSequence{A}}) where {A,K,N}
-    fwkmer = _build_kmer_data(Kmer{A,K,N}, it.seq, 1)
-    if isnothing(fwkmer)
-        return nothing
-    else
-        rshift = n_unused(Kmer{A,K,N}) * BioSequences.bits_per_symbol(A()) # Based on alphabet type, should constant fold.
-        rvkmer = rightshift_carry(_reverse(BioSequences.BitsPerSymbol(A()), _complement_bitpar(A(), fwkmer...)...), rshift)
-        return (1, min(Kmer{A,K,N}(fwkmer), Kmer{A,K,N}(rvkmer))), (K, fwkmer, rvkmer)
-    end
-end
-
-@inline function Base.iterate(it::SpacedCanonicalKmers{Kmer{A,K,N},LongSequence{A}}, state) where {A,K,N}
-    i, fwkmer, rvkmer = state
-    filled = it.filled
-    i += it.increment
-    
-    for _ in filled:K-1
-        if i > it.stop
-            return nothing
-        else
-            bps = BioSequences.bits_per_symbol(A()) # Based on type info, should constant fold.
-            rshift = (64 - (n_unused(Kmer{A,K,N}) + 1) * bps) # Based on type info, should constant fold.
-            mask = (one(UInt64) << bps) - one(UInt64) # Based on type info, should constant fold.
-            fbits = UInt64(BioSequences.extract_encoded_element(it.seq, i))
-            rbits = (BioSequences.complement_bitpar(fbits, A()) & mask) << rshift
-            fwkmer = leftshift_carry(fwkmer, bps, fbits)
-            rvkmer = rightshift_carry(rvkmer, bps, rbits)
-            i += 1
-        end
-    end
-    pos = i - K + 1
-    return (pos, min(Kmer{A,K,N}(fwkmer), Kmer{A,K,N}(rvkmer))), (i, fwkmer, rvkmer)
-end
-
-@inline function Base.iterate(it::SpacedCanonicalKmers{Kmer{A,K,N},LongSequence{B}}, state = (it.start - it.increment, 1, 0, blank_ntuple(Kmer{A,K,N}), blank_ntuple(Kmer{A,K,N}))
-    ) where {A<:NucleicAcidAlphabet{2},B<:NucleicAcidAlphabet{4},K,N}
-    i, pos, filled, fwkmer, rvkmer = state
-    i += it.increment
-
-    while i ≤ it.stop
-        nt = reinterpret(UInt8, @inbounds getindex(it.seq, i))
-        @inbounds fbits = UInt64(kmerbits[nt + 1])
-        rbits = ~fbits & typeof(fbits)(0x03)
-        if fbits == 0xff # ambiguous
-            filled = 0
-            # Find the beginning of next possible kmer after i
-            pos = i + it.step - Core.Intrinsics.urem_int(i - pos, it.step)
-            i = pos - 1
-        else
-            filled += 1
-            fwkmer = leftshift_carry(fwkmer, 2, fbits)
-            rvkmer = rightshift_carry(rvkmer, 2, UInt64(rbits) << (62 - (64N - 2K)))
-        end
-        if filled == K
-            state = (i, i - K + 1 + it.step, it.filled, fwkmer, rvkmer)
-            return (pos, min(Kmer{A,K,N}(fwkmer), Kmer{A,K,N}(rvkmer))), state
-        end
-        i += 1
-    end
-    return nothing
-end
\ No newline at end of file
diff --git a/src/kmer_iteration/SpacedKmers.jl b/src/kmer_iteration/SpacedKmers.jl
deleted file mode 100644
index 18461ee..0000000
--- a/src/kmer_iteration/SpacedKmers.jl
+++ /dev/null
@@ -1,127 +0,0 @@
-
-"""
-    SpacedKmers{T,S}(seq::S, step::Int, start::Int, stop::Int) where {T<:Kmer,S<:BioSequence}
-
-An iterator over every valid `T<:Kmer` separated by a `step` parameter, in a given
-longer `BioSequence`, between a `start` and `stop` position.
-
-!!! note
-    Typically, the alphabet of the Kmer type matches the alphabet of the input
-    BioSequence. In these cases, the iterator will have `Base.IteratorSize` of
-    `Base.HasLength`, and successive kmers produced by the iterator will overlap
-    by `max(0, K - step)` bases.
-    
-    However, in the specific case of iterating over kmers in a DNA or RNA sequence, you
-    may iterate over a Kmers where the alphabet is a NucleicAcidAlphabet{2}, but
-    the input BioSequence has a NucleicAcidAlphabet{4}.
-    
-    In this case then the iterator will skip over positions in the BioSequence
-    with characters that are not supported by the Kmer type's NucleicAcidAlphabet{2}.
-    
-    As a result, the overlap between successive kmers may not consistent, but the
-    reading frame will be preserved.
-    In addition, the iterator will have `Base.IteratorSize` of `Base.SizeUnknown`.
-"""
-struct SpacedKmers{T<:Kmer,S<:BioSequence} <: AbstractKmerIterator{T,S}
-    seq::S
-    start::Int
-    step::Int
-    stop::Int
-    filled::Int # This is cached for speed
-    increment::Int # This is cached for speed
-    
-    function SpacedKmers{T,S}(seq::S, step::Int, start::Int, stop::Int) where {T<:Kmer,S<:BioSequence}
-        T′ = kmertype(T)
-        checkmer(T′) # Should inline and constant fold.
-        if step <= 1
-            throw(ArgumentError("step size must be greater than 1"))
-        end
-        filled = max(0, ksize(T′) - step)
-        increment = max(1, step - ksize(T′) + 1)
-        return new{T′,S}(seq, start, step, stop, filled, increment)
-    end
-end
-
-"""
-    SpacedKmers{T}(seq::S, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-
-Convenience outer constructor so you don't have to specify `S` along with `T`.
-
-E.g. Instead of `SpacedKmers{DNACodon,typeof(s)}(s, 3)`, you can just use `SpacedKmers{DNACodon}(s, 3)`
-"""
-function SpacedKmers{T}(seq::S, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {T<:Kmer,S<:BioSequence}
-    return SpacedKmers{T,S}(seq, step, start, stop)
-end
-
-"""
-    SpacedKmers(seq::BioSequence{A}, ::Val{K}, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-
-Convenience outer constructor so yyou don't have to specify full `Kmer` typing.
-
-In order to deduce `Kmer{A,K,N}`, `A` is taken from the input `seq` type, `K` is
-taken from `::Val{K}`, and `N` is deduced using `A` and `K`.
-
-E.g. Instead of `SpacedKmers{DNAKmer{3,1}}(s, 3)`, or `SpacedKmers{DNACodon}(s, 3)`,
-you can use `SpacedKmers(s, Val(3), 3)`
-"""
-function SpacedKmers(seq::BioSequence{A}, ::Val{K}, step::Int, start = firstindex(seq), stop = lastindex(seq)) where {A,K}
-    return SpacedKmers{Kmer{A,K}}(seq, step, start, stop)
-end
-
-Base.step(x::SpacedKmers) = x.step
-
-@inline function Base.iterate(it::SpacedKmers{Kmer{A,K,N},LongSequence{A}}) where {A,K,N}
-    kmer = _build_kmer_data(Kmer{A,K,N}, it.seq, 1)
-    if isnothing(kmer)
-        return nothing
-    else
-        # Get the reverse.
-        alph = Alphabet(Kmer{A,K,N})
-        return (1, Kmer{A,K,N}(kmer)), (K, kmer)
-    end
-end
-
-@inline function Base.iterate(it::SpacedKmers{Kmer{A,K,N},LongSequence{A}}, state) where {A,K,N}
-    i, kmer = state
-    filled = it.filled
-    i += it.increment
-    
-    for _ in filled:K-1
-        if i > it.stop
-            return nothing
-        else
-            bps = BioSequences.bits_per_symbol(A()) # Based on type info, should constant fold.
-            bits = UInt64(BioSequences.extract_encoded_element(it.seq, i))
-            kmer = leftshift_carry(kmer, bps, bits)
-            i += 1
-        end
-    end
-    pos = i - K + 1
-    return (pos, Kmer{A,K,N}(kmer)), (i, kmer)
-end
-
-@inline function Base.iterate(it::SpacedKmers{Kmer{A,K,N},LongSequence{B}}, state = (it.start - it.increment, 1, 0, blank_ntuple(Kmer{A,K,N}))
-    ) where {A<:NucleicAcidAlphabet{2},B<:NucleicAcidAlphabet{4},K,N}
-    i, pos, filled, kmer = state
-    i += it.increment
-
-    while i ≤ it.stop
-        nt = reinterpret(UInt8, @inbounds getindex(it.seq, i))
-        @inbounds bits = UInt64(kmerbits[nt + 1])
-        if bits == 0xff # ambiguous
-            filled = 0
-            # Find the beginning of next possible kmer after i
-            pos = i + it.step - Core.Intrinsics.urem_int(i - pos, it.step)
-            i = pos - 1
-        else
-            filled += 1
-            kmer = leftshift_carry(kmer, 2, bits)
-        end
-        if filled == K
-            state = (i, i - K + 1 + it.step, it.filled, kmer)
-            return (pos, Kmer{A,K,N}(kmer)), state
-        end
-        i += 1
-    end
-    return nothing
-end
diff --git a/src/predicates.jl b/src/predicates.jl
deleted file mode 100644
index 86258e6..0000000
--- a/src/predicates.jl
+++ /dev/null
@@ -1,11 +0,0 @@
-###
-### Mer specific specializations of src/biosequence/predicates.jl
-###
-
-Base.cmp(x::T, y::T) where {T<:Kmer} = cmp(x.data, y.data)
-Base.:(==)(x::T, y::T) where {T<:Kmer} = x.data == y.data
-Base.isless(x::T, y::T) where {T<:Kmer} = isless(x.data, y.data)
-
-# TODO: Ensure this is the right way to go.
-# See https://github.com/BioJulia/BioSequences.jl/pull/121#discussion_r475234270
-Base.hash(x::Kmer{A,K,N}, h::UInt) where {A,K,N} = hash(x.data, h ⊻ K)
\ No newline at end of file
diff --git a/src/revtrans.jl b/src/revtrans.jl
index e4ebedf..dc8fb1d 100644
--- a/src/revtrans.jl
+++ b/src/revtrans.jl
@@ -18,14 +18,12 @@ julia> Set(CodonSet(v)) == Set(v)
 true
 
 julia> union(CodonSet(v), CodonSet([mer"GAG"r]))
-Kmers.CodonSet with 4 elements:
+CodonSet with 4 elements:
   GAG
   GGA
   UAG
   UUU
 ```
-
-See also: `push`
 """
 struct CodonSet <: AbstractSet{RNACodon}
     x::UInt64
@@ -33,11 +31,11 @@ struct CodonSet <: AbstractSet{RNACodon}
     CodonSet(x::UInt64, ::Unsafe) = new(x)
 end
 CodonSet() = CodonSet(UInt64(0), Unsafe())
-CodonSet(itr) = foldl(push, itr, init=CodonSet())
+CodonSet(itr) = foldl(push, itr; init=CodonSet())
 
 function Base.iterate(x::CodonSet, s::UInt64=x.x)
-    codon = RNACodon((trailing_zeros(s) % UInt64,))
-    iszero(s) ? nothing : (codon, s & (s-1))
+    codon = RNACodon(unsafe, (trailing_zeros(s) % UInt64,))
+    iszero(s) ? nothing : (codon, s & (s - 1))
 end
 
 function push(s::CodonSet, x::RNACodon)
@@ -52,8 +50,8 @@ Base.filter(f, s::CodonSet) = CodonSet(Iterators.filter(f, s))
 Base.setdiff(a::CodonSet, b::Vararg{CodonSet}) = CodonSet(a.x & ~(union(b...).x), Unsafe())
 
 for (name, f) in [(:union, |), (:intersect, &), (:symdiff, ⊻)]
-    @eval function Base.$(name)(a::CodonSet, b::Vararg{CodonSet}) 
-        CodonSet(mapreduce(i -> i.x, $f, b, init=a.x), Unsafe())
+    @eval function Base.$(name)(a::CodonSet, b::Vararg{CodonSet})
+        CodonSet(mapreduce(i -> i.x, $f, b; init=a.x), Unsafe())
     end
 end
 
@@ -76,7 +74,7 @@ inverse of the mapping through `GeneticCode`
 julia> code = ReverseGeneticCode(BioSequences.candidate_division_sr1_genetic_code);
 
 julia> code[AA_E]
-Kmers.CodonSet with 2 elements:
+CodonSet with 2 elements:
   GAA
   GAG
 
@@ -89,17 +87,17 @@ See also: [`reverse_translate`](@ref)
 """
 struct ReverseGeneticCode <: AbstractDict{AminoAcid, CodonSet}
     name::String
-    sets::NTuple{N_AA-1, CodonSet}
+    sets::NTuple{N_AA - 1, CodonSet}
 end
 
 function ReverseGeneticCode(x::BioSequences.GeneticCode)
     ind(aa::AminoAcid) = reinterpret(UInt8, aa) + 1
 
-    sets = fill(CodonSet(), N_AA-1)
+    sets = fill(CodonSet(), N_AA - 1)
     x_set = CodonSet()
     for i in Int64(0):Int64(63)
         aa = x.tbl[i + 1]
-        codon = RNACodon((i % UInt64,))
+        codon = RNACodon(unsafe, (i % UInt64,))
         sets[ind(aa)] = push(sets[ind(aa)], codon)
         if aa !== AA_Term
             x_set = push(x_set, codon)
@@ -122,9 +120,7 @@ function ReverseGeneticCode(x::BioSequences.GeneticCode)
     ReverseGeneticCode(x.name, Tuple(sets))
 end
 
-const rev_standard_genetic_code = ReverseGeneticCode(
-    BioSequences.standard_genetic_code
-)
+const rev_standard_genetic_code = ReverseGeneticCode(BioSequences.standard_genetic_code)
 
 function Base.getindex(s::ReverseGeneticCode, a::AminoAcid)
     if reinterpret(UInt8, a) > (N_AA - 2) # cannot translate gap
@@ -136,21 +132,29 @@ end
 Base.length(c::ReverseGeneticCode) = length(c.sets)
 function Base.iterate(c::ReverseGeneticCode, s=1)
     s > length(c.sets) && return nothing
-    return (reinterpret(AminoAcid, (s-1)%UInt8) => c.sets[s], s+1)
+    return (reinterpret(AminoAcid, (s - 1) % UInt8) => c.sets[s], s + 1)
 end
 
 """
-    reverse_translate!(v::Vector{CodonSet}, s::AASeq code=rev_standard_genetic_code)
+    reverse_translate!(v::Vector{CodonSet}, s::AASeq, code=rev_standard_genetic_code) -> v
 
 Reverse-translates `s` under the reverse genetic code `code`, putting the result in `v`.
 
 See also: [`reverse_translate`](@ref)
+
+# Examples:
+```jldoctest
+julia> v = CodonSet[];
+
+julia> reverse_translate!(v, aa"KWCL")
+4-element Vector{CodonSet}:
+ CodonSet(0x0000000000000005)
+ CodonSet(0x0400000000000000)
+ CodonSet(0x0a00000000000000)
+ CodonSet(0x50000000f0000000)
+```
 """
-function reverse_translate!(
-    v::Vector{CodonSet},
-    seq::AASeq,
-    code=rev_standard_genetic_code
-)
+function reverse_translate!(v::Vector{CodonSet}, seq::AASeq, code=rev_standard_genetic_code)
     resize!(v, length(seq))
     @inbounds for i in eachindex(v)
         v[i] = code[seq[i]]
@@ -168,21 +172,20 @@ If `s` is an `AASeq`, return `Vector{CodonSet}`.
 # Examples
 ```jldoctest
 julia> reverse_translate(AA_W)
-Kmers.CodonSet with 1 element:
+CodonSet with 1 element:
   UGG
 
 julia> v = reverse_translate(aa"MMLVQ");
 
 julia> typeof(v)
-Vector{Kmers.CodonSet}
+Vector{CodonSet} (alias for Array{CodonSet, 1})
 
 julia> v[4]
-Kmers.CodonSet with 4 elements:
+CodonSet with 4 elements:
   GUA
   GUC
   GUG
   GUU
-[...]
 ```
 
 See also: [`reverse_translate!`](@ref), [`ReverseGeneticCode`](@ref)
diff --git a/src/transformations.jl b/src/transformations.jl
index 6aa3456..53a3e3c 100644
--- a/src/transformations.jl
+++ b/src/transformations.jl
@@ -1,221 +1,103 @@
-
-# Bit-parallel element nucleotide complementation
-@inline function _complement_bitpar(a::A, head::UInt64, tail...) where {A<:NucleicAcidAlphabet}
-    return (BioSequences.complement_bitpar(head, A()), _complement_bitpar(a, tail...)...)
-end
-
-@inline _complement_bitpar(a::A) where {A<:NucleicAcidAlphabet} = ()
-
-@inline function pushfirst(x::Kmer{A,K,N}, nt) where {A,K,N}
-    ntbits = UInt64(BioSequences.encode(A(), nt)) << (62 - (64N - 2K))
-    #ntbits = UInt64(@inbounds BioSequences.twobitnucs[reinterpret(UInt8, nt) + 0x01]) << (62 - (64N - 2K))
-    return Kmer{A,K,N}(_rightshift_carry(2, ntbits, x.data...))
-end
-
-@inline function pushlast(x::Kmer{A,K,N}, nt) where {A,K,N}
-    ntbits = UInt64(BioSequences.encode(A(), nt))
-    #ntbits = UInt64(@inbounds BioSequences.twobitnucs[reinterpret(UInt8, nt) + 0x01])
-    _, newbits = _leftshift_carry(2, ntbits, x.data...)
-    return Kmer{A,K,N}(newbits)
-end
-
-
-###
-### Transformation methods
-###
-
-"""
-    complement(seq::T) where {T<:Kmer}
-
-Return a kmer's complement kmer.
-
-# Examples
-
-```jldoctest
-julia> complement(Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C))
-DNA 5-mer:
-AATCG
-```
-"""
-@inline function BioSequences.complement(seq::T) where {T<:Kmer}
-    return T(_complement_bitpar(Alphabet(seq), seq.data...))
-end
-
-"""
-    reverse(seq::Kmer{A,K,N}) where {A,K,N}
-
-Return a kmer that is the reverse of the input kmer.
-
-# Examples
-
-```jldoctest
-julia> reverse(Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C))
-DNA 5-mer:
-CGATT
-```
-"""
-@inline function Base.reverse(seq::Kmer{A,K,N}) where {A,K,N}
-    rdata = _reverse(BioSequences.BitsPerSymbol(seq), seq.data...)
-	# rshift should constant-fold.
-	rshift = n_unused(Kmer{A,K,N}) * BioSequences.bits_per_symbol(A())
-    return Kmer{A,K,N}(rightshift_carry(rdata, rshift)) # based on only 2 bit alphabet.
-end
-
-"""
-    reverse_complement(seq::Kmer)
-
-Return the kmer that is the reverse complement of the input kmer.
-
-# Examples
-
-```jldoctest
-julia> reverse_complement(Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C))
-DNA 5-mer:
-GCTAA
-```
-"""
-@inline function BioSequences.reverse_complement(seq::Kmer{A,K,N}) where {A,K,N}
-    return complement(reverse(seq))
+function Base.reverse(x::Kmer)
+    # ( ABC, DEFG) # reverse each element
+    # (CBA , GFED) # reverse elements
+    # (GFED, CBA ) # rightshift carry a zero
+    # ( GFE, DBCA) # final result
+    Bps = BioSequences.BitsPerSymbol(Alphabet(x))
+    data = map(i -> BioSequences.reversebits(i, Bps), reverse(x.data))
+    (_, data) = rightshift_carry(data, bits_unused(typeof(x)), zero(UInt))
+    typeof(x)(unsafe, data)
 end
 
-#=
-@inline function reverse_complement2(seq::Kmer{A,K,N}) where {A,K,N}
-    f = x -> complement_bitpar(x, A())
-    rdata = _reverse(f, BioSequences.BitsPerSymbol(seq), seq.data...)
-    return Kmer{A,K,N}(rightshift_carry(rdata, 64N - 2K))
+# For this method, we don't need to mask the unused bits, because the complement of
+# 0x0 == DNA_Gap is still DNA_Gap 
+function BioSequences.complement(x::Kmer{<:Union{DNAAlphabet{4}, RNAAlphabet{4}}})
+    isempty(x) && return x
+    data = map(i -> BioSequences.complement_bitpar(i, Alphabet(x)), x.data)
+    typeof(x)(unsafe, data)
 end
-=#
-
-"""
-    BioSequences.canonical(seq::Kmer{A,K,N}) where {A,K,N}
 
-Return the canonical sequence of `seq`.
-
-A canonical sequence is the numerical lesser of a kmer and its reverse complement.
-This is useful in hashing/counting sequences in data that is not strand specific,
-and thus observing the short sequence is equivalent to observing its reverse complement.
-
-# Examples
-
-```jldoctest
-julia> canonical(Kmer(DNA_T, DNA_T, DNA_A, DNA_G, DNA_C))
-DNA 5-mer:
-GCTAA
-```
-"""
-@inline function BioSequences.canonical(seq::Kmer{A,K,N}) where {A,K,N}
-    if N < 4
-		return min(seq, reverse_complement(seq))
-	else
-		return iscanonical(seq) ? seq : reverse_complement(seq)
-	end
+# For this method we do need to mask unused bits, unlike above
+function BioSequences.complement(x::Kmer{<:Union{DNAAlphabet{2}, RNAAlphabet{2}}})
+    isempty(x) && return x
+    data = map(i -> BioSequences.complement_bitpar(i, Alphabet(x)), x.data)
+    typeof(x)(unsafe, ((first(data) & get_mask(typeof(x))), Base.tail(data)...))
 end
 
-###
-### Old Mer specific specializations of src/biosequence/transformations.jl
-### - not currently transferred to new type.
-
-# TODO: Sort this and decide on transferring to new NTuple based kmers or no.
-
-#=
-function swap(x::T, i, j) where {T<:AbstractMer}
-    i = 2 * length(x) - 2i
-    j = 2 * length(x) - 2j
-    b = encoded_data(x)
-    x = ((b >> i) ⊻ (b >> j)) & encoded_data_type(x)(0x03)
-    return T(b ⊻ ((x << i) | (x << j)))
+# Generic fallback
+function BioSequences.complement(x::Kmer{<:NucleicAcidAlphabet})
+    typeof(x)((complement(i) for i in x))
 end
 
-
-
-function Random.shuffle(x::T) where {T<:AbstractMer}
-    # Fisher-Yates shuffle for mers.
-    j = lastindex(x)
-    for i in firstindex(x):(j - 1)
-        j′ = rand(i:j)
-        x = swap(x, i, j′)
-    end
-    return x
+function BioSequences.reverse_complement(x::Kmer)
+    @inline(reverse(@inline(complement(x))))
 end
-=#
 
-throw_translate_err(K) = error("Cannot translate Kmer of size $K not divisible by 3")
-
-@inline function setup_translate(seq::Kmer{<:NucleicAcidAlphabet, K}) where K
-    naa, rem = divrem(K, 3)
-    iszero(rem) || throw_translate_err(K)
-    kmertype(AAKmer{naa})
+function BioSequences.canonical(x::Kmer)
+    rc = reverse_complement(x)
+    ifelse(x < rc, x, rc)
 end
 
-# This sets the first amino acid to methionine, returning the data tuple
-@inline function set_methionine_data(data::Tuple{Vararg{UInt64}}, ::Val{K}) where K
-    offset = ((K - 1) * 8) & 63
-    mask = ~(UInt64(0xff) << offset) # mask off existing AA in pos 1
-    addition = UInt64(0x0c) << offset # 0x0c is encoded methionine
-    chunk, rest... = data
-    chunk = (chunk & mask) | addition
-    return (chunk, rest...)
-end
+BioSequences.iscanonical(x::Kmer) = x <= reverse_complement(x)
 
 function BioSequences.translate(
-    seq::Union{RNAKmer, DNAKmer};
-    code=BioSequences.standard_genetic_code,
-    allow_ambiguous_codons::Bool = true, # a noop for this method
-    alternative_start::Bool = false
-)   
-    T = setup_translate(seq)
-    data = blank_ntuple(T)
-    for i in 1:ksize(T)
-        a = seq[3*i - 2]
-        b = seq[3*i - 1]
-        c = seq[3*i - 0]
+    seq::Kmer{<:Union{DNAAlphabet{2}, RNAAlphabet{2}}};
+    code::BioSequences.GeneticCode=BioSequences.standard_genetic_code,
+    allow_ambiguous_codons::Bool=true, # noop in this method
+    alternative_start::Bool=false,
+)
+    iszero(ksize(typeof(seq))) && return mer""a
+    n_aa, remainder = divrem(length(seq), 3)
+    iszero(remainder) ||
+        error("LongRNA length is not divisible by three. Cannot translate.")
+    N = n_coding_elements(Kmer{AminoAcidAlphabet, n_aa})
+    T = Kmer{AminoAcidAlphabet, n_aa, N}
+    data = zero_tuple(T)
+    # In the next two lines: If alternative_start, we shift in the encoding of M
+    # to first place, then we skip the first 3 nucleotides
+    (_, data) = leftshift_carry(data, 8, UInt(0x0c) * alternative_start)
+    @inbounds for i in (1 + (3 * alternative_start)):n_aa
+        a = seq[3i - 2]
+        b = seq[3i - 1]
+        c = seq[3i - 0]
         codon = BioSequences.unambiguous_codon(a, b, c)
         aa = code[codon]
-        # Next line is equivalent to encode, but without checking.
-        # We assume genetic codes do not code to invalid data.
-        enc_data = reinterpret(UInt8, aa) % UInt64
-        data = leftshift_carry(data, 8, enc_data)
+        carry = UInt(reinterpret(UInt8, aa))
+        (_, data) =
+            leftshift_carry(data, BioSequences.bits_per_symbol(AminoAcidAlphabet()), carry)
     end
-    # This is probably not needed for kmers, but kept for compatibility.
-    # It does slightly slow down translation, even when not taken.
-    if alternative_start && !iszero(ksize(T))
-        data = set_methionine_data(data, Val(ksize(T)))
-    end
-    return T(data)
+    T(unsafe, data)
 end
 
-# See the function above for comments, or the equivalent function
-# in BioSequences
 function BioSequences.translate(
-    seq::Kmer{<:NucleicAcidAlphabet};
-    code=BioSequences.standard_genetic_code,
-    allow_ambiguous_codons::Bool = true,
-    alternative_start::Bool = false
-)    
-    T = setup_translate(seq)
-    data = blank_ntuple(T)
-    for i in 1:ksize(T)
-        a = reinterpret(RNA, seq[3*i - 2])
-        b = reinterpret(RNA, seq[3*i - 1])
-        c = reinterpret(RNA, seq[3*i - 0])
-        aa = if BioSequences.isambiguous(a) | BioSequences.isambiguous(b) | BioSequences.isambiguous(c)
-            aa_ = BioSequences.try_translate_ambiguous_codon(code, a, b, c)
-            if aa_ === nothing
-                if allow_ambiguous_codons
-                    aa_ = AA_X
-                else
-                    error("codon ", a, b, c, " cannot be unambiguously translated")
-                end
-            end
-            aa_
-        else
+    seq::Kmer{<:Union{DNAAlphabet{4}, RNAAlphabet{4}}};
+    code::BioSequences.GeneticCode=BioSequences.standard_genetic_code,
+    allow_ambiguous_codons::Bool=true,
+    alternative_start::Bool=false,
+)
+    n_aa, remainder = divrem(length(seq), 3)
+    iszero(remainder) ||
+        error("LongRNA length is not divisible by three. Cannot translate.")
+    N = n_coding_elements(Kmer{AminoAcidAlphabet, n_aa})
+    T = Kmer{AminoAcidAlphabet, n_aa, N}
+    data = zero_tuple(T)
+    # In the next two lines: If alternative_start, we shift in the encoding of M
+    # to first place, then we skip the first 3 nucleotides
+    (_, data) = leftshift_carry(data, 8, UInt(0x0c) * alternative_start)
+    @inbounds for i in (1 + (3 * alternative_start)):n_aa
+        a = reinterpret(RNA, seq[3i - 2])
+        b = reinterpret(RNA, seq[3i - 1])
+        c = reinterpret(RNA, seq[3i - 0])
+        aa = if isgap(a) | isgap(b) | isgap(c)
+            error("Cannot translate nucleotide sequences with gaps.")
+        elseif iscertain(a) & iscertain(b) & iscertain(c)
             code[BioSequences.unambiguous_codon(a, b, c)]
+        else
+            BioSequences.try_translate_ambiguous_codon(code, a, b, c, allow_ambiguous_codons)
         end
-        enc_data = reinterpret(UInt8, aa) % UInt64
-        data = leftshift_carry(data, 8, enc_data)
-    end
-    if alternative_start && !iszero(ksize(T))
-        data = set_methionine_data(data, Val(ksize(T)))
+        carry = UInt(reinterpret(UInt8, aa))
+        (_, data) =
+            leftshift_carry(data, BioSequences.bits_per_symbol(AminoAcidAlphabet()), carry)
     end
-    return T(data)
+    T(unsafe, data)
 end
diff --git a/src/tuple_bitflipping.jl b/src/tuple_bitflipping.jl
index 4476956..974bf55 100644
--- a/src/tuple_bitflipping.jl
+++ b/src/tuple_bitflipping.jl
@@ -1,72 +1,50 @@
-
-# TODO: this should end up in BioSequences.jl?
-
-"Extract the element stored in a packed bitarray referred to by bidx."
-@inline function BioSequences.extract_encoded_element(bidx::BioSequences.BitIndex{N,W}, data::NTuple{n,W}) where {N,n,W}
-    @inbounds chunk = data[BioSequences.index(bidx)]
-    offchunk = chunk >> (BioSequences.bitwidth(W) - N - BioSequences.offset(bidx))
-    return offchunk & BioSequences.bitmask(bidx)
-end
-
-
-
-"""
-    _cliphead(by::Integer, head::UInt64, tail...)
-
-A method used to mask the first `by` MSB's in `head`, before catting it with
-tail to return a NTuple.
-
-This is used internally to mask the first `by` bits in the first word of a 
-NTuple of UInt64's.
-
-Notably it's used when constructing a Kmer from an existing NTuple of UInt64
-"""
-@inline function _cliphead(by::Integer, head::UInt64, tail...)
-    return (head & (typemax(UInt64) >> by), tail...)
-end
-
-#=
-rightshift_carry & leftshift_carry
-
-These methods are micro-optimised (or should be!!!) for shifting the bits in 
-an NTuple of unsigned integers, carrying the bits "shifted off" one word 
-over to the next word. The carry can also be "seeded" so as other methods like
-pushfirst and pushlast can be efficiently implemented without duplication of code
-or less efficient implementations that first shift and then insert an element.
-=#
-
-@inline function rightshift_carry(x::NTuple{N,UInt64}, nbits::Integer, prevcarry = zero(UInt64)) where {N}
-    return _rightshift_carry(nbits, prevcarry, x...)
+# These compile to raw CPU instructions and are therefore more
+# efficient than simply using << and >>>
+@inline function left_shift(x::Unsigned, n::Integer)
+    x << (n & ((sizeof(x) * 8) - 1))
 end
 
-@inline function _rightshift_carry(nbits::Integer, carry::UInt64, head::UInt64, tail...)
-    return ((head >> nbits) | carry, _rightshift_carry(nbits, (head & ((one(UInt64) << nbits) - 1)) << (64 - nbits), tail...)...)
+@inline function right_shift(x::Unsigned, n::Integer)
+    x >>> (n & ((sizeof(x) * 8) - 1))
 end
 
-@inline _rightshift_carry(nbits::Integer, carry::UInt64) = ()
-
-@inline function leftshift_carry(x::NTuple{N,UInt64}, nbits::Integer, prevcarry::UInt64 = zero(UInt64)) where {N}
-    _, newbits = _leftshift_carry(nbits, prevcarry, x...)
-    return newbits
+# When the UInt is shifted n bits, these are the bits
+# that are shifted away (carried over)
+@inline function left_carry(x::Unsigned, n::Integer)
+    right_shift(x, 8 * sizeof(x) - n)
 end
 
-@inline function _leftshift_carry(nbits::Integer, prevcarry::UInt64, head::UInt64, tail...)
-    carry, newtail = _leftshift_carry(nbits, prevcarry, tail...)
-    return head >> (64 - nbits), ((head << nbits) | carry, newtail...)
+@inline function right_carry(x::Unsigned, n::Integer)
+    left_shift(x, 8 * sizeof(x) - n)
 end
 
-@inline _leftshift_carry(nbits::Integer, prevcarry::UInt64) = prevcarry, ()
-
-@inline function _reverse(bpe::BioSequences.BitsPerSymbol{N}, head::UInt64, tail...) where {N}
-    return (_reverse(bpe, tail...)..., BioSequences.reversebits(head, bpe))
+# Shift a tuple left nbits, carry over bits between tuple elements, and OR
+# the `carry` argument to the right side of the resulting tuple.
+# Returns (new_carry, new_tuple)
+@inline function leftshift_carry(
+    x::Tuple{Vararg{T}},
+    nbits::Integer,
+    carry::T,
+) where {T <: Unsigned}
+    isempty(x) && return x
+    (new_carry, new_tail) = leftshift_carry(tail(x), nbits, carry)
+    new_head = left_shift(first(x), nbits) | new_carry
+    (left_carry(first(x), nbits), (new_head, new_tail...))
 end
 
-@inline _reverse(::BioSequences.BitsPerSymbol{N}) where {N} = ()
-
-#=
-@inline function _reverse(f::F, bpe::BioSequences.BitsPerSymbol{N}, head::UInt64, tail...) where {N,F<:Function}
-    return (_reverse(f, bpe, tail...)..., f(reversebits(head, bpe)))
+@inline function rightshift_carry(
+    x::Tuple{Vararg{T}},
+    nbits::Integer,
+    carry::T,
+) where {T <: Unsigned}
+    isempty(x) && return x
+    new_head = right_shift(first(x), nbits) | right_carry(carry, nbits)
+    mask = left_shift(UInt(1), nbits) - 1
+    tail_carry = first(x) & mask
+    (new_carry, new_tail) = rightshift_carry(tail(x), nbits, tail_carry)
+    (new_carry, (new_head, new_tail...))
 end
 
-@inline _reverse(f::F, ::BioSequences.BitsPerSymbol{N}) where {N,F<:Function} = ()
-=#
\ No newline at end of file
+# Recusion terminator for above
+@inline leftshift_carry(::Tuple{}, nbits::Integer, carry::Unsigned) = (carry, ())
+@inline rightshift_carry(::Tuple{}, nbits::Integer, carry::Unsigned) = (carry, ())
diff --git a/sticker.svg b/sticker.svg
index 1b2ea24..c2c0b89 100644
--- a/sticker.svg
+++ b/sticker.svg
@@ -1,26 +1,169 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 136.48101 51.472999">
-  <g id="bubble-helix">
-    <circle cx="5.9792" cy="20.3" r="5.9792" fill="#945bb0"/>
-    <circle cx="21.317" cy="39.5" r="4.9393" fill="#4266d5"/>
-    <circle cx="16.378" cy="11.7" r="4.9393" fill="#c93d39"/>
-    <circle cx="35.095" cy="35.9" r="5.9792" fill="#945bb0"/>
-    <circle cx="47.184" cy="23.8" r="7.409" fill="#4266d5"/>
-    <circle cx="28.986" cy="9.7" r="4.5494" fill="#3b972e"/>
-    <circle cx="89.428" cy="46.5" r="4.9393" fill="#c93d39"/>
-    <circle cx="71.62" cy="5.6" r="5.5892" fill="#945bb0"/>
-    <circle cx="121.01" cy="15.5" r="5.5892" fill="#c93d39"/>
-    <circle cx="59.662" cy="30.3" r="2.4697" fill="#4266d5"/>
-    <circle cx="103.6" cy="19.9" r="3.2496" fill="#c93d39"/>
-    <circle cx="123.61" cy="36.5" r="2.7296" fill="#4266d5"/>
-    <circle cx="127.64" cy="5.2" r="3.8995" fill="#3b972e"/>
-    <circle cx="132.58" cy="39.2" r="3.8995" fill="#945bb0"/>
-    <circle cx="67.591" cy="37.4" r="3.3795" fill="#3b972e"/>
-    <circle cx="95.017" cy="13.9" r="4.2894" fill="#4266d5"/>
-    <circle cx="77.729" cy="42.9" r="3.8995" fill="#945bb0"/>
-    <circle cx="102.56" cy="42.0" r="5.8492" fill="#4266d5"/>
-    <circle cx="113.6" cy="28.8" r="6.7591" fill="#3b972e"/>
-    <circle cx="57.582" cy="11.0" r="6.3691" fill="#c93d39"/>
-    <circle cx="38.735" cy="14.8" r="3.3795" fill="#c93d39"/>
-    <circle cx="84.618" cy="7.9" r="5.3293" fill="#3b972e"/>
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   viewBox="0 0 136.48101 51.472999"
+   version="1.1"
+   id="svg22"
+   sodipodi:docname="sticker.svg"
+   inkscape:version="1.3.2 (091e20ef0f, 2023-11-25, custom)"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <defs
+     id="defs22" />
+  <sodipodi:namedview
+     id="namedview22"
+     pagecolor="#ffffff"
+     bordercolor="#000000"
+     borderopacity="0.25"
+     inkscape:showpageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#d1d1d1"
+     showgrid="false"
+     inkscape:zoom="8.7777784"
+     inkscape:cx="68.240501"
+     inkscape:cy="25.746834"
+     inkscape:window-width="1920"
+     inkscape:window-height="1043"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="svg22" />
+  <g
+     id="bubble-helix"
+     transform="matrix(1,0,0,-1,0,51.4501)">
+    <circle
+       cx="5.9791999"
+       cy="20.299999"
+       r="5.9791999"
+       fill="#945bb0"
+       id="circle1" />
+    <circle
+       cx="21.316999"
+       cy="39.5"
+       r="4.9393001"
+       fill="#4266d5"
+       id="circle2" />
+    <circle
+       cx="16.378"
+       cy="11.7"
+       r="4.9393001"
+       fill="#c93d39"
+       id="circle3" />
+    <circle
+       cx="35.095001"
+       cy="35.900002"
+       r="5.9791999"
+       fill="#945bb0"
+       id="circle4" />
+    <circle
+       cx="47.183998"
+       cy="23.799999"
+       r="7.4089999"
+       fill="#4266d5"
+       id="circle5" />
+    <circle
+       cx="28.986"
+       cy="9.6999998"
+       r="4.5493999"
+       fill="#3b972e"
+       id="circle6" />
+    <circle
+       cx="89.428001"
+       cy="46.5"
+       r="4.9393001"
+       fill="#c93d39"
+       id="circle7" />
+    <circle
+       cx="71.620003"
+       cy="5.5999999"
+       r="5.5892"
+       fill="#945bb0"
+       id="circle8" />
+    <circle
+       cx="121.01"
+       cy="15.5"
+       r="5.5892"
+       fill="#c93d39"
+       id="circle9" />
+    <circle
+       cx="59.661999"
+       cy="30.299999"
+       r="2.4697001"
+       fill="#4266d5"
+       id="circle10" />
+    <circle
+       cx="103.6"
+       cy="19.9"
+       r="3.2495999"
+       fill="#c93d39"
+       id="circle11" />
+    <circle
+       cx="123.61"
+       cy="36.5"
+       r="2.7296"
+       fill="#4266d5"
+       id="circle12" />
+    <circle
+       cx="127.64"
+       cy="5.1999998"
+       r="3.8994999"
+       fill="#3b972e"
+       id="circle13" />
+    <circle
+       cx="132.58"
+       cy="39.200001"
+       r="3.8994999"
+       fill="#945bb0"
+       id="circle14" />
+    <circle
+       cx="67.591003"
+       cy="37.400002"
+       r="3.3794999"
+       fill="#3b972e"
+       id="circle15" />
+    <circle
+       cx="95.016998"
+       cy="13.9"
+       r="4.2894001"
+       fill="#4266d5"
+       id="circle16" />
+    <circle
+       cx="77.728996"
+       cy="42.900002"
+       r="3.8994999"
+       fill="#945bb0"
+       id="circle17" />
+    <circle
+       cx="102.56"
+       cy="42"
+       r="5.8491998"
+       fill="#4266d5"
+       id="circle18" />
+    <circle
+       cx="113.6"
+       cy="28.799999"
+       r="6.7591"
+       fill="#3b972e"
+       id="circle19" />
+    <circle
+       cx="57.582001"
+       cy="11"
+       r="6.3691001"
+       fill="#c93d39"
+       id="circle20" />
+    <circle
+       cx="38.735001"
+       cy="14.8"
+       r="3.3794999"
+       fill="#c93d39"
+       id="circle21" />
+    <circle
+       cx="84.617996"
+       cy="7.9000001"
+       r="5.3292999"
+       fill="#3b972e"
+       id="circle22" />
   </g>
 </svg>
diff --git a/test/access.jl b/test/access.jl
deleted file mode 100644
index aa4a35e..0000000
--- a/test/access.jl
+++ /dev/null
@@ -1,96 +0,0 @@
-@testset "Access and Iterations" begin
-    dna_kmer = mer"ACTG"dna
-    rna_kmer = mer"ACUG"rna
-    aa_kmer  = mer"MVXN"aa
-
-    @testset "Access DNA Kmer" begin
-        @test dna_kmer[1] == DNA_A
-        @test dna_kmer[2] == DNA_C
-        @test dna_kmer[3] == DNA_T
-        @test dna_kmer[4] == DNA_G
-        
-        @test dna_kmer[1:3] == mer"ACT"dna
-        @test dna_kmer[2:4] == mer"CTG"dna
-
-        # Access indexes out of bounds
-        @test_throws BoundsError dna_kmer[-1]
-        @test_throws BoundsError dna_kmer[0]
-        @test_throws BoundsError dna_kmer[5]
-        @test_throws BoundsError getindex(dna_kmer,-1)
-        @test_throws BoundsError getindex(dna_kmer, 0)
-        @test_throws BoundsError getindex(dna_kmer, 5)
-        @test_throws BoundsError dna_kmer[3:7]
-    end
-
-    @testset "Iteration through DNA Kmer" begin
-        @test iterate(DNAKmer("ACTG")) == (DNA_A, 2)
-
-        @test iterate(DNAKmer("ACTG"), 1) == (DNA_A, 2)
-        @test iterate(DNAKmer("ACTG"), 4) == (DNA_G, 5)
-
-        @test iterate(DNAKmer("ACTG"), 1)  !== nothing
-        @test iterate(DNAKmer("ACTG"), 4)  !== nothing
-        @test iterate(DNAKmer("ACTG"), 5)  === nothing
-        @test isnothing(iterate(DNAKmer("ACTG"), -1))
-        @test iterate(DNAKmer("ACTG"), 0) === nothing
-
-        dna_vec = [DNA_A, DNA_C, DNA_T, DNA_G]
-        @test all([nt === dna_vec[i] for (i, nt) in enumerate(dna_kmer)])
-    end
-
-    @testset "Access RNA Kmer" begin
-        @test rna_kmer[1] == RNA_A
-        @test rna_kmer[2] == RNA_C
-        @test rna_kmer[3] == RNA_U
-        @test rna_kmer[4] == RNA_G
-        
-        @test rna_kmer[1:3] == mer"ACU"rna
-        @test rna_kmer[2:4] == mer"CUG"rna
-
-        # Access indexes out of bounds
-        @test_throws BoundsError rna_kmer[-1]
-        @test_throws BoundsError rna_kmer[0]
-        @test_throws BoundsError rna_kmer[5]
-        @test_throws BoundsError getindex(rna_kmer, -1)
-        @test_throws BoundsError getindex(rna_kmer, 0)
-        @test_throws BoundsError getindex(rna_kmer, 5)
-        @test_throws BoundsError rna_kmer[3:7]
-    end
-
-    @testset "Iteration through RNA Kmer" begin
-        @test iterate(RNAKmer("ACUG")) == (RNA_A, 2)
-        
-
-        @test iterate(RNAKmer("ACUG"), 1) == (RNA_A, 2)
-        @test iterate(RNAKmer("ACUG"), 4) == (RNA_G, 5)
-        
-
-        @test iterate(RNAKmer("ACUG"), 1) !== nothing
-        @test iterate(RNAKmer("ACUG"), 4)  !== nothing
-        @test iterate(RNAKmer("ACUG"), 5)  === nothing
-        @test iterate(RNAKmer("ACUG"), -1) === nothing
-        @test iterate(RNAKmer("ACUG"), 0) === nothing
-
-        rna_vec = [RNA_A, RNA_C, RNA_U, RNA_G]
-        @test all([nt === rna_vec[i] for (i, nt) in enumerate(rna_kmer)])
-    end
-    
-    @testset "Access AA Kmer" begin
-        @test aa_kmer[1] == AA_M
-        @test aa_kmer[2] == AA_V
-        @test aa_kmer[3] == AA_X
-        @test aa_kmer[4] == AA_N
-
-        @test aa_kmer[1:3] == mer"MVX"aa
-        @test aa_kmer[2:4] == mer"VXN"aa
-
-        # Access indexes out of bounds
-        @test_throws BoundsError aa_kmer[-1]
-        @test_throws BoundsError aa_kmer[0]
-        @test_throws BoundsError aa_kmer[5]
-        @test_throws BoundsError getindex(aa_kmer,-1)
-        @test_throws BoundsError getindex(aa_kmer, 0)
-        @test_throws BoundsError getindex(aa_kmer, 5)
-        @test_throws BoundsError aa_kmer[3:7]
-    end
-end
diff --git a/test/benchmark.jl b/test/benchmark.jl
new file mode 100644
index 0000000..b166d45
--- /dev/null
+++ b/test/benchmark.jl
@@ -0,0 +1,125 @@
+module TestBenchmarks
+
+using Kmers
+
+extract_kmer(::Type{<:Kmers.AbstractKmerIterator}, x) = x
+extract_kmer(::Type{<:FwRvIterator}, x) = first(x)
+extract_kmer(::Type{<:UnambiguousKmers}, x) = first(x)
+
+function reducer(it)
+    y = 0
+    for i in it
+        y ⊻= extract_kmer(typeof(it), i).data[1]
+    end
+    y
+end
+
+using Random: Xoshiro
+using BioSequences
+rng = Xoshiro(439824)
+
+const N = 10_000_000
+
+const seq_2bit = randseq(DNAAlphabet{2}(), N)
+const seq_4bit = randrnaseq(N)
+const seq_aa = randaaseq(N)
+const data = String(rand(codeunits("AaCcGgTt"), N))
+
+const names = [
+    ("2-bit LongSequence", seq_2bit),
+    ("4-bit LongSequence", seq_4bit),
+    ("AA LongSequence", seq_aa),
+    ("String", data),
+]
+
+println("Time to iterate over $N symbols:\n")
+println("FwKmers")
+for (name, seq) in names[1:3]
+    it = FwKmers{typeof(Alphabet(seq)), 7}(seq)
+    y = reducer(it)
+    @time "    " * name reducer(it)
+end
+it = FwDNAMers{7}(data)
+y = reducer(it)
+@time "    String" reducer(it)
+
+println("FwRvIterator")
+for (name, seq) in names[1:2]
+    it = FwRvIterator{typeof(Alphabet(seq)), 7}(seq)
+    y = reducer(it)
+    @time "    " * name reducer(it)
+end
+it = FwRvIterator{DNAAlphabet{2}, 7}(data)
+y = reducer(it)
+@time "    String" reducer(it)
+
+println("CanonicalKmers")
+for (name, seq) in names[1:2]
+    it = CanonicalKmers{typeof(Alphabet(seq)), 7}(seq)
+    y = reducer(it)
+    @time "    " * name reducer(it)
+end
+it = CanonicalDNAMers{7}(data)
+y = reducer(it)
+@time "    String" reducer(it)
+
+println("UnambiguousKmers")
+for (name, seq) in names[1:2]
+    it = UnambiguousRNAMers{7}(seq)
+    y = reducer(it)
+    @time "    " * name reducer(it)
+end
+it = UnambiguousRNAMers{7}(data)
+y = reducer(it)
+@time "    String" reducer(it)
+
+println("SpacedKmers, step 5")
+for (name, seq) in names[1:3]
+    it = SpacedKmers{typeof(Alphabet(seq)), 7, 5}(seq)
+    y = reducer(it)
+    @time "    " * name reducer(it)
+end
+it = SpacedDNAMers{7, 5}(data)
+y = reducer(it)
+@time "    String" reducer(it)
+
+println("SpacedKmers, step 7")
+for (name, seq) in names[1:3]
+    it = SpacedKmers{typeof(Alphabet(seq)), 7, 7}(seq)
+    y = reducer(it)
+    @time "    " * name reducer(it)
+end
+it = SpacedDNAMers{7, 7}(data)
+y = reducer(it)
+@time "    String" reducer(it)
+
+function unsafe_extract_minimizer(seq::LongDNA{2}, i::Int, ::Val{K}, ::Val{W}) where {K, W}
+    T = derive_type(Kmer{DNAAlphabet{2}, K})
+    kmer = Kmers.unsafe_extract(Kmers.Copyable(), T, seq, i)
+    hash = fx_hash(kmer)
+    for offset in 0:(W - 2)
+        new_kmer =
+            Kmers.unsafe_shift_from(Kmers.Copyable(), kmer, seq, i + K + offset, Val(1))
+        new_hash = fx_hash(new_kmer)
+        if new_hash < hash
+            hash = new_hash
+            kmer = new_kmer
+        end
+    end
+    kmer
+end
+
+function benchmark_minimizer(seq)
+    y = 0
+    for i in 1:20:(length(seq) - 19)
+        mer = unsafe_extract_minimizer(seq, i, Val{8}(), Val{20}())
+        y ⊻= mer.data[1]
+    end
+    y
+end
+
+println("Minimizer")
+benchmark_minimizer(seq_2bit)
+@time "    2-bit LongSequence" benchmark_minimizer(seq_2bit)
+
+end # module
diff --git a/test/biosequences_interface.jl b/test/biosequences_interface.jl
deleted file mode 100644
index 5db38a3..0000000
--- a/test/biosequences_interface.jl
+++ /dev/null
@@ -1,12 +0,0 @@
-@testset "BioSequences Interface" begin
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{DNAAlphabet{2},31}), rand(ACGT, 31), false)
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{DNAAlphabet{4},31}), rand(ACGT, 31), false)
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{RNAAlphabet{2},31}), rand(ACGU, 31), false)
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{RNAAlphabet{4},31}), rand(ACGU, 31), false)
-    
-    
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{DNAAlphabet{2},200}), rand(ACGT, 200), false)
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{DNAAlphabet{4},200}), rand(ACGT, 200), false)
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{RNAAlphabet{2},200}), rand(ACGU, 200), false)
-    @test BioSequences.has_interface(BioSequence, Kmers.kmertype(Kmer{RNAAlphabet{4},200}), rand(ACGU, 200), false)
-end
\ No newline at end of file
diff --git a/test/comparisons.jl b/test/comparisons.jl
deleted file mode 100644
index 78bf48a..0000000
--- a/test/comparisons.jl
+++ /dev/null
@@ -1,64 +0,0 @@
-@testset "Comparisons" begin
-    @testset "Equality" begin
-        function check_seq_kmer_equality(len)
-            a = DNAKmer(random_dna_kmer(len))
-            b = LongDNA{4}(a)
-            c = LongDNA{2}(a)
-            return a == b == c && c == b == a
-        end
-
-        for len in [1, 10, 32, 64, 128]
-            @test all(Bool[check_seq_kmer_equality(len) for _ in 1:reps])
-        end
-
-        # True negatives
-        @test DNAKmer("ACG") != RNAKmer("ACG")
-        @test DNAKmer("T")   != RNAKmer("U")
-        @test DNAKmer("AC")  != DNAKmer("AG")
-        @test RNAKmer("AC")  != RNAKmer("AG")
-        @test AAKmer("MV")   != AAKmer("NM")
-
-        @test DNAKmer("ACG") != rna"ACG"
-        @test DNAKmer("T")   != rna"U"
-        @test DNAKmer("AC")  != dna"AG"
-        @test RNAKmer("AC")  != rna"AG"
-        @test AAKmer("MV")   != aa"NM"
-
-        @test rna"ACG" != DNAKmer("ACG")
-        @test rna"U"   != DNAKmer("T")
-        @test dna"AG"  != DNAKmer("AC")
-        @test rna"AG"  != RNAKmer("AC")
-        @test aa"MV"   != AAKmer("NM")
-    end
-
-    @testset "Inequality" begin
-        for len in [1, 10, 32, 64]
-            if len <= 32
-                @test  isless(DNAKmer{1}((UInt64(0),)), DNAKmer{1}((UInt64(1),)))
-                @test !isless(DNAKmer{1}((UInt64(0),)), DNAKmer{1}((UInt64(0),)))
-                @test !isless(DNAKmer{1}((UInt64(1),)), DNAKmer{1}((UInt64(0),)))
-                
-                @test  isless(RNAKmer{1}((UInt64(0),)), RNAKmer{1}((UInt64(1),)))
-                @test !isless(RNAKmer{1}((UInt64(0),)), RNAKmer{1}((UInt64(0),)))
-                @test !isless(RNAKmer{1}((UInt64(1),)), RNAKmer{1}((UInt64(0),)))
-            end
-        end
-    end
-
-    @testset "Hash" begin
-        kmers = map(DNAKmer, ["AAAA", "AACT", "ACGT", "TGCA"])
-        for x in kmers, y in kmers
-            @test (x == y) == (hash(x) == hash(y))
-        end
-        
-        kmers = map(RNAKmer, ["AAAA", "AACU", "ACGU", "UGCA"])
-        for x in kmers, y in kmers
-            @test (x == y) == (hash(x) == hash(y))
-        end
-        
-        kmers = map(AAKmer, ["AMVK", "FPST", "QEGH", "ARND"])
-        for x in kmers, y in kmers
-            @test (x == y) == (hash(x) == hash(y))
-        end
-    end
-end
diff --git a/test/construction_and_conversion.jl b/test/construction_and_conversion.jl
deleted file mode 100644
index 567042c..0000000
--- a/test/construction_and_conversion.jl
+++ /dev/null
@@ -1,170 +0,0 @@
-global reps = 10
-
-@testset "Construction and Conversions" begin
-    @test Kmer(DNA_A, DNA_G, DNA_T) === Kmer("AGT")
-    @test Kmer(RNA_A, RNA_G, RNA_U) === Kmer("AGU")
-    #@test Kmer(AA_R, AA_D, AA_C, AA_B) === Kmer("RDCB")
-
-    @test DNAKmer(DNA_G, DNA_C, DNA_T) == Kmer("GCT")
-    @test RNAKmer(RNA_G, RNA_U, RNA_C, RNA_U) == Kmer("GUCU")
-    
-    # creation from iterator
-    @test Kmers.kmertype(Kmer{DNAAlphabet{2},31})((i for i in rand(ACGT, 31))) isa Kmers.kmertype(Kmer{DNAAlphabet{2},31})
-    
-    # Check that kmers in strings survive round trip conversion:
-    #   String → Kmer → String
-    function check_string_construction(::Type{T}, seq::AbstractString) where {T<:Kmer}
-        return String(T(seq)) == uppercase(seq)
-    end
-    
-    # Check that RNAKmers can be constructed from a LongRNASeq
-    #   LongSequence{A} → Kmer{A,K,N} → LongSequence{A}
-    function check_longsequence_construction(::Type{T}, seq::S) where {T<:Kmer,S<:LongSequence}
-        return S(T(seq)) == seq
-    end
-
-    # Check that kmers can be constructed from a BioSequence
-    #   BioSequence → Kmer → BioSequence
-    function check_biosequence_construction(::Type{T}, seq::LongSequence) where {T<:Kmer}
-        return LongSequence(T(seq)) == seq
-    end
-
-    # Check that kmers can be constructed from an array of nucleotides
-    #   Vector{T} → Kmer → Vector{T}
-    function check_nucarray_kmer(::Type{M}, seq::Vector{T}) where {T,M<:Kmer}
-        return String([convert(Char, c) for c in seq]) == String(M(seq))
-    end
-
-    # Check that kmers in strings survive round trip conversion:
-    #   String → BioSequence → Kmer → BioSequence → String
-    function check_roundabout_construction(::Type{T}, A2, seq::AbstractString) where {T<:Kmer}
-        return String(LongSequence{A2}(T(LongSequence{A2}(seq)))) == uppercase(seq)
-    end
-    
-    #=
-    function check_uint_conversion(::Type{T}) where {T<:Kmer}
-        U = BioSequences.encoded_data_type(T)
-        uint = rand(typemin(U):U(one(U) << 2BioSequences.ksize(T) - 1))
-        return convert(U, T(uint)) === uint
-    end
-    =#
-
-    @testset "Kmer conversion" begin
-        for len in [1, 16, 32, 64, 128]
-            # String construction
-            #   Check that kmers in strings survive round trip conversion:
-            #   String → Kmer → String
-            @test all(Bool[check_string_construction(DNAKmer{len},             random_dna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_string_construction(Kmer{DNAAlphabet{4},len}, random_dna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_string_construction(RNAKmer{len},             random_rna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_string_construction(Kmer{RNAAlphabet{4},len}, random_rna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_string_construction(AAKmer{len},              random_aa(len))       for _ in 1:reps])
-            
-            # Long(DNA|RNA)Seq Constructions
-            #   Check that DNAKmers can be constructed from a Long(DNA|RNA)Seq
-            #   Long(DNA|RNA)Seq → Kmer → Long(DNA|RNA)Seq
-            @test all(Bool[check_longsequence_construction(Kmer{DNAAlphabet{2},len}, LongDNA{2}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{DNAAlphabet{4},len}, LongDNA{4}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{DNAAlphabet{4},len}, LongDNA{2}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{DNAAlphabet{2},len}, LongDNA{4}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{2},len}, LongRNA{2}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{4},len}, LongRNA{4}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{4},len}, LongRNA{2}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{2},len}, LongRNA{4}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(AAKmer{len},              LongAA(random_aa(len)))    for _ in 1:reps])
-            
-            # Check Kmer{A1}(::BioSequence{A2}) for compatible A1 and A2
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{4}}, LongRNA{2}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{2}}, LongDNA{4}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{RNAAlphabet{4}}, LongDNA{4}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer{DNAAlphabet{2}}, LongRNA{4}(random_rna_kmer(len))) for _ in 1:reps])
-
-            # BioSequence Construction
-            #   Check that kmers can be constructed from a BioSequence
-            #   BioSequence → Kmer → BioSequence
-            @test all(Bool[check_biosequence_construction(Kmer{DNAAlphabet{2},len}, LongSequence{DNAAlphabet{2}}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{DNAAlphabet{4},len}, LongSequence{DNAAlphabet{4}}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{DNAAlphabet{2},len}, LongSequence{DNAAlphabet{4}}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{DNAAlphabet{4},len}, LongSequence{DNAAlphabet{2}}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{RNAAlphabet{2},len}, LongSequence{RNAAlphabet{2}}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{RNAAlphabet{4},len}, LongSequence{RNAAlphabet{4}}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{RNAAlphabet{2},len}, LongSequence{RNAAlphabet{4}}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(Kmer{RNAAlphabet{4},len}, LongSequence{RNAAlphabet{2}}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_biosequence_construction(AAKmer{len},              LongSequence{AminoAcidAlphabet}(random_aa(len)))    for _ in 1:reps])
-
-            # Check Kmer(::BioSequence) construction
-            @test all(Bool[check_longsequence_construction(Kmer,                     LongRNA{4}(random_rna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer,                     LongDNA{2}(random_dna_kmer(len))) for _ in 1:reps])
-            @test all(Bool[check_longsequence_construction(Kmer,                     LongAA(random_rna_kmer(len))) for _ in 1:reps])
-
-            # Construction from element arrays
-            #   Check that kmers can be constructed from an array of elements
-            #   Vector{T} → Kmer{A,K,N} → Vector{T}
-            @test all(Bool[check_nucarray_kmer(Kmer{DNAAlphabet{2},len}, random_dna_symbols(len, [0.25, 0.25, 0.25, 0.25, 0.0])) for _ in 1:reps])
-            @test all(Bool[check_nucarray_kmer(Kmer{DNAAlphabet{4},len}, random_dna_symbols(len)) for _ in 1:reps])
-            @test all(Bool[check_nucarray_kmer(Kmer{RNAAlphabet{2},len}, random_rna_symbols(len, [0.25, 0.25, 0.25, 0.25, 0.0])) for _ in 1:reps])
-            @test all(Bool[check_nucarray_kmer(Kmer{RNAAlphabet{4},len}, random_rna_symbols(len)) for _ in 1:reps])
-            @test all(Bool[check_nucarray_kmer(AAKmer{len},              random_aa_symbols(len))     for _ in 1:reps])
-            
-            # Roundabout conversions
-            @test all(Bool[check_roundabout_construction(Kmer{DNAAlphabet{2},len}, DNAAlphabet{2},    random_dna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{DNAAlphabet{4},len}, DNAAlphabet{4},    random_dna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{DNAAlphabet{2},len}, DNAAlphabet{4},    random_dna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{DNAAlphabet{4},len}, DNAAlphabet{2},    random_dna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{RNAAlphabet{2},len}, RNAAlphabet{2},    random_rna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{RNAAlphabet{4},len}, RNAAlphabet{4},    random_rna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{RNAAlphabet{2},len}, RNAAlphabet{4},    random_rna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(Kmer{RNAAlphabet{4},len}, RNAAlphabet{2},    random_rna_kmer(len)) for _ in 1:reps])
-            @test all(Bool[check_roundabout_construction(AAKmer{len},              AminoAcidAlphabet, random_aa(len))       for _ in 1:reps])
-        end
-    end
-
-    @test_throws MethodError Kmer() # can't construct 0-mer using `Kmer()`
-    @test_throws ArgumentError DNAKmer(dna"") # 0-mers not allowed
-    @test_throws ArgumentError AAKmer(aa"") # 0-mers not allowed
-    @test_throws ArgumentError DNAKmer{0}(UInt64(0)) # 0-mers not allowed
-    @test_throws ArgumentError RNAKmer{0}(UInt64(0)) # 0-mers not allowed
-    @test_throws ArgumentError AAKmer{0}(UInt64(0)) # 0-mers not allowed
-    @test_throws BioSequences.EncodeError Kmer(RNA_A, RNA_C, RNA_G, RNA_N, RNA_U) # no Ns in kmers
-    @test_throws BioSequences.EncodeError Kmer(DNA_A, DNA_C, DNA_G, DNA_N, DNA_T) # no Ns in kmers
-    @test_throws BioSequences.EncodeError RNAKmer(rna"ACGNU") # no Ns in 2-bit nucleic acid kmers
-    @test_throws BioSequences.EncodeError DNAKmer(dna"ACGNT") # no Ns in 2-bit nucleic acid kmers
-    @test_throws MethodError Kmer(RNA_A, DNA_A) # no mixing of RNA and DNA
-
-    @testset "From strings" begin
-        @test DNAKmer("ACTG") == DNAKmer(LongDNA{4}("ACTG"))
-        @test RNAKmer("ACUG") == RNAKmer(LongRNA{4}("ACUG"))
-
-        # N is not allowed in Kmers
-        @test_throws Exception DNAMmer("ACGTNACGT")
-        @test_throws Exception RNAKmer("ACGUNACGU")
-
-        # Test string literals
-        @test mer"ACTG"dna == DNAKmer(LongDNA{4}("ACTG"))
-        @test mer"AVBM"aa  == AAKmer(LongAA("AVBM"))
-        @test isa(mer"ACGT"dna, DNAKmer{4})
-        @test isa(mer"AVBM"aa,  AAKmer{4})
-        @test_throws LoadError eval(:(mer"ACGN"dna))
-        @test_throws LoadError eval(:(mer"ACG-"dna))
-    end
-    
-    @testset "Capacity" begin
-        @test Kmers.capacity(DNAKmer(random_dna_kmer(10))) == 32
-        @test Kmers.capacity(RNAKmer(random_rna_kmer(10))) == 32
-        @test Kmers.capacity(DNAKmer(random_dna_kmer(32))) == 32
-        @test Kmers.capacity(RNAKmer(random_rna_kmer(32))) == 32
-        @test Kmers.capacity(DNAKmer(random_dna_kmer(33))) == 64
-        @test Kmers.capacity(AAKmer(random_aa(8))) == 8
-        @test Kmers.capacity(AAKmer(random_aa(10))) == 16
-    end
-    
-    @testset "N unused" begin
-        @test Kmers.n_unused(DNAKmer(random_dna_kmer(10))) == 22
-        @test Kmers.n_unused(RNAKmer(random_rna_kmer(10))) == 22
-        @test Kmers.n_unused(DNAKmer(random_dna_kmer(32))) == 0
-        @test Kmers.n_unused(RNAKmer(random_rna_kmer(32))) == 0
-        @test Kmers.n_unused(DNAKmer(random_dna_kmer(33))) == 31
-        @test Kmers.n_unused(AAKmer(random_aa(8))) == 0
-        @test Kmers.n_unused(AAKmer(random_aa(10))) == 6
-    end
-end
diff --git a/test/debruijn_neighbors.jl b/test/debruijn_neighbors.jl
deleted file mode 100644
index c011b84..0000000
--- a/test/debruijn_neighbors.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-@testset "De Bruijn Neighbors" begin
-    @test collect(fw_neighbors(DNAKmer("ACG")))  == map(DNAKmer, ["CGA",  "CGC",  "CGG",  "CGT"])
-    @test collect(fw_neighbors(DNAKmer("GGGG"))) == map(DNAKmer, ["GGGA", "GGGC", "GGGG", "GGGT"])
-    @test collect(fw_neighbors(RNAKmer("ACG")))  == map(RNAKmer, ["CGA",  "CGC",  "CGG",  "CGU"])
-    @test collect(fw_neighbors(RNAKmer("GGGG"))) == map(RNAKmer, ["GGGA", "GGGC", "GGGG", "GGGU"])
-end
diff --git a/test/find.jl b/test/find.jl
deleted file mode 100644
index c3d4aaa..0000000
--- a/test/find.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-@testset "Find" begin
-    kmer = DNAKmer("ACGAG")
-
-    @test findnext(DNA_A, kmer, 1) == 1
-    @test findnext(DNA_C, kmer, 1) == 2
-    @test findnext(DNA_G, kmer, 1) == 3
-    @test findnext(DNA_T, kmer, 1) == nothing
-    @test findnext(DNA_A, kmer, 2) == 4
-
-    @test_throws BoundsError findnext(DNA_A, kmer, 0)
-    @test findnext(DNA_A, kmer, 6) === nothing
-
-    @test findprev(DNA_A, kmer, 5) == 4
-    @test findprev(DNA_C, kmer, 5) == 2
-    @test findprev(DNA_G, kmer, 5) == 5
-    @test findprev(DNA_T, kmer, 5) == nothing
-    @test findprev(DNA_G, kmer, 4) == 3
-
-    @test findprev(DNA_A, kmer, 0) === nothing
-    @test_throws BoundsError findprev(DNA_A, kmer, 6)
-
-    @test findfirst(DNA_A, kmer) == 1
-    @test findfirst(DNA_G, kmer) == 3
-    @test findlast(DNA_A, kmer) == 4
-    @test findlast(DNA_G, kmer) == 5
-    
-    kmer = AAKmer("AMVKFPSMT")
-    
-    @test findnext(AA_A, kmer, 1) == 1
-    @test findnext(AA_M, kmer, 1) == 2
-    @test findnext(AA_V, kmer, 1) == 3
-    @test findnext(AA_K, kmer, 1) == 4
-    @test findnext(AA_F, kmer, 1) == 5
-    @test findnext(AA_P, kmer, 1) == 6
-    @test findnext(AA_S, kmer, 1) == 7
-    @test findnext(AA_M, kmer, 1) == 2
-    @test findnext(AA_T, kmer, 1) == 9
-    
-    @test findnext(AA_F, kmer, 4) == 5
-    @test findprev(AA_F, kmer, 4) == nothing
-    @test findnext(AA_A, kmer, 7) == nothing
-    @test findnext(AA_M, kmer, 5) == 8
-    
-    @test findfirst(AA_M, kmer) == 2
-    @test findlast(AA_M, kmer) == 8
-end
diff --git a/test/iteration.jl b/test/iteration.jl
deleted file mode 100644
index 801a4b0..0000000
--- a/test/iteration.jl
+++ /dev/null
@@ -1,214 +0,0 @@
-@testset "EveryKmer" begin
-    @testset "EveryKmer DNA" begin
-        s = randdnaseq(500)
-        s2 = LongDNA{2}(s)
-        # Kmer and sequence Alphabets match.
-        @test collect(EveryKmer(s, Val{31}())) == collect(EveryKmer(s2, Val{31}()))
-        @test length(EveryKmer(s, Val{31}())) == length(EveryKmer(s2, Val{31}())) == 470
-        
-        @test collect(EveryKmer(s, Val{201}())) == collect(EveryKmer(s2, Val{201}()))
-        @test length(EveryKmer(s, Val{201}())) == length(EveryKmer(s2, Val{201}())) == 300
-        
-        # Kmer and sequence Alphabets mismatch.
-        s3 = dna"AC-TGAG--TGC"
-        @test collect(EveryKmer{DNACodon}(s3)) ==
-            [(UInt64(4), Kmer(DNA_T, DNA_G, DNA_A)),
-             (UInt64(5), Kmer(DNA_G, DNA_A, DNA_G)),
-             (UInt64(10), Kmer(DNA_T, DNA_G, DNA_C))]
-    end
-    
-    @testset "EveryKmer RNA" begin
-        s = randrnaseq(500)
-        s2 = LongRNA{2}(s)
-        # Kmer and sequence Alphabets match.
-        @test collect(EveryKmer(s, Val{31}())) == collect(EveryKmer(s2, Val{31}()))
-        @test length(EveryKmer(s, Val{31}())) == length(EveryKmer(s2, Val{31}())) == 470
-        
-        @test collect(EveryKmer(s, Val{201}())) == collect(EveryKmer(s2, Val{201}()))
-        @test length(EveryKmer(s, Val{201}())) == length(EveryKmer(s2, Val{201}())) == 300
-        
-        # Kmer and sequence Alphabets mismatch.
-        s3 = rna"AC-UGAG--UGC"
-        @test collect(EveryKmer{RNACodon}(s3)) ==
-            [(UInt64(4), Kmer(RNA_U, RNA_G, RNA_A)),
-             (UInt64(5), Kmer(RNA_G, RNA_A, RNA_G)),
-             (UInt64(10), Kmer(RNA_U, RNA_G, RNA_C))]
-    end
-    
-    @testset "EveryKmer AA" begin
-        s = randaaseq(500)
-        s2 = LongAA(s)
-        @test collect(EveryKmer(s, Val{31}())) == collect(EveryKmer(s2, Val{31}()))
-        @test length(EveryKmer(s, Val{31}())) == length(EveryKmer(s2, Val{31}())) == 470
-    
-        @test collect(EveryKmer(s, Val{201}())) == collect(EveryKmer(s2, Val{201}()))
-        @test length(EveryKmer(s, Val{201}())) == length(EveryKmer(s2, Val{201}())) == 300
-    end
-end
-
-@testset "SpacedKmers" begin
-    @testset "SpacedKmers DNA" begin
-        s = randdnaseq(500)
-        s2 = LongDNA{2}(s)
-        @test collect(SpacedKmers(s, Val{31}(), 50)) == collect(SpacedKmers(s2, Val{31}(), 50))
-        @test length(SpacedKmers(s, Val{31}(), 50)) == length(SpacedKmers(s2, Val{31}(), 50)) == 10
-        
-        @test collect(SpacedKmers(s, Val{201}(), 50)) == collect(SpacedKmers(s2, Val{201}(), 50))
-        @test length(SpacedKmers(s, Val{201}(), 50)) == length(SpacedKmers(s2, Val{201}(), 50)) == 6
-        
-        s3 = dna"AC-TGAG--TGC"
-        @test collect(SpacedKmers{DNACodon}(s3, 3)) ==
-            [(UInt64(4), Kmer(DNA_T, DNA_G, DNA_A)),
-             (UInt64(10), Kmer(DNA_T, DNA_G, DNA_C))]
-    end
-    
-    @testset "SpacedKmers RNA" begin
-        s = randrnaseq(500)
-        s2 = LongRNA{2}(s)
-        @test collect(SpacedKmers(s, Val{31}(), 50)) == collect(SpacedKmers(s2, Val{31}(), 50))
-        @test length(SpacedKmers(s, Val{31}(), 50)) == length(SpacedKmers(s2, Val{31}(), 50)) == 10
-        
-        @test collect(SpacedKmers(s, Val{201}(), 50)) == collect(SpacedKmers(s2, Val{201}(), 50))
-        @test length(SpacedKmers(s, Val{201}(), 50)) == length(SpacedKmers(s2, Val{201}(), 50)) == 6
-        
-        s3 = rna"AC-UGAG--UGC"
-        @test collect(SpacedKmers{RNACodon}(s3, 3)) ==
-            [(UInt64(4), Kmer(RNA_U, RNA_G, RNA_A)),
-             (UInt64(10), Kmer(RNA_U, RNA_G, RNA_C))]
-    end
-    
-    @testset "SpacedKmers AA" begin
-        s = randaaseq(500)
-        s2 = LongAA(s)
-        @test collect(SpacedKmers(s, Val{31}(), 50)) == collect(SpacedKmers(s2, Val{31}(), 50))
-        @test length(SpacedKmers(s, Val{31}(), 50)) == length(SpacedKmers(s2, Val{31}(), 50)) == 10
-    
-        @test collect(SpacedKmers(s, Val{201}(), 50)) == collect(SpacedKmers(s2, Val{201}(), 50))
-        @test length(SpacedKmers(s, Val{201}(), 50)) == length(SpacedKmers(s2, Val{201}(), 50)) == 6
-    end
-end
-
-@testset "EveryCanonicalKmer" begin
-    @testset "EveryCanonicalKmer DNA" begin
-        s = randdnaseq(500)
-        s2 = LongDNA{2}(s)
-        
-        # Iterator generates expected results...
-        ## 2-Bit DNA
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s2, Val{31}())] ==
-            collect(EveryCanonicalKmer(s2, Val{31}()))
-        
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s2, Val{201}())] ==
-            collect(EveryCanonicalKmer(s2, Val{201}()))
-        
-        ## 4-Bit DNA
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s, Val{31}())] ==
-            collect(EveryCanonicalKmer(s, Val{31}()))
-            
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s, Val{201}())] ==
-            collect(EveryCanonicalKmer(s, Val{201}()))
-        
-        # Test equivalency between different levels of bit compression...
-        @test [x[2] for x in EveryCanonicalKmer(s, Val{31}())] ==
-            [x[2] for x in EveryCanonicalKmer(s2, Val{31}())]
-        @test all(iscanonical.([x[2] for x in EveryCanonicalKmer(s, Val{31}())])) &&
-            all(iscanonical.([x[2] for x in EveryCanonicalKmer(s2, Val{31}())]))
-        
-        @test [x[2] for x in EveryCanonicalKmer(s, Val{201}())] ==
-            [x[2] for x in EveryCanonicalKmer(s2, Val{201}())]
-        @test all(iscanonical.([x[2] for x in EveryCanonicalKmer(s, Val{201}())])) &&
-            all(iscanonical.([x[2] for x in EveryCanonicalKmer(s2, Val{201}())]))
-        
-        # Kmer and sequence Alphabets mismatch.
-        s3 = dna"AC-TGAG--TGC"
-        @test collect(EveryCanonicalKmer{DNACodon}(s3)) ==
-            [(UInt64(4), canonical(Kmer(DNA_T, DNA_G, DNA_A))),
-             (UInt64(5), canonical(Kmer(DNA_G, DNA_A, DNA_G))),
-             (UInt64(10), canonical(Kmer(DNA_T, DNA_G, DNA_C)))]
-    end
-    
-    @testset "EveryCanonicalKmer RNA" begin
-        s = randrnaseq(500)
-        s2 = LongRNA{2}(s)
-        
-        # Iterator generates expected results...
-        ## 2-Bit DNA
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s2, Val{31}())] ==
-            collect(EveryCanonicalKmer(s2, Val{31}()))
-        
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s2, Val{201}())] ==
-            collect(EveryCanonicalKmer(s2, Val{201}()))
-        
-        ## 4-Bit DNA
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s, Val{31}())] ==
-            collect(EveryCanonicalKmer(s, Val{31}()))
-            
-        @test [(x[1], canonical(x[2])) for x in EveryKmer(s, Val{201}())] ==
-            collect(EveryCanonicalKmer(s, Val{201}()))
-        
-        # Test equivalency between different levels of bit compression...
-        @test [x[2] for x in EveryCanonicalKmer(s, Val{31}())] ==
-            [x[2] for x in EveryCanonicalKmer(s2, Val{31}())]
-        @test all(iscanonical.([x[2] for x in EveryCanonicalKmer(s, Val{31}())])) &&
-            all(iscanonical.([x[2] for x in EveryCanonicalKmer(s2, Val{31}())]))
-        
-        @test [x[2] for x in EveryCanonicalKmer(s, Val{201}())] ==
-            [x[2] for x in EveryCanonicalKmer(s2, Val{201}())]
-        @test all(iscanonical.([x[2] for x in EveryCanonicalKmer(s, Val{201}())])) &&
-            all(iscanonical.([x[2] for x in EveryCanonicalKmer(s2, Val{201}())]))
-        
-        s3 = rna"AC-UGAG--UGC"
-        @test collect(EveryCanonicalKmer{RNACodon}(s3)) ==
-            [(UInt64(4), canonical(Kmer(RNA_U, RNA_G, RNA_A))),
-             (UInt64(5), canonical(Kmer(RNA_G, RNA_A, RNA_G))),
-             (UInt64(10), canonical(Kmer(RNA_U, RNA_G, RNA_C)))]
-    end
-end
-
-@testset "SpacedCanonicalKmers" begin
-    @testset "SpacedCanonicalKmers DNA" begin
-        s = randdnaseq(500)
-        s2 = LongDNA{2}(s)
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s, Val{31}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{31}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{31}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s, Val{31}(), 50))
-        @test collect(SpacedCanonicalKmers(s, Val{31}(), 50)) == collect(SpacedCanonicalKmers(s2, Val{31}(), 50))
-        @test length(SpacedCanonicalKmers(s, Val{31}(), 50)) == length(SpacedCanonicalKmers(s2, Val{31}(), 50)) == 10
-        
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s, Val{201}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{201}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{201}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s, Val{201}(), 50))
-        @test collect(SpacedCanonicalKmers(s, Val{201}(), 50)) == collect(SpacedCanonicalKmers(s2, Val{201}(), 50))
-        @test length(SpacedCanonicalKmers(s, Val{201}(), 50)) == length(SpacedCanonicalKmers(s2, Val{201}(), 50)) == 6
-        
-        s3 = dna"AC-TGAG--TGC"
-        @test collect(SpacedCanonicalKmers{DNACodon}(s3, 3)) ==
-            [(UInt64(4), canonical(Kmer(DNA_T, DNA_C, DNA_A))),
-             (UInt64(10), canonical(Kmer(DNA_T, DNA_G, DNA_C)))]
-    end
-    
-    @testset "SpacedCanonicalKmers RNA" begin
-        s = randrnaseq(500)
-        s2 = LongRNA{2}(s)
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s, Val{31}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{31}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{31}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{31}(), 50)] == collect(SpacedCanonicalKmers(s, Val{31}(), 50))
-        @test collect(SpacedCanonicalKmers(s, Val{31}(), 50)) == collect(SpacedCanonicalKmers(s2, Val{31}(), 50))
-        @test length(SpacedCanonicalKmers(s, Val{31}(), 50)) == length(SpacedCanonicalKmers(s2, Val{31}(), 50)) == 10
-        
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s, Val{201}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{201}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s2, Val{201}(), 50))
-        @test [(x[1], canonical(x[2])) for x in SpacedKmers(s2, Val{201}(), 50)] == collect(SpacedCanonicalKmers(s, Val{201}(), 50))
-        @test collect(SpacedCanonicalKmers(s, Val{201}(), 50)) == collect(SpacedCanonicalKmers(s2, Val{201}(), 50))
-        @test length(SpacedCanonicalKmers(s, Val{201}(), 50)) == length(SpacedCanonicalKmers(s2, Val{201}(), 50)) == 6
-        
-        s3 = rna"AC-UGAG--UGC"
-        @test collect(SpacedCanonicalKmers{RNACodon}(s3, 3)) ==
-            [(UInt64(4), canonical(Kmer(RNA_U, RNA_C, RNA_A))),
-             (UInt64(10), canonical(Kmer(RNA_U, RNA_G, RNA_C)))]
-    end
-end
\ No newline at end of file
diff --git a/test/length.jl b/test/length.jl
deleted file mode 100644
index 598980e..0000000
--- a/test/length.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-@testset "Length" begin
-    for len in [1, 16, 32, 64, 128]
-        @test length(DNAKmer(random_dna_kmer(len))) == len
-        @test length(RNAKmer(random_rna_kmer(len))) == len
-        @test length(AAKmer(random_aa(len))) == len
-    end
-end
diff --git a/test/mismatches.jl b/test/mismatches.jl
deleted file mode 100644
index b8e1bb0..0000000
--- a/test/mismatches.jl
+++ /dev/null
@@ -1,51 +0,0 @@
-@testset "Mismatches" begin
-    function test_mismatches(a, b)
-        count = 0
-        for (x, y) in zip(a, b)
-            count += x != y
-        end
-        @test mismatches(a, b) === mismatches(b, a) === count
-    end
-
-    for len in 1:64, _ in 1:10
-        a = random_dna_kmer(len)
-        b = random_dna_kmer(len)
-        test_mismatches(DNAKmer(a), DNAKmer(b))
-        test_mismatches(Kmer{DNAAlphabet{4}}(a), Kmer{DNAAlphabet{4}}(b))
-        
-        a = random_rna_kmer(len)
-        b = random_rna_kmer(len)
-        test_mismatches(RNAKmer(a), RNAKmer(b))
-        test_mismatches(Kmer{RNAAlphabet{4}}(a), Kmer{RNAAlphabet{4}}(b))
-        
-        a = AAKmer(random_aa(len))
-        b = AAKmer(random_aa(len))
-        test_mismatches(a, b)
-    end
-end
-
-@testset "Matches" begin
-    function test_matches(a, b)
-        count = 0
-        for (x, y) in zip(a, b)
-            count += x == y
-        end
-        @test matches(a, b) === matches(b, a) === count
-    end
-
-    for len in 1:64, _ in 1:10
-        a = random_dna_kmer(len)
-        b = random_dna_kmer(len)
-        test_matches(DNAKmer(a), DNAKmer(b))
-        test_matches(Kmer{DNAAlphabet{4}}(a), Kmer{DNAAlphabet{4}}(b))
-        
-        a = random_rna_kmer(len)
-        b = random_rna_kmer(len)
-        test_matches(RNAKmer(a), RNAKmer(b))
-        test_matches(Kmer{RNAAlphabet{4}}(a), Kmer{RNAAlphabet{4}}(b))
-        
-        a = AAKmer(random_aa(len))
-        b = AAKmer(random_aa(len))
-        test_matches(a, b)
-    end
-end
\ No newline at end of file
diff --git a/test/order.jl b/test/order.jl
deleted file mode 100644
index 8b08201..0000000
--- a/test/order.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-@testset "Order" begin
-    @test DNAMer("AA") < DNAMer("AC") < DNAMer("AG") < DNAMer("AT") < DNAMer("CA")
-    @test RNAMer("AA") < RNAMer("AC") < RNAMer("AG") < RNAMer("AU") < RNAMer("CA")
-    
-    @test BigDNAMer("AA") < BigDNAMer("AC") < BigDNAMer("AG") < BigDNAMer("AT") < BigDNAMer("CA")
-    @test BigRNAMer("AA") < BigRNAMer("AC") < BigRNAMer("AG") < BigRNAMer("AU") < BigRNAMer("CA")
-end
diff --git a/test/print.jl b/test/print.jl
deleted file mode 100644
index 9e8f486..0000000
--- a/test/print.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-@testset "Print" begin
-    buf = IOBuffer()
-
-    print(buf, DNAKmer("ACGT"))
-    @test String(take!(buf)) == "ACGT"
-
-    print(buf, RNAKmer("ACGU"))
-    @test String(take!(buf)) == "ACGU"
-    
-    print(buf, Kmer{DNAAlphabet{4}}("ACGT"))
-    @test String(take!(buf)) == "ACGT"
-
-    print(buf, Kmer{RNAAlphabet{4}}("ACGU"))
-    @test String(take!(buf)) == "ACGU"
-    
-    print(buf, AAKmer("AMVKFPSMT"))
-    @test String(take!(buf)) == "AMVKFPSMT"
-end
-
-@testset "Show" begin
-    buf = IOBuffer()
-
-    show(buf, DNAKmer("AGAGT"))
-    @test String(take!(buf)) == "AGAGT"
-    
-    show(buf, RNAKmer("AGAGU"))
-    @test String(take!(buf)) == "AGAGU"
-    
-    show(buf, Kmer{DNAAlphabet{4}}("AGAGT"))
-    @test String(take!(buf)) == "AGAGT"
-    
-    show(buf, Kmer{RNAAlphabet{4}}("AGAGU"))
-    @test String(take!(buf)) == "AGAGU"
-    
-    print(buf, AAKmer("AMVKFPSMT"))
-    @test String(take!(buf)) == "AMVKFPSMT"
-end
diff --git a/test/random.jl b/test/random.jl
deleted file mode 100644
index 48074e7..0000000
--- a/test/random.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-@testset "Random" begin
-    @testset for k in 1:64
-        for _ in 1:10
-            kmer = rand(DNAKmer{k})
-            @test isa(kmer, DNAKmer{k})
-            kmer = rand(RNAKmer{k})
-            @test isa(kmer, RNAKmer{k})
-        end
-        for size in [0, 1, 2, 5, 10, 100]
-            @test length(rand(DNAKmer{k}, size)) == size
-            @test length(rand(RNAKmer{k}, size)) == size
-        end
-        kmers = rand(DNAKmer{k}, 10_000)
-        for i in 1:k
-            a = sum([kmer[i] for kmer in kmers] .== DNA_A)
-            c = sum([kmer[i] for kmer in kmers] .== DNA_C)
-            g = sum([kmer[i] for kmer in kmers] .== DNA_G)
-            t = sum([kmer[i] for kmer in kmers] .== DNA_T)
-            @test 2200 ≤ a ≤ 2800
-            @test 2200 ≤ c ≤ 2800
-            @test 2200 ≤ g ≤ 2800
-            @test 2200 ≤ t ≤ 2800
-            @test a + c + g + t == 10_000
-        end
-    end
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index 4260278..8a4c0df 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,28 +1,894 @@
 module TestKmers
 
-using Kmers
-using BioSequences 
 using Test
-
-const GROUP = get(ENV, "GROUP", "All")
+using Random
+using Kmers
+using BioSequences
+using BioSymbols
+using StringViews
 
 include("utils.jl")
 
-if GROUP == "BioSequences" || GROUP == "All"
-    include("biosequences_interface.jl")
-    include("construction_and_conversion.jl")
-    include("comparisons.jl")
-    include("length.jl")
-    include("access.jl")
-    include("random.jl")
-    include("find.jl")
-    include("print.jl")
-    include("transformations.jl")
-    include("mismatches.jl")
-    include("debruijn_neighbors.jl")
-    include("iteration.jl")
-    include("translation.jl")
-    #include("shuffle.jl")
+@testset "BioSequences Interface" begin
+    for A in
+        [DNAAlphabet{2}, DNAAlphabet{4}, RNAAlphabet{2}, RNAAlphabet{4}, AminoAcidAlphabet]
+        for K in (1, 9, 116)
+            @test BioSequences.has_interface(
+                BioSequence,
+                Kmers.derive_type(Kmer{A, K}),
+                rand(collect(A()), K),
+                false,
+            )
+        end
+    end
+end
+
+struct CharSymbol <: BioSymbol
+    x::Char
+end
+BioSymbols.prefix(::CharSymbol) = "Char"
+BioSymbols.type_text(::CharSymbol) = "CharSymbol"
+BioSymbols.isterm(::CharSymbol) = false
+
+# These two are not interface, but useful for the tests below
+# (e.g. converting the sequence to a string)
+Base.convert(::Type{Char}, x::CharSymbol) = x.x
+Base.Char(x::CharSymbol) = convert(Char, x)
+Base.convert(::Type{CharSymbol}, x::Char) = CharSymbol(x)
+BioSymbols.isgap(::CharSymbol) = false
+
+# TODO: Should BioSymbols be updated to remove this?
+BioSymbols.isvalid(::CharSymbol) = true
+
+struct CharAlphabet <: Alphabet end
+Base.eltype(::Type{CharAlphabet}) = CharSymbol
+BioSequences.symbols(::CharAlphabet) = ntuple(i -> CharSymbol(Char(i - 1)), Val{128}())
+BioSequences.encode(::CharAlphabet, c::CharSymbol) = reinterpret(UInt32, c.x) % UInt
+BioSequences.decode(::CharAlphabet, c::UInt) = CharSymbol(reinterpret(Char, c % UInt32))
+BioSequences.BitsPerSymbol(::CharAlphabet) = BioSequences.BitsPerSymbol{32}()
+
+struct GenericNucAlphabet <: NucleicAcidAlphabet{8} end
+Base.eltype(::Type{GenericNucAlphabet}) = DNA
+BioSequences.symbols(::GenericNucAlphabet) = symbols(DNAAlphabet{4}())
+BioSequences.encode(::GenericNucAlphabet, c::DNA) = BioSequences.encode(DNAAlphabet{4}(), c)
+BioSequences.decode(::GenericNucAlphabet, c::UInt) =
+    BioSequences.decode(DNAAlphabet{4}(), c)
+BioSequences.BitsPerSymbol(::GenericNucAlphabet) = BioSequences.BitsPerSymbol{8}()
+
+const ALPHABETS = [
+    DNAAlphabet{2}(),
+    RNAAlphabet{2}(),
+    DNAAlphabet{4}(),
+    RNAAlphabet{4}(),
+    AminoAcidAlphabet(),
+    CharAlphabet(),
+]
+
+@testset "Construction" begin
+    # Fundamentals
+    dna = dna"TAGCTAAC"
+    mer = Kmer{DNAAlphabet{2}, length(dna)}(dna)
+    @test mer isa Kmer{DNAAlphabet{2}, length(dna)}
+    @test DNACodon == DNAKmer{3, 1}
+    @test RNACodon == RNAKmer{3, 1}
+
+    for A in ALPHABETS
+        Ta = typeof(A)
+        for L in [0, 3, 11, 41]
+            for i in 1:3
+                # Fundamentals and length
+                s = randseq(A, SamplerUniform(symbols(A)), L)
+                mer = Kmer{Ta, L}(collect(s))
+                @test mer isa Kmer{Ta, L}
+                @test length(mer) == L
+                @test string(mer) == string(s)
+
+                # From string
+                mer2 = Kmer{Ta, L}(string(s))
+                @test mer == mer2
+                @test mer === mer2
+                @test string(mer) == string(mer2)
+
+                # From LongSequence
+                mer3 = Kmer{Ta, L}(s)
+                @test mer === mer3
+            end
+        end
+    end
+
+    @testset "Bad parameters" begin
+        @test_throws Exception Kmer{DNAAlphabet, :foo, 1}("C")
+        @test_throws Exception Kmer{DNAAlphabet, -1, 0}("")
+        @test_throws Exception Kmer{DNAAlphabet, 1, 2}("A")
+    end
+
+    @testset "Construct from string" begin
+        for s in ["TAG", "ACCGAGCC", "TGATGCTATTAGG"]
+            L = length(s)
+            for ss in (s, view(s, 1:lastindex(s)))
+                @test DNAKmer{L, 1}(ss) == DNAKmer{L}(ss)
+                @test string(DNAKmer{L}(ss)) == ss
+            end
+        end
+
+        s = "UHVKALRIQURPFLSMOF"
+        @test string(AAKmer{18}(s)) == s
+
+        for s in ["αβγδϵ", "", "中国人大网"]
+            L = length(s)
+            for ss in (s, view(s, 1:lastindex(s)))
+                sq = Kmer{CharAlphabet, L}(ss)
+                @test string(sq) == s
+                @test [Char(i) for i in sq] == collect(ss)
+            end
+        end
+    end
+
+    @testset "Wrong length" begin
+        @test_throws Exception DNAKmer{4}("TAC")
+        @test_throws Exception DNAKmer{4}("TACCA")
+        @test_throws Exception Kmer{CharAlphabet, 2}(['T'])
+        @test_throws Exception AAMer{3}((AminoAcid(i) for i in "WPLK" if true))
+    end
+
+    @testset "Length must be given explicitly" begin
+        for s in ["TACA", ""]
+            @test_throws Exception DNAKmer("TACGA")
+            @test string(DNAKmer{length(s)}(s)) == s
+        end
+        @test_throws Exception AAMer(aa"WPLKM")
+        @test collect(AAKmer{5}(aa"WPLKM")) == collect(aa"WPLKM")
+    end
+
+    @testset "Kmer literal" begin
+        @test collect(mer"TGAGTCA"d) == collect(dna"TGAGTCA")
+        @test collect(mer"WQOPMKAP"a) == collect(aa"WQOPMKAP")
+        @test collect(mer"UAUCGGAUC"r) == collect(rna"UAUCGGAUC")
+
+        @test_throws Exception eval(:(mer"ATCGATAG"k)) # invalid flag
+    end
+
+    @testset "Construct from Biosequences" begin
+        @testset "Construct from LongSequence" begin
+            for seq in [
+                dna"TAGGCA",
+                rna"UUCUGUGAGUCC",
+                aa"TTCGGAA",
+                LongSequence{CharAlphabet}("HELLO"),
+            ]
+                for sq in [seq, view(seq, 2:lastindex(seq))]
+                    A = typeof(Alphabet(sq))
+                    @test Kmer{A, length(sq)}(sq) == Kmer{A, length(sq)}(string(sq))
+                    @test string(Kmer{A, length(sq)}(sq)) == string(sq)
+                    @test_throws Exception Kmer{A, length(sq) + 1}(sq)
+                end
+            end
+        end
+
+        @testset "Construct from kmer" begin
+            m = mer"TAGCGTTA"d
+            m2 = DNAKmer{8}(m)
+            @test m === m2
+            @test_throws Exception DNAKmer{7}(m)
+            m3 = RNAKmer{8}(m)
+            @test m3 === mer"UAGCGUUA"r
+            @test_throws Exception RNAKmer{9}(m)
+            @test_throws Exception AAKmer{8}(m)
+        end
+
+        # From generic biosequence - TODO
+    end
+
+    @testset "Construct from iterable" begin
+        m1 = DNAKmer{6}((i for i in dna"GCGATC"))
+        m2 = DNAKmer{6}((i for i in dna"ATCGATGCAA" if i ∈ (DNA_A, DNA_C)))
+        @test m1 === mer"GCGATC"d
+        @test m2 === mer"ACACAA"d
+        m3 = DNAKmer{4}((i for i in rna"GAUC" if true))
+        @test m3 === mer"GATC"d
+    end
+end
+
+@testset "Comparison" begin
+    @testset "Equality" begin
+        @test mer""a == mer""a
+        @test mer"KMNUPQCX"a == mer"KMNUPQCX"a
+        @test mer"PKMNEA"a != mer"PKMNE"a
+        @test mer"PKM"a != mer"PK"a
+        @test mer"IUDHLDJVIPOEJKWE"a != mer"IUDHLDJVIPOEJKW"a
+    end
+
+    @testset "Ordering" begin
+        @test mer"UGCAG"r > mer"CGCAG"r
+        @test mer"TCGGAAG"d > mer"TCGGAAC"d
+        @test mer"OEWPM"a > mer"OEWP"a
+        @test mer"UGCGA"r > mer"TGAGA"d
+
+        @test cmp(mer"TAGCTA"d, mer"TACCTA"d) == 1
+        @test cmp(mer"TAC"d, mer"TAGCA"d) == -1
+    end
+
+    @testset "Hashing, isless and isequal" begin
+        @test hash(mer"POSMDGF"a, UInt(15)) === hash(mer"POSMDGF"a, UInt(15))
+        @test isequal(mer"POSMDGF"a, mer"POSMDGF"a)
+
+        # Same, but DNA/RNA
+        m1 = mer"TAGCTA"d
+        m2 = mer"UAGCUA"r
+        @test isequal(m1, m2)
+        @test hash(m1) === hash(m2)
+        m3 = Kmer{DNAAlphabet{4}}(m1)
+        m4 = Kmer{RNAAlphabet{4}}(m2)
+        @test isequal(m3, m4)
+        @test hash(m3) === hash(m4)
+
+        # Other kmers
+        # This throws because we want kmer hashing to be maximally fast,
+        # which implies they must have a different hashing strategy from
+        # other BioSequences, which implies they can't be isequal
+        @test_throws Exception isequal(mer"UGCUGA"r, mer"UGCUGA"a)
+        @test !isequal(mer"UGCAC"r, mer"UGCGA"r)
+
+        # Other sequences
+        @test_throws Exception dna"TAG" == mer"TAG"d
+        @test_throws Exception mer"TAG"d == dna"TAG"
+    end
+end
+
+@testset "Access" begin
+    @testset "Scalar indexing" begin
+        m = mer"TGATGCTAGTAGTATTCTATAG"d
+        @test m isa Mer{22}
+
+        @test m[1] == first(m) == DNA_T
+        @test m[3] == DNA_A
+        @test last(m) == m[22] == DNA_G
+
+        @test_throws BoundsError m[0]
+        @test_throws BoundsError m[-1]
+        @test_throws BoundsError m[23]
+
+        for s in
+            [dna"TAGCAAC", dna"TWKKSVVDNA-A", rna"UGUGUCA", rna"UGUCGWS", aa"PLLKMDDSH"]
+            m = Kmer{typeof(Alphabet(s)), length(s)}(s)
+            @test first(m) == first(s)
+            @test last(m) == last(s)
+            for i in [1, 3, 5]
+                @test s[i] == m[i]
+            end
+        end
+
+        # Weirdly, this throws ArgumentError
+        @test_throws Exception first(DNAKmer{0}(""))
+        @test_throws Exception last(RNAKmer{0}(""))
+    end
+
+    @testset "Unit ranges" begin
+        m = mer"POKDGTWDIKVL"a
+        @test m isa Mer{12}
+
+        @test m[1:3] === mer"POK"a
+        @test m[2:6] === mer"OKDGT"a
+        @test m[6:(end - 1)] === mer"TWDIKV"a
+
+        @test m[eachindex(m)] === m
+        @test m[Base.OneTo(4)] === mer"POKD"a
+
+        @test_throws BoundsError m[0:4]
+        @test m[0:-1] == AAKmer{0}("")
+        @test_throws BoundsError m[2:13]
+    end
+
+    @testset "With vector of indices" begin
+        m = mer"UGCUGAUCGUAU"r
+        @test m isa Mer{12}
+
+        @test m[[1, 3, 5]] == mer"UCG"r
+        @test m[[12, 9, 7]] == mer"UGU"r
+        @test m[Int[]] == Kmer{RNAAlphabet{2}, 0}("")
+
+        @test_throws BoundsError m[[2, 8, 15]]
+        @test_throws BoundsError m[[0, 1]]
+        @test_throws BoundsError m[[13]]
+    end
+
+    @testset "Logical indexing" begin
+        m = Kmer{CharAlphabet, 4}("ØÆGD")
+        @test m[[true, false, true, false]] == Kmer{CharAlphabet, 2}("ØG")
+        @test m[trues(4)] === m
+        @test m[falses(4)] === Kmer{CharAlphabet, 0}("")
+
+        @test_throws BoundsError m[[true, false, true, true, true]]
+        @test_throws BoundsError m[[false, false, true]]
+        @test_throws BoundsError m[trues(5)]
+    end
+end
+
+@testset "Modification" begin
+    @testset "push, push_first" begin
+        m = mer"UHALSAP"a
+        @test push(m, AA_W) == mer"UHALSAPW"a
+        @test push(push(m, AA_W), AA_M) === mer"UHALSAPWM"a
+        @test push_first(m, AA_Gap) == mer"-UHALSAP"a
+        @test push_first(push(m, AA_K), AA_H) == mer"HUHALSAPK"a
+
+        @test push(m, 'K') == mer"UHALSAPK"a
+        @test push(mer"TAG"d, RNA_A) == mer"TAGA"d
+    end
+
+    @testset "shift, shiftfirst" begin
+        m = mer"PDOFPOLEF"a
+        v = collect(m)
+        for aa in aa"PLLMWFVB"
+            m = shift(m, aa)
+            @test m isa Mer{9}
+            popfirst!(push!(v, aa))
+            @test collect(m) == v
+        end
+
+        m = mer"AUGCGUA"r
+        v = collect(m)
+        for dna in dna"TAGTGTGCTA"
+            m = shift_first(m, dna)
+            @test m isa Mer{7}
+            pop!(pushfirst!(v, dna))
+            @test collect(m) == v
+        end
+    end
+
+    @testset "pop, pop_first" begin
+        m = mer"LNPQ"a
+        @test (m = pop(m)) == mer"LNP"a
+        @test (m = pop(m)) == mer"LN"a
+        @test (m = pop(m)) == mer"L"a
+        @test (m = pop(m)) == AAKmer{0}("")
+        @test_throws ArgumentError pop(m)
+
+        @test pop(mer"MDFFIJFKL"a) === mer"MDFFIJFK"a
+
+        m = mer"UAGC"r
+        @test (m = pop_first(m)) == mer"AGC"r
+        @test (m = pop_first(m)) == mer"GC"r
+        @test (m = pop_first(m)) == mer"C"r
+        @test (m = pop_first(m)) == mer""r
+        @test pop_first(mer"PKWIKMPPAVYWA"a) == mer"KWIKMPPAVYWA"a
+    end
+
+    @testset "Setindex" begin
+        mer = mer"PLQVAK"a
+        setindex = Base.setindex
+        @test setindex(mer, 3, AA_K) == mer"PLKVAK"a
+        @test setindex(mer, 1, AA_R) == mer"RLQVAK"a
+        @test setindex(mer, 6, AA_M) == mer"PLQVAM"a
+        @test_throws BoundsError setindex(mer, 0, AA_K)
+        @test_throws BoundsError setindex(mer, 7, AA_K)
+
+        mer = mer"ATGTCGTGA"d
+        @test setindex(mer, 1, DNA_T) == mer"TTGTCGTGA"d
+        @test setindex(mer, 5, DNA_C) == mer"ATGTCGTGA"d
+        @test setindex(mer, 5, DNA_A) == mer"ATGTAGTGA"d
+
+        mer = mer"PLAKCVMARYKW"a
+        @test setindex(mer, 10, AA_Q) == mer"PLAKCVMARQKW"a
+    end
+end
+
+@testset "Biological operations" begin
+    for s in [
+        dna"",
+        aa"",
+        LongDNA{2}(dna"TAGTGCA"),
+        LongRNA{2}(rna"UGCUGUAA"),
+        dna"TGASWKHVAAN--A",
+        rna"UAGUCUYMNS",
+        aa"LKHWSYYVQN",
+        LongSequence{CharAlphabet}("LKDSJ"),
+        LongSequence{CharAlphabet}("κ𝚶⊸∑Γ"),
+    ]
+        m = Kmer{typeof(Alphabet(s)), length(s)}(s)
+
+        # Reverse
+        @test collect(reverse(m)) == reverse(collect(m))
+        @test collect(reverse(m)) == collect(reverse(s))
+
+        # The rest of the operations are only for nucleotides
+        isa(Alphabet(s), NucleicAcidAlphabet) || continue
+
+        # Complement
+        @test collect(complement(s)) == collect(complement(m))
+
+        # Reverse complement
+        rv = reverse_complement(m)
+        @test collect(reverse_complement(s)) == collect(rv)
+
+        # Canonical
+        can = canonical(m)
+        @test collect(can) == collect(canonical(s))
+        @test can ≤ m
+        if can === m
+            @test m ≤ rv
+        else
+            @test can === rv
+            @test rv ≤ m
+        end
+
+        @test iscanonical(mer"AGCTAG"d)
+        @test iscanonical(mer""d)
+        @test iscanonical(mer"GCGAAC"d)
+        @test iscanonical(mer"AATT"d)
+        @test !iscanonical(mer"GGATGC"d)
+        @test !iscanonical(mer"TCGTGA"d)
+        @test !iscanonical(mer"TTGAA"d)
+    end
+end
+
+@testset "Translation" begin
+    @testset "Forward translation" begin
+        # Empty
+        @test translate(mer""r) == mer""a
+        @test translate(mer""d) == mer""a
+        @test translate(Kmer{DNAAlphabet{4}, 0}("")) == mer""a
+
+        # Not divisible by 3
+        @test_throws Exception translate(mer"U"r)
+        @test_throws Exception translate(mer"UGCA"r)
+        @test_throws Exception translate(mer"GUCGAUUGUC"r)
+
+        # Containing gaps
+        @test_throws Exception translate(Kmer{DNAAlphabet{4}, 6}("CTGA-C"))
+        @test_throws Exception translate(Kmer{RNAAlphabet{4}, 3}("UC-"))
+
+        # Invalid alphabet
+        @test_throws Exception transate(mer"CCC"a)
+        @test_throws Exception transate(Kmer{CharAlphabet, 3}("GGG"))
+
+        # Compare to LongSequence
+        for s in [
+            rna"UCGUAGUUCGAUUCUAUGCUGUAGUGGCAA",
+            rna"UCGUAGGCGUAUUGCGCAAAGCGC",
+            rna"UGCUAGUGUUCGAAA",
+            rna"UCGUUAGUAAAA",
+        ]
+            for A in [DNAAlphabet{4}, RNAAlphabet{2}, DNAAlphabet{2}, RNAAlphabet{4}]
+                ss = LongSequence{A}(s)
+                @test collect(translate(ss)) == collect(translate(Kmer{A, length(s)}(s)))
+            end
+        end
+
+        for s in [
+            rna"UGCUGAWKVUDUGWUGUDHUAGUGCNUBGKUGCMGGSWC",
+            rna"UCGUAGUCKGUCGUYCUGAGGWUGCUGANNUGCUGA",
+            rna"CAGGCCAGWGCUGSSSCUGSMGKYVUCUAS",
+        ]
+            for A in [DNAAlphabet{4}, RNAAlphabet{4}]
+                ss = LongSequence{A}(s)
+                @test collect(translate(ss)) == collect(translate(Kmer{A, length(s)}(s)))
+            end
+        end
+
+        # Skip 1, the index of gap (which cannot be translated)
+        A = alphabet(RNA)
+        for i in 2:16, j in 2:16, k in 2:16
+            mer = Kmer{RNAAlphabet{4}, 3}((A[i], A[j], A[k]))
+            @test only(translate(mer)) == only(translate(LongSequence(mer)))
+        end
+    end
+
+    @testset "CodonSet" begin
+        codons = [RNACodon((i, j, k)) for i in mer"UACG"r, j in mer"UACG"r, k in mer"UACG"r]
+        @test length(Set(codons)) == 64
+        sources = [[], codons[[1, 4, 8]], codons, codons[rand(Bool, 64)], codons[[4, 8]]]
+        csets = map(CodonSet, sources)
+        sets = map(Set, sources)
+
+        # Basic properties
+        for (cset, set) in zip(csets, sets)
+            @test cset == set
+            @test sort!(collect(cset)) == sort!(collect(set))
+            @test length(cset) == length(set)
+            @test isempty(cset) == isempty(set)
+            for i in set
+                @test i ∈ cset
+            end
+
+            s = isempty(cset) ? mer"AAA"r : first(cset)
+            @test delete(cset, s) == delete!(copy(set), s)
+            @test filter(i -> first(i) == DNA_A, cset) ==
+                  filter(i -> first(i) == DNA_A, set)
+        end
+
+        for (si, ci) in zip(sets, csets), (sj, cj) in zip(sets, csets)
+            @test issubset(si, sj) == issubset(ci, cj)
+            for f in [union, setdiff, intersect, symdiff]
+                @test Set(f(ci, cj)) == f(si, sj)
+            end
+        end
+    end
+
+    @testset "Standard reverse genetic code" begin
+        seq = LongAA(collect((i for i in alphabet(AminoAcid) if i ∉ (AA_Gap, AA_U, AA_O))))
+        codonsets = reverse_translate(seq)
+        seen_codons = Set{RNACodon}()
+        for (codonset, aa) in zip(codonsets, seq)
+            @test reverse_translate(aa) === codonset
+            if isambiguous(aa)
+                bits = zero(compatbits(aa))
+                for codon in codonset
+                    bits |= compatbits(only(translate(codon)))
+                end
+                # selenocysteine and Pyrrolysine have bits
+                # 0x00300000. However, translating normal
+                # codons cannot get these amino acids,
+                # so we ignore them by masking their bits
+                @test bits == (compatbits(aa) & 0x000fffff)
+            else
+                @test isdisjoint(seen_codons, codonset)
+                union!(seen_codons, codonset)
+                for codon in codonset
+                    @test only(translate(codon)) == aa
+                end
+            end
+        end
+        @test length(seen_codons) == 64
+
+        code = Kmers.rev_standard_genetic_code
+        @test length(code) == 27
+        @test collect(code) == map(0x00:UInt8(26)) do i
+            aa = reinterpret(AminoAcid, i)
+            aa => reverse_translate(aa)
+        end
+        @test_throws Exception code[AA_Gap]
+    end
+
+    @testset "Custom reverse genetic code" begin
+        fw_code = BioSequences.pterobrachia_mitochondrial_genetic_code
+        code = ReverseGeneticCode(fw_code)
+        for (aa, set) in code
+            for codon in set
+                if aa ∈ (AA_O, AA_U, AA_B, AA_J, AA_X, AA_Z)
+                    continue
+                end
+                @test only(translate(LongSequence(codon); code=fw_code)) === aa
+            end
+        end
+    end
+end
+
+@testset "Printing" begin
+    function test_print(s, str)
+        @test string(s) == str
+        io = IOBuffer()
+        print(io, s)
+        @test String(take!(io)) == str
+    end
+
+    for s in [
+        dna"",
+        aa"",
+        LongDNA{2}(dna"TAGTGCA"),
+        LongRNA{2}(rna"UGCUGUAA"),
+        dna"TGASWKHVAAN--A",
+        rna"UAGUCUYMNS",
+        aa"LKHWSYYVQN",
+    ]
+        test_print(s, string(s))
+    end
+end
+
+@testset "Iterators" begin
+    @testset "Forward iteration" begin
+        @testset "Aliases" begin
+            @test FwKmers{DNAAlphabet{2}, 3}(dna"TAGA") isa
+                  FwKmers{DNAAlphabet{2}, 3, LongDNA{4}}
+            @test FwDNAMers{4}(rna"UAGC") isa FwKmers{DNAAlphabet{2}, 4, LongRNA{4}}
+            @test FwRNAMers{4}(dna"TACA") isa FwKmers{RNAAlphabet{2}, 4, LongDNA{4}}
+            @test FwAAMers{4}(aa"LKCY") isa FwKmers{AminoAcidAlphabet, 4, LongAA}
+        end
+
+        @testset "Smaller than K" begin
+            @test isempty(FwDNAMers{3}(dna"TA"))
+            @test isempty(FwAAMers{9}(aa"AOPJVPES"))
+            @test isempty(FwKmers{RNAAlphabet{4}, 6}(dna"ATGGA"))
+        end
+
+        @testset "Conversible alphabets" begin
+            for (seqs, alphabets) in [
+                (
+                    [LongDNA{2}("TGATGGCGTAGTA"), LongRNA{2}("UCGUGCUA"), LongDNA{2}("")],
+                    [DNAAlphabet{2}, DNAAlphabet{4}, RNAAlphabet{2}, RNAAlphabet{4}],
+                ), # From two-bit
+                (
+                    [dna"TAGTCTGAC", rna"UAGUCGAUUAGGCC"],
+                    [DNAAlphabet{2}, DNAAlphabet{4}, RNAAlphabet{2}, RNAAlphabet{4}],
+                ), # From four-bit
+            ]
+                for seq in seqs, alphabet in alphabets
+                    v1 = collect(FwKmers{alphabet, 3}(seq))
+                    v2 = [Kmer{alphabet, 3, 1}(seq[i:(i + 2)]) for i in 1:(length(seq) - 2)]
+                    @test v1 == v2
+                end
+            end
+            for seq in [dna"TGWSNVNTGA", rna"C-GGAU-WSNUCG"]
+                @test_throws Exception first(FwDNAMers{3}(seq))
+                @test_throws Exception first(FwRNAMers{3}(seq))
+            end
+        end
+
+        @testset "Four to two bit" begin
+            for seq in [dna"TATGCTTCGTAGTCGTCGTTGCTA"]
+                for seqq in [seq, LongRNA{4}(seq)]
+                    filtered = typeof(seqq)([i for i in seqq if !isambiguous(i)])
+                    for A in [DNAAlphabet{2}, RNAAlphabet{2}]
+                        v1 = collect(FwKmers{A, 4}(seqq))
+                        v2 = [
+                            Kmer{A, 4, 1}(filtered[i:(i + 3)]) for
+                            i in 1:(length(filtered) - 3)
+                        ]
+                        @test v1 == v2
+                    end
+                end
+            end
+        end
+
+        @testset "From ASCII bytes" begin
+            str = "TaghWS-TGnADbkWWMSTV"
+            T = FwKmers{DNAAlphabet{4}, 4}
+            mers = collect(T(str))
+            for source in
+                [str, view(str, 1:lastindex(str)), codeunits(str), Vector(codeunits(str))]
+                @test collect(T(source)) == mers
+            end
+
+            # Bad byte in ASCII
+            s = "TAGTCGTAGPATGC"
+            @test_throws BioSequences.EncodeError collect(FwDNAMers{3}(s))
+        end
+
+        # Unconvertible alphabet
+        @testset "Unconvertible alphabet" begin
+            @test_throws Exception iterate(FwKmers{DNAAlphabet{4}, 2}(aa"TAGTGCA"))
+        end
+
+        # GenericRecoding
+        s = dna"TGATGTCGTAGTGAgtagtaCCA"
+        it = FwKmers{GenericNucAlphabet, 8}(s)
+        @test collect(it) ==
+              [Kmer{GenericNucAlphabet, 8}(s[i:(i + 7)]) for i in 1:(length(s) - 7)]
+    end
+
+    @testset "FwRvIterator" begin
+        function naive_fwrv(s::NucSeq, len::Integer)
+            A = typeof(Alphabet(s))
+            T = Kmers.derive_type(Kmer{A, len})
+            [
+                (T(s[i:(i + len - 1)]), T(reverse_complement(s[i:(i + len - 1)]))) for
+                i in 1:(length(s) - len + 1)
+            ]
+        end
+
+        for s in ["", "TGATGCTGTA", "TAT"]
+            Ts = [DNAAlphabet{2}, DNAAlphabet{4}, RNAAlphabet{2}, RNAAlphabet{4}]
+            srcs = Any[LongSequence{T}(LongDNA{2}(s)) for T in Ts]
+            dsts = copy(srcs)
+            for dst in dsts
+                srcs_ = push!(copy(srcs), String(typeof(dst)(LongDNA{2}(s))))
+                for src in srcs_
+                    @test collect(FwRvIterator{typeof(Alphabet(dst)), 4}(src)) ==
+                          naive_fwrv(dst, 4)
+                end
+            end
+        end
+    end
+
+    @testset "CanonicalKmers" begin
+        @testset "Aliases" begin
+            @test CanonicalKmers{DNAAlphabet{4}, 3}("TAGCTAGA") isa CanonicalKmers
+            @test CanonicalDNAMers{8}("TAGCTAGA") isa CanonicalKmers
+            @test CanonicalRNAMers{9}("TAGCTAGA") isa CanonicalKmers
+        end
+
+        @testset "Only nucleic acids" begin
+            @test_throws Exception CanonicalKmers{AminoAcidAlphabet, 3}("UAGCTGA")
+        end
+
+        @testset "Iteration" begin
+            for s in [
+                dna"TAGCTAGGACA",
+                rna"UAGUCGUGAGA",
+                "TAGCTAGAGGA",
+                collect(codeunits("ATGCGAGGA")),
+            ]
+                seq = LongDNA{2}(s)
+                cns = [canonical(seq[i:(i + 4)]) for i in 1:(length(seq) - 4)]
+                for A in [DNAAlphabet{2}, DNAAlphabet{4}]
+                    it = CanonicalKmers{A, 5}(s)
+                    @test collect(it) == [Kmer{A, 5, 1}(i) for i in cns]
+                end
+            end
+
+            s = dna"TAGTCGTGATGATAGTCTGAATGTC"
+            it = CanonicalKmers{GenericNucAlphabet, 6}(s)
+            @test collect(it) == [
+                canonical(Kmer{GenericNucAlphabet, 6}(s[i:(i + 5)])) for
+                i in 1:(length(s) - 5)
+            ]
+
+            s = "TAGTGTCGATGATC"
+            it1 = CanonicalKmers{DNAAlphabet{2}, 4, String}(s)
+            it2 = CanonicalDNAMers{4}(s)
+            @test collect(it1) == collect(it2)
+        end
+    end
+
+    @testset "UnambiguousKmers" begin
+        for s in [dna"TAGCWSAGACYWNACGCNACG--", rna"UAGUCYWUAGCNUAHAGC-GAUGAGC"]
+            res = [(s[i:(i + 2)], i) for i in 1:(length(s) - 2)]
+            filter!(i -> all(iscertain, first(i)), res)
+            for A in [DNAAlphabet{2}, RNAAlphabet{2}]
+                resA = [(Kmer{A, 3, 1}(i), j) for (i, j) in res]
+                it = UnambiguousKmers{A, 3}(s)
+                @test collect(it) == resA
+            end
+
+            A = s isa LongDNA ? DNAAlphabet{2} : RNAAlphabet{2}
+            pos = [i:(i + 3) for i in 1:(lastindex(s) - 3)]
+            filter!(pos) do rng
+                all(iscertain, s[rng])
+            end
+            resA = [(Kmer{A, 4, 1}(s[i]), first(i)) for i in pos]
+            it = UnambiguousKmers{A, 4}(string(s))
+            @test collect(it) == resA
+        end
+
+        # Copyable
+        s = LongDNA{2}("TATCGGATAGGCAAA")
+        v = collect(UnambiguousRNAMers{4}(s))
+        v2 = [(DNAKmer{4}(s[i:(i + 3)]), i) for i in 1:(length(s) - 3)]
+        @test v == v2
+
+        # GenericRecoding
+        s = dna"TAGCTKAGAGGAGAACWSGCGAGA"
+        it = UnambiguousKmers{DNAAlphabet{2}, 4}(s)
+        v = collect(it)
+        v2 = [
+            (DNAKmer{4}(s[i:(i + 3)]), i) for
+            i in 1:(length(s) - 3) if all(iscertain, s[i:(i + 3)])
+        ]
+        @test v == v2
+
+        s = LongSequence{GenericNucAlphabet}(dna"TGATCGTAGATGwATGTC")
+        it = UnambiguousKmers{DNAAlphabet{2}, 7, LongSequence{GenericNucAlphabet}}(s)
+        it2 = UnambiguousDNAMers{7}(s)
+        @test collect(it) == collect(it2)
+
+        # Bad byte in ASCII
+        s = "TAGTCGTAGPATGC"
+        @test_throws BioSequences.EncodeError collect(UnambiguousDNAMers{3}(s))
+    end
+
+    @testset "SpacedKmers" begin
+        function test_naive_spaced(A, seq, k, space)
+            T = Kmers.derive_type(Kmer{A, k})
+            v = [T(seq[i:(i + k - 1)]) for i in 1:space:(length(seq) - k + 1)]
+            @test collect(SpacedKmers{A, k, space}(seq)) == v
+        end
+
+        for (s, A) in Any[
+            ("TA-NGAKATCGAWTAGA", DNAAlphabet{4}),
+            ("AUGCUGAUGAGUCGUAG", RNAAlphabet{2}),
+            ("KLMYUPOKQMMNLVYRW", AminoAcidAlphabet),
+        ]
+            test_naive_spaced(A, s, 3, 2)
+            test_naive_spaced(A, s, 2, 4)
+            test_naive_spaced(A, codeunits(s), 3, 3)
+        end
+        test_naive_spaced(DNAAlphabet{2}, rna"UAGUCGUAGUAG", 4, 3)
+        test_naive_spaced(RNAAlphabet{4}, dna"TAGCCWKMMNAGCTV", 2, 3)
+
+        it = SpacedDNAMers{3, 4}("TAGAWWWW")
+        @test_throws BioSequences.EncodeError collect(it)
+    end
+
+    @testset "Each codon" begin
+        for s in Any[
+            "TAGCGATAT",
+            b"UAUGCUGAA",
+            dna"TAGGCTATA",
+            LongDNA{2}(dna"TAGCTAGAGGA"),
+            rna"UGAUUCGUUGA",
+            LongRNA{2}(rna"UAGUCGUGAGUA"),
+        ]
+            @test each_codon(DNA, s) === SpacedDNAMers{3, 3}(s)
+            @test each_codon(RNA, s) === SpacedRNAMers{3, 3}(s)
+            if s isa BioSequence{<:DNAAlphabet}
+                @test each_codon(s) === SpacedDNAMers{3, 3}(s)
+            elseif s isa BioSequence{<:RNAAlphabet}
+                @test each_codon(s) === SpacedRNAMers{3, 3}(s)
+            end
+        end
+    end
+end
+
+@testset "StringViews" begin
+    s = StringView(collect(codeunits("ATGCTGATGATCGTATGATGTCGAAA")))
+    it = FwRvIterator{DNAAlphabet{2}, 9}(s)
+    @test collect(it) == map(1:(length(s) - 8)) do i
+        a = DNAKmer{9}(s[i:(i + 8)])
+        (a, reverse_complement(a))
+    end
+end
+
+@testset "fx_hash" begin
+    if UInt == UInt64
+        for (kmer, h) in Any[
+            (mer"TAG"a, 0x55dbbe22bb3e4a13),
+            (mer"KPWAK"a, 0x10203d1c885b7467),
+            (mer"TAGCTAG"d, 0xa76409341339d05a),
+            (mer""a, 0x0000000000000000),
+            (mer""r, 0x0000000000000000),
+            (mer"UGAUGCA"r, 0xdd7c97ae4ca204b4),
+        ]
+            @test fx_hash(kmer) === h
+        end
+    end
+end
+
+@testset "Construction utils" begin
+    @testset "Unsafe extract" begin
+        seq = dna"TTGCTAGGGATTCGAGGATCCTCTAGAGCGCGGCACGATCTTAGCAC"
+        unsafe_extract = Kmers.unsafe_extract
+        @test unsafe_extract(Kmers.FourToTwo(), DNAKmer{6, 1}, seq, 3) ==
+              DNAKmer{6}(seq[3:8])
+        @test unsafe_extract(Kmers.FourToTwo(), DNAKmer{36, 2}, seq, 2) ==
+              DNAKmer{36}(seq[2:37])
+
+        seq = LongDNA{2}(seq)
+        @test unsafe_extract(Kmers.TwoToFour(), Kmer{DNAAlphabet{4}, 6, 1}, seq, 3) ==
+              Kmer{DNAAlphabet{4}, 6}(seq[3:8])
+        @test unsafe_extract(Kmers.TwoToFour(), Kmer{DNAAlphabet{4}, 36, 3}, seq, 2) ==
+              Kmer{DNAAlphabet{4}, 36}(seq[2:37])
+
+        @test unsafe_extract(Kmers.Copyable(), DNAKmer{6, 1}, seq, 3) ==
+              DNAKmer{6}(seq[3:8])
+        @test unsafe_extract(Kmers.Copyable(), DNAKmer{36, 2}, seq, 2) ==
+              DNAKmer{36}(seq[2:37])
+
+        seq = codeunits(String(seq))
+        @test unsafe_extract(Kmers.AsciiEncode(), DNAKmer{6, 1}, seq, 3) ==
+              DNAKmer{6}(seq[3:8])
+        @test unsafe_extract(Kmers.AsciiEncode(), DNAKmer{36, 2}, seq, 2) ==
+              DNAKmer{36}(seq[2:37])
+
+        seq = LongSequence{CharAlphabet}("中国¨Å!人大æ网")
+        @test unsafe_extract(Kmers.GenericRecoding(), Kmer{CharAlphabet, 3, 2}, seq, 4) ==
+              Kmer{CharAlphabet, 3}("Å!人")
+    end
+
+    @testset "Unsafe shift from" begin
+        ushift = Kmers.unsafe_shift_from
+
+        seq = dna"TTGCTAGGGATTCGAGGATCCTCTAGAGCGCGGCACGATCTTAGCAC"
+        mer = Kmer{DNAAlphabet{4}, 9}("TAGwKwADH")
+        @test ushift(Kmers.Copyable(), mer, seq, 4, Val(3)) ==
+              Kmer{DNAAlphabet{4}, 9}("wKwADHCTA")
+
+        mer = mer"TAGCATCG"d
+        @test ushift(Kmers.FourToTwo(), mer, seq, 4, Val(3)) == mer"CATCGCTA"d
+
+        seq = LongDNA{2}(seq)
+        mer = Kmer{DNAAlphabet{4}, 9}("TAGwKwADH")
+        @test ushift(Kmers.TwoToFour(), mer, seq, 2, Val(3)) ==
+              Kmer{DNAAlphabet{4}, 9}("wKwADHTGC")
+
+        seq = codeunits(String(seq))
+        mer = mer"KWPLCVAKVM"a
+        @test ushift(Kmers.AsciiEncode(), mer, seq, 5, Val(4)) == mer"CVAKVMTAGG"a
+
+        seq = LongSequence{CharAlphabet}("中国¨Å!人大æ网")
+        mer = Kmer{CharAlphabet, 5, 3}("中国¨Å!")
+        @test ushift(Kmers.GenericRecoding(), mer, seq, 6, Val(3)) ==
+              Kmer{CharAlphabet, 5, 3}("Å!人大æ")
+    end
 end
 
 end # module
diff --git a/test/shuffle.jl b/test/shuffle.jl
deleted file mode 100644
index 793081b..0000000
--- a/test/shuffle.jl
+++ /dev/null
@@ -1,25 +0,0 @@
-@testset "Shuffle" begin
-    for s in ["A", "C", "G", "T"]
-        kmer = DNAKmer(s)
-        @test kmer === shuffle(kmer)
-    end
-
-    function count(kmer)
-        a = c = g = t = 0
-        for x in kmer
-            a += x == DNA_A
-            c += x == DNA_C
-            g += x == DNA_G
-            t += x == DNA_T
-        end
-        return a, c, g, t
-    end
-
-    for k in 1:64, _ in 1:10
-        kmer = rand(DNAKmer{k})
-        @test count(kmer) == count(shuffle(kmer))
-        if k ≥ 30
-            @test kmer != shuffle(kmer)
-        end
-    end
-end
diff --git a/test/transformations.jl b/test/transformations.jl
deleted file mode 100644
index 1691688..0000000
--- a/test/transformations.jl
+++ /dev/null
@@ -1,60 +0,0 @@
-@testset "Transformations" begin
-    function test_reverse(T, seq)
-        revseq = reverse(T(seq))
-        @test String(revseq) == reverse(seq)
-    end
-
-    function test_dna_complement(T, seq)
-        comp = complement(T(seq))
-        @test String(comp) == dna_complement(seq)
-    end
-
-    function test_rna_complement(T, seq)
-        comp = complement(T(seq))
-        @test String(comp) == rna_complement(seq)
-    end
-
-    function test_dna_revcomp(T, seq)
-        revcomp = reverse_complement(T(seq))
-        @test String(revcomp) == reverse(dna_complement(seq))
-    end
-
-    function test_rna_revcomp(T, seq)
-        revcomp = reverse_complement(T(seq))
-        @test String(revcomp) == reverse(rna_complement(seq))
-    end
-
-    @testset "Reverse" begin
-        for len in 1:64, _ in 1:10    
-            test_reverse(DNAKmer{len}, random_dna_kmer(len))
-            test_reverse(RNAKmer{len}, random_rna_kmer(len))
-        end
-
-        seq = dna"AAAAAAAAAAAAAAAAAAAAAAAAAAAAGATAC"
-        @test reverse(seq[(length(seq)-9):length(seq)]) == dna"CATAGAAAAA"
-    end
-
-    @testset "Complement" begin
-        for len in 1:64, _ in 1:10
-            test_dna_complement(DNAKmer{len}, random_dna_kmer(len))
-            test_rna_complement(RNAKmer{len}, random_rna_kmer(len))
-        end
-    end
-
-    @testset "Reverse Complement" begin
-        for len in 1:64, _ in 1:10
-            test_dna_revcomp(DNAKmer{len}, random_dna_kmer(len))
-            test_rna_revcomp(RNAKmer{len}, random_rna_kmer(len))
-        end
-    end
-    
-    @testset "Canonical" begin
-        @test canonical(DNAKmer{4,1}("ACCG")) == DNAKmer{4,1}("ACCG")
-        @test canonical(DNAKmer{4,1}("GCAC")) == DNAKmer{4,1}("GCAC")
-        @test canonical(RNAKmer{4,1}("AAUU")) == RNAKmer{4,1}("AAUU")
-        @test canonical(RNAKmer{4,1}("UGGA")) == RNAKmer{4,1}("UCCA")
-        @test canonical(RNAKmer{4,1}("CGAU")) == RNAKmer{4,1}("AUCG")
-        @test canonical(RNAKmer{4,1}("UGGA")) == RNAKmer{4,1}("UCCA")
-        @test canonical(DNAKmer{4,1}("GCAC")) == DNAKmer{4,1}("GCAC")
-    end
-end
diff --git a/test/translation.jl b/test/translation.jl
index ccf34dc..38d8cdb 100644
--- a/test/translation.jl
+++ b/test/translation.jl
@@ -1,141 +1,138 @@
 @testset "Translation" begin
-
-sampler = BioSequences.SamplerWeighted(
-    dna"ACGTMRSVWYHKDBN",
-    vcat(fill(0.225, 4), fill(0.00909, 10))
-)
-
-for A in (RNAAlphabet, DNAAlphabet)
-    for N in (2, 4)
-        for len in [3, 15, 33, 66]
-            for alternative in (true, false)
-                seq = if N == 2
-                    randseq(A{2}(), len)
-                else
-                    randseq(A{4}(), sampler, len)
+    sampler = BioSequences.SamplerWeighted(
+        dna"ACGTMRSVWYHKDBN",
+        vcat(fill(0.225, 4), fill(0.00909, 10)),
+    )
+
+    for A in (RNAAlphabet, DNAAlphabet)
+        for N in (2, 4)
+            for len in [3, 15, 33, 66]
+                for alternative in (true, false)
+                    seq = if N == 2
+                        randseq(A{2}(), len)
+                    else
+                        randseq(A{4}(), sampler, len)
+                    end
+                    kmer = Kmer{A{N}}(seq)
+                    @test (
+                        translate(seq; alternative_start=alternative) ==
+                        translate(kmer; alternative_start=alternative)
+                    )
                 end
-                kmer = Kmer{A{N}}(seq)
-                @test (
-                    translate(seq, alternative_start=alternative) ==
-                    translate(kmer, alternative_start=alternative)
-                )          
             end
         end
     end
-end
 
-# Throws when ambiguous
-@test_throws Exception translate(
-    Kmer{RNAAlphabet{4}}("AUGCCGCMA"),
-    allow_ambiguous_codons=false
-)
+    # Throws when ambiguous
+    @test_throws Exception translate(
+        Kmer{RNAAlphabet{4}}("AUGCCGCMA"),
+        allow_ambiguous_codons=false,
+    )
+
+    # Not divisible by 3
+    @test_throws Exception translate(mer"UG"r)
+    @test_throws Exception translate(mer"TAGCTTAA"d)
+    @test_throws Exception translate(mer"CUGUAGUUGUCGC"r)
+    @test_throws Exception translate(mer"AGCGA"d)
+
+    # Cannot transla AA seq
+    @test_throws MethodError translate(mer"LLVM"aa)
+    @test_throws MethodError translate(mer"ATG"aa)
+end # translation
+
+@testset "CodonSet" begin
+    CodonSet = Kmers.CodonSet
 
-# Not divisible by 3
-@test_throws Exception translate(mer"UG"r)
-@test_throws Exception translate(mer"TAGCTTAA"d)
-@test_throws Exception translate(mer"CUGUAGUUGUCGC"r)
-@test_throws Exception translate(mer"AGCGA"d)
+    SAMPLE_SOURCES = Any[
+        [mer"UAG"r, mer"ACC"r, mer"ACC"r, mer"UGG"r],
+        RNACodon[],
+        [mer"AAA"r, mer"ACC"r, mer"AAA"r, mer"UCA"r, mer"UCC"r],
+        (i for i in (mer"AGC"r, mer"AGA"r, mer"UUU"r)),
+        (mer"AAC"r, mer"AGG"r),
+        (mer"UUG"r,),
+    ]
 
-# Cannot transla AA seq
-@test_throws MethodError translate(mer"LLVM"aa)
-@test_throws MethodError translate(mer"ATG"aa)
+    @testset "Construction and basics" begin
+        @test isempty(CodonSet())
 
-end # translation
+        # Constuct the sets and basic properties
+        for codons in SAMPLE_SOURCES
+            set = Set(codons)
+            codonset = CodonSet(codons)
+            @test issetequal(set, codonset)
+            @test length(codonset) == length(set)
+        end
 
-@testset "CodonSet" begin
-CodonSet = Kmers.CodonSet
-
-SAMPLE_SOURCES = Any[
-    [mer"UAG"r, mer"ACC"r, mer"ACC"r, mer"UGG"r],
-    RNACodon[],
-    [mer"AAA"r, mer"ACC"r, mer"AAA"r, mer"UCA"r, mer"UCC"r],
-    (i for i in (mer"AGC"r, mer"AGA"r, mer"UUU"r)),
-    (mer"AAC"r, mer"AGG"r),
-    (mer"UUG"r,),
-]
-
-@testset "Construction and basics" begin
-    @test isempty(CodonSet())
-
-    # Constuct the sets and basic properties
-    for codons in SAMPLE_SOURCES
-        set = Set(codons)
-        codonset = CodonSet(codons)
-        @test issetequal(set, codonset)
-        @test length(codonset) == length(set)
+        # Fails with non-codons
+        @test_throws MethodError CodonSet([(RNA_A, RNA_G)])
+        @test_throws MethodError CodonSet((mer"UA"r,))
+        @test_throws MethodError CodonSet([rna"AGG", rna"GGG"])
+        @test_throws MethodError CodonSet([1, 2, 3])
     end
 
-    # Fails with non-codons
-    @test_throws MethodError CodonSet([(RNA_A, RNA_G)])
-    @test_throws MethodError CodonSet((mer"UA"r,))
-    @test_throws MethodError CodonSet([rna"AGG", rna"GGG"])
-    @test_throws MethodError CodonSet([1,2,3])
-end
+    SAMPLE_CODONSETS = map(CodonSet, SAMPLE_SOURCES)
 
-SAMPLE_CODONSETS = map(CodonSet, SAMPLE_SOURCES)
+    @testset "Iteration" begin
+        for things in SAMPLE_SOURCES
+            @test sort!(collect(CodonSet(things))) == sort!(collect(Set(things)))
+        end
 
-@testset "Iteration" begin
-    for things in SAMPLE_SOURCES
-        @test sort!(collect(CodonSet(things))) == sort!(collect(Set(things)))
+        @test iterate(CodonSet()) === nothing
+        codonset = CodonSet((mer"UUU"r,))
+        codon, state = iterate(codonset)
+        @test codon == mer"UUU"r
+        @test iterate(codonset, state) === nothing
     end
 
-    @test iterate(CodonSet()) === nothing
-    codonset = CodonSet((mer"UUU"r,))
-    codon, state = iterate(codonset)
-    @test codon == mer"UUU"r
-    @test iterate(codonset, state) === nothing
-end
-
-@testset "Membership" begin
-    codonset = CodonSet([mer"ACC"r, mer"UAG"r, mer"UUU"r])
-    @test mer"ACC"r in codonset
-    @test mer"UAG"r in codonset
-    @test mer"UUU"r in codonset
-    @test !in(mer"GAA"r, codonset)
-    @test !in(mer"AAA"r, codonset)
-end
-
-@testset "Modifying" begin
-    # Push
-    s1 = CodonSet([mer"GGA"r, mer"UGU"r])
-    s2 = push(s1, mer"GGA"r)
-    @test s1 == s2
-    s3 = push(s2, mer"GAG"r)
-    @test Set(s3) == Set([mer"GGA"r, mer"UGU"r, mer"GGA"r, mer"GAG"r])
-
-    # Delete
-    s4 = delete(s3, mer"GAG"r)
-    @test s2 == s4
-    s5 = delete(s4, mer"UGU"r)
-    @test only(s5) == mer"GGA"r
-    s6 = delete(s5, mer"UUU"r)
-    @test s5 == s6
-    s7 = delete(s6, mer"GGA"r)
-    @test isempty(s7)
-end
-
-@testset "Set operations" begin
-    for c1 in SAMPLE_CODONSETS, c2 in SAMPLE_CODONSETS
-        s1, s2 = Set(c1), Set(c2)
-        for operation in [union, intersect, setdiff, symdiff]
-            @test Set(operation(c1, c2)) == operation(s1, s2)
-        end
-        @test issubset(c1, c2) == issubset(s1, s2)
+    @testset "Membership" begin
+        codonset = CodonSet([mer"ACC"r, mer"UAG"r, mer"UUU"r])
+        @test mer"ACC"r in codonset
+        @test mer"UAG"r in codonset
+        @test mer"UUU"r in codonset
+        @test !in(mer"GAA"r, codonset)
+        @test !in(mer"AAA"r, codonset)
     end
-end
-
-@testset "Filter" begin
-    predicates = [
-        (i -> i[2] == RNA_G),
-        (i -> isodd(length(i))), # always true for codons
-        (i -> i[1] == i[3]),
-        (i -> i[2] != RNA_A)
-    ]
-    for codonset in SAMPLE_CODONSETS, predicate in predicates
-        @test Set(filter(predicate, codonset)) == filter(predicate, Set(codonset))
+
+    @testset "Modifying" begin
+        # Push
+        s1 = CodonSet([mer"GGA"r, mer"UGU"r])
+        s2 = push(s1, mer"GGA"r)
+        @test s1 == s2
+        s3 = push(s2, mer"GAG"r)
+        @test Set(s3) == Set([mer"GGA"r, mer"UGU"r, mer"GGA"r, mer"GAG"r])
+
+        # Delete
+        s4 = delete(s3, mer"GAG"r)
+        @test s2 == s4
+        s5 = delete(s4, mer"UGU"r)
+        @test only(s5) == mer"GGA"r
+        s6 = delete(s5, mer"UUU"r)
+        @test s5 == s6
+        s7 = delete(s6, mer"GGA"r)
+        @test isempty(s7)
     end
-end
 
+    @testset "Set operations" begin
+        for c1 in SAMPLE_CODONSETS, c2 in SAMPLE_CODONSETS
+            s1, s2 = Set(c1), Set(c2)
+            for operation in [union, intersect, setdiff, symdiff]
+                @test Set(operation(c1, c2)) == operation(s1, s2)
+            end
+            @test issubset(c1, c2) == issubset(s1, s2)
+        end
+    end
+
+    @testset "Filter" begin
+        predicates = [
+            (i -> i[2] == RNA_G),
+            (i -> isodd(length(i))), # always true for codons
+            (i -> i[1] == i[3]),
+            (i -> i[2] != RNA_A),
+        ]
+        for codonset in SAMPLE_CODONSETS, predicate in predicates
+            @test Set(filter(predicate, codonset)) == filter(predicate, Set(codonset))
+        end
+    end
 end # CodonSet
 
 @testset "Reverse translation" begin
@@ -143,7 +140,7 @@ end # CodonSet
     code2 = ReverseGeneticCode(BioSequences.trematode_mitochondrial_genetic_code)
     for (rvcode, fwcode) in [
         (Kmers.rev_standard_genetic_code, BioSequences.standard_genetic_code),
-        (code2, BioSequences.trematode_mitochondrial_genetic_code)
+        (code2, BioSequences.trematode_mitochondrial_genetic_code),
     ]
         @test reverse_translate(aa"", rvcode) == CodonSet[]
         observed = Dict{AminoAcid, CodonSet}()
@@ -155,7 +152,7 @@ end # CodonSet
 
         # Length and iteration of ReverseGeneticCode
         @test length(rvcode) == length(symbols(AminoAcidAlphabet())) - 1 # all but AA_Gap
-        @test sort!(collect(rvcode), by=first) == sort!(collect(observed), by=first)
+        @test sort!(collect(rvcode); by=first) == sort!(collect(observed); by=first)
 
         flipped = Dict(v => k for (k, v) in observed)
         for (codonset, aa) in flipped
@@ -181,13 +178,34 @@ end # CodonSet
             (AA_J, [AA_I, AA_L]),
             (AA_Z, [AA_E, AA_Q]),
             (AA_B, [AA_D, AA_N]),
-            (AA_X, [
-                AA_A, AA_R, AA_N, AA_D, AA_C, AA_Q, AA_E, AA_G, AA_H, AA_I, AA_L, AA_K,
-                AA_M, AA_F, AA_P, AA_S, AA_T, AA_W, AA_Y, AA_V
-            ])
-        ] 
+            (
+                AA_X,
+                [
+                    AA_A,
+                    AA_R,
+                    AA_N,
+                    AA_D,
+                    AA_C,
+                    AA_Q,
+                    AA_E,
+                    AA_G,
+                    AA_H,
+                    AA_I,
+                    AA_L,
+                    AA_K,
+                    AA_M,
+                    AA_F,
+                    AA_P,
+                    AA_S,
+                    AA_T,
+                    AA_W,
+                    AA_Y,
+                    AA_V,
+                ],
+            ),
+        ]
             c1 = only(reverse_translate(LongAA([ambig]), rvcode))
-            c2 = foldl(elements, init=CodonSet()) do old, aa
+            c2 = foldl(elements; init=CodonSet()) do old, aa
                 union(old, reverse_translate(aa, rvcode))
             end
             @test c1 == c2
diff --git a/test/utils.jl b/test/utils.jl
index a2d74b3..2373053 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -1,5 +1,9 @@
+function random_seq(A::Alphabet, n::Integer)
+    randseq(A, SamplerUniform(symbols(A)), n)
+end
+
 # Return a random DNA/RNA sequence of the given length.
-function random_seq(n::Integer, nts, probs, outtype = String)
+function random_seq(n::Integer, nts, probs, outtype=String)
     cumprobs = cumsum(probs)
     x = Vector{Char}(undef, n)
     for i in 1:n
@@ -8,8 +12,7 @@ function random_seq(n::Integer, nts, probs, outtype = String)
     return outtype(x)
 end
 
-function random_seq(::Type{A}, n::Integer) where {A<:Alphabet}
-    # TODO: Resolve the use of symbols(A()).
+function random_seq(::Type{A}, n::Integer) where {A <: Alphabet}
     nts = symbols(A())
     probs = Vector{Float64}(undef, length(nts))
     fill!(probs, 1 / length(nts))
@@ -25,10 +28,33 @@ function random_rna(n, probs=[0.24, 0.24, 0.24, 0.24, 0.04])
 end
 
 function random_aa(len)
-    return random_seq(len,
-        ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
-         'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'X' ],
-        push!(fill(0.049, 20), 0.02))
+    return random_seq(
+        len,
+        [
+            'A',
+            'R',
+            'N',
+            'D',
+            'C',
+            'Q',
+            'E',
+            'G',
+            'H',
+            'I',
+            'L',
+            'K',
+            'M',
+            'F',
+            'P',
+            'S',
+            'T',
+            'W',
+            'Y',
+            'V',
+            'X',
+        ],
+        push!(fill(0.049, 20), 0.02),
+    )
 end
 
 function random_dna_symbols(n, probs=[0.24, 0.24, 0.24, 0.24, 0.04])
@@ -39,13 +65,35 @@ function random_rna_symbols(n, probs=[0.24, 0.24, 0.24, 0.24, 0.04])
     return random_seq(n, ['A', 'C', 'G', 'U', 'N'], probs, Vector{RNA})
 end
 
-function random_rna_symbols(n, probs=[0.24, 0.24, 0.24, 0.24, 0.04])
-    return random_seq(n, ['A', 'C', 'G', 'U', 'N'], probs, Vector{RNA})
-end
-
 function random_aa_symbols(n, probs=[0.24, 0.24, 0.24, 0.24, 0.04])
-    return random_seq(n, ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
-     'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'X' ], probs, Vector{AminoAcid})
+    return random_seq(
+        n,
+        [
+            'A',
+            'R',
+            'N',
+            'D',
+            'C',
+            'Q',
+            'E',
+            'G',
+            'H',
+            'I',
+            'L',
+            'K',
+            'M',
+            'F',
+            'P',
+            'S',
+            'T',
+            'W',
+            'Y',
+            'V',
+            'X',
+        ],
+        probs,
+        Vector{AminoAcid},
+    )
 end
 
 function random_dna_kmer(len)
@@ -72,4 +120,4 @@ function rna_complement(seq::AbstractString)
         seqc[i] = complementer[c]
     end
     return String(seqc)
-end
\ No newline at end of file
+end