From 217ad60b2de0c240a9cebcfad6f3dab99e17c272 Mon Sep 17 00:00:00 2001
From: Aidan Lakshman <ahl27@pitt.edu>
Date: Thu, 17 Oct 2024 09:01:36 -0400
Subject: [PATCH 1/5] Create test-coverage.yaml

---
 .github/workflows/test-coverage.yaml | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/workflows/test-coverage.yaml

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/.github/workflows/test-coverage.yaml
@@ -0,0 +1 @@
+

From 765a40baff7adb7ca870b5f2a0403b4487e1b831 Mon Sep 17 00:00:00 2001
From: Aidan Lakshman <ahl27@pitt.edu>
Date: Thu, 17 Oct 2024 09:47:50 -0400
Subject: [PATCH 2/5] feat: Unit Testing and Automation (#114)

Major changes:
- new workflow file to run tests
- numerous unit tests

Minor changes:
- warning in translate.R:16 has been elevated to an error because AA_ALPHABET is now enforced.
error messages for letter.R are standardized, some additional error checking is added.
- mask was scheduled to be deprecated in Biostrings 2.9. This function now throws a warning when it is used and will be removed in a subsequent release.
man/XStringSetList-class.Rd previously said that "DNAStringSetList and AAStringSetList are the only constructors", but the current version of Biostrings includes BStringSetList and RNAStringSetList constructors. This has been updated.
- Some tests were included inline in replaceAt.R wrapped in if(FALSE){...} brackets. These have been moved to the unit tests, and performance/timing tests have been removed.
- replaceAmbiguities now checks to ensure input is one of DNAString, RNAString, or the XStringSet/XStringSetList analogs. This previously caused some odd behavior with input of type AAString.
- an erroneous extra (useless) argument to make_AA_COLORED_LETTERS has been removed.
- a warning in XStringViews-class.R that was supposed to be suppressed in Bioc 2.12 has been suppressed.
- tests in inst/unitTests have been ported to testthat when possible. Accordingly, run_unitTests.R has been removed.
- Some files have had trailing whitespace removed.
---
 .github/workflows/test-coverage.yaml          | 226 ++++++++++++++
 .gitignore                                    |  13 +
 DESCRIPTION                                   |   2 +-
 R/XStringViews-class.R                        |  16 +-
 R/chartr.R                                    |   3 +
 R/coloring.R                                  |   2 +-
 R/dinucleotideFrequencyTest.R                 |   6 +-
 R/letter.R                                    |  17 +-
 R/maskMotif.R                                 |   2 +
 R/matchPDict.R                                |   2 +-
 R/replaceAt.R                                 | 169 -----------
 R/translate.R                                 |   3 +-
 man/XStringSetList-class.Rd                   |   9 +-
 man/chartr.Rd                                 |   3 +-
 tests/run_unitTests.R                         |   2 -
 tests/testthat.R                              |   4 +
 tests/testthat/test-MaskedXString.R           | 115 ++++++++
 tests/testthat/test-OtherMasking.R            |  58 ++++
 tests/testthat/test-PDict.R                   | 205 +++++++++++++
 tests/testthat/test-QualityScaledXStringSet.R |  94 ++++++
 tests/testthat/test-XString-class.R           | 244 ++++++++++++++++
 tests/testthat/test-XStringQuality.R          |  77 +++++
 tests/testthat/test-XStringSet-class.R        | 275 ++++++++++++++++++
 tests/testthat/test-XStringSet-io.R           | 172 +++++++++++
 tests/testthat/test-XStringSetList-class.R    | 198 +++++++++++++
 tests/testthat/test-XStringSetList.R          |  36 +++
 tests/testthat/test-XStringViews.R            |  49 ++++
 tests/testthat/test-chartr.R                  |  42 +++
 .../testthat/test-dinucleotideFrequencyTest.R |  32 ++
 tests/testthat/test-findPalindromes.R         | 175 +++++++++++
 tests/testthat/test-letter.R                  |  46 +++
 tests/testthat/test-letterFrequency.R         | 265 +++++++++++++++++
 tests/testthat/test-matchLRPatterns.R         |  47 +++
 tests/testthat/test-matchPDict.R              | 224 ++++++++++++++
 tests/testthat/test-matchPWM.R                |  24 ++
 tests/testthat/test-matchPattern.R            | 120 ++++++++
 tests/testthat/test-miscXStringMethods.R      | 111 +++++++
 tests/testthat/test-miscellaneous.R           |  46 +++
 tests/testthat/test-replaceAt.R               | 192 ++++++++++++
 tests/testthat/test-translate.R               | 125 ++++++++
 tests/testthat/test-trimLRPatterns.R          |  62 ++++
 41 files changed, 3311 insertions(+), 202 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 tests/run_unitTests.R
 create mode 100644 tests/testthat.R
 create mode 100644 tests/testthat/test-MaskedXString.R
 create mode 100644 tests/testthat/test-OtherMasking.R
 create mode 100644 tests/testthat/test-PDict.R
 create mode 100644 tests/testthat/test-QualityScaledXStringSet.R
 create mode 100644 tests/testthat/test-XString-class.R
 create mode 100644 tests/testthat/test-XStringQuality.R
 create mode 100644 tests/testthat/test-XStringSet-class.R
 create mode 100644 tests/testthat/test-XStringSet-io.R
 create mode 100644 tests/testthat/test-XStringSetList-class.R
 create mode 100644 tests/testthat/test-XStringSetList.R
 create mode 100644 tests/testthat/test-XStringViews.R
 create mode 100644 tests/testthat/test-chartr.R
 create mode 100644 tests/testthat/test-dinucleotideFrequencyTest.R
 create mode 100644 tests/testthat/test-findPalindromes.R
 create mode 100644 tests/testthat/test-letter.R
 create mode 100644 tests/testthat/test-letterFrequency.R
 create mode 100644 tests/testthat/test-matchLRPatterns.R
 create mode 100644 tests/testthat/test-matchPDict.R
 create mode 100644 tests/testthat/test-matchPWM.R
 create mode 100644 tests/testthat/test-matchPattern.R
 create mode 100644 tests/testthat/test-miscXStringMethods.R
 create mode 100644 tests/testthat/test-miscellaneous.R
 create mode 100644 tests/testthat/test-replaceAt.R
 create mode 100644 tests/testthat/test-translate.R
 create mode 100644 tests/testthat/test-trimLRPatterns.R

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 8b137891..c51f050f 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -1 +1,227 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  pull_request:
+    branches: [main, master, devel, devel-staging]
 
+name: test-coverage-nocodecov
+
+permissions: read-all
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - name: Checkout current ref
+        uses: actions/checkout@v4
+        with:
+          path: ./new-state
+
+      - name: Checkout Biostrings devel ref
+        id: devel-checkout
+        uses: actions/checkout@v4
+        with:
+          repository: Bioconductor/Biostrings
+          path: ./original-state
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - name: setup dependencies for old state
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: ./original-state
+          extra-packages: any::covr
+          needs: coverage
+
+      - name: setup dependencies for new state
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: ./new-state
+          extra-packages: any::covr
+          needs: coverage
+
+      - name: Test coverage on base branch
+        run: |
+          dirpath <- file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+          dir.create(dirpath)
+
+          ## first check unit tests
+          cat("Checking test results...\n")
+          res <- testthat::test_local("./new-state", stop_on_failure=FALSE, reporter="check")
+          res <- as.data.frame(res)
+          test_report <- c(sum(res$failed), sum(res$warning), sum(res$skipped), sum(res$passed))
+          shouldStop <- test_report[1] > 0
+          shouldPrint <- sum(test_report[1:2]) > 0
+          test_report <- paste(c("FAIL", "WARN", "SKIP", "PASS"), test_report, collapse=' | ')
+          test_report <- paste('Unit Tests: [', test_report, ']')
+
+          ## build the output message
+          out_msg <- '# Testing Report\n\n'
+          out_msg <- paste0(out_msg, "## Test Results:\n\n")
+          out_msg <- paste0(out_msg, "Please note that test coverage is **not** an ",
+                            "end-all-be-all measure of robustness. Having tests that ",
+                            "correctly cover intended use cases and potential errors ",
+                            "is significantly more important than maximizing coverage.\n\n")
+          out_msg <- paste0(out_msg, "```\n", test_report, '\n```\n\n')
+
+          ## if any tests failed or threw warnings, report them
+          if(shouldPrint){
+            p_toprint <- which(res$failed + res$warning > 0)
+            ptp <- res[p_toprint,]
+            failed_tests <- ptp[,c("file", "test", "warning", "failed")]
+            failed_tests <- apply(failed_tests, 1L, paste, collapse=" | ")
+            failed_tests <- paste("|", failed_tests, "| ")
+            failed_tests <- paste(failed_tests, collapse='\n')
+            md_tab <- paste0("| Test File :page_facing_up: | Test Name :id: | Warnings :warning: | Failures :x: | \n",
+                            "| :----- | :----- | :-----: | :-----: | \n",
+                            failed_tests, "\n\n")
+            out_msg <- paste0(out_msg, "### Warning/Failing Tests:\n\n", md_tab)
+          }
+          if(shouldStop){
+            cat(out_msg, file='./test_status.md')
+            stop("Some tests failed! Skipping coverage report.")
+          }
+
+          ## if no tests failed, check coverage of old vs. new
+          library(covr)
+          # exclude lines with no content
+          options(covr.exclude_pattern=c("^[ \t{}()]+$"))
+          # get results on old state
+          files_to_ignore <- list("R/AMINO_ACID_CODE.R", "R/GENETIC_CODE.R",
+                                  "R/zzz.R", "R/IUPAC_CODE_MAP.R",
+                                  "R/getSeq.R")
+          cov <- covr::package_coverage(
+            path = "./original-state",
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(dirpath, "old-state"),
+            function_exclusions = "^\\.",
+            line_exclusions=files_to_ignore
+          )
+          head_res <- covr::coverage_to_list(cov)
+          # get results on new state
+          cov <- covr::package_coverage(
+            path = "./new-state",
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(dirpath, "new-state"),
+            function_exclusions = "^\\.", # excludes functions starting with .
+            line_exclusions=files_to_ignore
+          )
+          new_res <- covr::coverage_to_list(cov)
+
+          ## compare difference in coverage
+          f_old <- head_res$filecoverage
+          f_new <- new_res$filecoverage
+          cat("Old Coverage:\n")
+          print(f_old)
+          cat("***************\n")
+          cat("New Coverage:\n")
+          print(f_new)
+          cat("***************\n")
+          all_files <- union(names(f_old), names(f_new))
+          file_changes <- rep(0, length(all_files))
+          names(file_changes) <- all_files
+          file_changes[names(f_new)] <- file_changes[names(f_new)] + f_new
+          final_cov <- file_changes
+          file_changes[names(f_old)] <- file_changes[names(f_old)] - f_old
+          total_change <- new_res$totalcoverage - head_res$totalcoverage
+
+          out_msg <- paste0(out_msg, "## Negatively Impacted Files\n\n")
+          ## build warning message
+          n <- names(file_changes)
+          pos_neg <- which(file_changes < 0)
+          if(length(pos_neg) > 0){
+            pos_neg <- pos_neg[order(file_changes[pos_neg], decreasing=FALSE)]
+            warn_changes <- sprintf("%+.01f%%", file_changes)
+            header <- "| File name | Coverage | Change |\n | :----- | :-----: | :-----: |\n"
+            warn_tab <- paste0('| ', n[pos_neg], ' | ', sprintf("%0.02f%%", final_cov[pos_neg]), ' | ',
+                                unname(warn_changes[pos_neg]), ' |', collapse='\n')
+            warn_tab <- paste0(header, warn_tab)
+            out_msg <- paste0(out_msg, "The following files have lost coverage:\n", warn_tab, '\n')
+          } else {
+            out_msg <- paste0(out_msg, "No negatively impacted files. Nice job!\n\n")
+          }
+
+          ## build extended diff table
+          p_Rfiles <- grepl("^R/", n)
+          n <- vapply(strsplit(n, '/'), .subset, character(1L), 2L)
+          all_diffs <- data.frame(filename=n,
+            coverage=sprintf("%.02f%%", final_cov),
+            change=sprintf("%+.01f%%", file_changes))
+          max_nchar <- max(nchar(all_diffs$filename))
+          all_diffs$filename <- sprintf(paste0("%", max_nchar, "s"), all_diffs$filename)
+          all_diffs$coverage <- sprintf("%7s", all_diffs$coverage)
+          all_diffs$change <- sprintf("%7s", all_diffs$change)
+          all_diffs$mark_char <- 1L
+          all_diffs$mark_char[file_changes > 0] <- 2L
+          all_diffs$mark_char[file_changes < 0] <- 3L
+          all_diffs$mark_char <- c(" ", "+", "-")[all_diffs$mark_char]
+
+          all_rows <- apply(all_diffs[c(4,1:3)], 1L, paste, collapse=' ')
+          w <- nchar(all_rows[1L])
+
+          title0 <- "Total Coverage"
+          n_padding <- (w - nchar(title0) - 4) / 2
+          title0 <- paste0("@@", paste(rep(' ', floor(n_padding)), collapse=''),
+            title0, paste(rep(' ', ceiling(n_padding)), collapse=''), "@@")
+          row0 <- paste(ifelse(total_change < 0, "-", ifelse(total_change>0, "+", " ")),
+                          sprintf(paste0("%", max_nchar, "s"), "Total Coverage"),
+                          sprintf("%6.02f%%", new_res$totalcoverage),
+                          sprintf("%+6.01f%%", total_change), collapse=' ')
+
+          title1 <- "R/... Files"
+          n_padding <- (w - nchar(title1) - 4) / 2
+          title1 <- paste0("@@", paste(rep(' ', floor(n_padding)), collapse=''),
+            title1, paste(rep(' ', ceiling(n_padding)), collapse=''), "@@")
+
+          title2 <- "src/... Files"
+          n_padding <- (w - nchar(title2) - 4) / 2
+          title2 <- paste0("@@", paste(rep(' ', floor(n_padding)), collapse=''),
+            title2, paste(rep(' ', ceiling(n_padding)), collapse=''), "@@")
+
+          spacer <- paste(rep('=', w), collapse='')
+          entries1 <- paste(all_rows[p_Rfiles], collapse='\n')
+          entries2 <- paste(all_rows[!p_Rfiles], collapse='\n')
+          diff_table <- paste(title0, spacer, '\n', row0, '\n', spacer,
+                              title1, spacer, entries1, spacer,
+                              title2, spacer, entries2, spacer,
+                              collapse='\n', sep='\n')
+          diff_table <- paste0("<details>\n<summary>Additional Details and Impacted Files:</summary>\n\n",
+                                "```diff\n", diff_table, '\n\n```\n\n</details>')
+          out_msg <- paste0(out_msg, diff_table, '\n')
+          cat(out_msg, file='./test_status.md')
+        shell: Rscript {0}
+
+      ## This is a better option than step summary, but requires
+      ## the "pull-request: write" permission, which I'd rather
+      ## not allow on a public repository
+      # - name: Print comment to PR
+      #   uses: thollander/actions-comment-pull-request@v2
+      #   with:
+      #     GITHUB_TOKEN: ${{ env.GITHUB_PAT }}
+      #     filePath: ./test_status.md
+      #     comment_tag: unit-test-results
+
+      - name: Print results to summary
+        if: always()
+        run: cat ./test_status.md >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload package on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package
+
+      - name: Upload status on success
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-results
+          path: ./test_status.md
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..f59cef4f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+*.o
+src/*.o
+src/*.so
+*.icloud
+.Rproj*
+.DS_Store
+*/.DS_Store
+.RData
+.Rbuildignore
+.Rhistory
+*/*.gcov
+*/*.gcda
+*/*.gcno
diff --git a/DESCRIPTION b/DESCRIPTION
index 240379e0..6146a526 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -44,7 +44,7 @@ Suggests: graphics, pwalign, BSgenome (>= 1.13.14),
 	drosophila2probe, hgu95av2probe, hgu133aprobe,
 	GenomicFeatures (>= 1.3.14),
 	hgu95av2cdf, affy (>= 1.41.3), affydata (>= 1.11.5),
-	RUnit, BiocStyle, knitr
+	RUnit, BiocStyle, knitr, testthat (>= 3.0.0), covr
 VignetteBuilder: knitr
 Collate: utils.R
 	IUPAC_CODE_MAP.R
diff --git a/R/XStringViews-class.R b/R/XStringViews-class.R
index 052b5c6a..b6a2cca3 100644
--- a/R/XStringViews-class.R
+++ b/R/XStringViews-class.R
@@ -478,14 +478,14 @@ setMethod("as.matrix", "XStringViews",
     function(x, use.names=TRUE)
     {
         ## TODO: Supress this warning in BioC 2.12.
-        msg <- c("as.matrix() on an XStringViews object 'x' has changed ",
-                 "behavior: now the\n  views in 'x' must be of equal width ",
-                 "and each view is converted into a row of\n",
-                 "  single characters. To achieve the old behavior, ",
-                 "do 'as.matrix(ranges(x))'.\n  To supress this warning, do ",
-                 "'suppressWarnings(as.matrix(x))'.\n  This warning will be ",
-                 "removed in BioC 2.12.")
-        warning(msg)
+        # msg <- c("as.matrix() on an XStringViews object 'x' has changed ",
+        #          "behavior: now the\n  views in 'x' must be of equal width ",
+        #          "and each view is converted into a row of\n",
+        #          "  single characters. To achieve the old behavior, ",
+        #          "do 'as.matrix(ranges(x))'.\n  To supress this warning, do ",
+        #          "'suppressWarnings(as.matrix(x))'.\n  This warning will be ",
+        #          "removed in BioC 2.12.")
+        # warning(msg)
         y <- fromXStringViewsToStringSet(x, out.of.limits="error",
                                          use.names=use.names)
         as.matrix(y)
diff --git a/R/chartr.R b/R/chartr.R
index 253bf779..a98c6943 100644
--- a/R/chartr.R
+++ b/R/chartr.R
@@ -49,6 +49,9 @@ setMethod("chartr", c(old="ANY", new="ANY", x="MaskedXString"),
 ### A simple wrapper to chartr().
 replaceAmbiguities <- function(x, new="N")
 {
+    if(!(inherits(x, c("XString", "XStringSet", "XStringViews"))) ||
+       !(seqtype(x) %in% c("DNA", "RNA")))
+      stop("replaceAmbiguities is only supported for DNA and RNA")
     if (!(isSingleString(new) && nchar(new) == 1L))
         stop("'new' must be a single letter")
     old <- paste(setdiff(names(IUPAC_CODE_MAP), DNA_BASES), collapse="")
diff --git a/R/coloring.R b/R/coloring.R
index c33eaac7..05406fed 100644
--- a/R/coloring.R
+++ b/R/coloring.R
@@ -63,7 +63,7 @@ make_DNA_AND_RNA_COLORED_LETTERS <- function()
 ### Colors groupins by
 ###   https://www.jalview.org/help/html/colourSchemes/zappo.html
 ### Called in .onLoad() to initialize AA_COLORED_LETTERS.
-make_AA_COLORED_LETTERS <- function(x){
+make_AA_COLORED_LETTERS <- function(){
     whiter <- make_style(rgb(1, 1, 1))
     dark_grey_bg <- make_style(rgb(0.5,0.5,0.5), bg=TRUE)
 
diff --git a/R/dinucleotideFrequencyTest.R b/R/dinucleotideFrequencyTest.R
index 483e1895..1a4ed9d6 100644
--- a/R/dinucleotideFrequencyTest.R
+++ b/R/dinucleotideFrequencyTest.R
@@ -113,12 +113,12 @@ g.test <- function(x, y = NULL, correct="none",
   else {
     # x is not a matrix, so we do Goodness of Fit
     METHOD <- "Log likelihood ratio (G-test) goodness of fit test"
-    if (length(x) == 1) 
+    if (length(x) == 1)
       stop("x must at least have 2 elements")
-    if (length(x) != length(p)) 
+    if (length(x) != length(p))
       stop("x and p must have the same number of elements")
     E <- n * p
-    
+
     if (correct=="yates"){ # Do Yates' correction
       if(length(x)!=2)
         stop("Yates' correction requires 2 data values")
diff --git a/R/letter.R b/R/letter.R
index 339bfce7..5dd1b387 100644
--- a/R/letter.R
+++ b/R/letter.R
@@ -12,8 +12,8 @@ setGeneric("letter", signature="x",
 setMethod("letter", "character",
     function(x, i)
     {
-        if (!is.numeric(i) || any(is.na(i)))
-            stop("'i' must be an NA-free numeric vector")
+        if (!is.numeric(i) || anyNA(i))
+            stop("subscript 'i' must be an NA-free numeric vector")
         if (length(x) == 0)
             return(character(0))
         if (length(i) == 0)
@@ -42,10 +42,13 @@ setMethod("letter", "character",
 setMethod("letter", "XString",
     function(x, i)
     {
-        if (!is.numeric(i))
-            stop("subscript 'i' must be an integer vector")
+        if (!is.numeric(i) || anyNA(i))
+            stop("subscript 'i' must be an NA-free numeric vector")
         if (!is.integer(i))
             i <- as.integer(i)
+        imax <- length(x)
+        if (!all(i >= 1) || !all(i <= imax))
+            stop("subscript out of bounds")
         extract_character_from_XString_by_positions(x, i, collapse=TRUE)
     }
 )
@@ -54,12 +57,10 @@ setMethod("letter", "XString",
 setMethod("letter", "XStringViews",
     function(x, i)
     {
-        if (!is.numeric(i))
-            stop("subscript 'i' must be an integer vector")
+        if (!is.numeric(i) || anyNA(i))
+            stop("subscript 'i' must be an NA-free numeric vector")
         if (!is.integer(i))
             i <- as.integer(i)
-        if (anyNA(i))
-            stop("subscript 'i' cannot contain NAs")
         if (length(x) == 0)
             return(character(0))
         imax <- min(width(x))
diff --git a/R/maskMotif.R b/R/maskMotif.R
index 0fda68a0..5f6692b8 100644
--- a/R/maskMotif.R
+++ b/R/maskMotif.R
@@ -53,6 +53,8 @@ setMethod("maskMotif", signature(x="XString", motif="ANY"),
 ### Deprecate in Biostrings 2.9!
 mask <- function(x, start=NA, end=NA, pattern)
 {
+    warning("`mask()` is deprecated and will be removed in a future release.\n",
+              "Please use `Mask()` or `maskMotif()` instead.")
     if (!is(x, "XString"))
         x <- XString(NULL, x)
     if (missing(pattern)) {
diff --git a/R/matchPDict.R b/R/matchPDict.R
index aa4a90ab..c7324abe 100644
--- a/R/matchPDict.R
+++ b/R/matchPDict.R
@@ -647,7 +647,7 @@ setMethod("matchPDict", "XStringViews",
 
 ### Dispatch on 'subject' (see signature of generic).
 setMethod("matchPDict", "MaskedXString",
-    function(pdict, subject, 
+    function(pdict, subject,
              max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=TRUE,
              algorithm="auto", verbose=FALSE)
         matchPDict(pdict, toXStringViewsOrXString(subject),
diff --git a/R/replaceAt.R b/R/replaceAt.R
index b1cca293..3462e50e 100644
--- a/R/replaceAt.R
+++ b/R/replaceAt.R
@@ -321,75 +321,6 @@ setMethod("extractAt", "XStringSet",
     }
 )
 
-
-if (FALSE) {  #     <<<--- begin testing extractAt() --->>>
-
-library(Biostrings)
-
-### From an XStringSet object
-### =========================
-
-### Only compare the classes, the shapes (i.e. lengths + elementNROWS +
-### names), the inner names, and the sequence contents. Doesn't look at
-### the metadata or the metadata columns (outer or inner).
-identical_XStringSetList <- function(target, current)
-{
-    ok1 <- identical(class(target), class(current))
-    ok2 <- identical(elementNROWS(target), elementNROWS(current))
-    unlisted_target <- unlist(target, use.names=FALSE)
-    unlisted_current <- unlist(current, use.names=FALSE)
-    ok3 <- identical(names(unlisted_target), names(unlisted_current))
-    ok4 <- all(unlisted_target == unlisted_current)
-    ok1 && ok2 && ok3 && ok4
-}
-
-x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="XYZ"))
-at <- IRangesList(IRanges(c(2, 1), c(3, 0)),
-                  IRanges(c(7, 2, 12, 7), c(6, 5, 11, 8)),
-                  IRanges(2, 2))
-### Set inner names on 'at'.
-unlisted_at <- unlist(at)
-names(unlisted_at) <- paste0("rg", sprintf("%02d", seq_along(unlisted_at)))
-at <- relist(unlisted_at, at)
-
-res1a <- extractAt(x, at)
-res1b <- BStringSetList(mapply(extractAt, x, at))
-stopifnot(identical_XStringSetList(res1a, res1b))
-
-res2a <- extractAt(x, at[3])
-res2b <- BStringSetList(mapply(extractAt, x, at[3]))
-stopifnot(identical_XStringSetList(res2a, res2b))
-res2c <- extractAt(x, at[[3]])
-stopifnot(identical_XStringSetList(res2a, res2c))
-
-### Testing performance with 2 millions small extractions at random
-### locations in Fly upstream sequences:
-dm3_upstream_filepath <- system.file("extdata",
-                                     "dm3_upstream2000.fa.gz",
-                                     package="Biostrings")
-dm3_upstream <- readDNAStringSet(dm3_upstream_filepath)
-dm3_upstream <- dm3_upstream[width(dm3_upstream) == 2000L]
-
-set.seed(33)
-NE <- 2000000L  # total number of extractions
-sample_size <- NE * 1.1
-some_ranges <- IRanges(sample(2001L, sample_size, replace=TRUE),
-                       width=sample(0:75, sample_size, replace=TRUE))
-some_ranges <- head(some_ranges[end(some_ranges) <= 2000L], n=NE)
-split_factor <- factor(sample(length(dm3_upstream), NE, replace=TRUE),
-                       levels=seq_along(dm3_upstream))
-at <- unname(split(some_ranges, split_factor))
-
-### Takes < 1s on my machine.
-system.time(res3a <- extractAt(dm3_upstream, at)) 
-
-### Equivalent but about 250x slower than the above on my machine.
-system.time(res3b <- DNAStringSetList(mapply(extractAt, dm3_upstream, at)))
-stopifnot(identical_XStringSetList(res3a, res3b))
-
-}  #                <<<--- end testing extractAt() --->>>
-
-
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### replaceAt()
 ###
@@ -423,103 +354,3 @@ setMethod("replaceAt", "XStringSet",
         ans
     }
 )
-
-
-if (FALSE) {  #     <<<--- begin testing replaceAt() --->>>
-
-library(Biostrings)
-
-### On an XString object
-### ====================
-
-### Testing performance with half-million small substitutions at random
-### locations in Celegans chrI:
-library(BSgenome.Celegans.UCSC.ce2)
-genome <- BSgenome.Celegans.UCSC.ce2
-chrI <- genome$chrI
-
-### A very amateurish random disjoint ranges generator. Not for serious use!
-randomDisjointRanges <- function(min_start, max_end, min_width, max_width, n)
-{
-    set.seed(33)
-    offset <- min_start - 1L
-    n1 <- n * (1.1 + max_width/(max_end-offset+1L))  # very approximative
-
-    some_starts <- sample(max_end-offset+1L, n1, replace=TRUE) + offset
-    some_widths <- sample(min_width:max_width, n1, replace=TRUE)
-    some_ranges <- IRanges(some_starts, width=some_widths)
-    some_ranges <- some_ranges[end(some_ranges) <= max_end]
-    ans <- disjoin(some_ranges)
-    if (min_width == 0L) {
-        is_empty <- width(some_ranges) == 0L
-        some_empty_ranges <- some_ranges[is_empty]
-        ans <- sample(c(ans, some_empty_ranges))
-    }
-    if (length(ans) < n)
-        stop("internal error, sorry")
-    head(ans, n=n)
-}
-
-at4 <- randomDisjointRanges(1L, nchar(chrI), 0L, 20L, 500000L)
-### Takes < 1s on my machine (64-bit Ubuntu).
-system.time(current <- replaceAt(chrI, at4, Views(chrI, at4)))
-stopifnot(current == chrI)
-
-### Testing performance with half-million single-letter insertions at random
-### locations in Celegans chrI:
-at5 <- randomDisjointRanges(1L, nchar(chrI), 0L, 0L, 500000L)
-### Takes < 1s on my machine (64-bit Ubuntu).
-system.time(current <- replaceAt(chrI, at5, value="-"))
-m <- matchPattern("-", current)
-stopifnot(identical(sort(start(at5)), start(m) - seq_along(at5) + 1L))
-
-system.time(current2 <- replaceAt(chrI, start(at5), value="-"))
-stopifnot(identical(current, current2))
-
-matchPattern("---", current)
-
-### On an XStringSet object
-### =======================
-
-### Only compare the classes, lengths, names, and sequence contents.
-### Doesn't look at the metadata or the metadata columns.
-identical_XStringSet <- function(target, current)
-{
-    ok1 <- identical(class(target), class(current))
-    ok2 <- identical(names(target), names(current))
-    ok3 <- all(target == current)
-    ok1 && ok2 && ok3
-}
-
-x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="XYZ"))
-at <- IRangesList(IRanges(c(2, 1), c(3, 0)),
-                  IRanges(c(7, 2, 12, 7), c(6, 5, 11, 8)),
-                  IRanges(2, 2))
-### Set inner names on 'at'.
-unlisted_at <- unlist(at)
-names(unlisted_at) <- paste0("rg", sprintf("%02d", seq_along(unlisted_at)))
-at <- relist(unlisted_at, at)
-
-current <- replaceAt(x, at, value=extractAt(x, at))  # no-op
-stopifnot(identical_XStringSet(x, current))
-
-res1a <- replaceAt(x, at)  # deletions
-res1b <- mendoapply(replaceAt, x, at)
-stopifnot(identical_XStringSet(res1a, res1b))
-
-### Testing performance with half-million single-letter insertions at random
-### locations in Fly upstream sequences:
-set.seed(33)
-split_factor <- factor(sample(length(dm3_upstream), 500000L, replace=TRUE),
-                       levels=seq_along(dm3_upstream))
-at5 <- unname(split(sample(2001L, 500000L, replace=TRUE),
-                    split_factor))
-### Takes < 1s on my machine.
-system.time(res5a <- replaceAt(dm3_upstream, at5, value="-"))
-### Equivalent but about 1400x slower than the above on my machine.
-system.time(res5b <- mendoapply(replaceAt,
-                                dm3_upstream, as(at5, "List"), as("-", "List")))
-stopifnot(identical_XStringSet(res5a, res5b))
-
-}  #                <<<--- end testing replaceAt() --->>>
-
diff --git a/R/translate.R b/R/translate.R
index 1a16f981..e061fb6e 100644
--- a/R/translate.R
+++ b/R/translate.R
@@ -13,8 +13,9 @@
     if (!all(nchar(genetic.code) == 1L))
         stop("'genetic.code' must contain 1-letter strings")
     ## Just a warning for now. Might become an error in the future.
+    ## 07/26/2024: updated to error since AAString() enforces alphabet
     if (!all(genetic.code %in% AA_ALPHABET))
-        warning("some codons in 'genetic.code' are mapped to letters ",
+        stop("some codons in 'genetic.code' are mapped to letters ",
                 "not in the Amino Acid\n  alphabet (AA_ALPHABET)")
     alt_init_codons <- attr(genetic.code, "alt_init_codons", exact=TRUE)
     if (is.null(alt_init_codons)
diff --git a/man/XStringSetList-class.Rd b/man/XStringSetList-class.Rd
index f8cc185f..b2ae458b 100644
--- a/man/XStringSetList-class.Rd
+++ b/man/XStringSetList-class.Rd
@@ -60,10 +60,7 @@
   for storing a list of \link{BStringSet}, \link{DNAStringSet},
   \link{RNAStringSet} and \link{AAStringSet} objects, respectively.
   These four containers are direct subclasses of XStringSetList
-  with no additional slots.
-
-  Currently \code{DNAStringSetList()} and \code{AAStringSetList()} are
-  the only XStringSetList constructors. The XStringSetList class itself 
+  with no additional slots. The XStringSetList class itself
   is virtual and has no constructor.
 }
 
@@ -77,7 +74,7 @@ AAStringSetList(..., use.names=TRUE)
 
 \arguments{
   \item{\dots}{
-    Character vector(s) (with no NAs), or \link{XStringSet} object(s), or 
+    Character vector(s) (with no NAs), or \link{XStringSet} object(s), or
     \link{XStringViews} object(s) to be concatenated into a
     \link{XStringSetList}.
   }
@@ -110,8 +107,6 @@ AAStringSetList(..., use.names=TRUE)
 ## ------------------------------------------------------------------------
 ## A. THE XStringSetList CONSTRUCTORS
 ## ------------------------------------------------------------------------
-## Currently DNAStringSetList() and AAStringSetList() are the only 
-## constructors. Others will be developed when the use case arises.
 
 dna1 <- c("AAA", "AC", "", "T", "GGATA")
 dna2 <- c("G", "TT", "C")
diff --git a/man/chartr.Rd b/man/chartr.Rd
index 35a5c91f..a83171a6 100644
--- a/man/chartr.Rd
+++ b/man/chartr.Rd
@@ -46,7 +46,8 @@ replaceAmbiguities(x, new="N")
   specifications.
 
   \code{replaceAmbiguities()} is a simple wrapper around \code{chartr()}
-  that replaces all IUPAC ambiguities with N.
+  that replaces all IUPAC ambiguities with N for objects containing DNA
+  or RNA sequence data.
 }
 
 \value{
diff --git a/tests/run_unitTests.R b/tests/run_unitTests.R
deleted file mode 100644
index f1b20090..00000000
--- a/tests/run_unitTests.R
+++ /dev/null
@@ -1,2 +0,0 @@
-require("Biostrings") || stop("unable to load Biostrings package")
-Biostrings:::.test()
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 00000000..b22abee7
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(Biostrings)
+
+test_check("Biostrings")
diff --git a/tests/testthat/test-MaskedXString.R b/tests/testthat/test-MaskedXString.R
new file mode 100644
index 00000000..5a76d303
--- /dev/null
+++ b/tests/testthat/test-MaskedXString.R
@@ -0,0 +1,115 @@
+## Testing for MaskedXString-class.R, maskMotif.R, injectHardMask.R
+##
+## MaskedXString-class.R exports:
+## - MaskedXString class (B, AA, DNA, RNA)
+## - unmasked
+## - masks, masks<-
+## - length
+## - maskedratio
+## - maskedwidth
+## - nchar
+## - seqtype
+## - *Views
+## - as.character
+## - subseq
+## - toString
+## - coercion between masked types
+## - coercion between unmasked types (B <-> MaskedB)
+## - coercion to MaskCollection, NormalIRanges, XStringViews
+
+dna_c <- sample(DNA_BASES, 25L, replace=TRUE)
+rna_c <- sample(RNA_BASES, 25L, replace=TRUE)
+aaa_c <- sample(AA_STANDARD, 25L, replace=TRUE)
+bbb_c <- sample(LETTERS, 25L, replace=TRUE)
+
+dna <- DNAString(paste(dna_c, collapse=''))
+rna <- RNAString(paste(rna_c, collapse=''))
+aaa <- AAString(paste(aaa_c, collapse=''))
+bbb <- BString(paste(bbb_c, collapse=''))
+
+mask_starts <- c(1,5,10)
+mask_ends <- c(3,7,12)
+m1 <- Mask(25L, start=mask_starts, end=mask_ends)
+m2 <- Mask(25L, start=c(15,20), end=c(18,22))
+total_mask_width <- sum(mask_ends - mask_starts + 1L)
+
+.check_masked_ascharacter <- function(maskedv, origv, maskstarts, maskends){
+	c1 <- strsplit(as.character(maskedv), '')[[1]]
+	c2 <- strsplit(as.character(origv), '')[[1]]
+	if(sum(c1 == "#") != total_mask_width) return(FALSE)
+	if(!all(c1[c1!='#'] == c2[c1!='#'])) return(FALSE)
+	TRUE
+}
+
+test_that("masking coerces between XString and MaskedXString", {
+	l <- list(dna, rna, aaa, bbb)
+	for(i in seq_along(l)){
+		tmp <- l[[i]]
+		masks(tmp) <- m1
+		## should convert to MaskedXString
+		mseqclass <- paste0("Masked", seqtype(l[[i]]), "String")
+		expect_s4_class(tmp, mseqclass)
+		expect_s4_class(as(l[[i]], mseqclass), mseqclass)
+
+		## seqtype should stay the same
+		expect_equal(seqtype(tmp), seqtype(l[[i]]))
+
+		## length should stay the same
+		expect_equal(length(tmp), length(l[[i]]))
+
+		## maskedwidth should report 9
+		expect_equal(maskedwidth(tmp), total_mask_width)
+		expect_equal(maskedratio(tmp), total_mask_width/nchar(l[[i]]))
+		expect_equal(nchar(tmp), nchar(l[[i]]) - total_mask_width)
+
+		## make sure that as.character preserves masking
+		expect_true(.check_masked_ascharacter(tmp, l[[i]]))
+		expect_equal(as.character(subseq(tmp, 4, 10)), substr(as.character(tmp), 4, 10))
+		expect_equal(toString(tmp), as.character(tmp))
+
+		## should convert back to XString
+		expect_s4_class(as(tmp, paste0(seqtype(tmp), "String")), class(l[[i]]))
+
+		## equality should work here because they're from the same pool (?)
+		masks(tmp) <- m2
+		expect_equal(masks(tmp), m2)
+		expect_equal(unmasked(tmp), l[[i]])
+	}
+})
+
+test_that("coercion between masked types works correctly", {
+	rnam <- rna
+	dnam <- dna
+	aaam <- aaa
+	masks(dnam) <- masks(rnam) <- masks(aaam) <- m2
+
+	# test translation
+	udna <- strsplit(as.character(dnam), '')[[1]]
+	udna <- DNAString(paste(udna[udna!='#'], collapse=''))
+	expect_equal(as.character(translate(dnam)), as.character(translate(udna)))
+
+	# DNA <-> RNA conversion
+	expect_equal(as.character(unmasked(as(dnam, "MaskedRNAString"))), as.character(as(dna, "RNAString")))
+	expect_equal(as.character(unmasked(as(rnam, "MaskedRNAString"))), as.character(as(rna, "RNAString")))
+
+	expect_error(as(dnam, "MaskedAAString"), "incompatible sequence types")
+	expect_error(as(rnam, "MaskedAAString"), "incompatible sequence types")
+
+	# roundtrip X -> B -> X conversion
+	expect_equal(as(as(dnam, "MaskedBString"), "MaskedDNAString"), dnam)
+	expect_equal(as(as(rnam, "MaskedBString"), "MaskedRNAString"), rnam)
+	expect_equal(as(as(aaam, "MaskedBString"), "MaskedAAString"), aaam)
+
+	masks(dnam) <- m1
+	# MaskCollection conversion
+	expect_true(all(as(dnam, "MaskCollection") == m1))
+
+	# NormalIRanges conversion converts to the masked ranges
+	expect_equal(start(as(dnam, "NormalIRanges")), mask_starts)
+	expect_equal(end(as(dnam, "NormalIRanges")), mask_ends)
+
+	# Views conversion converts to unmasked ranges
+	exp_end <- c(mask_starts-1L, length(dnam))
+	exp_end <- unique(exp_end[exp_end>=1])
+	expect_equal(end(as(dnam, "Views")), exp_end)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-OtherMasking.R b/tests/testthat/test-OtherMasking.R
new file mode 100644
index 00000000..a68ae2aa
--- /dev/null
+++ b/tests/testthat/test-OtherMasking.R
@@ -0,0 +1,58 @@
+## Additional tests for the following files:
+## - maskMotif.R (contains `maskMotif`, `mask`)
+## - injectHardMask.R (contains `injectHardMask`)
+
+test_that("maskMotif works for XStrings", {
+	s1 <- BString("aabbaacc")
+	s2 <- maskMotif(s1, "bb")
+	expect_equal(as.character(s2), "aa##aacc")
+	s3 <- maskMotif(s2, "baac")
+	expect_equal(as.character(s3), "aa##aacc")
+
+	## order of operations matters, masking should skip masked positions
+	s4 <- maskMotif(s1, "baac")
+	expect_equal(as.character(s4), "aab####c")
+	expect_equal(as.character(gaps(s4)), "###baac#")
+	expect_equal(as.character(as(s4, "Views")), c("aab", "c"))
+
+	## mask DNAString by multiple motifs
+	## example from ?maskMotif.R, just using DNAString objects
+	x <- DNAString("ACACAACTAGATAGNACTNNGAGAGACGC")
+	x1 <- maskMotif(x, DNAString("N"))
+	x2 <- maskMotif(x1, DNAString("AC"))
+	x3 <- maskMotif(x2, DNAString("GA"), min.block.width=5)
+	expect_equal(width(as(x3, "Views")), c(1,7,1,5,2))
+	exp_opt <- c(A=5L, C=5L, N=3L, `#`=16L)
+	expect_true(all(table(strsplit(as.character(gaps(x3)), '')[[1]]) == as.table(exp_opt[order(names(exp_opt))])))
+})
+
+test_that("injectHardMask works for all supported inputs", {
+	set.seed(50L)
+	ds <- sample(DNA_BASES, 50L, replace=TRUE)
+
+	d1 <- DNAString(paste(ds, collapse=''))
+	d2 <- d1
+
+	mask_starts <- c(4,10,28)
+	mask_ends <- c(7,17,38)
+	m1 <- Mask(length(ds), start=mask_starts, end=mask_ends)
+	masks(d2) <- m1
+
+	all_masked_indices <- unlist(lapply(seq_along(mask_starts), \(i) seq(mask_starts[i], mask_ends[i])))
+	exp_opt <- ds
+	exp_opt[all_masked_indices] <- "+"
+	expect_equal(as.character(injectHardMask(d2)), paste(exp_opt, collapse=''))
+
+	exp_opt[all_masked_indices] <- "M"
+	expect_equal(as.character(injectHardMask(d2, "M")), paste(exp_opt, collapse=''))
+
+	v <- Views(d1, start=mask_starts, end=mask_ends)
+	## injecting on a Views does the opposite
+	all_vmasked <- seq_len(length(ds))[-all_masked_indices]
+	exp_vopt <- ds
+	exp_vopt[all_vmasked] <- "+"
+	expect_equal(as.character(injectHardMask(v)), paste(exp_vopt, collapse=''))
+
+	exp_vopt[all_vmasked] <- "M"
+	expect_equal(as.character(injectHardMask(v, "M")), paste(exp_vopt, collapse=''))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-PDict.R b/tests/testthat/test-PDict.R
new file mode 100644
index 00000000..a1db9fdb
--- /dev/null
+++ b/tests/testthat/test-PDict.R
@@ -0,0 +1,205 @@
+## PDict objects have the following accessors:
+## - length
+## - width
+## - names
+## - head
+## - tb
+## - tb.width
+## - tail
+## - (duplicated)
+## - (patternFrequency)
+## The `[[` operator is also defined
+
+test_that("PDicts can be initialized happy path", {
+	codons <- mkAllStrings(DNA_BASES, 3)
+	namedcodons <- codons
+	names(namedcodons) <- codons
+
+	for(algo in c("ACtree2", "Twobit")){
+		expect_s4_class(PDict(codons, algorithm=algo), "TB_PDict")
+
+		p <- PDict(codons, algorithm=algo)
+		expect_output(show(p), "TB_PDict object of length 64 and width 3")
+
+		## checking threeparts internals
+		expect_s4_class(p@threeparts@pptb, algo)
+		expect_equal(lengths(p@threeparts@tail), rep(0L, 64L))
+		expect_equal(lengths(p@threeparts@head), rep(0L, 64L))
+
+		## accessors
+		expect_equal(length(p), 64L)
+		expect_equal(width(p), rep(3L, 64L))
+
+		expect_null(names(p))
+		expect_equal(names(PDict(namedcodons, algorithm=algo)), codons)
+
+		expect_equal(duplicated(p), rep(FALSE, 64L))
+		expect_equal(patternFrequency(p), rep(1L, 64L))
+
+		expect_equal(duplicated(PDict(c("A","A"), algorithm=algo)), c(FALSE, TRUE))
+		expect_equal(patternFrequency(PDict(c("A","A"), algorithm=algo)), c(2L, 2L))
+
+
+		## tb
+		expect_identical(tb(p), p@threeparts@pptb@tb)
+		expect_s4_class(tb(p), "DNAStringSet")
+		expect_equal(as.character(tb(p)), codons)
+		expect_equal(tb.width(p), 3L)
+
+		expect_s4_class(p[[1L]], "DNAString")
+		expect_equal(as.character(p[[1L]]), "AAA")
+
+		## initializing with ambiguity codes
+		strs <- paste0(DNA_ALPHABET, "AT", DNA_ALPHABET, "A")
+		l <- length(DNA_ALPHABET)
+		expect_s4_class(PDict(strs, tb.start=2L, tb.width=2L, algorithm=algo), "TB_PDict")
+		p <- PDict(strs, tb.start=2L, tb.width=2L, algorithm=algo)
+		expect_equal(as.character(tb(p)), rep("AT", l))
+		expect_equal(tb.width(p), 2L)
+		expect_equal(lengths(head(p)), rep(1L, l))
+		expect_identical(as.character(head(p)), DNA_ALPHABET)
+		expect_equal(lengths(tail(p)), rep(2L, l))
+		expect_identical(as.character(tail(p)), paste0(DNA_ALPHABET, "A"))
+		expect_equal(width(p@threeparts), rep(5L, l))
+
+		## variable width tail
+		strs[seq(1L,length(strs),2L)] <- paste0(strs[seq(1L,length(strs),2L)], "G")
+		p <- PDict(strs, tb.start=2L, tb.width=2L, algorithm=algo)
+		expect_equal(lengths(head(p@threeparts)), rep(1L, l))
+		expect_identical(as.character(head(p@threeparts)), DNA_ALPHABET)
+		expect_equal(lengths(tail(p@threeparts)), rep(c(3L,2L), length.out=l))
+
+		## allowing small number of mismatches
+		pats <- apply(expand.grid(mkAllStrings(c("A","T"), 3L), mkAllStrings(c("G","C"), 3L)), 1, paste0, collapse='')
+		expect_s4_class(PDict(pats, max.mismatch=1L, algorithm=algo), "MTB_PDict")
+		p <- PDict(pats, max.mismatch=1L, algorithm=algo)
+		expect_output(show(p), "MTB_PDict object of length 64 and width 6")
+		expect_error(p@threeparts, 'no slot of name "threeparts"')
+		expect_equal(length(p@threeparts_list), 2L)
+		p1 <- p@threeparts_list[[1]]
+		p2 <- p@threeparts_list[[2]]
+		expect_equal(lengths(p1@head), rep(0L, 64L))
+		expect_equal(lengths(p1@tail), rep(3L, 64L))
+		expect_equal(as.character(p1@tail), substr(pats,4,6))
+		expect_equal(lengths(p2@head), rep(3L, 64L))
+		expect_equal(lengths(p2@tail), rep(0L, 64L))
+		expect_equal(as.character(p2@head), substr(pats,1,3))
+
+		expect_equal(length(as.list(p)), 2L)
+	}
+})
+
+test_that("PDict sad path errors correctly", {
+	expect_error(PDict(DNA_ALPHABET), "non base DNA letter found in Trusted Band")
+	expect_error(PDict(AA_ALPHABET), "not in lookup table")
+	expect_error(PDict(1:3), "unable to find an inherited method")
+	expect_error(PDict(character(0L)), "must contain at least one pattern")
+	p <- PDict(DNA_BASES)
+	expect_error({names(p) <- DNA_BASES}, "attempt to modify the names of a TB_PDict instance")
+	expect_error(PDict(c("AA", "AAA")), "Trusted Band has a different length")
+
+	codons <- mkAllStrings(DNA_BASES, 3)
+	expect_error(PDict(codons, max.mismatch=1), "supported only if the width of dictionary 'x' is >= 6")
+
+	longstrings <- mkAllStrings(c("A","T"), 6)
+	expect_error(PDict(longstrings, max.mismatch=7), "must be <= 1 given the width of dictionary 'x'")
+	expect_error(PDict(paste0(rep("A", 100), collapse=''), max.mismatch=100), "must be <= 32 given the width of dictionary 'x'")
+
+	expect_error(PDict(codons, max.mismatch=NULL), "must be a single integer or 'NA'")
+	expect_error(PDict(codons, max.mismatch='a'), "must be a single integer or 'NA'")
+	expect_error(PDict(codons, max.mismatch=c(1,2)), "must be a single integer or 'NA'")
+
+	expect_error(PDict("A", algorithm='erroralgo'), "is not a defined class")
+
+	names(longstrings) <- rep('', length(longstrings))
+	expect_error(PDict(longstrings), "'x' has invalid names")
+	names(longstrings) <- rep('test', length(longstrings))
+	expect_error(PDict(longstrings), "'x' has duplicated names")
+
+	expect_error(PDict("AAAA", tb.start='a'), "must be a single integer or 'NA'")
+	expect_error(PDict("AAAA", tb.end='a'), "must be a single integer or 'NA'")
+	expect_error(PDict("AAAA", tb.width='a'), "must be a single integer or 'NA'")
+
+	expect_warning(PDict("AAAA", algorithm='ACtree'), "support for ACtree preprocessing algo has been dropped")
+})
+
+### Old Tests
+
+## Helper functions ported from old tests
+randomDNASequences <- function(n, w)
+{
+  alphabet <- DNA_BASES
+  w <- rep(w, length=n)
+  sequences <- sapply(seq(1, n, length=n),
+                      function(x) {
+                        s <- sample(alphabet, w[x], replace=TRUE)
+                        s <- paste(s, collapse="")
+                        return(s)
+                      })
+  return(Biostrings::DNAStringSet(sequences))
+}
+
+msubseq <- function(x, ir)
+{
+  ## differs from subseq in the sense that several subsequences
+  ## from the same sequence are extracted
+  ## x:  XString
+  ## ir: IRanges
+  res <- vector("character", length = length(ir))
+  for (i in seq(along=res)) {
+    res[i] <- as.character(subseq(x, start=ir@start[i], width=width(ir)[i]))
+    ## forced cast: chain of tools for DNAString seems interupted for
+    ##              some use cases (or I missed something)
+  }
+  res <- DNAStringSet(res)
+  return(res)
+}
+
+test_that("PDict works with constant width initialization", {
+  set.seed(1)
+  l <- 150
+  target <- randomDNASequences(1, l)[[1]]
+  W <- 20
+  L <- 6
+  ir <- successiveIRanges(rep(W, L), gapwidth = 1)
+  short_sequences <- msubseq(target, ir)
+  # shuffle the sequences (they are not in consecutive order)
+  o <- sample(seq(along=short_sequences))
+
+  dna_short <- DNAStringSet(short_sequences[o])
+  pdict <- PDict(dna_short)
+  expect_equal(L, length(pdict))
+  expect_equal(rep(W, L), width(pdict))
+  expect_equal(NULL, head(pdict))
+  expect_equal(W, tb.width(pdict))
+  expect_equal(NULL, tail(pdict))
+})
+
+test_that("PDict works for variable width lookup", {
+  set.seed(1)
+  l <- 150
+  target <- randomDNASequences(1, l)[[1]]
+  W <- 20
+  L <- 6
+  n_cut <- sample(0:5, L, replace=TRUE)
+  ir <- successiveIRanges(rep(W, L) - n_cut, gapwidth = 1 + n_cut[-length(n_cut)])
+  short_sequences <- msubseq(target, ir)
+  # shuffle the sequences (they are not in consecutive order)
+  o <- sample(seq(along=short_sequences))
+
+  dna_var_short <- DNAStringSet(short_sequences[o])
+
+  ## Previous comment: shouldn't 1:min(width) be the default?
+  pdict <- PDict(dna_var_short,
+                 tb.start=1,
+                 tb.width=min(width(short_sequences))
+                 )
+  expect_equal(L, length(pdict))
+  expect_equal((rep(W, L) - n_cut)[o], width(pdict))
+  expect_equal(NULL, head(pdict))
+  shortest_seq_width <- min(width(dna_var_short))
+  expect_equal(shortest_seq_width,
+              tb.width(pdict))           # mostly a sanity check
+  expect_equal(substring(short_sequences, shortest_seq_width+1)[o],
+              as.character(tail(pdict)))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-QualityScaledXStringSet.R b/tests/testthat/test-QualityScaledXStringSet.R
new file mode 100644
index 00000000..2631061a
--- /dev/null
+++ b/tests/testthat/test-QualityScaledXStringSet.R
@@ -0,0 +1,94 @@
+## QualityScaledXStringSet.R exports the following:
+## - QualityScaledXStringSet class
+## - readQualityScaledXStringSet
+## - writeQualityScaledXStringSet
+## - quality
+## - all accessor methods defined for XStringSet objects
+##
+## here, X is a seqtype in c("DNA", "RNA", "AA", "B")
+## the only accessors with different definitions are:
+## - windows
+## - narrow
+## - reverse
+## - reverseComplement
+## - show
+
+
+test_that("QualityScaledXStringSet I/O works properly", {
+	set.seed(4L)
+	n_samp <- 40L
+	n_seq <- 3L
+	qs <- list(phred=PhredQuality,
+							solexa=SolexaQuality,
+							illumina=IlluminaQuality)
+
+	alphs <- list(DNA=DNA_BASES,
+								RNA=RNA_BASES,
+								AA=AA_STANDARD,
+								B=LETTERS)
+
+	for(quality_type in c('phred', 'solexa', 'illumina')){
+		q <- do.call(c, lapply(seq_len(n_seq), \(x) qs[[quality_type]](sample(40L, n_samp, replace=TRUE))))
+		for(stype in c("DNA", "RNA", "AA", "B")){
+			seqs <- vapply(seq_len(n_seq), \(i) {
+				paste(sample(alphs[[stype]], n_samp, replace=TRUE), collapse='')
+			}, character(1L))
+			seqs <- as(seqs, paste0(stype, "StringSet"))
+			names(seqs) <- letters[seq_len(n_seq)]
+
+			qss <- get(paste0("QualityScaled", stype, "StringSet"))(seqs, q)
+
+			expect_s4_class(qss, paste0("QualityScaled", stype, "StringSet"))
+			expect_s4_class(quality(qss), class(qs[[quality_type]](0L)))
+
+			tf <- tempfile()
+			if(file.exists(tf)) file.remove(tf)
+			writeQualityScaledXStringSet(qss, tf)
+			expect_equal(fastq.geometry(tf), c(n_seq, n_samp))
+
+			if(stype == "DNA"){
+				## only readQualityScaledDNAStringSet is currently defined
+				qss2 <- readQualityScaledDNAStringSet(tf, quality.scoring=quality_type)
+
+				expect_equal(as.character(qss), as.character(qss2))
+				expect_equal(as.character(quality(qss)), as.character(quality(qss2)))
+			}
+			if(file.exists(tf)) file.remove(tf)
+		}
+	}
+})
+
+test_that("QualityScaledXStringSet properly implements reverse, reverseComplement", {
+	.revString <- function(s) paste(strsplit(s, '')[[1]][seq(nchar(s), 1L)], collapse='')
+	set.seed(10L)
+	n_samp <- 40L
+	n_seq <- 3L
+	qs <- list(phred=PhredQuality,
+							solexa=SolexaQuality,
+							illumina=IlluminaQuality)
+
+	alphs <- list(DNA=DNA_BASES,
+								RNA=RNA_BASES,
+								AA=AA_STANDARD,
+								B=LETTERS)
+
+	qualities <- lapply(qs, \(q){
+			do.call(c, lapply(seq_len(n_seq), \(i){
+					q(sample(40L, n_samp, replace=TRUE))
+				}))
+		})
+
+	for(stype in c("DNA", "RNA", "AA", "B")){
+		seqs <- vapply(seq_len(n_seq), \(x) paste(sample(alphs[[stype]], n_samp, replace=TRUE), collapse=''), character(1L))
+		seqs <- get(paste0(stype, "StringSet"))(seqs)
+		for(q in names(qs)){
+			qual <- qualities[[q]]
+			qss <- get(paste0("QualityScaled", stype, "StringSet"))(seqs, qual)
+			exp_output <- vapply(as.character(qual), .revString, character(1L), USE.NAMES=FALSE)
+			expect_equal(as.character(quality(reverse(qss))), exp_output)
+			if(stype %in% c("DNA", "RNA")){
+				expect_equal(as.character(quality(reverseComplement(qss))), exp_output)
+			}
+		}
+	}
+})
\ No newline at end of file
diff --git a/tests/testthat/test-XString-class.R b/tests/testthat/test-XString-class.R
new file mode 100644
index 00000000..83f3dabf
--- /dev/null
+++ b/tests/testthat/test-XString-class.R
@@ -0,0 +1,244 @@
+dnastr <- paste(DNA_ALPHABET, collapse='')
+rnastr <- paste(RNA_ALPHABET, collapse='')
+aastr <- paste(AA_ALPHABET, collapse='')
+bstr <- rawToChar(as.raw(32:126))
+
+test_allstrings_for_error <- function(input, exp_error, ignore=''){
+	if(!"DNA"%in%ignore) expect_error(DNAString(input), exp_error)
+	if(!"RNA"%in%ignore) expect_error(RNAString(input), exp_error)
+	if(!"AA"%in%ignore)  expect_error(AAString(input), exp_error)
+	if(!"B"%in%ignore)   expect_error(BString(input), exp_error)
+}
+
+test_that("seqtype correctly infers types", {
+	expect_equal(seqtype(BString("ABC")), "B")
+	expect_equal(seqtype(RNAString("AUG")), "RNA")
+	expect_equal(seqtype(DNAString("ATG")), "DNA")
+	expect_equal(seqtype(AAString("ARN")), "AA")
+})
+
+
+test_that("character, vector conversion works properly", {
+	expect_equal(as.character(DNAString(dnastr)), dnastr)
+	expect_equal(as.character(RNAString(rnastr)), rnastr)
+	expect_equal(as.character(AAString(aastr)), aastr)
+	expect_equal(as.character(BString(bstr)), bstr)
+})
+
+test_that("encode/decode tables work correctly", {
+	expect_equal(DNAString(tolower(dnastr)), DNAString(dnastr))
+	expect_equal(RNAString(tolower(rnastr)), RNAString(rnastr))
+	expect_equal(AAString(tolower(aastr)), AAString(aastr))
+
+	expect_equal(DNAString(as.factor(dnastr)), DNAString(dnastr))
+	expect_equal(RNAString(as.factor(rnastr)), RNAString(rnastr))
+	expect_equal(AAString(as.factor(aastr)), AAString(aastr))
+	expect_equal(BString(as.factor(bstr)), BString(bstr))
+
+	test_allstrings_for_error(bstr, "not in lookup table", ignore="B")
+})
+
+test_that("'letter' correctly extracts elements", {
+	expect_equal(letter(DNAString("ATGCATGC"), c(1,3,6)), "AGT")
+	expect_equal(letter(RNAString("AUGCAUGC"), c(1,3,6)), "AGU")
+	expect_equal( letter(AAString("ARNDARND"), c(1,3,6)), "ANR")
+	expect_equal(  letter(BString("ABCDEFGH"), c(1,3,6)), "ACF")
+	expect_equal(letter(bstr, seq(nchar(bstr), 1)), rawToChar(rev(charToRaw(bstr))))
+
+	## TODO: Better error messages
+	expect_error(letter(DNAString(""), 10), "out of bounds")
+	expect_error(letter(RNAString(""), 10), "out of bounds")
+	expect_error(letter(AAString(""), 10), "out of bounds")
+	expect_error(letter(BString(""), 10), "out of bounds")
+	expect_error(letter("", 10), "out of bounds")
+})
+
+test_that("constructors handle invalid and non-char input correctly", {
+	# note that this only displays for character input
+	# meaning DNAString(1) returns a non-informative error
+	test_allstrings_for_error(NA_character_, "input must be a single non-NA string")
+	test_allstrings_for_error(as.factor(1:4), "input must be a single non-NA string")
+	test_allstrings_for_error(c("A", "A"), "input must be a single non-NA string")
+})
+
+test_that("conversion between XStrings works properly", {
+	# DNA <-> RNA
+	expect_equal(RNAString(DNAString(dnastr)), RNAString(rnastr))
+	expect_equal(DNAString(RNAString(rnastr)), DNAString(dnastr))
+
+	# X -> B
+	expect_equal(BString(DNAString(dnastr)), BString(dnastr))
+	expect_equal(BString(RNAString(rnastr)), BString(rnastr))
+	expect_equal(BString(AAString(aastr)), BString(aastr))
+
+	# valid B -> X
+	expect_equal(DNAString(BString(dnastr)), DNAString(dnastr))
+	expect_equal(RNAString(BString(rnastr)), RNAString(rnastr))
+	expect_equal(AAString(BString(aastr)), AAString(aastr))
+
+	# invalid DNA,RNA <-> AA
+	expect_error(AAString(DNAString("ATGC")), "incompatible sequence types")
+	expect_error(AAString(RNAString("AUGC")), "incompatible sequence types")
+	expect_error(DNAString(AAString("ARND")), "incompatible sequence types")
+	expect_error(RNAString(AAString("ARND")), "incompatible sequence types")
+
+	# invalid B -> X
+	bbad <- BString(";")
+	test_allstrings_for_error(bbad, 'not in lookup table', ignore='B')
+})
+
+test_that("as.vector methods work correctly", {
+	## TODO: sometimes returns factor, sometimes character
+	## 				note the BString case as an example
+	dnafac <- factor(seq_len(nchar(dnastr)))
+	attr(dnafac, 'levels') <- strsplit(dnastr, '')[[1]]
+
+	rnafac <- factor(seq_len(nchar(rnastr)))
+	attr(rnafac, 'levels') <- strsplit(rnastr, '')[[1]]
+
+	aafac <- factor(seq_len(nchar(aastr)))
+	attr(aafac, 'levels') <- strsplit(aastr, '')[[1]]
+
+	bfac <- factor(seq_len(nchar(bstr)))
+	attr(bfac, 'levels') <- strsplit(bstr, '')[[1]]
+
+	expect_equal(as.vector(DNAString(dnastr)), dnafac)
+	expect_equal(as.vector(RNAString(rnastr)), rnafac)
+	expect_equal(as.vector(AAString(aastr)), aafac)
+	#expect_equal(as.vector(BString(bstr)), bfac)
+})
+
+test_that("equality methods work as advertised", {
+	expect_true(DNAString(dnastr) == DNAString(dnastr))
+	expect_true(RNAString(rnastr) == RNAString(rnastr))
+	expect_true(AAString(aastr) == AAString(aastr))
+	expect_true(BString(bstr) == BString(bstr))
+
+	expect_false(DNAString(dnastr) == DNAString(substr(dnastr, 1, 7)))
+	expect_false(RNAString(rnastr) == RNAString(substr(rnastr, 1, 7)))
+	expect_false(AAString(aastr) == AAString(substr(aastr, 1, 7)))
+	expect_false(BString(bstr) == BString(substr(bstr, 1, 7)))
+
+	expect_true(DNAString(dnastr) != DNAString(substr(dnastr, 1, 7)))
+	expect_true(RNAString(rnastr) != RNAString(substr(rnastr, 1, 7)))
+	expect_true(AAString(aastr) != AAString(substr(aastr, 1, 7)))
+	expect_true(BString(bstr) != BString(substr(bstr, 1, 7)))
+
+
+	## DNA <-> RNA comparison
+	expect_true(DNAString(dnastr) == RNAString(rnastr))
+
+	## other B comparisons
+	expect_true(AAString(aastr) == BString(aastr))
+	expect_true(AAString(aastr) != BString(bstr))
+	expect_true(bstr == BString(bstr))
+
+	## invalid comparisons
+	expect_error(DNAString(dnastr) == AAString(aastr), 'comparison between a "DNAString" instance and a "AAString" instance is not supported')
+	expect_error(DNAString(dnastr) == BString(bstr), 'comparison between a "DNAString" instance and a "BString" instance is not supported')
+	expect_error(RNAString(rnastr) == AAString(aastr), 'comparison between a "RNAString" instance and a "AAString" instance is not supported')
+	expect_error(RNAString(rnastr) == BString(bstr), 'comparison between a "RNAString" instance and a "BString" instance is not supported')
+})
+
+test_that("output works correctly", {
+	s <- paste(rep("A", 100L), collapse='')
+	expect_output(show(DNAString(s)), "100-letter DNAString object\\nseq: .+\\.\\.\\..+$", width=80)
+	expect_output(show(RNAString(s)), "100-letter RNAString object\\nseq: .+\\.\\.\\..+$", width=80)
+	expect_output(show(AAString(s)), "100-letter AAString object\\nseq: .+\\.\\.\\..+$", width=80)
+	expect_output(show(BString(s)), "100-letter BString object\\nseq: A{36}\\.\\.\\.A{36}$", width=80)
+
+	# width of sequence is 5 less than that of 'width'
+	expect_output(show(DNAString(s)), "100-letter DNAString object\\nseq: .+\\.\\.\\..+$", width=10)
+	expect_output(show(RNAString(s)), "100-letter RNAString object\\nseq: .+\\.\\.\\..+$", width=10)
+	expect_output(show(AAString(s)), "100-letter AAString object\\nseq: .+\\.\\.\\..+$", width=10)
+	expect_output(show(BString(s)), "100-letter BString object\\nseq: AA\\.\\.\\.AA$", width=10)
+})
+
+test_that("substr, substring methods work correctly", {
+	d <- DNAString(dnastr)
+	r <- RNAString(rnastr)
+	a <- AAString(aastr)
+	b <- BString(bstr)
+
+	expect_equal(as.character(substr(d, 1, 10)), substr(dnastr, 1, 10))
+	expect_equal(as.character(substr(r, 1, 10)), substr(rnastr, 1, 10))
+	expect_equal(as.character(substr(a, 1, 10)), substr(aastr, 1, 10))
+	expect_equal(as.character(substr(b, 1, 10)), substr(bstr, 1, 10))
+
+	expect_equal(as.character(substring(d, 5, 10)), substring(dnastr, 5, 10))
+	expect_equal(as.character(substring(r, 5, 10)), substring(rnastr, 5, 10))
+	expect_equal(as.character(substring(a, 5, 10)), substring(aastr, 5, 10))
+	expect_equal(as.character(substring(b, 5, 10)), substring(bstr, 5, 10))
+
+	expect_error(substring(d, 10, 5), "Invalid sequence coordinates")
+	expect_error(substring(r, 10, 5), "Invalid sequence coordinates")
+	expect_error(substring(a, 10, 5), "Invalid sequence coordinates")
+	expect_error(substring(b, 10, 5), "Invalid sequence coordinates")
+
+	# `[` dispatch
+	expect_equal(as.character(d[1:10]), substr(dnastr, 1, 10))
+	expect_equal(as.character(d[-1]), substr(dnastr, 2, nchar(dnastr)))
+})
+
+test_that("reverse, complement, reverseComplement work correctly", {
+	## reverse tests
+	.revString <- function(s) paste(strsplit(s, '')[[1]][seq(nchar(s), 1L)], collapse='')
+	dna <- DNAString(dnastr)
+	rna <- RNAString(rnastr)
+	aaa <- AAString(aastr)
+	bbb <- BString(bstr)
+	d_comp <- "TGCAKYWSRMBDHVN-+."
+	r_comp <- "UGCAKYWSRMBDHVN-+."
+
+	## example Views on strings
+	d_v <- Views(dna, start=rep(1L,3L), end=rep(nchar(dnastr),3L))
+	mdna <- dna
+	mrna <- rna
+	m1 <- Mask(nchar(dnastr), start=c(2,7), end=c(3,10))
+	masks(mdna) <- masks(mrna) <- m1
+	md_comp <- strsplit(d_comp, '')[[1]]
+	mr_comp <- strsplit(r_comp, '')[[1]]
+	md_comp[c(2:3,7:10)] <- mr_comp[c(2:3,7:10)] <- "#"
+	md_comp <- paste(md_comp, collapse='')
+	mr_comp <- paste(mr_comp, collapse='')
+
+
+	## reverse method
+	expect_equal(reverse(dnastr), .revString(dnastr))
+	expect_equal(as.character(reverse(dna)), .revString(dnastr))
+	expect_equal(as.character(reverse(rna)), .revString(rnastr))
+	expect_equal(as.character(reverse(aaa)), .revString(aastr))
+	expect_equal(as.character(reverse(bbb)), .revString(bstr))
+	expect_equal(as.character(reverse(mdna)), .revString(as.character(mdna)))
+	expect_equal(as.character(reverse(mrna)), .revString(as.character(mrna)))
+
+	## complement method
+	expect_equal(as.character(complement(dna)), d_comp)
+	expect_equal(as.character(complement(rna)), r_comp)
+	expect_error(complement(AAString()), "unable to find an inherited method")
+	expect_error(complement(BString()), "unable to find an inherited method")
+	expect_equal(as.character(complement(d_v)), rep(d_comp, 3L))
+	expect_equal(as.character(complement(mdna)), md_comp)
+	expect_equal(as.character(complement(mrna)), mr_comp)
+
+	## reverseComplement method
+	expect_equal(as.character(reverseComplement(dna)), .revString(d_comp))
+	expect_equal(as.character(reverseComplement(rna)), .revString(r_comp))
+	expect_error(reverseComplement(AAString()), "unable to find an inherited method")
+	expect_error(reverseComplement(BString()), "unable to find an inherited method")
+	expect_equal(as.character(reverseComplement(d_v)), rep(.revString(d_comp), 3L))
+	expect_equal(as.character(reverseComplement(mdna)), .revString(md_comp))
+	expect_equal(as.character(reverseComplement(mrna)), .revString(mr_comp))
+})
+
+## Porting RUnit tests
+test_that("alphabet finds the correct values", {
+	expect_equal(alphabet(DNAString(dnastr)), strsplit(dnastr, '')[[1]])
+	expect_equal(alphabet(RNAString(rnastr)), strsplit(rnastr, '')[[1]])
+	expect_equal(alphabet(AAString(aastr)), strsplit(aastr, '')[[1]])
+	expect_equal(alphabet(BString(bstr)), NULL)
+
+	expect_equal(alphabet(DNAString(), baseOnly=TRUE), DNA_BASES)
+	expect_equal(alphabet(RNAString(), baseOnly=TRUE), RNA_BASES)
+	expect_error(alphabet(DNAString(), baseOnly=1), "'baseOnly' must be TRUE or FALSE")
+})
diff --git a/tests/testthat/test-XStringQuality.R b/tests/testthat/test-XStringQuality.R
new file mode 100644
index 00000000..bb13af1c
--- /dev/null
+++ b/tests/testthat/test-XStringQuality.R
@@ -0,0 +1,77 @@
+## XStringQuality-class.R exports the following:
+## - PhredQuality
+## - SolexaQuality
+## - IlluminaQuality
+## - offset
+## - alphabet
+## - encoding
+## - coercion methods
+
+## Phred: 0-99 quality as (ASCII)-33 (! = 0, note that this can't support >=95)
+## Solexa: -5-99 quality as (ASCII)-64 (; = 0, no valid characters above 63)
+## Illumina: 0-99 quality as (ASCII)-64 (@ = 0, no valid characters above 63)
+
+test_that("Quality constructors work correctly", {
+	expect_s4_class(PhredQuality(seq(0,99)), "PhredQuality")
+	expect_s4_class(SolexaQuality(seq(-5,99)), "SolexaQuality")
+	expect_s4_class(IlluminaQuality(0:99), "IlluminaQuality")
+	p <- PhredQuality(seq(0,99))
+	s <- SolexaQuality(seq(-5,99))
+	i <- IlluminaQuality(0:99)
+
+	## note that PhredQuality cannot represent scores greater than 93
+	## (and it auto-rounds all these values down)
+	## all other qualities can support the values in their range
+	complete_ascii <- strsplit(rawToChar(as.raw(33:126)), '')[[1]]
+
+	## check alphabets
+	expect_equal(alphabet(p), complete_ascii[seq_len(94)])
+	expect_equal(alphabet(s)[seq_len(68)], complete_ascii[seq(27,94)])
+	expect_equal(alphabet(i)[seq_len(63)], complete_ascii[seq(32,94)])
+
+	## check encodings
+	mapping <- seq(0,131)
+	names(mapping) <- complete_ascii
+	expect_equal(encoding(p), mapping[seq_len(94)])
+	expect_equal(encoding(s), mapping[seq(27,131)] - 31L)
+	expect_equal(encoding(i), mapping[seq(32,131)] - 31L)
+
+	## check probability conversion
+	prob_conversions <- c("numeric", "NumericList")
+
+	## probability definitions taken from the man pages, should match
+	phred_scores <- c(0:93, rep(93,6L))
+	solexa_scores <- seq(-5L, 99L)
+	illumina_scores <- seq(0L, 99L)
+
+	phred_probs <- 10^(-0.1 * phred_scores)
+	solexa_probs <- 10^(-0.1*solexa_scores) / (1 + 10^(-0.1*solexa_scores))
+	illumina_probs <- 10^(-0.1 * illumina_scores)
+	expect_equal(as.numeric(p), phred_probs)
+	expect_equal(as.numeric(s), solexa_probs)
+	expect_equal(as.numeric(i), illumina_probs)
+	expect_equal(as.numeric(as(p, "NumericList")[[1]]), phred_probs)
+	expect_equal(as.numeric(as(s, "NumericList")[[1]]), solexa_probs)
+	expect_equal(as.numeric(as(i, "NumericList")[[1]]), illumina_probs)
+
+	## score conversions
+	score_conversions <- c("integer", "matrix", "IntegerList")
+	expect_equal(as.integer(p), phred_scores)
+	expect_equal(as.integer(s), solexa_scores)
+	expect_equal(as.integer(i), illumina_scores)
+	expect_equal(as(p,'matrix')[1,], phred_scores)
+	expect_equal(as(s,'matrix')[1,], solexa_scores)
+	expect_equal(as(i,'matrix')[1,], illumina_scores)
+	expect_equal(as.integer(as(p,'IntegerList')[[1]]), phred_scores)
+	expect_equal(as.integer(as(s,'IntegerList')[[1]]), solexa_scores)
+	expect_equal(as.integer(as(i,'IntegerList')[[1]]), illumina_scores)
+
+	## BString, character conversions
+	b <- BString(paste(LETTERS, collapse=''))
+	expect_equal(as.character(PhredQuality(b)), as.character(b))
+	expect_equal(as.character(SolexaQuality(b)), as.character(b))
+	expect_equal(as.character(IlluminaQuality(b)), as.character(b))
+	expect_equal(as.character(PhredQuality(as.character(b))), as.character(b))
+	expect_equal(as.character(SolexaQuality(as.character(b))), as.character(b))
+	expect_equal(as.character(IlluminaQuality(as.character(b))), as.character(b))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-XStringSet-class.R b/tests/testthat/test-XStringSet-class.R
new file mode 100644
index 00000000..276012f8
--- /dev/null
+++ b/tests/testthat/test-XStringSet-class.R
@@ -0,0 +1,275 @@
+dnastr <- paste(DNA_ALPHABET, collapse='')
+rnastr <- paste(RNA_ALPHABET, collapse='')
+aastr <- paste(AA_ALPHABET, collapse='')
+bstr <- rawToChar(as.raw(32:126))
+
+d <- DNAStringSet(dnastr)
+r <- RNAStringSet(rnastr)
+a <- AAStringSet(aastr)
+b <- BStringSet(bstr)
+
+## testing functions from old testing files
+### '.eltAddresses(x)' collects the addresses of the elements in 'x' (in
+### practice 'x' will be a list of external pointers or environments).
+.eltAddresses <- function(x) sapply(x, XVector:::address)
+
+### 'x' and 'y' must be XVectorList vectors.
+.haveIdenticalPools <- function(x, y)
+    identical(.eltAddresses(x@pool@xp_list), .eltAddresses(y@pool@xp_list))
+
+### 'x' must be an XVectorList vector.
+.poolEltLengths <- function(x)
+{
+    pool_len <- length(x@pool)
+    if (pool_len == 0L)
+        return(integer(0))
+    sapply(seq_len(pool_len), function(i) length(x@pool[[i]]))
+}
+
+test_that("seqtype correctly infers types", {
+  expect_equal(seqtype(d), "DNA")
+  expect_equal(seqtype(r), "RNA")
+  expect_equal(seqtype(a), "AA")
+  expect_equal(seqtype(b), "B")
+
+  expect_equal(seqtype(d[[1]]), "DNA")
+  expect_equal(seqtype(r[[1]]), "RNA")
+  expect_equal(seqtype(a[[1]]), "AA")
+  expect_equal(seqtype(b[[1]]), "B")
+})
+
+test_that("seqtype conversion works as expected", {
+  ## conversion between RNA and DNA
+  expect_equal({x <- d; seqtype(x) <- "RNA"; x}, r)
+  expect_equal({x <- r; seqtype(x) <- "DNA"; x}, d)
+
+  ## conversion to BStringSets
+  expect_equal({x <- d; seqtype(x) <- "B"; x}, BStringSet(dnastr))
+  expect_equal({x <- r; seqtype(x) <- "B"; x}, BStringSet(rnastr))
+  expect_equal({x <- a; seqtype(x) <- "B"; x}, BStringSet(aastr))
+
+  expect_equal({x <- BStringSet(dnastr); seqtype(x) <- "DNA"; x}, d)
+  expect_equal({x <- BStringSet(rnastr); seqtype(x) <- "RNA"; x}, r)
+  expect_equal({x <- BStringSet(aastr); seqtype(x) <- "AA"; x}, a)
+
+  ## invalid conversions
+  expect_error(seqtype(d) <- "AA", "incompatible sequence types")
+  expect_error(seqtype(r) <- "AA", "incompatible sequence types")
+  expect_error(seqtype(a) <- "DNA", "incompatible sequence types")
+  expect_error(seqtype(a) <- "RNA", "incompatible sequence types")
+  expect_error(seqtype(b) <- "AA", "not in lookup table")
+  expect_error(seqtype(b) <- "DNA", "not in lookup table")
+  expect_error(seqtype(b) <- "RNA", "not in lookup table")
+})
+
+test_that("unlisting works", {
+  expect_equal(unlist(d), d[[1]])
+  expect_equal(unlist(r), r[[1]])
+  expect_equal(unlist(a), a[[1]])
+  expect_equal(unlist(b), b[[1]])
+})
+
+test_that("width and nchar are correct", {
+  expect_equal(width(d), nchar(dnastr))
+  expect_equal(width(r), nchar(rnastr))
+  expect_equal(width(a), nchar(aastr))
+  expect_equal(width(b), nchar(bstr))
+
+  expect_equal(nchar(d), nchar(dnastr))
+  expect_equal(nchar(r), nchar(rnastr))
+  expect_equal(nchar(a), nchar(aastr))
+  expect_equal(nchar(b), nchar(bstr))
+
+  expect_error(width(NA_character_), "NAs in 'x' are not supported")
+})
+
+test_that("concatenation and character/vector conversion are correct", {
+  expect_s4_class(c(d,d), "DNAStringSet")
+  expect_s4_class(c(r,r), "RNAStringSet")
+  expect_s4_class(c(a,a), "AAStringSet")
+  expect_s4_class(c(b,b), "BStringSet")
+
+  dd <- c(d,d)
+  rr <- c(r,r)
+  aa <- c(a,a)
+  bb <- c(b,b)
+
+  expect_equal(dd == DNAStringSet(c(dnastr, dnastr)), c(TRUE, TRUE))
+  expect_equal(rr == RNAStringSet(c(rnastr, rnastr)), c(TRUE, TRUE))
+  expect_equal(aa == AAStringSet(c(aastr, aastr)), c(TRUE, TRUE))
+  expect_equal(bb == BStringSet(c(bstr, bstr)), c(TRUE, TRUE))
+
+  expect_equal(as.character(dd), c(dnastr, dnastr))
+  expect_equal(as.character(rr), c(rnastr, rnastr))
+  expect_equal(as.character(aa), c(aastr, aastr))
+  expect_equal(as.character(bb), c(bstr, bstr))
+
+  expect_equal(as.vector(dd), c(dnastr, dnastr))
+  expect_equal(as.vector(rr), c(rnastr, rnastr))
+  expect_equal(as.vector(aa), c(aastr, aastr))
+  expect_equal(as.vector(bb), c(bstr, bstr))
+})
+
+test_that("matrix conversion is correct", {
+  dd <- c(d,d)
+  rr <- c(r,r)
+  aa <- c(a,a)
+  bb <- c(b,b)
+
+  md <- do.call(rbind, rep(strsplit(dnastr, ''), 2))
+  mr <- do.call(rbind, rep(strsplit(rnastr, ''), 2))
+  ma <- do.call(rbind, rep(strsplit(aastr, ''), 2))
+  mb <- do.call(rbind, rep(strsplit(bstr, ''), 2))
+
+
+  expect_equal(as.matrix(dd), md)
+  expect_equal(as.matrix(rr), mr)
+  expect_equal(as.matrix(aa), ma)
+  expect_equal(as.matrix(bb), mb)
+
+  md <- as.matrix(DNAStringSet(''))
+  mr <- as.matrix(RNAStringSet(''))
+  ma <- as.matrix(AAStringSet(''))
+  mb <- as.matrix(BStringSet(''))
+
+  m_base <- matrix('', nrow=1, ncol=0)
+  expect_equal(md, m_base)
+  expect_equal(mr, m_base)
+  expect_equal(ma, m_base)
+  expect_equal(mb, m_base)
+})
+
+test_that("factor conversion is correct", {
+  expect_equal(as.factor(d), as.factor(dnastr))
+  expect_equal(as.factor(r), as.factor(rnastr))
+  expect_equal(as.factor(a), as.factor(aastr))
+  expect_equal(as.factor(b), as.factor(bstr))
+})
+
+test_that("data.frame conversion is correct", {
+  expect_equal(as.data.frame(c(d,d)), data.frame(x=c(dnastr, dnastr)))
+  expect_equal(as.data.frame(c(r,r)), data.frame(x=c(rnastr, rnastr)))
+  expect_equal(as.data.frame(c(a,a)), data.frame(x=c(aastr, aastr)))
+  expect_equal(as.data.frame(c(b,b)), data.frame(x=c(bstr, bstr)))
+})
+
+test_that("toString conversion is correct", {
+  expect_equal(toString(c(d,d)), paste(dnastr, dnastr, sep=', '))
+  expect_equal(toString(c(r,r)), paste(rnastr, rnastr, sep=', '))
+  expect_equal(toString(c(a,a)), paste(aastr, aastr, sep=', '))
+  expect_equal(toString(c(b,b)), paste(bstr, bstr, sep=', '))
+})
+
+test_that("show methods output correctly", {
+  expect_output(show(c(d,d)), "^DNAStringSet object of length 2:\\n")
+  expect_output(show(c(r,r)), "^RNAStringSet object of length 2:\\n")
+  expect_output(show(c(a,a)), "^AAStringSet object of length 2:\\n")
+  expect_output(show(c(b,b)), "^BStringSet object of length 2:\\n")
+})
+
+test_that("showAsCell works correctly", {
+  d <- DNAStringSet(paste(rep(dnastr, 10), collapse=''))
+  r <- RNAStringSet(paste(rep(rnastr, 10), collapse=''))
+  a <- AAStringSet(paste(rep(aastr, 10), collapse=''))
+
+  expect_equal(nchar(showAsCell(d)), 23L)
+  expect_equal(nchar(showAsCell(r)), 23L)
+  expect_equal(nchar(showAsCell(a)), 23L)
+  expect_equal(nchar(showAsCell(b)), 23L)
+})
+
+test_that("comparison operators are correct", {
+  dna <- DNAStringSet(DNA_ALPHABET)
+  rna <- RNAStringSet(RNA_ALPHABET)
+  aaa <- AAStringSet(AA_ALPHABET)
+  bbb <- BStringSet(LETTERS)
+
+  expect_true(!any(is.na(dna)))
+  expect_true(!anyNA(dna))
+  expect_equal(match(dna, dna), seq_along(dna))
+  expect_equal(aaa[seq_len(26)] < bbb, AA_ALPHABET[seq_len(26L)] < LETTERS)
+
+  expect_equal(match(sort(aaa), bbb, nomatch=0), c(rep(0L, 4L), seq_len(26L)))
+  expect_true(all(dna == as.character(dna)))
+
+  expect_error(aaa == dna, "is not supported")
+  expect_error(aaa == rna, "is not supported")
+  expect_true(all(dna == BStringSet(DNA_ALPHABET)))
+  expect_true(all(rna == BStringSet(RNA_ALPHABET)))
+  expect_true(all(aaa == BStringSet(AA_ALPHABET)))
+  expect_equal(dna == NULL, logical(0L))
+})
+
+## Porting RUnit tests
+test_that("short RUnit tests continue to pass", {
+  ## test_width_character
+  x <- safeExplode(rawToChar(as.raw(1:255)))
+  expect_equal(width(x), rep.int(1L, 255))
+
+  ## DNAStringSet internal elements ##
+  dna <- DNAStringSet(DNA_ALPHABET)
+
+  ## DNAStringSet_constructor
+  expect_equal(.poolEltLengths(dna), length(DNA_ALPHABET))
+
+  ## DNAStringSet_width
+  expect_equal(width(dna), width(DNA_ALPHABET))
+
+  ## DNAStringSet_unlist
+  expect_equal(as.character(unlist(dna)), dnastr)
+
+  ## DNAStringSet_showAsCell
+  expect_equal(showAsCell(DNAStringSet()), character(0L))
+  expect_equal(showAsCell(dna), DNA_ALPHABET)
+})
+
+test_that("RUnit test_DNAStringSet_subsetting", {
+  dna <- DNAStringSet(DNA_ALPHABET)
+  elementMetadata(dna) <- DataFrame(C1=dna)
+
+  dna0 <- dna[FALSE]
+  expect_equal(length(dna0), 0L)
+  ## Checking internal representation.
+  expect_equal(.poolEltLengths(dna0), integer(0))
+  expect_true(.haveIdenticalPools(elementMetadata(dna0)$C1, dna0))
+  expect_equal(elementMetadata(dna0)$C1@ranges, dna0@ranges)
+
+  idx <- rep.int((8:6)*2L, 100L)
+  dna300 <- dna[idx]
+  expect_equal(length(dna300), length(idx))
+  ## Checking internal representation.
+  expect_true(.haveIdenticalPools(dna300, dna))
+  expect_true(.haveIdenticalPools(elementMetadata(dna300)$C1, dna300))
+  expect_equal(elementMetadata(dna300)$C1@ranges, dna300@ranges)
+})
+
+test_that("RUnit test_DNAStringSet_combining", {
+  dna <- DNAStringSet(DNA_ALPHABET)
+  elementMetadata(dna) <- DataFrame(C1=dna)
+
+  dna2a <- c(dna, dna)
+  dna2b <- rep(dna, 2L)
+  expect_equal(dna2a, dna2b)
+
+  ## Checking internal representation.
+  expect_true(.haveIdenticalPools(dna2a, dna))
+  expect_true(.haveIdenticalPools(dna2a, dna2b))
+  expect_true(.haveIdenticalPools(elementMetadata(dna2a)$C1, dna2a))
+  expect_equal(elementMetadata(dna2a)$C1@ranges, dna2a@ranges)
+})
+
+test_that("RUnit test_DNAStringSet_compaction", {
+  dna <- DNAStringSet(DNA_ALPHABET)
+  elementMetadata(dna) <- DataFrame(C1=dna)
+
+  idx <- rep.int((8:6)*2L, 100L)
+  dna300 <- dna[idx]
+  compact_dna300 <- compact(dna300)
+  expect_equal(as.character(compact_dna300), as.character(dna300))
+  ## Checking internal representation.
+  expect_equal(.poolEltLengths(compact_dna300), 3L)
+  expect_equal(.poolEltLengths(elementMetadata(compact_dna300)$C1),
+                .poolEltLengths(compact_dna300))
+  expect_equal(elementMetadata(compact_dna300)$C1@ranges,
+                compact_dna300@ranges)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-XStringSet-io.R b/tests/testthat/test-XStringSet-io.R
new file mode 100644
index 00000000..ae4fcf63
--- /dev/null
+++ b/tests/testthat/test-XStringSet-io.R
@@ -0,0 +1,172 @@
+## TODO: Add in I/O tests for FASTQ files
+# (will revisit after doing QualityScaledXStringSet testing)
+
+check_rw_strings_fasta <- function(XStringSetRFUN, XStringSetMAKEFUN, X_ALPH){
+	# setup
+	set.seed(123L)
+	NUM_STRINGS <- 10L
+	seq_lens <- sample(100L, NUM_STRINGS, replace=TRUE)
+	f <- tempfile()
+	if(file.exists(f)) unlink(f)
+
+	# make some test strings
+	strings <- character(NUM_STRINGS)
+	for(i in seq_len(NUM_STRINGS))
+		strings[i] <- paste(sample(X_ALPH, seq_lens[i], replace=TRUE), collapse='')
+
+	# make an XStringSet and name it
+	ss <- XStringSetMAKEFUN(strings)
+	names(ss) <- as.character(seq_len(NUM_STRINGS))
+
+	# write, then ensure it wrote
+	writeXStringSet(ss, f, format="fasta")
+	expect_true(file.exists(f))
+
+	# check that seq lengths in the fasta file are correct
+	ssn <- fasta.seqlengths(f, seqtype=seqtype(ss), use.names=TRUE)
+	expected_lengths <- lengths(ss)
+	names(expected_lengths) <- names(ss)
+	expect_equal(ssn, expected_lengths)
+
+	# check that optional parameters work
+	expect_equal(fasta.seqlengths(f, seqtype=seqtype(ss), nrec=2, use.names=FALSE), seq_lens[seq_len(2)])
+	expect_equal(fasta.seqlengths(f, seqtype=seqtype(ss), skip=2, nrec=3, use.names=FALSE), seq_lens[3:5])
+
+	# check indexing
+	indices <- fasta.index(f, seqtype=seqtype(ss))
+	indices_sub <- fasta.index(f, skip=2L, nrec=3L, seqtype=seqtype(ss))
+
+	expect_s3_class(indices, "data.frame")
+	expect_equal(colnames(indices), c("recno", "fileno", "offset", "desc", "seqlength", "filepath"))
+	expect_true(all(indices$filepath == f))
+	expect_true(all(indices$recno == names(ss)))
+	expect_equal(indices$seqlength, seq_lens)
+	expect_equal(nrow(indices_sub), 3L)
+
+	# rownames will screw this up
+	rownames(indices_sub) <- 3:5
+	expect_equal(indices[3:5,], indices_sub)
+
+	# check that the actual values are correct
+	ss_read <- XStringSetRFUN(f, format="fasta")
+	ss_read_sub <- XStringSetRFUN(f, nrec=3, skip=2)
+
+	# double checking with equals.XStringSet and standard character
+	expect_equal(class(ss_read), class(ss))
+	expect_true(all(ss_read == ss))
+	expect_equal(as.character(ss_read), as.character(ss))
+	expect_true(all(ss_read_sub == ss[3:5]))
+	expect_equal(as.character(ss_read_sub), as.character(ss[3:5]))
+
+	expect_equal(names(ss_read), names(ss))
+	expect_equal(names(ss_read_sub), names(ss)[3:5])
+
+	# check that we can append to file
+	writeXStringSet(ss, f, format="fasta", append=TRUE)
+	ss_read <- XStringSetRFUN(f, format="fasta")
+	expect_true(all(ss_read == c(ss, ss)))
+	expect_equal(as.character(ss_read), as.character(c(ss, ss)))
+
+	unlink(f)
+}
+
+check_rw_strings_serial <- function(XStringSetMAKEFUN, X_ALPH){
+	# setup
+	set.seed(345L)
+	NUM_STRINGS <- 10L
+	seq_lens <- sample(100L, NUM_STRINGS, replace=TRUE)
+	d <- tempdir()
+	f <- "example_xs"
+	outfile <- file.path(d, paste0(f, '.rda'))
+	if(file.exists(outfile)) unlink(outfile)
+
+	# make some test strings
+	strings <- character(NUM_STRINGS)
+	for(i in seq_len(NUM_STRINGS))
+		strings[i] <- paste(sample(X_ALPH, seq_lens[i], replace=TRUE), collapse='')
+
+	ss <- XStringSetMAKEFUN(strings)
+
+	expect_output(saveXStringSet(ss, f, dirpath=d, save.dups=FALSE), "Saving .+/example_xs.rda")
+	expect_true(file.exists(outfile))
+
+	if("example_xs" %in% ls()) rm("example_xs")
+	load(outfile)
+	expect_equal(as.character(example_xs), as.character(ss))
+	expect_true(all(example_xs == ss))
+	unlink(outfile)
+
+	# unsure how this is supposed to work
+	# expect_output(saveXStringSet(XStringSetMAKEFUN(rep(strings, 2)), f, dirpath=d, save.dups=TRUE), "Saving .+/example_xs.rda")
+
+	## testing standard save()
+	old_ss <- ss
+	save(ss, file=outfile)
+	expect_true(file.exists(outfile))
+	rm(ss)
+	load(outfile)
+	expect_equal(as.character(ss), as.character(old_ss))
+	expect_true(all(ss == old_ss))
+	unlink(outfile)
+}
+
+test_that("All XStringSets can be read/written to a FASTA", {
+	B_ALPHABET <- strsplit(rawToChar(as.raw(32:126)), '')[[1]]
+	check_rw_strings_fasta(readDNAStringSet, DNAStringSet, DNA_ALPHABET)
+	check_rw_strings_fasta(readRNAStringSet, RNAStringSet, RNA_ALPHABET)
+	check_rw_strings_fasta(readAAStringSet, AAStringSet, AA_ALPHABET)
+	check_rw_strings_fasta(readBStringSet, BStringSet, B_ALPHABET)
+})
+
+test_that("All XStringSets can be serialized and subseqeuntly read", {
+	B_ALPHABET <- strsplit(rawToChar(as.raw(32:126)), '')[[1]]
+	check_rw_strings_serial(DNAStringSet, DNA_ALPHABET)
+	check_rw_strings_serial(RNAStringSet, RNA_ALPHABET)
+	check_rw_strings_serial(AAStringSet, AA_ALPHABET)
+	check_rw_strings_serial(BStringSet, B_ALPHABET)
+})
+
+## Previous RUnit testing
+test_that("XStringSet read/write is correct", {
+	## setup initial values
+	tmpdir <- tempdir()
+  tmpfilefa <- file.path(tmpdir,"whee.fa")
+  tmpfilefq <- file.path(tmpdir,"whee.fq")
+  if(file.exists(tmpfilefa)) unlink(tmpfilefa)
+  if(file.exists(tmpfilefq)) unlink(tmpfilefq)
+
+
+  x1 <- DNAStringSet(c("TTGA", "CTCN"))
+  q1 <- PhredQuality(c("*+,-", "6789"))
+  qdna1 <- QualityScaledDNAStringSet(x1, q1)
+  names_dna <- c("A", "B")
+  names(qdna1) <- names_dna
+
+  names(x1) <- names_dna
+  writeXStringSet(x1, format="fastq", file=tmpfilefq)
+  expect_warning(qdna2 <- readQualityScaledDNAStringSet(tmpfilefq),
+  	"metadata columns on input DNAStringSet object were dropped")
+
+  expect_true(!all(quality(qdna2) == quality(qdna1)))
+  expect_true(all(all(as(quality(qdna2),"IntegerList") == 26L)))
+
+  writeXStringSet(x1, format="fastq", file=tmpfilefq, qualities = q1)
+  expect_warning(qdna2 <- readQualityScaledDNAStringSet(tmpfilefq),
+  	"metadata columns on input DNAStringSet object were dropped")
+  expect_true(all(quality(qdna2) == quality(qdna1)))
+
+  writeXStringSet(qdna1, format="fastq", file=tmpfilefq)
+  expect_warning(qdna2 <- readQualityScaledDNAStringSet(tmpfilefq),
+  	"metadata columns on input DNAStringSet object were dropped")
+  expect_true(all(names(qdna2) == names_dna))
+  expect_true(all(quality(qdna2) == quality(qdna1)))
+
+  qdna2 <- readDNAStringSet(tmpfilefq, format="fastq")
+  expect_true(is.null(mcols(qdna2)))
+
+  qdna2 <- readDNAStringSet(tmpfilefq, format="fastq", with.qualities = TRUE)
+  expect_true(all(mcols(qdna2)$qualities == quality(qdna1)))
+
+  unlink(tmpfilefa)
+  unlink(tmpfilefq)
+})
diff --git a/tests/testthat/test-XStringSetList-class.R b/tests/testthat/test-XStringSetList-class.R
new file mode 100644
index 00000000..cc9cbdd8
--- /dev/null
+++ b/tests/testthat/test-XStringSetList-class.R
@@ -0,0 +1,198 @@
+## WARNING: Do *NOT* use checkIdentical() on XString, XStringSet, or
+## XStringSetList objects. It is *NOT* reliable. Use checkTrue(all.equal())
+## instead.
+
+## TODO: Maybe "all.equal" should be made an S4 generic with S4/S3 method
+## combos for XVector and XVectorList object?
+
+B_ALPHABET <- strsplit(rawToChar(as.raw(32:126)), '')[[1]]
+
+## Unclass the XStringSet object by setting its "pool" and "ranges" slots
+## to NULL first.
+.unclass_XStringSet <- function(x)
+{
+    slot(x, "pool", check=FALSE) <- NULL
+    slot(x, "ranges", check=FALSE) <- NULL
+    unclass(x)
+}
+
+all.equal.XStringSet <- function(target, current, ...)
+{
+    ## Compare the sequences (and their names if they have).
+    target_seqs <- as.character(target)
+    current_seqs <- as.character(current)
+    ok1 <- all.equal(target_seqs, current_seqs)
+    ## Compare the rest.
+
+    ## this was previously commented out, but works with testthat
+    target <- .unclass_XStringSet(target)
+    current <- .unclass_XStringSet(current)
+    ok2 <- all.equal(target, current)
+    ##
+
+    ok2 <- identical(class(target), class(current))
+    if (!ok2)
+        ok2 <- "class mismatch"
+    ok3 <- all.equal(target@metadata, current@metadata)
+    ok4 <- all.equal(target@elementMetadata, current@elementMetadata)
+    ans <- character(0)
+    if (!isTRUE(ok1))
+        ans <- c(ans, ok1)
+    if (!isTRUE(ok2))
+        ans <- c(ans, ok2)
+    if (!isTRUE(ok3))
+        ans <- c(ans, ok3)
+    if (!isTRUE(ok4))
+        ans <- c(ans, ok4)
+    if (length(ans) == 0L)
+        return(TRUE)
+    ans
+}
+
+all.equal.XStringSetList <- function(target, current, ...)
+{
+    ok1 <- identical(class(target), class(current))
+    if (!ok1)
+        ok1 <- "class mismatch"
+    target2 <- as(target, "CharacterList")
+    current2 <- as(current, "CharacterList")
+    ## Temporary workaround until coercion from XStringSet to CharacterList
+    ## is fixed to propagate metadata and metadata columns.
+    metadata(target2) <- metadata(target)
+    mcols(target2) <- mcols(target)
+    metadata(current2) <- metadata(current)
+    mcols(current2) <- mcols(current)
+    ok2 <- all.equal(target2, current2)
+    ans <- character(0)
+    if (!isTRUE(ok1))
+        ans <- c(ans, ok1)
+    if (!isTRUE(ok2))
+        ans <- c(ans, ok2)
+    if (length(ans) == 0L)
+        return(TRUE)
+    ans
+}
+
+.XStringSetList_constructor <- function(XStringSetFUN, XStringSetListFUN, XS_ALPHABET){
+    xs1 <- XStringSetFUN(XS_ALPHABET[1:8])
+    xs2 <- XStringSetFUN(XS_ALPHABET[9:17])
+
+    lst1 <- XStringSetListFUN(xs1, xs2)
+    lst2 <- XStringSetListFUN(as.character(XS_ALPHABET[1:8]),
+                              as.character(XS_ALPHABET[9:17]))
+
+    expect_true(all.equal.XStringSetList(lst1, lst2))
+    expect_equal(length(XStringSetListFUN()), 0L)
+}
+
+.XStringSetList_unlist <- function(XStringSetFUN, XStringSetListFUN, XS_ALPHABET){
+    lst <- XStringSetListFUN(XS_ALPHABET, XS_ALPHABET)
+    expected <- XStringSetFUN(c(XS_ALPHABET, XS_ALPHABET))
+    expect_true(all.equal.XStringSet(unlist(lst), expected))
+}
+
+.XStringSetList_append <- function(XStringSetFUN, XStringSetListFUN, XS_ALPHABET){
+    xs <- XStringSetFUN(XS_ALPHABET)
+    lst <- XStringSetListFUN(XS_ALPHABET, XS_ALPHABET)
+    elementMetadata(lst) <- DataFrame(C1=c("list1", "list2"))
+
+    xs2a <- c(lst, lst)
+    xs2b <- rep(lst, 2L)
+    xs2c <- append(lst, lst)
+    expect_true(all.equal.XStringSetList(xs2a, xs2b))
+    expect_true(all.equal.XStringSetList(xs2a, xs2c))
+}
+
+.XStringSetList_subset <- function(XStringSetFUN, XStringSetListFUN, XS_ALPHABET){
+    lst1 <- XStringSetListFUN(XS_ALPHABET[-1], XS_ALPHABET[-2], XS_ALPHABET[-3], XS_ALPHABET[-4])
+    lst2 <- XStringSetListFUN(XS_ALPHABET[-2], XS_ALPHABET[-1], XS_ALPHABET[-4])
+
+    expect_true(all.equal.XStringSetList(lst1[1:2], lst2[2:1]))
+    expect_true(all.equal.XStringSetList(lst1[-c(2:3)], lst2[-1]))
+}
+
+.XStringSetList_from_list <- function(XStringSetFUN, XStringSetListFUN, XS_ALPHABET){
+    lst1 <- list(XStringSetFUN(XS_ALPHABET), XStringSetFUN(XS_ALPHABET))
+    lst2 <- XStringSetListFUN(XS_ALPHABET, XS_ALPHABET)
+
+    expect_true(all.equal.XStringSetList(XStringSetListFUN(lst1), lst2))
+}
+
+## DNAStringSet
+test_that("XStringSetList constructor works correctly", {
+    .XStringSetList_constructor(DNAStringSet, DNAStringSetList, DNA_ALPHABET)
+    .XStringSetList_constructor(RNAStringSet, RNAStringSetList, RNA_ALPHABET)
+    .XStringSetList_constructor(AAStringSet, AAStringSetList, AA_ALPHABET)
+    .XStringSetList_constructor(BStringSet, BStringSetList, B_ALPHABET)
+})
+
+test_that("XStringSetList unlist works correctly", {
+    .XStringSetList_unlist(DNAStringSet, DNAStringSetList, DNA_ALPHABET)
+    .XStringSetList_unlist(RNAStringSet, RNAStringSetList, RNA_ALPHABET)
+    .XStringSetList_unlist(AAStringSet, AAStringSetList, AA_ALPHABET)
+    .XStringSetList_unlist(BStringSet, BStringSetList, B_ALPHABET)
+})
+
+test_that("XStringSetList append works correctly", {
+    .XStringSetList_append(DNAStringSet, DNAStringSetList, DNA_ALPHABET)
+    .XStringSetList_append(RNAStringSet, RNAStringSetList, RNA_ALPHABET)
+    .XStringSetList_append(AAStringSet, AAStringSetList, AA_ALPHABET)
+    .XStringSetList_append(BStringSet, BStringSetList, B_ALPHABET)
+})
+
+test_that("XStringSetList subset works correctly", {
+    .XStringSetList_subset(DNAStringSet, DNAStringSetList, DNA_ALPHABET)
+    .XStringSetList_subset(RNAStringSet, RNAStringSetList, RNA_ALPHABET)
+    .XStringSetList_subset(AAStringSet, AAStringSetList, AA_ALPHABET)
+    .XStringSetList_subset(BStringSet, BStringSetList, B_ALPHABET)
+})
+
+test_that("XStringSetList showAsCell works correctly", {
+    expect_equal(showAsCell(DNAStringSetList()), character(0L))
+    expect_equal(showAsCell(DNAStringSetList(DNA_ALPHABET)), "A,C,G,...")
+})
+
+test_that("XStringSetList seqtype is correct", {
+    expect_equal(seqtype(DNAStringSetList()), "DNA")
+    expect_equal(seqtype(RNAStringSetList()), "RNA")
+    expect_equal(seqtype(AAStringSetList()), "AA")
+    expect_equal(seqtype(BStringSetList()), "B")
+
+    # downgrade sequences
+    x <- BStringSetList(DNA_ALPHABET)
+    seqtype(x) <- "DNA"
+    all.equal.XStringSetList(x, DNAStringSetList(DNA_ALPHABET))
+    seqtype(x) <- "RNA"
+    all.equal.XStringSetList(x, RNAStringSetList(RNA_ALPHABET))
+
+    x <- BStringSetList(AA_ALPHABET)
+    seqtype(x) <- "AA"
+    all.equal.XStringSetList(x, AAStringSetList(AA_ALPHABET))
+
+    # invalid conversions
+    x <- DNAStringSetList(DNA_ALPHABET)
+    expect_error({seqtype(x) <- "AA"}, 'incompatible sequence types "DNA" and "AA"')
+    seqtype(x) <- "RNA"
+    expect_error({seqtype(x) <- "AA"}, 'incompatible sequence types "RNA" and "AA"')
+})
+
+test_that("XStringSetLists show correctly", {
+    ## TODO: add some values in case printing only breaks on output
+    expect_output(show(DNAStringSetList(DNA_ALPHABET)), "DNAStringSetList of length 1\\n\\[\\[1\\]\\]")
+    expect_output(show(RNAStringSetList(RNA_ALPHABET)), "RNAStringSetList of length 1\\n\\[\\[1\\]\\]")
+    expect_output(show(AAStringSetList(AA_ALPHABET)), "AAStringSetList of length 1\\n\\[\\[1\\]\\]")
+    expect_output(show(BStringSetList(B_ALPHABET)), "BStringSetList of length 1\\n\\[\\[1\\]\\]")
+})
+
+test_that("XStringSetLists coerce from list correctly", {
+    .XStringSetList_from_list(DNAStringSet, DNAStringSetList, DNA_ALPHABET)
+    .XStringSetList_from_list(RNAStringSet, RNAStringSetList, RNA_ALPHABET)
+    .XStringSetList_from_list(AAStringSet, AAStringSetList, AA_ALPHABET)
+    .XStringSetList_from_list(BStringSet, BStringSetList, B_ALPHABET)
+})
+
+test_that("XStringSetList uses nchar correctly", {
+    dna <- DNAStringSetList(DNA_ALPHABET)
+    expect_s4_class(nchar(dna), "IntegerList")
+    expect_equal(unlist(nchar(dna)), rep(1L, length(DNA_ALPHABET)))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-XStringSetList.R b/tests/testthat/test-XStringSetList.R
new file mode 100644
index 00000000..dd1ae2ac
--- /dev/null
+++ b/tests/testthat/test-XStringSetList.R
@@ -0,0 +1,36 @@
+## XStringSetList-class.R exports the following:
+## - DNAStringSetList
+## - AAStringSetList
+## note that RNAStringSetList and BStringSetList are not supported
+
+
+test_that("XStringSetList constuctors work properly", {
+	seq_lengths <- c(2,3,4)
+	seqs <- list(DNA=paste(DNA_ALPHABET, collapse=''),
+							RNA=paste(RNA_ALPHABET, collapse=''),
+							AA=paste(AA_ALPHABET, collapse=''),
+							B=rawToChar(as.raw(32:126)))
+
+	for(s in c("DNA", "RNA", "AA", "B")){
+		seq <- seqs[[s]]
+		all_seqs <- lapply(seq_along(seq_lengths), \(i){
+			get(paste0(s, "StringSet"))(rep(seq, seq_lengths[i]))
+		})
+		cl_name <- paste0(s, "StringSetList")
+		CONSTRUCTOR <- get(cl_name)
+		expect_s4_class(CONSTRUCTOR(all_seqs), cl_name)
+
+		seqlist <- CONSTRUCTOR(all_seqs)
+		expect_equal(length(seqlist), length(seq_lengths))
+		expect_equal(lengths(seqlist), seq_lengths)
+		expect_equal(as.list(seqlist), all_seqs)
+		expect_equal(seqtype(seqlist), s)
+
+		# empty constructor
+		expect_equal(length(CONSTRUCTOR()), 0L)
+
+		names(all_seqs) <- LETTERS[seq_along(seq_lengths)]
+		expect_equal(names(CONSTRUCTOR(all_seqs)), names(all_seqs))
+		expect_null(names(CONSTRUCTOR(all_seqs, use.names=FALSE)))
+	}
+})
\ No newline at end of file
diff --git a/tests/testthat/test-XStringViews.R b/tests/testthat/test-XStringViews.R
new file mode 100644
index 00000000..a0970d7d
--- /dev/null
+++ b/tests/testthat/test-XStringViews.R
@@ -0,0 +1,49 @@
+## XStringViews-class.R exports the following:
+## - `==` for XStringViews against XStringViews, XString, character
+## - `!=` for the same as above
+## - as.character
+## - as.matrix
+## - toString
+
+
+slen <- 25L
+dnas <- sample(DNA_BASES, slen, replace=TRUE)
+d <- DNAString(paste(dnas, collapse=''))
+
+test_that('XStringViews equality works correctly', {
+	n_ranges <- 5L
+	starts <- sample(slen-5L, n_ranges)
+	ends <- starts + sample(5L, n_ranges)
+	strs <- vapply(seq_len(n_ranges), \(i) paste(dnas[seq(starts[i], ends[i])], collapse=''), character(1L))
+	v <- Views(d, start=starts, end=ends)
+	expect_equal(seqtype(v), seqtype(d))
+
+	## only the diagonal should be true
+	exp_opt <- matrix(FALSE, nrow=n_ranges, ncol=n_ranges)
+	diag(exp_opt) <- TRUE
+	test1 <- matrix(unlist(v == DNAStringSet(strs)), nrow=n_ranges, byrow=TRUE)
+	expect_identical(test1, exp_opt)
+
+	## only the diagonal should be false
+	exp_opt <- !exp_opt
+	test2 <- matrix(unlist(v != DNAStringSet(strs)), nrow=n_ranges, byrow=TRUE)
+	expect_identical(test2, exp_opt)
+
+	## comparison against Views objects should be supported
+	expect_identical(v==v, rep(TRUE, n_ranges))
+
+	## comparison with non-BString is not supported
+	expect_error(v == strs, "is not supported")
+	expect_error(strs == v, "is not supported")
+
+	## comparing BString against character
+	v2 <- v
+	expect_identical((seqtype(v2) <- "B"), "B")
+	expect_identical(v2 == strs, rep(TRUE, n_ranges))
+	expect_identical(strs == v2, rep(TRUE, n_ranges))
+	expect_identical(v2 != strs, rep(FALSE, n_ranges))
+	expect_identical(strs != v2, rep(FALSE, n_ranges))
+
+	expect_identical(toString(v2), paste(strs, collapse=', '))
+	expect_identical(nchar(v2), nchar(strs))
+})
diff --git a/tests/testthat/test-chartr.R b/tests/testthat/test-chartr.R
new file mode 100644
index 00000000..e665cceb
--- /dev/null
+++ b/tests/testthat/test-chartr.R
@@ -0,0 +1,42 @@
+## chartr.R exports the following:
+## - chartr
+## - replaceAmbiguities
+##
+## chartr() is defined for XString, XStringSet,
+## XStringViews, MaskedXString
+
+
+test_that("chartr has correct behavior on Biostrings objects", {
+	s1 <- BString("apple AppLE potato")
+	s2 <- BString("peels AeeLE eotpto")
+	expect_true(chartr("apple", "peels", s1) == s2)
+	expect_true(all(chartr("apple", "peels", BStringSet(list(s1,s1))) == BStringSet(list(s2,s2))))
+
+	v1 <- Views(s1, start=c(1,7,13), width=c(5,5,6))
+	v2 <- Views(s2, start=c(1,7,13), width=c(5,5,6))
+	expect_equal(as.character(chartr("apple", "peels", v1)), as.character(v2))
+
+	## will break when we implement this to remind me to write tests
+	ms1 <- s1
+	ms2 <- s2
+	m1 <- Mask(length(s1), start=7, width=5)
+	masks(ms1) <- m1
+	expect_error(chartr("apple", "peels", ms1), "Please complain!")
+})
+
+test_that('replaceAmbiguities works as expected', {
+	dna1 <- DNAString(paste(DNA_ALPHABET, collapse=''))
+	dna2 <- DNAString(paste(c(DNA_BASES, rep("N", length(dna1)-7L), "-+."), collapse=''))
+
+	expect_equal(replaceAmbiguities(dna1), dna2)
+	expect_equal(replaceAmbiguities(as(dna1, "RNAString")), as(dna2, "RNAString"))
+
+	aa <- AAString(paste(AA_ALPHABET, collapse=''))
+	bb <- BString(paste(LETTERS, collapse=''))
+	expect_error(replaceAmbiguities(aa), "only supported for DNA and RNA")
+	expect_error(replaceAmbiguities(bb), "only supported for DNA and RNA")
+	expect_error(replaceAmbiguities("test"), "only supported for DNA and RNA")
+
+	expect_true(all(replaceAmbiguities(DNAStringSet(list(dna1, dna1))) == DNAStringSet(list(dna2, dna2))))
+	expect_true(all(replaceAmbiguities(Views(dna1, start=c(1,4,7), width=3)) == Views(dna2, start=c(1,4,7), width=3)))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-dinucleotideFrequencyTest.R b/tests/testthat/test-dinucleotideFrequencyTest.R
new file mode 100644
index 00000000..9f6724d0
--- /dev/null
+++ b/tests/testthat/test-dinucleotideFrequencyTest.R
@@ -0,0 +1,32 @@
+## dinucleotideFrequencyTest exports dinucleotideFrequencyTest()
+## for the following classes:
+##
+## - DNAStringSet
+## - RNAStringSet
+
+test_that("examples in dinucleotideFrequencyTest.Rd maintain functionality", {
+	## TODO: better tests.
+	## This is adapted from current functionality, do we know that's correct?
+	data(HNF4alpha)
+	expect_s3_class(dinucleotideFrequencyTest(HNF4alpha, 1, 2), "htest")
+
+	## why doesn't p.value have a name?
+	test1 <- dinucleotideFrequencyTest(HNF4alpha, 1, 2)
+	expect_equal(c(round(test1$statistic,4L), test1$parameter, round(test1$p.value, 4L)),
+							 c("X-squared"=19.0727, "df"=9, 0.0246))
+
+  test2 <- dinucleotideFrequencyTest(HNF4alpha, 1, 2, test = "G")
+  expect_equal(c(round(test2$statistic,4L), test2$parameter, round(test2$p.value,4L)),
+  						c("Log likelihood ratio statistic (G)"=17.2609,
+  							"X-squared df"=9,
+  							"p.value"=0.0448))
+
+  expect_false(grepl("Williams' correction", test2$method))
+
+  test3 <- dinucleotideFrequencyTest(HNF4alpha, 1, 2, test = "adjG")
+  expect_equal(c(round(test3$statistic,4L), test3$parameter, round(test3$p.value,4L)),
+  						c("Log likelihood ratio statistic (G)"=10.8064,
+  							"X-squared df"=9,
+  							"p.value"=0.2892))
+  expect_true(grepl("Williams' correction", test3$method))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-findPalindromes.R b/tests/testthat/test-findPalindromes.R
new file mode 100644
index 00000000..a6a5e601
--- /dev/null
+++ b/tests/testthat/test-findPalindromes.R
@@ -0,0 +1,175 @@
+## findPalindromes.R exports the following:
+## - findPalindromes()
+## - palindromeArmLength()
+## - palindromeLeftArm()
+## - palindromeRightArm()
+##
+## TODOs: missing MaskedXString, XStringViews tests
+
+
+## new tests
+test_that("palindromeLeftArm and *RightArm identify the right arms", {
+  text4 <- BString("i45hgfe7d321c3b4a56789uvwWVU98765A4B3C123D7EFGH54I")
+  current <- findPalindromes(text4, min.armlength = 5, max.looplength = length(text4), max.mismatch = 3)
+  leftarm <- palindromeLeftArm(current)
+  expect_equal(start(leftarm), start(current))
+  expect_equal(width(leftarm), c(5,1,1,3,1,0))
+
+  rightarm <- palindromeRightArm(current)
+  expect_equal(end(rightarm), end(current))
+  expect_equal(width(rightarm), width(leftarm))
+})
+
+## For now I'm just going to port the old tests
+test_that("findPalindromes works on BString objects", {
+    text1 <- BString("ABCDCBA")
+    current <- findPalindromes(text1, min.armlength=2)
+    expect_identical(IRanges(1, 7), ranges(current))
+    current <- findPalindromes(text1, min.armlength=2, max.looplength=0)
+    expect_identical(IRanges(), ranges(current))
+
+    text2 <- BString("xesopeReste etseReposeygohangasalamiImalasagnahogz")
+    current <- findPalindromes(text2, max.looplength=2)
+    expect_identical(IRanges(c(2, 24), c(22, 49)), ranges(current))
+    expect_identical(c(10L, 12L), palindromeArmLength(current))
+
+    text3 <- BString("AATAAACTNTCAAATYCCY")
+    current <- findPalindromes(text3, min.armlength=2)
+    expect_identical(IRanges(c(1, 3, 16), c(5, 15, 19)), ranges(current))
+
+    text4 <- BString("i45hgfe7d321c3b4a56789uvwWVU98765A4B3C123D7EFGH54I")
+    current <- findPalindromes(text4, min.armlength = 5, max.looplength = length(text4), max.mismatch = 3)
+    expect_identical(IRanges(c(18, 16, 14, 10, 8, 7), c(33, 35, 37, 41, 43, 44)), ranges(current))
+})
+
+test_that("findPalindromes works on nucleotide sequences", {
+  x1 <- DNAString("AATAAACTNTCAAATYCCY")  # same sequence as 'text3'
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x1, class)
+      current <- findPalindromes(x, min.armlength=2)
+      expect_identical(IRanges(), ranges(current))
+      current <- findPalindromes(x, min.armlength=2, max.looplength=10)
+      expect_identical(IRanges(2, 15), ranges(current))
+  }
+
+  x2 <- DNAString("AATTT")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x2, class)
+      current <- findPalindromes(x, min.armlength=2)
+      target <- IRanges(1, 4:5)
+      expect_identical(target, ranges(current))
+      current <- findPalindromes(x, min.armlength=2, max.looplength=0)
+      expect_identical(target[1], ranges(current))
+  }
+
+  x3 <- DNAString("TTTAA")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x3, class)
+      current <- findPalindromes(x, min.armlength=2)
+      target <- IRanges(1:2, 5)
+      expect_identical(target, ranges(current))
+      current <- findPalindromes(x, min.armlength=2, max.looplength=0)
+      expect_identical(target[2], ranges(current))
+  }
+
+  x4 <- DNAString("AAATTT")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x4, class)
+      current <- findPalindromes(x, min.armlength=2)
+      target <- IRanges(c(1, 1, 2), c(5, 6, 6))
+      expect_identical(target, ranges(current))
+      current <- findPalindromes(x, min.armlength=3)
+      expect_identical(target[2], ranges(current))
+      current <- findPalindromes(x, min.armlength=4)
+      expect_identical(target[0], ranges(current))
+  }
+
+  ## With nested palindromes.
+  x5 <- DNAString("CTTGAAATTTGAAG")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x5, class)
+      target0 <- IRanges(c(2, 2, 5,  5,  1,  6,  8,  9),
+                         c(6, 7, 9, 10, 14, 10, 13, 13))
+      current <- findPalindromes(x, min.armlength=2, max.looplength=0)
+      expect_identical(target0[4], ranges(current))
+      current <- findPalindromes(x, min.armlength=2, max.looplength=1)
+      expect_identical(target0[-c(2, 5, 7)], ranges(current))
+      for (i in 2:7) {
+          current <- findPalindromes(x, min.armlength=2, max.looplength=i)
+          expect_identical(target0[-5], ranges(current))
+      }
+      current <- findPalindromes(x, min.armlength=2, max.looplength=8)
+      expect_identical(target0, ranges(current))
+  }
+
+  x6 <- DNAString("AAGAATTGTT")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x6, class)
+      target0 <- IRanges(c(1, 4, 1, 4), c(7, 7, 10, 10))
+      for (i in 0:2) {
+          current <- findPalindromes(x, min.armlength=2, max.looplength=i)
+          expect_identical(target0[2], ranges(current))
+      }
+      for (i in 3:5) {
+          current <- findPalindromes(x, min.armlength=2, max.looplength=i)
+          expect_identical(target0[-3], ranges(current))
+      }
+      current <- findPalindromes(x, min.armlength=2, max.looplength=6)
+      expect_identical(target0, ranges(current))
+  }
+
+  # With IUPAC ambiguity codes. Ambiguity codes are not allowed in the arms
+  # of a palindrome!
+  x7 <- DNAString("NNNNNN")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x7, class)
+      current <- findPalindromes(x, min.armlength=2)
+      expect_identical(IRanges(), ranges(current))
+  }
+
+  x8 <- DNAString("ACCGNAAATTTNCGGT")
+  for (class in c("DNAString", "RNAString")) {
+      x <- as(x8, class)
+      current <- findPalindromes(x, min.armlength=2)
+      expect_identical(IRanges(c(6, 6, 7), c(10, 11, 11)), ranges(current))
+      current <- findPalindromes(x, min.armlength=4)
+      expect_identical(IRanges(), ranges(current))
+      current <- findPalindromes(x, min.armlength=3, max.looplength=8)
+      expect_identical(IRanges(c(6, 1), c(11, 16)), ranges(current))
+  }
+})
+
+test_that("palindromeArmLength functions correctly for nucleotide sequences", {
+    x2 <- DNAString("AATTT")
+    for (class in c("DNAString", "RNAString")) {
+        x <- as(x2, class)
+        expect_identical(2L, palindromeArmLength(x))
+        pals <- findPalindromes(x, min.armlength=2)
+        expect_identical(c(2L, 2L), palindromeArmLength(pals))
+    }
+
+    x3 <- DNAString("TTTAA")
+    for (class in c("DNAString", "RNAString")) {
+        x <- as(x3, class)
+        expect_identical(2L, palindromeArmLength(x))
+        pals <- findPalindromes(x, min.armlength=2)
+        expect_identical(c(2L, 2L), palindromeArmLength(pals))
+    }
+
+    x4 <- DNAString("AAATTT")
+    for (class in c("DNAString", "RNAString")) {
+        x <- as(x4, class)
+        expect_identical(3L, palindromeArmLength(x))
+        pals <- findPalindromes(x, min.armlength=2)
+        expect_identical(c(2L, 3L, 2L), palindromeArmLength(pals))
+    }
+
+    x5 <- DNAString("CTTGAAATTTGAAG")
+    for (class in c("DNAString", "RNAString")) {
+        x <- as(x5, class)
+        expect_identical(3L, palindromeArmLength(x))
+        pals <- findPalindromes(x, min.armlength=2, max.looplength=8)
+        expect_identical(c(2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L),
+                       palindromeArmLength(pals))
+    }
+})
diff --git a/tests/testthat/test-letter.R b/tests/testthat/test-letter.R
new file mode 100644
index 00000000..bca65936
--- /dev/null
+++ b/tests/testthat/test-letter.R
@@ -0,0 +1,46 @@
+## letter.R exports `letter` for the following classes:
+## - character
+## - XString
+## - XStringViews
+## - MaskedXString
+
+
+test_that("letter generic works properly for all classes", {
+	s <- paste(letters, collapse='')
+	sdna <- sample(DNA_BASES, 20L, replace=TRUE)
+
+	dna <- DNAString(paste(sdna, collapse=''))
+	viewdna <- Views(dna, start=rep(1L,3L), width=10)
+
+	m <- Mask(mask.width=20L, start=1, end=5L)
+	maskdna <- dna
+	masks(maskdna) <- m
+
+	## happy path testing
+	test_cases <- list(1.0, 1:3, 1:3*2L, integer(0L))
+	for(i in seq_along(test_cases)){
+		## character
+		expect_equal(letter(s, test_cases[[i]]), paste(letters[test_cases[[i]]], collapse=''))
+
+		## XString
+		expect_equal(letter(dna, test_cases[[i]]), paste(sdna[test_cases[[i]]], collapse=''))
+
+		## XStringViews
+		expect_equal(letter(viewdna, test_cases[[i]]), rep(paste(sdna[test_cases[[i]]], collapse=''), 3L))
+
+		## MaskedXString -- note that this ignores the mask
+		expect_equal(letter(maskdna, test_cases[[i]]), paste(sdna[test_cases[[i]]], collapse=''))
+	}
+
+	## sad path testing
+	sad_list <- list('a', NA_real_, 10000)
+	error_msgs <- c("must be an NA-free numeric vector",
+									"must be an NA-free numeric vector",
+									"out of bounds")
+	for(i in seq_along(sad_list)){
+		expect_error(letter(s, sad_list[[i]]), error_msgs[i])
+		expect_error(letter(dna, sad_list[[i]]), error_msgs[i])
+		expect_error(letter(viewdna, sad_list[[i]]), error_msgs[i])
+		expect_error(letter(maskdna, sad_list[[i]]), error_msgs[i])
+	}
+})
\ No newline at end of file
diff --git a/tests/testthat/test-letterFrequency.R b/tests/testthat/test-letterFrequency.R
new file mode 100644
index 00000000..2b8dedd5
--- /dev/null
+++ b/tests/testthat/test-letterFrequency.R
@@ -0,0 +1,265 @@
+## letterFrequency.R exports the following:
+##	- alphabetFrequency
+##	- hasOnlyBaseLetters
+##	- uniqueLetters
+##	- letterFrequency
+##	- *letterFrequencyInSlidingView
+##	- consensusMatrix
+##	- consensusString (matrix, DNAStringSet, RNAStringSet)
+##  - mkAllStrings
+##  - oligonucleotideFrequency
+##	- dinucleotideFrequency
+##	- trinucleotideFrequency
+## 	- *oligonucleotideTransitions
+##  - *twoWayAlphabetFrequency
+##
+## starred functions are still lacking tests
+
+B_ALPHABET <- strsplit(rawToChar(as.raw(32:126)), '')[[1]]
+
+check_uniqueLetters <- function(XStringFUN, X_ALPH){
+	set.seed(123L)
+	s <- sample(X_ALPH, 100L, replace=TRUE)
+	u_v <- unique(s)
+
+	s <- paste(s, collapse='')
+	expect_equal(sort(uniqueLetters(XStringFUN(s))), sort(u_v))
+}
+
+check_alphfreq <- function(XStringFUN, X_ALPH, isMatrix=FALSE, checkBaseOnly=NULL){
+	set.seed(456L)
+	tab <- integer(length(X_ALPH))
+	names(tab) <- X_ALPH
+
+	if(isMatrix){
+		exp_res <- matrix(0L, nrow=5, ncol=length(tab))
+		colnames(exp_res) <- names(tab)
+		strs <- character(5L)
+		for(i in seq_len(5)){
+			tab[] <- 0L
+			s <- sample(X_ALPH, 100L, replace=TRUE)
+			u_v <- table(s)
+			tab[names(u_v)] <- u_v
+			exp_res[i,] <- tab
+			strs[i] <- paste(s, collapse='')
+		}
+		norm_res <- exp_res / rowSums(exp_res)
+		ss <- XStringFUN(strs)
+	} else {
+		strs <- sample(X_ALPH, 100L, replace=TRUE)
+		u_v <- table(strs)
+		tab[names(u_v)] <- u_v
+		exp_res <- tab
+		ss <- XStringFUN(paste(strs, collapse=''))
+		norm_res <- exp_res / sum(exp_res)
+	}
+
+	# Check alphabetFrequency
+	v <- alphabetFrequency(ss)
+	expect_equal(v, exp_res)
+	expect_equal(sum(v), sum(nchar(strs)))
+	expect_equal(alphabetFrequency(ss, as.prob=TRUE), norm_res)
+
+	# Check letterFrequency
+	expect_equal(letterFrequency(ss, X_ALPH), exp_res)
+	expect_equal(letterFrequency(ss, X_ALPH, as.prob=TRUE), norm_res)
+
+	if(!is.null(checkBaseOnly)){
+		# provide the alphabet
+		b <- checkBaseOnly
+		if(isMatrix){
+			p_in <- match(colnames(exp_res), b, nomatch=0)
+			exp_res <- cbind(exp_res[,p_in!=0], other=rowSums(exp_res[,p_in==0]))
+			norm_res <- exp_res / rowSums(exp_res)
+		} else {
+			p_in <- match(names(exp_res), b, nomatch=0)
+			exp_res <- c(exp_res[p_in!=0], sum(exp_res[p_in==0]))
+			names(exp_res) <- c(b, "other")
+			norm_res <- exp_res / sum(exp_res)
+		}
+
+		v <- alphabetFrequency(ss, baseOnly=TRUE)
+		expect_equal(v, exp_res)
+		expect_equal(sum(v), sum(nchar(strs)))
+		expect_error(alphabetFrequency(ss, baseOnly=NA), "'baseOnly' must be TRUE or FALSE")
+		expect_error(alphabetFrequency(ss, baseOnly=1), "'baseOnly' must be TRUE or FALSE")
+		expect_equal(alphabetFrequency(ss, baseOnly=TRUE, as.prob=TRUE), norm_res)
+		if(isMatrix){
+			expect_equal(letterFrequency(ss, b), exp_res[,seq_along(b)])
+			expect_equal(letterFrequency(ss, b, as.prob=TRUE), norm_res[,seq_along(b)])
+		} else {
+			expect_equal(letterFrequency(ss, b), exp_res[seq_along(b)])
+			expect_equal(letterFrequency(ss, b, as.prob=TRUE), norm_res[seq_along(b)])
+		}
+	}
+}
+
+test_that("uniqueLetters works properly", {
+	check_uniqueLetters(DNAString, DNA_ALPHABET)
+	check_uniqueLetters(RNAString, RNA_ALPHABET)
+	check_uniqueLetters(AAString, AA_ALPHABET)
+	check_uniqueLetters(BString, B_ALPHABET)
+
+	check_uniqueLetters(DNAStringSet, DNA_ALPHABET)
+	check_uniqueLetters(RNAStringSet, RNA_ALPHABET)
+	check_uniqueLetters(AAStringSet, AA_ALPHABET)
+	check_uniqueLetters(BStringSet, B_ALPHABET)
+})
+
+test_that("letterFrequency, alphabetFrequency functions for all input types", {
+	check_alphfreq(DNAString, DNA_ALPHABET, checkBaseOnly=DNA_BASES)
+	check_alphfreq(RNAString, RNA_ALPHABET, checkBaseOnly=RNA_BASES)
+	check_alphfreq(AAString, AA_ALPHABET, checkBaseOnly=AA_STANDARD)
+
+	check_alphfreq(DNAStringSet, DNA_ALPHABET, isMatrix=TRUE, checkBaseOnly=DNA_BASES)
+	check_alphfreq(RNAStringSet, RNA_ALPHABET, isMatrix=TRUE, checkBaseOnly=RNA_BASES)
+	check_alphfreq(AAStringSet, AA_ALPHABET, isMatrix=TRUE, checkBaseOnly=AA_STANDARD)
+
+	## checking for ambiguity tabulation
+	d <- DNAString("ATGCATGC.+Y")
+	exp_output <- c(2L, 4L, 2L)
+	names(exp_output) <- c("A", "G|C", "T")
+	expect_equal(letterFrequency(d, letters=c("A", "GC", "T")), exp_output)
+	names(exp_output)[2] <- "G*C"
+	expect_equal(letterFrequency(d, letters=c("A", "GC", "T"), OR="*"), exp_output)
+
+	r <- RNAString("AUGCAUGC.+Y")
+	exp_output <- 8L
+	names(exp_output) <- c("A|U|G|C")
+	expect_equal(letterFrequency(r, letters="AUGC"), exp_output)
+	exp_output <- rep(2L, 4)
+	names(exp_output) <- c("A","U","G","C") # intentionally different from RNA_BASES
+	expect_equal(letterFrequency(r, letters="AUGC", OR=0), exp_output)
+
+	## BStrings have their own check
+	set.seed(999L)
+	bstr <- sample(B_ALPHABET, 100, replace=TRUE)
+	bstr_counts <- table(match(bstr, B_ALPHABET))
+	r <- integer(256L)
+	r[as.integer(names(bstr_counts)) + 32L] <- bstr_counts
+	bstr <- paste(bstr, collapse='')
+	expect_equal(alphabetFrequency(BString(bstr)), r)
+	expect_equal(alphabetFrequency(BStringSet(bstr)), matrix(r, nrow=1L, ncol=256L))
+
+	expect_error(alphabetFrequency(BString(), baseOnly=TRUE), "unused argument (baseOnly = TRUE)", fixed=TRUE)
+
+	## checking for Views objects and maskedString objects
+	ds <- sample(DNA_ALPHABET, 100L, replace=TRUE)
+	exp_out <- matrix(0L, nrow=2, ncol=length(DNA_ALPHABET))
+	colnames(exp_out) <- DNA_ALPHABET
+	exp_out[1,names(table(ds[seq(1,50)]))] <- table(ds[seq(1,50)])
+	exp_out[2,names(table(ds[seq(51,100)]))] <- table(ds[seq(51,100)])
+	v <- Views(DNAString(paste(ds, collapse='')), start=c(1,51), end=c(50,100))
+	expect_equal(alphabetFrequency(v), exp_out)
+	expect_equal(letterFrequency(v, "A"), exp_out[,"A",drop=FALSE])
+
+	mask_d <- DNAString(paste(ds, collapse=''))
+	masks(mask_d) <- Mask(length(ds), start=c(5,50), end=c(10,60))
+	exp_out <- rep(0L, length(DNA_ALPHABET))
+	names(exp_out) <- DNA_ALPHABET
+	subdstr <- ds[-c(5:10,50:60)]
+	exp_out[names(table(subdstr))] <- table(subdstr)
+	expect_equal(alphabetFrequency(mask_d), exp_out)
+	expect_equal(letterFrequency(mask_d, "A"), exp_out["A"])
+})
+
+test_that("hasOnlyBaseLetters works as expected", {
+	## XStringSet
+	expect_true(hasOnlyBaseLetters(DNAStringSet(DNA_BASES)))
+	expect_true(hasOnlyBaseLetters(RNAStringSet(RNA_BASES)))
+	expect_true(hasOnlyBaseLetters(AAStringSet(AA_STANDARD)))
+
+	expect_false(hasOnlyBaseLetters(DNAStringSet(DNA_ALPHABET)))
+	expect_false(hasOnlyBaseLetters(RNAStringSet(RNA_ALPHABET)))
+	expect_false(hasOnlyBaseLetters(AAStringSet(AA_ALPHABET)))
+	expect_error(hasOnlyBaseLetters(BStringSet()), 'unable to find an inherited method')
+
+	## Views
+	d1 <- DNAString(paste(DNA_BASES, collapse=''))
+	d2 <- DNAString(paste(DNA_ALPHABET, collapse=''))
+	expect_true(hasOnlyBaseLetters(Views(d1, start=1, end=length(DNA_BASES))))
+	expect_false(hasOnlyBaseLetters(Views(d2, start=1, end=length(DNA_ALPHABET))))
+
+	## MaskedXString
+	md1 <- d1
+	md2 <- d2
+	masks(md1) <- Mask(length(d1), start=2, end=3)
+	masks(md2) <- Mask(length(d2), start=2, end=5)
+	expect_true(hasOnlyBaseLetters(md1))
+	expect_false(hasOnlyBaseLetters(md2))
+
+})
+
+test_that("miscellaneous letterFrequency methods work", {
+	## mkAllStrings
+	alph <- c("A", "B", "C")
+	exp_output <- expand.grid(alph, alph, alph)
+	exp_output <- apply(exp_output, 1L, paste, collapse='')
+	expect_equal(sort(mkAllStrings(alph, 3L)), sort(exp_output))
+
+	## uniqueLetters
+	d <- paste(rep(DNA_ALPHABET, each=3), collapse='')
+	expect_equal(uniqueLetters(DNAString(d)), DNA_ALPHABET)
+	expect_equal(uniqueLetters(DNAStringSet(c(d,d))), DNA_ALPHABET)
+	expect_equal(uniqueLetters(Views(DNAString(d), start=c(1,6), end=c(5,nchar(d)))), DNA_ALPHABET)
+	md <- DNAString(d)
+	masks(md) <- Mask(nchar(d), start=seq(1,nchar(d),3), end=seq(2,nchar(d),3))
+	expect_equal(uniqueLetters(md), DNA_ALPHABET)
+})
+
+test_that("nucleotideFrequency methods work properly", {
+	set.seed(999L)
+	l <- 102L
+	ds <- sample(DNA_BASES, l, replace=TRUE)
+	d <- DNAString(paste(ds, collapse=''))
+	dss <- DNAStringSet(c(ds,ds))
+
+	## oligonucleotideFrequency(x, 1L) should be the same as alphabetFrequency(...,baseOnly=TRUE)
+	## (ignoring the 'other' arg)
+	expect_equal(oligonucleotideFrequency(d, 1L), alphabetFrequency(d, baseOnly=TRUE)[seq_len(4L)])
+	expect_equal(oligonucleotideFrequency(dss, 1L), alphabetFrequency(dss, baseOnly=TRUE)[,seq_len(4L)])
+
+	## dinucleotide correctness
+	all_doubles <- table(paste0(ds[seq_len(l-1L)], ds[seq_len(l-1L)+1L]))
+	exp_res <- rep(0L, length(DNA_BASES)**2)
+	names(exp_res) <- mkAllStrings(DNA_BASES, 2L)
+	exp_res[names(all_doubles)] <- all_doubles
+	expect_equal(oligonucleotideFrequency(d, 2L), exp_res)
+	expect_equal(dinucleotideFrequency(d), exp_res)
+
+	## trinucleotide correctness
+	all_triples <- table(paste0(ds[seq_len(l-2L)], ds[seq_len(l-2L)+1L], ds[seq_len(l-2L)+2L]))
+	exp_res <- rep(0L, length(DNA_BASES)**3)
+	names(exp_res) <- mkAllStrings(DNA_BASES, 3L)
+	exp_res[names(all_triples)] <- all_triples
+	expect_equal(oligonucleotideFrequency(d, 3L), exp_res)
+	expect_equal(trinucleotideFrequency(d), exp_res)
+
+	## using step
+	all_codons <- codons(d)
+	exp_res[] <- 0L
+	codon_table <- table(as.character(all_codons))
+	exp_res[names(codon_table)] <- codon_table
+	expect_equal(oligonucleotideFrequency(d, 3L, step=3L), exp_res)
+
+	## frequency on a Views object
+	exp_res <- matrix(0L, nrow=l/3, ncol=length(DNA_BASES)**3)
+	colnames(exp_res) <- mkAllStrings(DNA_BASES, 3L)
+	exp_res[cbind(seq_len(nrow(exp_res)), match(as.character(all_codons), colnames(exp_res)))] <- 1L
+	expect_equal(oligonucleotideFrequency(codons(d), 3L), exp_res)
+
+	## frequency on a MaskedXString object
+	mask_d <- d
+	masks(mask_d) <- Mask(l, start=c(10,50), end=c(19,57))
+	mask_str <- strsplit(as.character(mask_d), '')[[1]]
+	all_doubles <- table(paste0(mask_str[seq_len(l-1L)], mask_str[seq_len(l-1L)+1L]))
+	all_doubles <- all_doubles[!grepl("#", names(all_doubles))]
+	exp_res <- rep(0L, length(DNA_BASES)**2)
+	names(exp_res) <- mkAllStrings(DNA_BASES, 2L)
+	exp_res[names(all_doubles)] <- all_doubles
+	expect_equal(oligonucleotideFrequency(mask_d, 2L), exp_res)
+
+	## disallowed types
+	expect_error(oligonucleotideFrequency(AAString(), "must contain sequences of type DNA or RNA"))
+	expect_error(oligonucleotideFrequency(BStringSet(), "must contain sequences of type DNA or RNA"))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-matchLRPatterns.R b/tests/testthat/test-matchLRPatterns.R
new file mode 100644
index 00000000..a146d276
--- /dev/null
+++ b/tests/testthat/test-matchLRPatterns.R
@@ -0,0 +1,47 @@
+## matchLRPatterns.R exports matchLRPatterns for the following:
+## - XString
+## - XStringViews
+## - MaskedXString
+##
+## note that Lfixed, Rfixed are only defined for DNAString, RNAString
+
+test_that("matchLRPatterns works for all inputs", {
+	d <- DNAString("AAATTAACCCTT")
+
+	## different values of max.mismatch
+	expect_equal(matchLRPatterns("AA", "TT", 0, d), Views(d, start=2, end=5))
+	expect_equal(matchLRPatterns("AA", "TT", 1, d), Views(d, start=c(1,2), end=c(5,5)))
+	expect_equal(matchLRPatterns("AA", "TT", 3, d), Views(d, start=c(1,2,6), end=c(5,5,12)))
+	expect_equal(matchLRPatterns("AA", "TT", 7, d), Views(d, start=c(1,2,2,6), end=c(5,5,12,12)))
+
+	## L,Rmismatch
+	expect_equal(matchLRPatterns("AA", "TT", 0, subject=d, max.Lmismatch=2), Views(d, start=c(2,9), end=c(5,12)))
+	expect_equal(matchLRPatterns("AA", "TT", 0, subject=d, max.Rmismatch=2), Views(d, start=c(1,2,6), end=c(4,5,9)))
+
+	## L,Rindels
+	d2 <- DNAString("ATATTA")
+	expect_equal(matchLRPatterns("AA", "TT", 0, d2, max.Lmismatch=1, with.Lindels=FALSE), Views(d2, start=2, end=5))
+	expect_equal(matchLRPatterns("AA", "TT", 0, d2, max.Lmismatch=1, with.Lindels=TRUE), Views(d2, start=3, end=5))
+	d3 <- DNAString("AATAT")
+	expect_equal(matchLRPatterns("AA", "TT", 0, subject=d3, max.Rmismatch=1, with.Rindels=FALSE), Views(d3,start=1,end=4))
+	expect_equal(matchLRPatterns("AA", "TT", 0, subject=d3, max.Rmismatch=1, with.Rindels=TRUE), Views(d3,start=1,end=3))
+
+	## Lfixed, Rfixed
+	expect_equal(matchLRPatterns("NN", "TT", 0, subject=d), Views(d))
+	expect_equal(matchLRPatterns("MM", "TT", 0, subject=d, Lfixed="subject"), Views(d, start=c(2,9), end=c(5,12)))
+	expect_equal(matchLRPatterns("AA", "WY", 0, subject=d, Rfixed="subject"), Views(d, start=c(1,2), end=c(4,5)))
+
+	## with a mask
+	dm <- d
+	masks(dm) <- Mask(length(dm), start=8, end=10)
+	expect_equal(matchLRPatterns("AA", "TT", 0, dm), Views(unmasked(dm), start=2, end=5))
+	expect_equal(matchLRPatterns("AA", "TT", 1, dm), Views(unmasked(dm), start=c(1,2), end=c(5,5)))
+	expect_equal(matchLRPatterns("AA", "TT", 3, dm), Views(unmasked(dm), start=c(1,2), end=c(5,5)))
+
+	## on a Views object
+	v <- Views(d, start=c(1,6), end=c(5,12))
+	expect_equal(matchLRPatterns("AA", "TT", 0, v), Views(d, start=2, end=5))
+	expect_equal(matchLRPatterns("AA", "TT", 1, v), Views(d, start=c(1,2), end=c(5,5)))
+	expect_equal(matchLRPatterns("AA", "TT", 3, v), Views(d, start=c(1,2,6), end=c(5,5,12)))
+	expect_equal(matchLRPatterns("AA", "TT", 7, v), Views(d, start=c(1,2,6), end=c(5,5,12)))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-matchPDict.R b/tests/testthat/test-matchPDict.R
new file mode 100644
index 00000000..a92e3206
--- /dev/null
+++ b/tests/testthat/test-matchPDict.R
@@ -0,0 +1,224 @@
+## matchPDict.R exports the following:
+##		- matchPDict
+##		- countPDict
+##		- whichPDict
+##		- vmatchPDict
+##		- vcountPDict
+##		- vwhichPDict
+## These are all S4s defined for "subject", XString,
+##	XStringSet, XStringViews, MaskedXString
+
+test_that("general match/count/whichPDict behavior works as expected", {
+  d1 <- DNAString("ATGGATACACAA")
+  all_codons <- mkAllStrings(DNA_BASES, 3L)
+
+  ## non-pdict with matchPDict
+  p1 <- matchPDict(DNAStringSet("ATA"), d1)
+  expect_equal(as.integer(c(start(p1), end(p1))), c(5,7))
+
+  pd <- PDict(all_codons)
+
+  ## pd against a DNAStringSet
+  p2 <- lengths(matchPDict(pd, d1))
+  pos_good <- which(all_codons %in% as.character(Views(d1, start=seq_len(length(d1)-2L), width=3)))
+  expect_true(all(p2[pos_good] > 0) && all(p2[-pos_good] == 0))
+  expect_equal(whichPDict(pd, d1), pos_good)
+  expect_equal(countPDict(pd, d1), p2)
+
+  ## pd against an XStringViews object
+  p3 <- lengths(matchPDict(pd, codons(d1)))
+  pos_good <- which(all_codons %in% as.character(codons(d1)))
+  expect_true(all(p3[pos_good] > 0) && all(p3[-pos_good] == 0))
+  expect_equal(whichPDict(pd, codons(d1)), pos_good)
+  expect_equal(countPDict(pd, codons(d1)), p3)
+
+  ## pd against a MaskedXString object
+  m1 <- Mask(length(d1), start=4, end=8)
+  d2 <- d1
+  masks(d2) <- m1
+  p4 <- lengths(matchPDict(pd, d2))
+  pos_good <- which(all_codons %in% as.character(Views(unmasked(d2), start=c(1,9,10), width=3)))
+  expect_true(all(p4[pos_good] > 0) && all(p4[-pos_good] == 0))
+  expect_equal(whichPDict(pd, d2), pos_good)
+  expect_equal(countPDict(pd, d2), p4)
+
+
+  ## matchPDict() with AA, RNA, B
+  aa1 <- AAStringSet(c("DARC", "EGH"))
+  aa2 <- AAString("KMFPRNDEGHSTTWTEE")
+  paa <- matchPDict(aa1, aa2)
+  expect_equal(c(unlist(startIndex(paa)), unlist(endIndex(paa))), c(8,10))
+  expect_equal(countPDict(aa1, aa2), c(0,1))
+  expect_equal(whichPDict(aa1, aa2), 2)
+
+  b1 <- BStringSet(c("DARC", "EGH"))
+  b2 <- BString("KMFPRNDEGHSTTWTEE")
+  pb <- matchPDict(b1, b2)
+  expect_equal(c(unlist(startIndex(pb)), unlist(endIndex(pb))), c(8,10))
+  expect_equal(countPDict(b1, b2), c(0,1))
+  expect_equal(whichPDict(b1, b2), 2)
+})
+
+test_that("inexact PDict matching works", {
+	pdict <- PDict(c("acgt", "gt", "cgt", "ac"), tb.end=2)
+	d <- DNAString("acggaccg")
+  expect_equal(lengths(endIndex(matchPDict(pdict, d, max.mismatch=0))), c(0L,0L,0L,2L))
+  expect_equal(lengths(endIndex(matchPDict(pdict, d, max.mismatch=1))), c(1L,0L,2L,2L))
+  expect_equal(lengths(endIndex(matchPDict(pdict, d, max.mismatch=2))), c(2L,0L,2L,2L))
+
+  expect_equal(countPDict(pdict, d, max.mismatch=0), c(0,0,0,2))
+  expect_equal(countPDict(pdict, d, max.mismatch=1), c(1,0,2,2))
+  expect_equal(countPDict(pdict, d, max.mismatch=2), c(2,0,2,2))
+})
+
+test_that("vectorized PDict lookup works", {
+  ## canary tests for future changes
+  pdict <- PDict(c("acgt", "gt", "cgt", "ac"), tb.end=2)
+  d <- DNAString("acggaccg")
+  dss <- DNAStringSet(list(d,d))
+  dvv <- Views(d, start=rep(1,2), width=rep(length(d), 2))
+
+  # Disallowed cases
+  expect_error(matchPDict(pdict, dss), "please use vmatchPDict")
+  expect_error(vmatchPDict(pdict, d), "please use matchPDict")
+  expect_error(vmatchPDict(pdict, as(d, "MaskedDNAString")), "please use matchPDict")
+  expect_error(vwhichPDict(pdict, d), "please use whichPDict")
+  expect_error(vwhichPDict(pdict, as(d, "MaskedDNAString")), "please use whichPDict")
+  expect_error(vcountPDict(pdict, d), "please use countPDict")
+  expect_error(vcountPDict(pdict, as(d, "MaskedDNAString")), "please use countPDict")
+
+  exp_out <- matrix(rep(c(0,2), times=c(6,2)), nrow=4L, byrow=TRUE)
+  expect_equal(vcountPDict(pdict, dss), exp_out)
+  expect_equal(vwhichPDict(pdict, dss), list(4,4))
+  expect_equal(vcountPDict(pdict, dvv), exp_out)
+  expect_equal(vwhichPDict(pdict, dvv), list(4,4))
+
+  exp_out[1,] <- 1
+  exp_out[3,] <- 2
+  expect_equal(vcountPDict(pdict, dss, max.mismatch=1), exp_out)
+  expect_equal(vwhichPDict(pdict, dss, max.mismatch=1), list(c(1,3,4),c(1,3,4)))
+  expect_equal(vcountPDict(pdict, dvv, max.mismatch=1), exp_out)
+  expect_equal(vwhichPDict(pdict, dvv, max.mismatch=1), list(c(1,3,4),c(1,3,4)))
+
+  exp_out[1,] <- 2
+  expect_equal(vcountPDict(pdict, dss, max.mismatch=2), exp_out)
+  expect_equal(vwhichPDict(pdict, dss, max.mismatch=2), list(c(1,3,4),c(1,3,4)))
+  expect_equal(vcountPDict(pdict, dvv, max.mismatch=2), exp_out)
+  expect_equal(vwhichPDict(pdict, dvv, max.mismatch=2), list(c(1,3,4),c(1,3,4)))
+
+  ## this will fail when we implement it to remind me to add tests for it
+  expect_error(vmatchPDict(pdict, dss), "vmatchPDict() is not ready yet", fixed=TRUE)
+})
+
+test_that("MIndex functionality works", {
+  ## set up an MIndex object
+  s <- mkAllStrings(c("A", "T"), 2L)
+  names(s) <- s
+  pd <- PDict(s)
+  d <- DNAString("ATTATTAATTAA")
+  m <- matchPDict(pd, d)
+  exp_match_starts <- list(c(7,11), c(1,4,8), c(3,6,10), c(2,5,9))
+  expect_equal(length(m), 2^2)
+  expect_equal(names(m), names(s))
+  expect_equal(startIndex(m), exp_match_starts)
+  expect_equal(endIndex(m), lapply(exp_match_starts, \(x) x+1L))
+  expect_equal(elementNROWS(m), c(2,3,3,3))
+  expect_equal(m[[1L]], IRanges(start=c(7,11), end=c(8,12)))
+  expect_s4_class(as(m, "CompressedIRangesList"), "CompressedIRangesList")
+  expect_equal(start(unlist(m)), unlist(exp_match_starts))
+  ux <- extractAllMatches(d, m)
+  exp_ux <- rep(s, times=c(2,3,3,3))
+  expect_equal(start(ux), unlist(exp_match_starts))
+  expect_equal(as.character(ux), exp_ux)
+})
+
+### All old tests
+## Helper functions ported from old tests
+randomDNASequences <- function(n, w)
+{
+  alphabet <- DNA_BASES
+  w <- rep(w, length=n)
+  sequences <- sapply(seq(1, n, length=n),
+                      function(x) {
+                        s <- sample(alphabet, w[x], replace=TRUE)
+                        s <- paste(s, collapse="")
+                        return(s)
+                      })
+  return(Biostrings::DNAStringSet(sequences))
+}
+
+msubseq <- function(x, ir)
+{
+  ## differs from subseq in the sense that several subsequences
+  ## from the same sequence are extracted
+  ## x:  XString
+  ## ir: IRanges
+  res <- vector("character", length = length(ir))
+  for (i in seq(along=res)) {
+    res[i] <- as.character(subseq(x, start=ir@start[i], width=width(ir)[i]))
+    ## forced cast: chain of tools for DNAString seems interupted for
+    ##              some use cases (or I missed something)
+  }
+  res <- DNAStringSet(res)
+  return(res)
+}
+
+test_that("matchPDict works for constant width lookup", {
+  set.seed(1)
+  l <- 150
+  dna_target <- randomDNASequences(1, l)[[1]]
+  W <- 20
+  L <- 6
+  ir <- successiveIRanges(rep(W, L), gapwidth = 1)
+  short_sequences <- msubseq(dna_target, ir)
+  # shuffle the sequences (so they are not in consecutive order)
+  o <- sample(seq(along=short_sequences))
+
+  dna_short <- DNAStringSet(short_sequences[o])
+  pdict <- PDict(dna_short)
+
+  res <- matchPDict(pdict, dna_target)
+
+  # mostly a sanity check
+  expect_equal(L, length(res))
+
+  for (i in seq(along=res)) {
+    m_start <- ir[o][i]@start
+    expect_equal(m_start, start(res[[i]]))
+    expect_equal(W, width(res[[i]]))
+    expect_equal(m_start + W - 1, end(res[[i]]))  # mostly a sanity check
+  }
+})
+
+test_that("matchPDict works for variable width lookup", {
+  set.seed(1)
+  l <- 150
+  dna_target <- randomDNASequences(1, l)[[1]]
+  W <- 20
+  L <- 6
+  n_cut <- sample(0:5, L, replace=TRUE)
+  ir <- successiveIRanges(rep(W, L) - n_cut, gapwidth = 1 + n_cut[-length(n_cut)])
+  short_sequences <- msubseq(dna_target, ir)
+  # shuffle the sequences (they are not in consecutive order)
+  o <- sample(seq(along=short_sequences))
+
+  dna_var_short <- DNAStringSet(short_sequences[o])
+
+  pdict <- PDict(dna_var_short,
+                 tb.start=1,                        # can't this be
+                 tb.width=min(width(dna_var_short)) # the default for
+                                                    # variable width ?
+                 )
+
+  res <- matchPDict(pdict, dna_target)
+
+  # mostly a sanity check
+  expect_equal(L, length(res))
+
+  iro <- ir[o]
+  for (i in seq(along=res)) {
+    expect_equal(start(iro)[i], start(res[[i]]))
+    expect_equal(width(iro)[i], width(res[[i]]))
+    expect_equal(end(iro)[i], end(res[[i]]))  # mostly a sanity check
+  }
+})
diff --git a/tests/testthat/test-matchPWM.R b/tests/testthat/test-matchPWM.R
new file mode 100644
index 00000000..2434f905
--- /dev/null
+++ b/tests/testthat/test-matchPWM.R
@@ -0,0 +1,24 @@
+## matchPWM.R exports the following:
+## - PWM
+## - matchPWM
+## - countPWM
+## - PWMscoreStartingAt
+## - maxWeights
+## - minWeights
+## - minScore
+## - unitScale
+## - reverseComplement
+
+## TODO: more tests, this is really just a first pass
+test_that("PWMs can be initialized", {
+	d <- DNAStringSet(c("AAAATTC",
+											"ATTAAAG"))
+
+	p <- PWM(d, type='prob')
+	expect_equal(dim(p), c(4L, width(d)[1]))
+	## have to use all.equal because of floating point error
+	expect_true(all.equal(colSums(p), rep(sum(p[,1L]), ncol(p))))
+	expect_true(all(p[c("C", "G"),-7] == 0))
+	expect_true(maxScore(p) == 1)
+	expect_true(minScore(p) == 0)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-matchPattern.R b/tests/testthat/test-matchPattern.R
new file mode 100644
index 00000000..ce6036b2
--- /dev/null
+++ b/tests/testthat/test-matchPattern.R
@@ -0,0 +1,120 @@
+## matchPattern.R exports the following:
+## - gregexpr2
+## - matchPattern generic
+## - countPattern generic
+## - vmatchPattern generic
+## - vcountPattern generic
+##
+## classes for generics: character, XString,
+## 	XStringSet, XStringViews, MaskedXString
+
+sss <- "ATGCATGCATGC"
+dna <- DNAString(sss)
+dss <- DNAStringSet(list(dna,dna))
+maskpos <- c(2L,9L)
+m1 <- Mask(length(dna), start=maskpos, end=maskpos+2L)
+mdna <- dna
+masks(mdna) <- m1
+vdna <- Views(dna, start=maskpos, end=maskpos+2L)
+# note that gregexpr and gregexpr2 are supported but not documented
+all_algos <- c("naive-exact", "naive-inexact", "boyer-moore", "shift-or", "indels")
+
+.test_match_output <- function(subject, pattern, algo, exp_match, ...){
+	m <- matchPattern(subject=subject, pattern=pattern, algorithm=algo, ...)
+	mc <- countPattern(subject=subject, pattern=pattern, algorithm=algo, ...)
+	res <- list(start=start(m), end=end(m), width=width(m), stype=seqtype(m))
+	expect_s4_class(m, "XStringViews")
+	expect_equal(res, exp_match)
+	expect_equal(mc, length(exp_match$start))
+}
+
+.test_vmatch_output <- function(subject, pattern, algo, exp_match, ...){
+	m <- vmatchPattern(subject=subject, pattern=pattern, algorithm=algo, ...)
+	mc <- vcountPattern(subject=subject, pattern=pattern, algorithm=algo, ...)
+	res <- lapply(m, \(x) list(start=start(x), end=end(x), width=width(x)))
+	expect_s4_class(m, "MIndex")
+	expect_equal(res, exp_match)
+	expect_equal(mc, vapply(m, \(x) length(start(x)), integer(1L)))
+}
+
+test_that("gregexpr2 correctly returns overlapping matches", {
+	## should find overlapping matches
+	expect_equal(unlist(gregexpr2("aa", c("XaaaYaa", "a"))), c(2L, 3L, 6L, -1L))
+
+	## only supports character vectors of length 1 for 'pattern'
+	expect_error(gregexpr2(c("aa", "aaa"), "a"), "invalid pattern")
+	expect_error(gregexpr2(NA_character_, "a"), "invalid pattern")
+	expect_error(gregexpr2(1, "a"), "invalid pattern")
+	expect_error(gregexpr2(character(0L), "a"), "invalid pattern")
+})
+
+test_that("matchPattern, countPattern work as expected", {
+	## exact matching
+	for(algo in all_algos[c(1:4)]){
+		.test_match_output(subject=sss, pattern="ATG", algo=algo,
+												exp_match=list(start=c(1,5,9), end=c(3,7,11), width=rep(3,3), stype="B"))
+		.test_match_output(subject=dna, pattern="ATG", algo=algo,
+												exp_match=list(start=c(1,5,9), end=c(3,7,11), width=rep(3,3), stype="DNA"))
+		.test_match_output(subject=mdna, pattern="ATG", algo=algo,
+												exp_match=list(start=5, end=7, width=3, stype="DNA"))
+		.test_match_output(subject=vdna, pattern="ATG", algo=algo,
+												exp_match=list(start=9, end=11, width=3, stype="DNA"))
+	}
+
+	## inexact matching
+	.test_match_output(subject=dna, pattern="ATC", algo="auto",
+		exp_match=list(start=c(1,2,5,6,9,10), end=c(3,4,7,8,11,12), width=rep(3,6), stype="DNA"),
+		max.mismatch=2)
+	.test_match_output(subject=dna, pattern="ATC", algo="auto",
+		exp_match=list(start=c(2,6,10), end=c(4,8,12), width=rep(3,3), stype="DNA"),
+		max.mismatch=2, min.mismatch=2)
+	.test_match_output(subject=dna, pattern="ATC", algo="auto",
+		exp_match=list(start=c(1,5,9), end=c(2,6,10), width=rep(2,3), stype="DNA"),
+		max.mismatch=1, with.indels=TRUE)
+
+	## fixed args
+	dna2 <- DNAString("ACGTNMRWSYKVHDB")
+	.test_match_output(subject=dna2, pattern="GTN", algo="auto",
+		exp_match=list(start=c(3), end=c(5), width=rep(3,1), stype="DNA"), fixed=TRUE)
+	.test_match_output(subject=dna2, pattern="GTN", algo="auto",
+		exp_match=list(start=c(3,7,9,12), end=c(5,9,11,14), width=rep(3,4), stype="DNA"), fixed=FALSE)
+
+	## sad paths
+	expect_error(matchPattern("ATG", dna, algorithm="indels"), "valid algos for your string matching problem")
+	expect_error(matchPattern("ATG", dss), "please use vmatchPattern")
+	expect_error(countPattern("ATG", dna, algorithm="indels"), "valid algos for your string matching problem")
+	expect_error(countPattern("ATG", dss), "please use vcountPattern")
+	expect_error(matchPattern("", dna), "empty patterns are not supported")
+	expect_error(matchPattern(1, dna), "'pattern' must be a single string or an XString object")
+	expect_error(matchPattern("test", '', algorithm="gregexpr"), "'subject' must be a single (non-empty) string", fixed=TRUE)
+	expect_error(matchPattern("test", '', algorithm="gregexpr2"), "'subject' must be a single (non-empty) string", fixed=TRUE)
+
+	## edge cases from matchPattern.R
+	## unsure if these are intended functionality, but at least we'll track changes to it
+	.test_match_output(subject=DNAString("ACGTGCA"), pattern="---", algo="auto",
+		exp_match=list(start=seq(-1,7), end=seq(1,9), width=rep(3,9), stype='DNA'), max.mismatch=3)
+	.test_match_output(subject=DNAString("A"), pattern="---", algo="auto",
+		exp_match=list(start=integer(0L), end=integer(0L), width=integer(0L), stype='DNA'))
+})
+
+test_that("vmatchPattern, vcountPattern work correctly", {
+		for(algo in all_algos[c(1:4)]){
+			.test_vmatch_output(subject=dss, pattern="ATG", algo=algo,
+													exp_match=lapply(seq_len(2), \(x) list(start=c(1,5,9), end=c(3,7,11), width=rep(3,3))))
+			.test_vmatch_output(subject=rep(sss,2L), pattern="ATG", algo=algo,
+													exp_match=lapply(seq_len(2), \(x) list(start=c(1,5,9), end=c(3,7,11), width=rep(3,3))))
+			# XStringViews objects aren't supported yet
+			# .test_vmatch_output(subject=vdna, pattern="ATG", algo=algo,
+			# 										exp_match=lapply(seq_len(2), \(x) list(start=c(1,5,9), end=c(3,7,11), width=rep(3,3))))
+		}
+
+		## sad path
+		expect_error(vmatchPattern("ATG", dna), "please use matchPattern")
+		expect_error(vmatchPattern("ATG", mdna), "please use matchPattern")
+		expect_error(vcountPattern("ATG", dna), "please use countPattern")
+		expect_error(vcountPattern("ATG", mdna), "please use countPattern")
+
+		## TODO: more sad path options
+		expect_equal(vcountPattern("TG", vdna), c(1L,1L))
+})
+
diff --git a/tests/testthat/test-miscXStringMethods.R b/tests/testthat/test-miscXStringMethods.R
new file mode 100644
index 00000000..9fc4cdf3
--- /dev/null
+++ b/tests/testthat/test-miscXStringMethods.R
@@ -0,0 +1,111 @@
+## Miscellaneous XString Methods
+## Currently includes:
+## - toComplex
+## - xscat
+## - strsplit, unstrsplit (for XStringSet, XStringSetList)
+## - N50
+
+test_that("toComplex conversion works correctly", {
+	seq <- DNAString("agtcagtcagtcagtc")
+  baseValues1 <- c(A=1+0i, G=0+1i, T=-1+0i, C=0-1i)
+  expect_equal(toComplex(seq, baseValues1), unname(rep(baseValues1, 4)))
+
+  ## GC content:
+  baseValues2 <- c(A=0, C=1, G=1, T=0)
+  expect_equal(sum(as.integer(toComplex(seq, baseValues2))), 8L)
+
+  expect_error(toComplex(seq, 1:4), "'baseValues' must have names")
+  expect_error(toComplex(seq, c(W=1,X=1,Y=1,Z=1)), "'baseValues' names must be valid DNA letters")
+})
+
+test_that("xscat concatenates all types correctly", {
+  s1 <- "ATGATG"
+  s2 <- "CCACCA"
+
+  ## character
+  expect_true(xscat(s1, s2) == BString(paste0(s1, s2)))
+  expect_error(xscat(s1, NA), "arguments must be character vectors (with no NAs)", fixed=TRUE)
+
+  ## XString
+  d1 <- DNAString(s1)
+  d2 <- DNAString(s2)
+  d3 <- DNAString(paste0(s1, s2))
+  d4 <- DNAString(paste0(s2, s1))
+  expect_true(xscat(d1, d2) == d3)
+
+  ## XStringSet
+  dss1 <- DNAStringSet(list(d1, d2))
+  dss2 <- DNAStringSet(list(d2, d1))
+  dss3 <- DNAStringSet(list(d3, d4))
+  expect_true(all(xscat(dss1, dss2) == dss3))
+
+  v1 <- Views(d3, start=c(1,7), width=3)
+  v2 <- Views(d3, start=c(4,10), width=3)
+  expect_true(all(xscat(v1, v2) == DNAStringSet(c(s1, s2))))
+})
+
+test_that("padAndClip, stackStrings have correct functionality", {
+  ## just replicating the stuff in the examples
+  ## can add more tests later if necessary, I don't see these used super often
+  x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="", seq4="XYZ"))
+
+  p1 <- padAndClip(x, IRanges(3, 8:5), Lpadding.letter=">", Rpadding.letter="<")
+  expect_equal(unname(as.character(p1)), c("CD<<<<", "cdefg", "<<<<", "Z<<"))
+
+  p2 <- padAndClip(x, IRanges(1:-2, 7), Lpadding.letter=">", Rpadding.letter="<")
+  expect_equal(unname(as.character(p2)), c("ABCD<<<", ">abcdefg", ">><<<<<<<", ">>>XYZ<<<<"))
+
+  expect_equal(width(stackStrings(x, 2, 8)), rep(7L, 4L))
+
+  exp_out <- c("###ABCD....", "ijk........", "#########..", "##########X")
+  s1 <- stackStrings(x, -2, 8, shift=c(0, -11, 6, 7), Lpadding.letter="#", Rpadding.letter=".")
+  expect_equal(width(s1), rep(11, 4L))
+  expect_equal(unname(as.character(s1)), exp_out)
+
+  s2 <- stackStrings(x, -2, 8, shift=c(0, -14, 6, 7),
+               Lpadding.letter="#", Rpadding.letter=".",
+               remove.out.of.view.strings=TRUE)
+  expect_equal(names(s2), paste0("seq", c(1,3,4)))
+  expect_equal(unname(as.character(s2)), exp_out[c(1,3,4)])
+})
+
+test_that("Biostrings strsplit, unstrsplit methods work correctly", {
+  ds <- "ATGCATGC"
+  d1 <- DNAStringSet(ds)
+  d2 <- DNAStringSet("ATCATC")
+  d_spl <- DNAStringSet(c("AT", "CAT", "C"))
+
+  expect_true(all(strsplit(d1, "G") == d_spl))
+
+  expect_true(unstrsplit(strsplit(d1, "G")) == d2)
+
+  ## round trip identity
+  expect_true(unstrsplit(strsplit(d1, "G"), "G") == d1)
+
+  ## unstrsplit on a stringset should be a noop
+  expect_identical(unstrsplit(d1), d1)
+})
+
+test_that("N50 has correct behavior", {
+  set.seed(6L)
+  n_seq <- 20L
+  possible_len <- seq(100,10000)
+  lens <- sample(possible_len, n_seq)
+  many_seqs <- DNAStringSet(
+    vapply(seq_len(n_seq), \(i){
+      paste(sample(DNA_BASES, lens[i], replace=TRUE), collapse='')
+    }, character(1L))
+  )
+
+  ## N50 is calculated by:
+  ##  1. add sizes of contigs until half the total size is reached
+  all_widths <- sort(width(many_seqs), decreasing=TRUE)
+  total_width <- sum(all_widths)
+  cs <- cumsum(all_widths)
+  pos_first <- which.max(cs > total_width/2)
+
+  ##  2. take the size of the contig that was added last
+  N50_exp <- all_widths[pos_first]
+
+  expect_equal(N50(width(many_seqs)), N50_exp)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-miscellaneous.R b/tests/testthat/test-miscellaneous.R
new file mode 100644
index 00000000..f8b9dff5
--- /dev/null
+++ b/tests/testthat/test-miscellaneous.R
@@ -0,0 +1,46 @@
+## Miscellaneous tests
+## these are all relatively low priority and/or for files with only a few things
+## some tests are just for internal functions
+
+test_that("coloring works for DNA, RNA, and AA", {
+	## not a super important test
+	dna_rna_expected <- c(DNA_BASES, "U", DNA_ALPHABET[-c(1:4,16:18)])
+	expect_true(!any(duplicated(Biostrings:::make_DNA_AND_RNA_COLORED_LETTERS())))
+	expect_equal(sort(names(Biostrings:::make_DNA_AND_RNA_COLORED_LETTERS())), sort(dna_rna_expected))
+
+	aa_expected <- AA_ALPHABET[-c(27:30)]
+	expect_true(!any(duplicated(Biostrings:::make_AA_COLORED_LETTERS())))
+	expect_equal(sort(names(Biostrings:::make_AA_COLORED_LETTERS())), sort(aa_expected))
+})
+
+test_that("utils functions work as they should", {
+	expect_true(Biostrings:::isNumericOrNAs(NA_character_))
+	expect_true(Biostrings:::isNumericOrNAs(NA_real_))
+	expect_true(Biostrings:::isNumericOrNAs(1))
+	expect_true(Biostrings:::isNumericOrNAs(1L))
+	expect_true(Biostrings:::isNumericOrNAs(1:2))
+	expect_false(Biostrings:::isNumericOrNAs(NULL))
+	expect_false(Biostrings:::isNumericOrNAs('a'))
+
+	expect_error(Biostrings:::pow.int('a', 1), "must be a numeric vector")
+	expect_identical(Biostrings:::pow.int(3,5), as.integer(3**5))
+
+	expect_error(Biostrings:::normargUseNames(NA), "must be TRUE or FALSE")
+	expect_true(Biostrings:::normargUseNames(NULL))
+	expect_true(Biostrings:::normargUseNames(TRUE))
+	expect_false(Biostrings:::normargUseNames(FALSE))
+})
+
+test_that("longestConsecutive still functions", {
+	## adapted from the examples in the man page
+	v <- c("AAACTGTGFG", "GGGAATT", "CCAAAAAAAAAATT")
+	expect_equal(longestConsecutive(v, "A"), c(3L, 2L, 10L))
+	expect_equal(longestConsecutive(v, "C"), c(1L, 0L, 2L))
+	expect_equal(longestConsecutive(v, "C"), c(1L, 0L, 2L))
+	expect_error(longestConsecutive(v, NA), "'letter' must be a character variable")
+	expect_error(longestConsecutive(NA, "A"), "'x' must be a string")
+})
+
+test_that("matchprobes is deprecated", {
+	expect_warning(matchprobes("A","A"), "matchprobes() is deprecated.", fixed=TRUE)
+})
diff --git a/tests/testthat/test-replaceAt.R b/tests/testthat/test-replaceAt.R
new file mode 100644
index 00000000..39634e43
--- /dev/null
+++ b/tests/testthat/test-replaceAt.R
@@ -0,0 +1,192 @@
+## testing for both replaceAt.R and replaceLetterAt.R
+##
+## these export the following:
+## - extractAt
+## - replaceAt
+## - replaceLetterAt
+## - (.inplaceReplaceLetterAt)
+## These functions work like subseq(<-) but without copying data
+## replaceLetterAt will make a copy of the sequence.
+## .inplaceReplaceLetterAt will not, but its use is discouraged.
+## note that replaceLetterAt is only defined for character, DNAString, DNAStringSet
+
+test_that("replaceLetterAt has expected behavior", {
+	str <- "AGAGAGAGAGAG"
+	d <- DNAString(str)
+	dss <- DNAStringSet(list(d,d))
+	pos_replace <- c(1,3,5,7,8)
+	mat_replace <- matrix(rep(FALSE, length(d)*2), nrow=2L, byrow=TRUE)
+	mat_replace[,pos_replace] <- TRUE
+	r_str <- "CGCGCGCCAGAG"
+	replace_dss <- DNAStringSet(rep(paste(rep("C", length(pos_replace)), collapse=''), 2))
+	#expect_equal(replaceLetterAt(str, pos_replace, "C"), replaced_str)
+	expect_equal(replaceLetterAt(d, pos_replace, rep("C", length(pos_replace))), DNAString(r_str))
+	expect_equal(replaceLetterAt(d, mat_replace[1,], rep("C", length(pos_replace))), DNAString(r_str))
+	expect_equal(replaceLetterAt(dss, mat_replace, replace_dss), DNAStringSet(c(r_str,r_str)))
+
+	d2 <- DNAString("RMWSYK")
+	expect_equal(replaceLetterAt(d2, 1:6, rep(c("A","M"), each=3)), DNAString("AAAMMM"))
+	expect_equal(replaceLetterAt(d2, 1:6, rep(c("A","M"), each=3), if.not.extending="merge"), DNAString("RMWVHN"))
+	expect_equal(replaceLetterAt(d2, 1:6, rep(c("A","M"), each=3), if.not.extending="skip"), d2)
+	expect_error(replaceLetterAt(d2, 1:6, rep(c("A","M"), each=3), if.not.extending="error"), "does not extend old letter")
+})
+
+## old tests
+
+### From an XStringSet object
+### =========================
+
+### Only compare the classes, the shapes (i.e. lengths + elementNROWS +
+### names), the inner names, and the sequence contents. Doesn't look at
+### the metadata or the metadata columns (outer or inner).
+identical_XStringSetList <- function(target, current){
+    ok1 <- identical(class(target), class(current))
+    ok2 <- identical(elementNROWS(target), elementNROWS(current))
+    unlisted_target <- unlist(target, use.names=FALSE)
+    unlisted_current <- unlist(current, use.names=FALSE)
+    ok3 <- identical(names(unlisted_target), names(unlisted_current))
+    ok4 <- all(unlisted_target == unlisted_current)
+    ok1 && ok2 && ok3 && ok4
+}
+
+### Only compare the classes, lengths, names, and sequence contents.
+### Doesn't look at the metadata or the metadata columns.
+identical_XStringSet <- function(target, current){
+    ok1 <- identical(class(target), class(current))
+    ok2 <- identical(names(target), names(current))
+    ok3 <- all(target == current)
+    ok1 && ok2 && ok3
+}
+
+# randomDisjointRanges <- function(min_start, max_end, min_width, max_width, n){
+#     offset <- min_start - 1L
+#     n1 <- n * (1.1 + max_width/(max_end-offset+1L))  # very approximative
+
+#     some_starts <- sample(max_end-offset+1L, n1, replace=TRUE) + offset
+#     some_widths <- sample(min_width:max_width, n1, replace=TRUE)
+#     some_ranges <- IRanges(some_starts, width=some_widths)
+#     some_ranges <- some_ranges[end(some_ranges) <= max_end]
+#     ans <- disjoin(some_ranges)
+#     if (min_width == 0L) {
+#         is_empty <- width(some_ranges) == 0L
+#         some_empty_ranges <- some_ranges[is_empty]
+#         ans <- sample(c(ans, some_empty_ranges))
+#     }
+#     if (length(ans) < n)
+#         stop("internal error, sorry")
+#     head(ans, n=n)
+# }
+
+
+test_that("old extractAt tests still work", {
+	x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="XYZ"))
+	at <- IRangesList(IRanges(c(2, 1), c(3, 0)),
+	                  IRanges(c(7, 2, 12, 7), c(6, 5, 11, 8)),
+	                  IRanges(2, 2))
+	### Set inner names on 'at'.
+	unlisted_at <- unlist(at)
+	names(unlisted_at) <- paste0("rg", sprintf("%02d", seq_along(unlisted_at)))
+	at <- relist(unlisted_at, at)
+
+	res1a <- extractAt(x, at)
+	res1b <- BStringSetList(mapply(extractAt, x, at))
+	expect_true(identical_XStringSetList(res1a, res1b))
+
+	res2a <- extractAt(x, at[3])
+	res2b <- BStringSetList(mapply(extractAt, x, at[3]))
+	expect_true(identical_XStringSetList(res2a, res2b))
+	res2c <- extractAt(x, at[[3]])
+	expect_true(identical_XStringSetList(res2a, res2c))
+
+	## performance testing is cut, this is too time consuming
+	### Testing performance with 2 millions small extractions at random
+	### locations in Fly upstream sequences:
+	# dm3_upstream_filepath <- system.file("extdata",
+	#                                      "dm3_upstream2000.fa.gz",
+	#                                      package="Biostrings")
+	# dm3_upstream <- readDNAStringSet(dm3_upstream_filepath)
+	# dm3_upstream <- dm3_upstream[width(dm3_upstream) == 2000L]
+
+	# set.seed(33)
+	# ## cut this down significantly from 2,000,000
+	# ## performance is not important for unit testing
+	# NE <- 2000000L  # total number of extractions
+
+	# sample_size <- NE * 1.1
+	# some_ranges <- IRanges(sample(2001L, sample_size, replace=TRUE),
+	#                        width=sample(0:75, sample_size, replace=TRUE))
+	# some_ranges <- head(some_ranges[end(some_ranges) <= 2000L], n=NE)
+	# split_factor <- factor(sample(length(dm3_upstream), NE, replace=TRUE),
+	#                        levels=seq_along(dm3_upstream))
+	# at <- unname(split(some_ranges, split_factor))
+
+	# ### Takes < 1s on my machine.
+	# res3a <- extractAt(dm3_upstream, at)
+
+	### Equivalent but about 250x slower than the above on my machine.
+	#system.time(res3b <- DNAStringSetList(mapply(extractAt, dm3_upstream, at)))
+	#expect_true(identical_XStringSetList(res3a, res3b))
+})
+
+test_that("old replaceAt tests still work", {
+	### On an XString object
+	### ====================
+
+	### Testing performance with half-million small substitutions at random
+	### locations in Celegans chrI:
+	# library(BSgenome.Celegans.UCSC.ce2)
+	# genome <- BSgenome.Celegans.UCSC.ce2
+	# chrI <- genome$chrI
+	# at4 <- randomDisjointRanges(1L, nchar(chrI), 0L, 20L, 500000L)
+	# ### Takes < 1s on my machine (64-bit Ubuntu).
+	# system.time(current <- replaceAt(chrI, at4, Views(chrI, at4)))
+	# stopifnot(current == chrI)
+
+	# ### Testing performance with half-million single-letter insertions at random
+	# ### locations in Celegans chrI:
+	# at5 <- randomDisjointRanges(1L, nchar(chrI), 0L, 0L, 500000L)
+	# ### Takes < 1s on my machine (64-bit Ubuntu).
+	# system.time(current <- replaceAt(chrI, at5, value="-"))
+	# m <- matchPattern("-", current)
+	# stopifnot(identical(sort(start(at5)), start(m) - seq_along(at5) + 1L))
+
+	# system.time(current2 <- replaceAt(chrI, start(at5), value="-"))
+	# stopifnot(identical(current, current2))
+
+	# matchPattern("---", current)
+
+	### On an XStringSet object
+	### =======================
+
+	x <- BStringSet(c(seq1="ABCD", seq2="abcdefghijk", seq3="XYZ"))
+	at <- IRangesList(IRanges(c(2, 1), c(3, 0)),
+	                  IRanges(c(7, 2, 12, 7), c(6, 5, 11, 8)),
+	                  IRanges(2, 2))
+	### Set inner names on 'at'.
+	unlisted_at <- unlist(at)
+	names(unlisted_at) <- paste0("rg", sprintf("%02d", seq_along(unlisted_at)))
+	at <- relist(unlisted_at, at)
+
+	current <- replaceAt(x, at, value=extractAt(x, at))  # no-op
+	expect_true(identical_XStringSet(x, current))
+
+	res1a <- replaceAt(x, at)  # deletions
+	res1b <- mendoapply(replaceAt, x, at)
+	expect_true(identical_XStringSet(res1a, res1b))
+
+	## cutting this as well
+	### Testing performance with half-million single-letter insertions at random
+	### locations in Fly upstream sequences:
+	##
+	# set.seed(33)
+	# split_factor <- factor(sample(length(dm3_upstream), 500000L, replace=TRUE),
+	#                        levels=seq_along(dm3_upstream))
+	# at5 <- unname(split(sample(2001L, 500000L, replace=TRUE),
+	#                     split_factor))
+	# ### Takes < 1s on my machine.
+	# system.time(res5a <- replaceAt(dm3_upstream, at5, value="-"))
+	# ### Equivalent but about 1400x slower than the above on my machine.
+	# system.time(res5b <- mendoapply(replaceAt,
+	#                                 dm3_upstream, as(at5, "List"), as("-", "List")))
+	# stopifnot(identical_XStringSet(res5a, res5b))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-translate.R b/tests/testthat/test-translate.R
new file mode 100644
index 00000000..293d2462
--- /dev/null
+++ b/tests/testthat/test-translate.R
@@ -0,0 +1,125 @@
+## translate.R exports the following:
+## - translate (DNAString, RNAString, DNAStringSet, RNAStringSet,
+##								MaskedDNAString, MaskedRNAString, Default)
+## - codons (DNAString, RNAString, MaskedDNAString, MaskedRNAString, Default)
+
+d <- DNAString(paste(names(GENETIC_CODE), collapse=''))
+r <- as(d, "RNAString")
+a <- AAString(paste(GENETIC_CODE, collapse=''))
+
+test_that("translate works properly for all dna,rna", {
+	## basic conversion
+	expect_identical(translate(d), a)
+	expect_identical(translate(r), a)
+
+	dd <- DNAStringSet(list(d,d))
+	rr <- RNAStringSet(list(r,r))
+	aa <- AAStringSet(list(a,a))
+	expect_identical(translate(dd) == aa, c(TRUE, TRUE))
+	expect_identical(translate(rr) == aa, c(TRUE, TRUE))
+
+	## errors
+	expect_error(translate(BString()), "unable to find an inherited method")
+	expect_error(translate(AAString()), "unable to find an inherited method")
+	expect_error(translate("ATG"), "unable to find an inherited method")
+
+	alt_gen_code <- GENETIC_CODE
+	alt_gen_code[] <- rep(c("A", "B"), each=32L)
+	alt_a <- AAString(paste(rep(c("A", "B"), each=32L), collapse=''))
+	alt_aa <- AAStringSet(list(alt_a, alt_a))
+	expect_identical(translate(d, genetic.code=alt_gen_code), alt_a)
+	expect_identical(translate(r, genetic.code=alt_gen_code), alt_a)
+	expect_identical(translate(dd, genetic.code=alt_gen_code) == alt_aa, c(TRUE, TRUE))
+	expect_identical(translate(rr, genetic.code=alt_gen_code) == alt_aa, c(TRUE, TRUE))
+
+	## fault paths
+	expect_error(translate(d, genetic.code=character(0L)),
+		"'genetic.code' must have the same names as predefined constant GENETIC_CODE", fixed=TRUE)
+	expect_error(translate(d, genetic.code=GENETIC_CODE[seq_len(10L)]),
+		"'genetic.code' must have the same names as predefined constant GENETIC_CODE", fixed=TRUE)
+	alt_gen_code[] <- ''
+	expect_error(translate(d, genetic.code=alt_gen_code),
+		"'genetic.code' must contain 1-letter strings", fixed=TRUE)
+
+	## this is promoted to an error with this test case
+	alt_gen_code[] <- '/'
+	expect_error(translate(d, genetic.code=alt_gen_code),
+		"some codons in 'genetic.code' are mapped to letters not in the Amino Acid\n  alphabet", fixed=TRUE)
+
+	## alt_init_codon args
+	## error message could be more informative here
+	error_msg <- "'genetic.code' must have an \"alt_init_codons\" attribute"
+	alt_gen_code <- GENETIC_CODE
+	attr(alt_gen_code, "alt_init_codons")[] <- c("BLAH", "NOTANAMINOACID")
+	expect_error(translate(d, genetic.code=alt_gen_code), error_msg, fixed=TRUE)
+	attr(alt_gen_code, "alt_init_codons")[] <- c("AAA", "AAA")
+	expect_error(translate(d, genetic.code=alt_gen_code), error_msg, fixed=TRUE)
+	attr(alt_gen_code, "alt_init_codons")[] <- c(NA_character_, "AAA")
+	expect_error(translate(d, genetic.code=alt_gen_code), error_msg, fixed=TRUE)
+	attr(alt_gen_code, "alt_init_codons")[] <- c(1,2)
+	expect_error(translate(d, genetic.code=alt_gen_code), error_msg, fixed=TRUE)
+	attr(alt_gen_code, "alt_init_codons") <- NULL
+	expect_error(translate(d, genetic.code=alt_gen_code), error_msg, fixed=TRUE)
+
+	## alternate init codons
+	dna1 <- DNAString("TTGATATGGCCCTTATAA")
+	expect_identical(translate(dna1), AAString("MIWPL*"))
+	expect_identical(translate(dna1, no.init.codon=TRUE), AAString("LIWPL*"))
+	rna1 <- as(dna1, "RNAString")
+	expect_identical(translate(rna1), AAString("MIWPL*"))
+	expect_identical(translate(rna1, no.init.codon=TRUE), AAString("LIWPL*"))
+
+	## fuzzy codons
+	dna1 <- DNAString("TTYTTTTTC")
+	expect_error(translate(dna1), "not a base at pos 3")
+	expect_identical(translate(dna1, if.fuzzy.codon='solve'), AAString("FFF"))
+	expect_identical(translate(dna1, if.fuzzy.codon='X'), AAString("XFF"))
+	expect_identical(translate(dna1, if.fuzzy.codon='error.if.X'), AAString("XFF"))
+
+	dna2 <- DNAString("TTNTTYTTC")
+	expect_identical(translate(dna2, if.fuzzy.codon='solve'), AAString("XFF"))
+	expect_identical(translate(dna2, if.fuzzy.codon='X'), AAString("XXF"))
+	expect_error(translate(dna2, if.fuzzy.codon='error.if.X'), "ambiguous fuzzy codon starting at pos 1")
+
+	expect_error(translate(dna2, if.fuzzy.codon='blahblahblaherror'), '\'arg\' should be one of ', fixed=TRUE)
+})
+
+test_that("translate, codons work properly for masked strings and string sets", {
+	mask0 <- Mask(mask.width=nchar(d), start=c(4,10), width=3)
+	mask1 <- Mask(mask.width=nchar(d), start=c(4,10), width=4)
+	mask2 <- Mask(mask.width=nchar(d), start=1, width=5)
+
+	dm <- d
+	rm <- r
+	masks(dm) <- masks(rm) <- mask0
+
+	m_a <- AAString(paste(GENETIC_CODE[-c(2,4)], collapse=''))
+	expect_identical(translate(dm), m_a)
+	expect_identical(translate(rm), m_a)
+
+	codon_views_d <- codons(dm)
+	codon_views_r <- codons(rm)
+	svec <- c(1L, 7L, seq(13L,nchar(d),3L))
+	expect_identical(start(codon_views_d), svec)
+	expect_identical(start(codon_views_r), svec)
+	expect_identical(end(codon_views_d), svec+2L)
+	expect_identical(end(codon_views_r), svec+2L)
+
+	masks(d) <- mask1
+	expect_warning(translate(d), "last base was ignored")
+
+})
+
+test_that("codon function works properly", {
+	expect_identical(codons(d), Views(d, start=seq(1,nchar(d),3), width=3))
+	expect_identical(codons(r), Views(r, start=seq(1,nchar(d),3), width=3))
+
+	expect_warning(codons(DNAString("AT")), "the number of nucleotides in 'x' is not a multiple of 3")
+	expect_warning(codons(DNAString("ATATA")), "the number of nucleotides in 'x' is not a multiple of 3")
+	expect_warning(codons(RNAString("AU")), "the number of nucleotides in 'x' is not a multiple of 3")
+	expect_warning(codons(RNAString("AUAUA")), "the number of nucleotides in 'x' is not a multiple of 3")
+
+	expect_error(codons(AAString()), "unable to find an inherited method for function")
+	expect_error(codons(BString()), "unable to find an inherited method for function")
+	expect_error(codons("ATG"), "unable to find an inherited method for function")
+})
\ No newline at end of file
diff --git a/tests/testthat/test-trimLRPatterns.R b/tests/testthat/test-trimLRPatterns.R
new file mode 100644
index 00000000..e6374291
--- /dev/null
+++ b/tests/testthat/test-trimLRPatterns.R
@@ -0,0 +1,62 @@
+## trimLRPatterns.R exports trimLRPatterns for the following classes:
+## - character
+## - XString
+## - XStringSet
+
+test_that("trimLRPatterns works correctly on supported input types", {
+	## tests based on the examples in the docs
+	L <- "TTCTGCTTG"
+  R <- "GATCGGAAG"
+  DChr <- "TTCTGCTTGACGTGATCGGA"
+  DStr <- DNAString(DChr)
+  DSet <- DNAStringSet(c(DChr, DChr))
+
+  exp1 <- "ACGTGATCGGA"
+  exp2 <- "TTCTGCTTGACGT"
+  exp3 <- "ACGT"
+
+  DSet2 <- DNAStringSet(c("TGCTTGACGGCAGATCGG", "TTCTGCTTGGATCGGAAG"))
+
+  ## Perfect matches on the flanks
+  ### left match
+  expect_true(trimLRPatterns(Lpattern = L, subject = DChr) == exp1)
+  expect_true(trimLRPatterns(Lpattern = L, subject = DStr) == DNAString(exp1))
+  expect_true(all(trimLRPatterns(Lpattern = L, subject = DSet) == DNAStringSet(rep(exp1, 2L))))
+
+  ### right match
+  expect_true(trimLRPatterns(Rpattern = R, subject = DChr) == exp2)
+  expect_true(trimLRPatterns(Rpattern = R, subject = DStr) == DNAString(exp2))
+  expect_true(all(trimLRPatterns(Rpattern = R, subject = DSet) == DNAStringSet(rep(exp2, 2L))))
+
+  ### both match, perfect matches on flanking overlaps
+  expect_true(trimLRPatterns(Lpattern=L, Rpattern = R, subject = DChr) == exp3)
+  expect_true(trimLRPatterns(Lpattern=L, Rpattern = R, subject = DStr) == DNAString(exp3))
+  expect_true(all(trimLRPatterns(Lpattern=L, Rpattern = R, subject = DSet) == DNAStringSet(rep(exp3, 2L))))
+
+  ## Mismatches on the flanks
+
+  ## Allow for mismatches on the flanks
+  t1 <- trimLRPatterns(Lpattern = L, Rpattern = R, subject = DSet2,
+  								max.Lmismatch = 0.2, max.Rmismatch = 0.2)
+  expect_true(all(t1 == DNAStringSet(c("ACGGCA", ""))))
+
+  maxMismatches <- as.integer(0.2 * 1:9)
+  maxMismatches
+  t2 <- trimLRPatterns(Lpattern = L, Rpattern = R, subject = DSet2,
+        					max.Lmismatch = maxMismatches, max.Rmismatch = maxMismatches)
+  ## This should have a better example
+  expect_true(all(t1 == t2))
+
+  t3 <- trimLRPatterns(Lpattern = L, Rpattern = R, subject = DSet2[1],
+                 max.Lmismatch = maxMismatches, max.Rmismatch = maxMismatches,
+                 with.Rindels=TRUE)
+  expect_true(t3 == DNAStringSet("ACGGC"))
+  t4 <- trimLRPatterns(Lpattern = L, Rpattern = R, subject = DSet2[1],
+                 max.Lmismatch = maxMismatches, max.Rmismatch = maxMismatches,
+                 with.Rindels=TRUE, with.Lindels=TRUE)
+  expect_true(t4 == DNAStringSet("CGGC"))
+
+
+  ## Produce ranges that can be an input into other functions
+  expect_s4_class(trimLRPatterns(Lpattern=L, Rpattern=R, subject=DSet2, ranges=TRUE), "IRanges")
+})
\ No newline at end of file

From f292546c438dbac1caff67fc2b4be287630afeb6 Mon Sep 17 00:00:00 2001
From: Aidan Lakshman <ahl27@pitt.edu>
Date: Thu, 17 Oct 2024 10:53:31 -0400
Subject: [PATCH 3/5] fix: Issue #101 (#111)

* fix: fixes bug with 0-length input to consensusMatrix
* adds some unit tests
---
 R/letterFrequency.R                   | 12 +++++++---
 tests/testthat/test-letterFrequency.R | 34 +++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/R/letterFrequency.R b/R/letterFrequency.R
index 39d754e8..5aa6dd07 100644
--- a/R/letterFrequency.R
+++ b/R/letterFrequency.R
@@ -794,9 +794,15 @@ setMethod("consensusMatrix", "XStringSet",
         } else {
             removeUnused <- FALSE
         }
-        ans <- .Call2("XStringSet_consensus_matrix",
-                     x, shift, width, baseOnly, codes,
-                     PACKAGE="Biostrings")
+        if(length(x) == 0){
+            ans <- matrix(ifelse(as.prob, 0, 0L), nrow=0,
+                          ncol=ifelse(removeUnused, 0, length(codes)))
+            if(!removeUnused) colnames(ans) <- names(codes)
+        } else {
+            ans <- .Call2("XStringSet_consensus_matrix",
+                         x, shift, width, baseOnly, codes,
+                         PACKAGE="Biostrings")
+        }
         if (removeUnused) {
             ans <- ans[rowSums(ans) > 0, , drop=FALSE]
         }
diff --git a/tests/testthat/test-letterFrequency.R b/tests/testthat/test-letterFrequency.R
index 2b8dedd5..ce2ce655 100644
--- a/tests/testthat/test-letterFrequency.R
+++ b/tests/testthat/test-letterFrequency.R
@@ -262,4 +262,38 @@ test_that("nucleotideFrequency methods work properly", {
 	## disallowed types
 	expect_error(oligonucleotideFrequency(AAString(), "must contain sequences of type DNA or RNA"))
 	expect_error(oligonucleotideFrequency(BStringSet(), "must contain sequences of type DNA or RNA"))
+})
+
+test_that("zero-length input doesn't cause errors in consensusMatrix, consensusString", {
+	## verify previous functionality
+	exp_output <- matrix(0L, nrow=5, ncol=4)
+	diag(exp_output) <- 1L
+	rownames(exp_output) <- c(DNA_BASES, "other")
+	expect_identical(consensusMatrix(DNAStringSet("ACGT"), baseOnly=TRUE), exp_output)
+	expect_identical(consensusString(DNAStringSet(c("ATGC", "ATTC"))), "ATKC")
+
+	## DNA_ALPHABET is a subset of AA_ALPHABET
+	all_letters <- do.call(union, list(AA_ALPHABET, RNA_ALPHABET))
+	exp_zero_output <- matrix(nrow=0, ncol=length(all_letters))
+	colnames(exp_zero_output) <- all_letters
+
+	## having issues testing for matrix equality manually
+	expect_equal_matrix <- function(m1, m2){
+		expect_identical(nrow(m1), nrow(m2))
+		expect_identical(colnames(m1), colnames(m2))
+	}
+	expect_equal_matrix(consensusMatrix(DNAStringSet()), exp_zero_output[,DNA_ALPHABET])
+	expect_equal_matrix(consensusMatrix(RNAStringSet()), exp_zero_output[,RNA_ALPHABET])
+	expect_equal_matrix(consensusMatrix( AAStringSet()), exp_zero_output[, AA_ALPHABET])
+	expect_equal_matrix(consensusMatrix(  BStringSet()), exp_zero_output[,NULL])
+
+	## zero length input for consensusString
+	expect_identical(consensusString(DNAStringSet()), character(0L))
+	expect_identical(consensusString(RNAStringSet()), character(0L))
+	expect_identical(consensusString(AAStringSet()), character(0L))
+	expect_identical(consensusString(BStringSet()), character(0L))
+	expect_identical(consensusString(character(0L)), character(0L))
+
+	expect_true(is.integer(consensusMatrix(DNAStringSet(), as.prob=FALSE)))
+	expect_false(is.integer(consensusMatrix(DNAStringSet(), as.prob=TRUE)))
 })
\ No newline at end of file

From 13ae854011ce41cad0f5e499ebbde55631cc39b5 Mon Sep 17 00:00:00 2001
From: Aidan Lakshman <ahl27@pitt.edu>
Date: Thu, 17 Oct 2024 17:27:39 -0400
Subject: [PATCH 4/5] fix: Issue #106 (#110)

* fix: clear qualities from mcol to remove needless warning
---
 R/QualityScaledXStringSet.R | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/R/QualityScaledXStringSet.R b/R/QualityScaledXStringSet.R
index 45b3724d..2ef886ea 100644
--- a/R/QualityScaledXStringSet.R
+++ b/R/QualityScaledXStringSet.R
@@ -186,6 +186,13 @@ readQualityScaledDNAStringSet <- function(filepath,
                           nrec, skip, seek.first.rec,
                           use.names, with.qualities=TRUE)
     qualities <- mcols(x)[ , "qualities"]
+    ## Clear out the qualities parameter from the DNAStringSet,
+    ## since it gets passed into the QualityScaledDNAStringSet
+    ## constructor via quals anyway
+    ## (otherwise we get a warning that doesn't make sense)
+    mcols(x)[,"qualities"] <- NULL
+    if(ncol(mcols(x)) == 0)
+        mcols(x) <- NULL
     quals <- switch(quality.scoring,
                     phred=PhredQuality(qualities),
                     solexa=SolexaQuality(qualities),
@@ -201,4 +208,3 @@ writeQualityScaledXStringSet <- function(x, filepath,
     writeXStringSet(x, filepath, append, compress, compression_level,
                        format="fastq", qualities=quality(x))
 }
-

From 80bb49c9bdc15175c6721be77ce929d12f3cc7ba Mon Sep 17 00:00:00 2001
From: Aidan Lakshman <ahl27@pitt.edu>
Date: Fri, 18 Oct 2024 11:03:36 -0400
Subject: [PATCH 5/5] fix: remove expected warning in test

---
 tests/testthat/test-XStringSet-io.R | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tests/testthat/test-XStringSet-io.R b/tests/testthat/test-XStringSet-io.R
index ae4fcf63..484e3c21 100644
--- a/tests/testthat/test-XStringSet-io.R
+++ b/tests/testthat/test-XStringSet-io.R
@@ -144,20 +144,16 @@ test_that("XStringSet read/write is correct", {
 
   names(x1) <- names_dna
   writeXStringSet(x1, format="fastq", file=tmpfilefq)
-  expect_warning(qdna2 <- readQualityScaledDNAStringSet(tmpfilefq),
-  	"metadata columns on input DNAStringSet object were dropped")
-
+  qdna2 <- readQualityScaledDNAStringSet(tmpfilefq)
   expect_true(!all(quality(qdna2) == quality(qdna1)))
   expect_true(all(all(as(quality(qdna2),"IntegerList") == 26L)))
 
   writeXStringSet(x1, format="fastq", file=tmpfilefq, qualities = q1)
-  expect_warning(qdna2 <- readQualityScaledDNAStringSet(tmpfilefq),
-  	"metadata columns on input DNAStringSet object were dropped")
+  qdna2 <- readQualityScaledDNAStringSet(tmpfilefq)
   expect_true(all(quality(qdna2) == quality(qdna1)))
 
   writeXStringSet(qdna1, format="fastq", file=tmpfilefq)
-  expect_warning(qdna2 <- readQualityScaledDNAStringSet(tmpfilefq),
-  	"metadata columns on input DNAStringSet object were dropped")
+  qdna2 <- readQualityScaledDNAStringSet(tmpfilefq)
   expect_true(all(names(qdna2) == names_dna))
   expect_true(all(quality(qdna2) == quality(qdna1)))