diff --git a/.gitignore b/.gitignore
index 9e6ac64..0756e95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,5 @@ inst/doc
 Gac
 jackal_*.tgz
 *.pdf
-methods
 _crash_test.R
 !inst/art_profiles/*.txt.gz
diff --git a/DESCRIPTION b/DESCRIPTION
index 5c73ee8..85197fb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,8 +9,7 @@ Description: `jackal` efficiently (i) reads and simulates reference genomes;
     (ii) generates variants using summary statistics, phylogenies, Variant
     Call Format (VCF) files, and coalescent simulations—the latter of which can include
     selection, recombination, and demographic fluctuations;
-    (iii) simulates sequencing error, mapping qualities, restriction-enzyme digestion, 
-    and variance in coverage among sites; and
+    (iii) simulates sequencing error, mapping qualities, and optical/PCR duplicates; and
     (iv) writes outputs to standard file formats.
     `jackal` can simulate single, paired-end, or mate-pair Illumina reads, as well as
     reads from Pacific BioSciences.
diff --git a/NAMESPACE b/NAMESPACE
index b4abcd4..6f67973 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,7 +2,6 @@
 
 export(create_genome)
 export(create_variants)
-export(digest)
 export(illumina)
 export(make_mevo)
 export(mevo)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index bae1749..6844a56 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -94,81 +94,6 @@ rando_seqs <- function(n_seqs, len_mean, len_sd = 0, pi_tcag = numeric(0), n_cor
     .Call(`_jackal_rando_seqs`, n_seqs, len_mean, len_sd, pi_tcag, n_cores)
 }
 
-#' Calculate how many bases come before a cleavage site.
-#'
-#'
-#' @noRd
-#'
-get_precleavage_lens <- function(seqs) {
-    .Call(`_jackal_get_precleavage_lens`, seqs)
-}
-
-#' Expand sequences for reverse complements and for non-specific nucleobases.
-#'
-#'
-#' @noRd
-#'
-expand_seqs <- function(seqs) {
-    .Call(`_jackal_expand_seqs`, seqs)
-}
-
-#' Internal C++ function to digest all sequences for all variants in a variant set.
-#'
-#'
-#'
-#' @param var_set_ptr An external pointer to a C++ \code{VarSet} object
-#'     representing variants from the reference genome.
-#' @inheritParams bind_sites digest_ref
-#' @inheritParams len5s digest_ref
-#' @param chunk_size The size of chunks to divide sequences into when digesting.
-#' @param n_cores The number of cores to use for processing. Defaults to \code{1}.
-#'
-#' @return A list of lists, each sub-list containing multiple vectors representing
-#'     the locations of cut sites for a given variant on a given sequence.
-#'     Indexing the output list would be done as such:
-#'     \code{output_list[[variant_index]][[sequence_index]][position_index]}.
-#'
-#' @noRd
-#'
-digest_var_set <- function(var_set_ptr, bind_sites, len5s, chunk_size, n_cores = 1L) {
-    .Call(`_jackal_digest_var_set`, var_set_ptr, bind_sites, len5s, chunk_size, n_cores)
-}
-
-#' Internal C++ function to digest all sequences in a reference genome.
-#'
-#'
-#'
-#' @param ref_genome_ptr An external pointer to a C++ \code{RefGenome} object
-#'     representing the reference genome.
-#' @param bind_sites Vector of enzyme full recognition site(s).
-#' @param len5s A vector of the numbers of characters of the prime5 sites for each
-#'     recognition site.
-#' @param chunk_size Size of chunks to break sequences into for processing.
-#'     This value is ignored if it's set to zero.
-#'     Ideally this is set to a value that results in a number of chunks divisible by
-#'     the number of cores you're using, and is most useful when `n_cores` is greater
-#'     than the number of scaffolds.
-#'     Breaking into increasingly small chunks results in increasing overhead, so
-#'     beware of making this argument very small.
-#'     Reference genome sequences are not copied during this function, so using
-#'     this argument for a reference genome does NOT decrease memory usage
-#'     appreciably.
-#'     Defaults to \code{0}.
-#' @param n_cores The number of cores to use for processing. This value is ignored
-#'     if the input reference genome is merged and \code{chunk_size == 0}.
-#'     Defaults to \code{1}.
-#'
-#' @return A list of vectors, each vector representing the locations of cut sites
-#'     on a given sequence.
-#'     Indexing the output list would be done as such:
-#'     \code{output_list[[sequence_index]][position_index]}.
-#'
-#' @noRd
-#'
-digest_ref <- function(ref_genome_ptr, bind_sites, len5s, chunk_size = 0L, n_cores = 1L) {
-    .Call(`_jackal_digest_ref`, ref_genome_ptr, bind_sites, len5s, chunk_size, n_cores)
-}
-
 #' Illumina sequence for reference object.
 #'
 #'
diff --git a/R/aaa-classes.R b/R/aaa-classes.R
index 0087adc..9e83a3e 100644
--- a/R/aaa-classes.R
+++ b/R/aaa-classes.R
@@ -11,8 +11,6 @@
 #'
 #' @field genome An \code{externalptr} to a C++ object storing the sequences
 #'     representing the genome.
-#' @field digests An \code{externalptr} to a C++ object storing the digestion of the
-#'     genome, if a digestion has been carried out. It's \code{NULL} otherwise.
 #'
 #' @section Methods:
 #' \describe{
@@ -55,7 +53,6 @@ ref_genome <- R6::R6Class(
     public = list(
 
         genome = NULL,
-        digests = NULL,
 
         initialize = function(genome_ptr) {
             if (!inherits(genome_ptr, "externalptr")) {
@@ -384,8 +381,6 @@ mevo$lock()
 #'
 #' @field genome An \code{externalptr} to a C++ object storing the sequences
 #'     representing the genome.
-#' @field digests An \code{externalptr} to a C++ object storing the digestion of the
-#'     genome, if a digestion has been carried out. It's \code{NULL} otherwise.
 #' @field reference An \code{externalptr} to a C++ object storing the sequences
 #'     representing the genome.
 #'     There are a few extra notes for this field:
@@ -437,7 +432,6 @@ variants <- R6::R6Class(
     public = list(
 
         genomes = NULL,
-        digests = NULL,
 
         initialize = function(genomes_ptr, reference_ptr) {
             if (!inherits(genomes_ptr, "externalptr")) {
diff --git a/R/data.R b/R/data.R
index 8fb12ae..a2255bb 100644
--- a/R/data.R
+++ b/R/data.R
@@ -1,34 +1,3 @@
-#' Binding sites for selected restriction enzymes.
-#'
-#' @format A 695 x 2 data frame with the following columns:
-#' \describe{
-#'   \item{enzyme}{Restriction enzyme name.}
-#'   \item{sites}{Enzyme binding site sequences.
-#'       See \code{\link{nucleobase_legend}} for what bases other than `T`, `C`, `A`,
-#'       and `G` mean.
-#'       Each `/` indicates a cleavage point.
-#'       According to NEB (see link below under Source):
-#'       "Numbers in parentheses indicate point of cleaveage for non-palindromic
-#'       enzymes."
-#'       These types of enzymes are not implemented.}
-#' }
-#' @source \url{https://www.neb.com/tools-and-resources/selection-charts/alphabetized-list-of-recognition-specificities}
-"binding_sites"
-
-
-
-#' Legend for the single-letter code of nucleobases indicating restriction sequences.
-#'
-#' @format A data frame of 28 rows and two columns:
-#' \describe{
-#'   \item{code}{The letter indicating more than one possible nucleotides.}
-#'   \item{nucleotides}{One of the multiple nucleotides the code can refer to.}
-#' }
-#' @source \url{https://en.wikipedia.org/wiki/List_of_restriction_enzyme_cutting_sites:_A#Whole_list_navigation}
-"nucleobase_legend"
-
-
-
 
 #' Table of evolutionary rates.
 #'
diff --git a/R/digest.R b/R/digest.R
deleted file mode 100644
index 2fcb29b..0000000
--- a/R/digest.R
+++ /dev/null
@@ -1,156 +0,0 @@
-#
-# Digest genome(s)
-#
-
-
-
-#' Check validity, adjust for degeneracy, and remove duplicates for input enzymes.
-#'
-#' @inheritParams digest
-#'
-#' @noRd
-#'
-process_enzymes <- function(enzyme_names, custom_enzymes) {
-
-    if (missing(enzyme_names) & missing(custom_enzymes)) {
-        stop("\nWhen digesting a reference genome, you must provide either an ",
-             "enzyme name, a custom enzyme, or both.",
-             call. = FALSE)
-    }
-    enzyme_seqs <- character(0)
-    if (!missing(enzyme_names)) {
-        enzyme_seqs <- character(length(enzyme_names))
-        for (i in 1:length(enzyme_names)) {
-            x <- enzyme_names[i]
-            z <- jackal::binding_sites$sequence[jackal::binding_sites == x]
-            if (length(z) == 0) {
-                stop(paste0("\nEnzyme name \"", x, "\" not found."),
-                     call. = TRUE)
-            }
-            if (grepl("\\(|\\)", z)) {
-                stop(paste0("\n", x, " is a non-palindromic enzyme. ",
-                            "See the link in ?binding_sites for ",
-                            "more info."),
-                     call. = TRUE)
-            }
-            if (!grepl("/", z)) {
-                stop(paste("\n", x, "doesn't have a cleavage site",
-                           "according to NEB.",
-                           "Choose another enzyme,",
-                           "or provide a custom enzyme if you're",
-                           "sure I've erred in this error."),
-                     call. = TRUE)
-            }
-            enzyme_seqs[i] <- z
-        }
-
-    }
-    if (!missing(custom_enzymes)) {
-        stopifnot(inherits(custom_enzymes, "character"))
-
-        allowed_chars <- sort(unique(c(jackal::nucleobase_legend, "/",
-                                       recursive = TRUE)))
-        allowed_chars <- paste(allowed_chars, collapse = "")
-        weird_chars <- grepl(sprintf("[^%s]", allowed_chars), custom_enzymes)
-        if (any(weird_chars)) {
-            stop(paste("\nThe only allowed characters for restriction enzymes are",
-                       allowed_chars),
-                 call. = FALSE)
-        }
-
-        no_cleavage <- ! grepl("/", custom_enzymes)
-
-        if (any(no_cleavage)) {
-            stop("\nYou must include a cleavage site for each custom restriction ",
-                 "site using \"/\" (e.g., \"TTA/TAA\" for the enzyme AanI).",
-                 call. = TRUE)
-        }
-
-        enzyme_seqs <- c(enzyme_seqs, custom_enzymes)
-    }
-
-    # Get number of nucleotides before the cleavage site:
-    len5s <- get_precleavage_lens(enzyme_seqs)
-
-    # Now we no longer want/need the cleavage indicators:
-    bind_sites <- gsub("/", "", enzyme_seqs)
-
-    bind_sites <- expand_seqs(bind_sites)
-
-    return(list(len5s = len5s, bind_sites = bind_sites))
-}
-
-
-
-
-
-
-
-#' Digest reference genome or variants from a genome.
-#'
-#' \emph{Note:} This will override any digestions currently in place in the
-#' object. If you want to add a new digestion, re-run this function with the names
-#' of all enzymes you're interested in included in the \code{enzyme_names} argument.
-#'
-#'
-#' @inheritParams create_genome
-#' @param object Either a \code{ref_genome} or \code{variants} object.
-#' @param enzyme_names Name of enzymes that should be present inside the package data
-#'     object `binding_sites` (in the `enzyme` column).
-#' @param custom_enzymes Custom enzymes for those not present in internal data.
-#' @param chunk_size The size of chunks to break up sqeuences into when digesting
-#'     a \code{ref_genome} or \code{variants} object.
-#'     Changing this might affect performance, for better or worse.
-#'     The default worked best on my computer. Defaults to \code{1000}.
-#'
-#' @return All changes occur in place, but the input object is returned invisibly
-#'     so that piping works.
-#'
-#'
-#' @export
-#'
-#'
-digest <- function(object,
-                   enzyme_names,
-                   custom_enzymes,
-                   chunk_size = 1000,
-                   n_cores = 1) {
-
-    if (missing(enzyme_names) & missing(custom_enzymes)) {
-        stop("\nWhen digesting a reference genome or variants, you must provide ",
-             "either an enzyme name, a custom enzyme, or both.",
-             call. = FALSE)
-    }
-
-    enz_info <- process_enzymes(enzyme_names, custom_enzymes)
-
-    if (inherits(object, "ref_genome")) {
-        if (!inherits(object$genome, "externalptr")) {
-            stop("\nYou're attempting a digestion on a ref_genome object with ",
-                 "a genome field that is not an externalptr. ",
-                 "Restart by reading a FASTA file or by simulating a genome. ",
-                 "And do NOT change the genome field manually.",
-                 call. = FALSE)
-        }
-        object$digests <- digest_ref(object$genome,
-                                     enz_info$bind_sites, enz_info$len5s,
-                                     chunk_size, n_cores)
-    } else if (inherits(object, "variants")) {
-        if (!inherits(object$genomes, "externalptr")) {
-            stop("\nYou're attempting a digestion on a variants object with ",
-                 "a genomes field that is not an externalptr. ",
-                 "Restart by reading a FASTA file or by simulating a genome, ",
-                 "then generating variants. ",
-                 "And do NOT change the genomes field manually.",
-                 call. = FALSE)
-        }
-        object$digests <- digest_var_set(object$genomes,
-                                         enz_info$bind_sites, enz_info$len5s,
-                                         chunk_size, n_cores)
-    } else {
-        stop("\nA digestion can only be applied to a ref_genome or variants object.",
-             call. = FALSE)
-    }
-    invisible(object)
-}
-
diff --git a/R/jackal.R b/R/jackal.R
index b789e6a..76e3a61 100644
--- a/R/jackal.R
+++ b/R/jackal.R
@@ -4,13 +4,10 @@
 #' (ii) generates variants using summary statistics, phylogenies, Variant
 #' Call Format (VCF) files, and coalescent simulations—the latter of which can include
 #' selection, recombination, and demographic fluctuations;
-#' (iii) simulates sequencing error, mapping qualities, restriction-enzyme digestion,
-#' and variance in coverage among sites; and
+#' (iii) simulates sequencing error, mapping qualities, and optical/PCR duplicates; and
 #' (iv) writes outputs to standard file formats.
-#' `jackal` can simulate single or paired-ended reads for WGS on the Illumina platform,
-#' and can be extended to simulate different methods (e.g., original RADseq,
-#' double-digest RADseq, and genotyping-by-sequencing), and sequencing technologies
-#' (e.g., Pacific BioSciences, Oxford Nanopore Technologies).
+#' `jackal` can simulate single, paired-end, or mate-pair Illumina reads, as well as
+#' reads from Pacific BioSciences.
 #'
 #'
 #' @importFrom Rcpp evalCpp
diff --git a/R/mevo_phylo.R b/R/mevo_phylo.R
index 2e6ab30..edf6898 100644
--- a/R/mevo_phylo.R
+++ b/R/mevo_phylo.R
@@ -137,7 +137,7 @@ read_phy_obj <- function(phy, n_seqs, chunked, err_msg = "") {
 #'
 read_coal_obj <- function(coal_obj, seq_sizes, chunked, err_msg) {
 
-    # Check for coal_obj begin a list and either having a `trees` field or all its
+    # Check for coal_obj being a list and either having a `trees` field or all its
     # items within having `trees` fields
     err <- FALSE
     nested <- FALSE
diff --git a/README.Rmd b/README.Rmd
index 232bb66..3c8f419 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -33,8 +33,7 @@ __An efficient, versatile molecular evolution and sequencing simulator__
 (ii) generates variants using summary statistics, phylogenies, Variant
 Call Format (VCF) files, and coalescent simulations—the latter of which can include
 selection, recombination, and demographic fluctuations;
-(iii) simulates sequencing error, mapping qualities, restriction-enzyme digestion, 
-and variance in coverage among sites; and
+(iii) simulates sequencing error, mapping qualities, and optical/PCR duplicates; and
 (iv) writes outputs to standard file formats.
 `jackal` can simulate single, paired-end, or mate-pair Illumina reads, as well as
 reads from Pacific BioSciences.
diff --git a/README.md b/README.md
index 5426c7e..2f370d2 100644
--- a/README.md
+++ b/README.md
@@ -18,10 +18,10 @@ Status](https://travis-ci.com/lucasnell/jackal.svg?branch=master)](https://travi
 generates variants using summary statistics, phylogenies, Variant Call
 Format (VCF) files, and coalescent simulations—the latter of which can
 include selection, recombination, and demographic fluctuations; (iii)
-simulates sequencing error, mapping qualities, restriction-enzyme
-digestion, and variance in coverage among sites; and (iv) writes outputs
-to standard file formats. `jackal` can simulate single, paired-end, or
-mate-pair Illumina reads, as well as reads from Pacific BioSciences.
+simulates sequencing error, mapping qualities, and optical/PCR
+duplicates; and (iv) writes outputs to standard file formats. `jackal`
+can simulate single, paired-end, or mate-pair Illumina reads, as well as
+reads from Pacific BioSciences.
 
 ## Installation
 
@@ -42,20 +42,20 @@ ref_variants <- create_variants(reference, "phylo", phy, mevo_info)
 ref_variants
 #>                            << Variants object >>
 #> # Variants: 5
-#> # Mutations: 21,301
+#> # Mutations: 5,130
 #> 
 #>                         << Reference genome info: >>
 #> < Set of 10 sequences >
 #> # Total size: 10,000 bp
 #>   name                          sequence                             length
-#> seq0       CGTACCCCGTCGTGATTCTAGACTC...GTGTGAAAATGGCGATGTTACATGTG      1000
-#> seq1       TCTCCTCCTGATACCTGAACTCGCC...ACGATCGCGGGTGTTGCTCCGACCGG      1000
-#> seq2       ATCTTAAGGCTCACCACTGAGCCAG...AAATAGCATTAGTCCCGTTCGTAAAA      1000
-#> seq3       TCCAACAAGTGGGCCAGCTGCTTTA...ACGGTGTCGCCCAGCGTATTATGATA      1000
-#> seq4       ACGATATGCTTTCGCGCCTAGGATC...GAGAGTGTTCTCTTCACATCTGTCCG      1000
-#> seq5       GCGTTCTAGTTACGTGTTACCCAGG...GTCATAACATTAGATAATCCATCCGA      1000
-#> seq6       CGGCTCTAAACTTACCGGAAGACCA...CGGGACGGCGTGCTTCTAGTAACCGC      1000
-#> seq7       CCGCGGGCCAGGACAATCTTAACAT...ACTGAGAGTAATGCGTAGCACTGGCA      1000
-#> seq8       ATAACGGAGCTCCACACCGTCCTTG...CGATTGGCTTAAGGGTCCGGAATTAA      1000
-#> seq9       ATCCAATTGGACTCGAGGGTGATAG...CACCGGGCACACTAGCGTCCTCGTGA      1000
+#> seq0       GACTACGGCGCAGAACCTACAGAAC...TACCGGCGACAATAGTTTACCACTAC      1000
+#> seq1       AAGCATCCACATATTAGGGTTTTAC...GGCCCTAACGTATCGTGTGGCCTGAT      1000
+#> seq2       CGCTTATAACAACGCGTTTCGCGAT...ACAACAAGCGGTCCCACCTTAGAATT      1000
+#> seq3       CCTTGTCATCGGCTTAGTTGAGCAT...AGGCTAGAGAATAGTAACATGCATAC      1000
+#> seq4       AGGCACTATGCACTGTTCCAACGCA...CGTACGTACACTCCAATGTGTAGGAC      1000
+#> seq5       CGTCAGGACACGTTGGAGTTGAAAA...GTGTAGGTTATAGTCCATTGATGCGT      1000
+#> seq6       CTTGACGGGGGCCTTAACCGGCGAG...GTAGTTCTCCCTCTCCGACTGCTGCA      1000
+#> seq7       GGACTAAGGCAGTCCAGCTACTCAT...ACGCAGGGTTATCATTCATACACGTC      1000
+#> seq8       CATACACGACGTCCCATTAGGATGA...ATAAAGATTCCTGAGATCCGCAATGA      1000
+#> seq9       CAACGGTATGCACGATTGCATGGGA...GGGGCTTAACCTCTCGTTTCATTATC      1000
 ```
diff --git a/data-raw/binding_sites.R b/data-raw/binding_sites.R
deleted file mode 100644
index 8da473f..0000000
--- a/data-raw/binding_sites.R
+++ /dev/null
@@ -1,139 +0,0 @@
-
-library(dplyr)
-library(readr)
-library(tidyr)
-library(purrr)
-library(rvest)
-
-
-
-# Altered version of rvest::html_table that doesn't remove first row:
-html_table2 <- function (x, header = NA, trim = TRUE, fill = FALSE, dec = ".") {
-    stopifnot(html_name(x) == "table")
-    rows <- html_nodes(x, "tr")
-    n <- length(rows)
-    cells <- lapply(rows, "html_nodes", xpath = ".//td|.//th")
-    ncols <- lapply(cells, html_attr, "colspan", default = "1")
-    ncols <- lapply(ncols, as.integer)
-    nrows <- lapply(cells, html_attr, "rowspan", default = "1")
-    nrows <- lapply(nrows, as.integer)
-    p <- unique(vapply(ncols, sum, integer(1)))
-    maxp <- max(p)
-    if (length(p) > 1 & maxp * n != sum(unlist(nrows)) & maxp *
-        n != sum(unlist(ncols))) {
-        if (!fill) {
-            stop("Table has inconsistent number of columns. ",
-                 "Do you want fill = TRUE?", call. = FALSE)
-        }
-    }
-    values <- lapply(cells, html_text, trim = trim)
-    out <- matrix(NA_character_, nrow = n, ncol = maxp)
-    for (i in seq_len(n)) {
-        row <- values[[i]]
-        ncol <- ncols[[i]]
-        col <- 1
-        for (j in seq_len(length(ncol))) {
-            out[i, col:(col + ncol[j] - 1)] <- row[[j]]
-            col <- col + ncol[j]
-        }
-    }
-    # Not sure why, but this part removes the first row:
-    # for (i in seq_len(maxp)) {
-    #     for (j in seq_len(n)) {
-    #         rowspan <- nrows[[j]][i]
-    #         colspan <- ncols[[j]][i]
-    #         if (!is.na(rowspan) & (rowspan > 1)) {
-    #             if (!is.na(colspan) & (colspan > 1)) {
-    #                 nrows[[j]] <- c(utils::head(nrows[[j]], i),
-    #                                 rep(rowspan, colspan - 1), utils::tail(nrows[[j]],
-    #                                                       length(rowspan) - (i + 1)))
-    #                 rowspan <- nrows[[j]][i]
-    #             }
-    #             for (k in seq_len(rowspan - 1)) {
-    #                 l <- utils::head(out[j + k, ], i - 1)
-    #                 r <- utils::tail(out[j + k, ], maxp - i + 1)
-    #                 out[j + k, ] <- utils::head(c(l, out[j, i],
-    #                                               r), maxp)
-    #             }
-    #         }
-    #     }
-    # }
-    if (is.na(header)) {
-        header <- all(html_name(cells[[1]]) == "th")
-    }
-    if (header) {
-        col_names <- out[1, , drop = FALSE]
-        out <- out[-1, , drop = FALSE]
-    }
-    else {
-        col_names <- paste0("X", seq_len(ncol(out)))
-    }
-    df <- lapply(seq_len(maxp), function(i) {
-        utils::type.convert(out[, i], as.is = TRUE, dec = dec)
-    })
-    names(df) <- col_names
-    class(df) <- "data.frame"
-    attr(df, "row.names") <- .set_row_names(length(df[[1]]))
-    if (length(unique(col_names)) < length(col_names)) {
-        warning("At least two columns have the same name")
-    }
-    df
-}
-
-
-
-binding_sites <- paste0("https://www.neb.com/tools-and-resources/selection-charts/",
-                        "alphabetized-list-of-recognition-specificities") %>%
-    read_html() %>%
-    html_node("table") %>%
-    html_table2() %>%
-    setNames(c("sequence", "enzyme")) %>%
-    tbl_df() %>%
-    mutate(enzyme = strsplit(gsub("®", "", enzyme), " ")) %>%
-    unnest() %>%
-    mutate(sequence = ifelse(grepl("BstZ17I", x = enzyme),
-                             sprintf("%.3s/%s", sequence, substr(sequence,4,6)),
-                             sequence))
-
-
-
-
-process_isos <- function(isos) {
-    strsplit(isos, ", ") %>%
-        map(function(x) {
-            xx <- keep(x, ~ !grepl("\\^$", x = .x))
-            return(xx)
-        })
-}
-
-
-other_isos <- "https://www.neb.com/tools-and-resources/selection-charts/isoschizomers" %>%
-    read_html() %>%
-    html_node("table") %>%
-    html_table2() %>%
-    tbl_df() %>%
-    select(sequence = `Sequence`, enzyme = `Enzyme`, isos = `Other Isoschizomers`) %>%
-    filter(enzyme != "", sequence != "", isos != "") %>%
-    mutate(isos = process_isos(isos),
-           enzyme = gsub(" x$", "", enzyme)) %>%
-    filter(map_lgl(isos, ~ length(.x) > 0))
-
-
-
-binding_sites <- full_join(binding_sites, other_isos,
-                           by = c("enzyme", "sequence")) %>%
-    mutate(isos = map(isos, ~ {if (length(.x) == 0) character(0) else .x})) %>%
-    group_by(enzyme) %>%
-    summarize(sequence = sequence[nchar(sequence) == max(nchar(sequence))][1],
-              isos = isos[sapply(isos, length) == max(sapply(isos, length))]) %>%
-    ungroup() %>%
-    group_by(sequence) %>%
-    summarize(enzyme = list(unique(c(enzyme, isos, recursive = TRUE)))) %>%
-    unnest() %>%
-    select(enzyme, sequence) %>%
-    as.data.frame()
-
-
-write_csv(binding_sites, "data-raw/binding_sites.csv")
-devtools::use_data(binding_sites, overwrite = TRUE)
-
diff --git a/data-raw/binding_sites.csv b/data-raw/binding_sites.csv
deleted file mode 100644
index 8f1ddb6..0000000
--- a/data-raw/binding_sites.csv
+++ /dev/null
@@ -1,696 +0,0 @@
-enzyme,sequence
-Nt.CviPII,(0/-1)CCD
-BcgI,(10/12)CGANNNNNNTGC(12/10)
-BaeI,(10/15)ACNNNNGTAYC(12/7)
-CspCI,(11/13)CAANNNNNGTGG(12/10)
-BsaXI,(9/12)ACNNNNNCTCC(10/7)
-MluCI,/AATT
-Sse9I,/AATT
-TasI,/AATT
-TspEI,/AATT
-FatI,/CATG
-BstSCI,/CCNGG
-StyD4I,/CCNGG
-AjnI,/CCWGG
-EcoRII,/CCWGG
-Psp6I,/CCWGG
-PspGI,/CCWGG
-Bsp143I,/GATC
-BssMI,/GATC
-BstMBI,/GATC
-DpnII,/GATC
-Kzo9I,/GATC
-MboI,/GATC
-NdeII,/GATC
-Sau3AI,/GATC
-NmuCI,/GTSAC
-TseFI,/GTSAC
-Tsp45I,/GTSAC
-HindIII,A/AGCTT
-HindIII-HF,A/AGCTT
-BspLU11I,A/CATGT
-PciI,A/CATGT
-PscI,A/CATGT
-AgeI,A/CCGGT
-AgeI-HF,A/CCGGT
-AsiGI,A/CCGGT
-BshTI,A/CCGGT
-CspAI,A/CCGGT
-PinAI,A/CCGGT
-CsiI,A/CCWGGT
-MabI,A/CCWGGT
-SexAI,A/CCWGGT
-MluI,A/CGCGT
-MluI-HF,A/CGCGT
-HpyCH4IV,A/CGT
-HpySE526I,A/CGT
-MaeII,A/CGT
-AflIII,A/CRYGT
-AhlI,A/CTAGT
-BcuI,A/CTAGT
-SpeI,A/CTAGT
-SpeI-HF,A/CTAGT
-BglII,A/GATCT
-AclI,AA/CGTT
-Psp1406I,AA/CGTT
-SspI,AAT/ATT
-SspI-HF,AAT/ATT
-Acc36I,ACCTGC(4/8)
-BfuAI,ACCTGC(4/8)
-BspMI,ACCTGC(4/8)
-BveI,ACCTGC(4/8)
-BceAI,ACGGC(12/14)
-Bst4CI,ACN/GT
-HpyCH4III,ACN/GT
-TaaI,ACN/GT
-Tsp4CI,ACN/GT
-Bse1I,ACTGG(1/-1)
-BseNI,ACTGG(1/-1)
-BsrI,ACTGG(1/-1)
-BfiI,ACTGGG(5/4)
-BmrI,ACTGGG(5/4)
-BmuI,ACTGGG(5/4)
-AluBI,AG/CT
-AluI,AG/CT
-AfeI,AGC/GCT
-Aor51HI,AGC/GCT
-Eco47III,AGC/GCT
-Eco147I,AGG/CCT
-PceI,AGG/CCT
-SseBI,AGG/CCT
-StuI,AGG/CCT
-BmcAI,AGT/ACT
-ScaI-HF,AGT/ACT
-ZrmI,AGT/ACT
-Bsa29I,AT/CGAT
-BseCI,AT/CGAT
-BshVI,AT/CGAT
-BspDI,AT/CGAT
-Bsu15I,AT/CGAT
-BsuTUI,AT/CGAT
-ClaI,AT/CGAT
-AseI,AT/TAAT
-PshBI,AT/TAAT
-VspI,AT/TAAT
-PI-SceI,ATCTATGTCGGGTGCGGAGAAAGAGGTAAT(-15/-19)
-EcoT22I,ATGCA/T
-Mph1103I,ATGCA/T
-NsiI,ATGCA/T
-NsiI-HF,ATGCA/T
-Zsp2I,ATGCA/T
-AvaIII,ATGCAT
-EcoT22I,ATGCAT
-Mph1103I,ATGCAT
-NsiI,ATGCAT
-NsiI-HF,ATGCAT
-Zsp2I,ATGCAT
-SmiI,ATTT/AAAT
-SwaI,ATTT/AAAT
-MfeI,C/AATTG
-MfeI-HF,C/AATTG
-MunI,C/AATTG
-CviAII,C/ATG
-Bsp19I,C/CATGG
-NcoI,C/CATGG
-NcoI-HF,C/CATGG
-Cfr9I,C/CCGGG
-TspMI,C/CCGGG
-XmaI,C/CCGGG
-BsiSI,C/CGG
-HapII,C/CGG
-HpaII,C/CGG
-MspI,C/CGG
-BsaJI,C/CNNGG
-BseDI,C/CNNGG
-BssECI,C/CNNGG
-SecI,C/CNNGG
-BstDSI,C/CRYGG
-BtgI,C/CRYGG
-DsaI,C/CRYGG
-AspA2I,C/CTAGG
-AvrII,C/CTAGG
-BlnI,C/CTAGG
-XmaJI,C/CTAGG
-BssT1I,C/CWWGG
-Eco130I,C/CWWGG
-EcoT14I,C/CWWGG
-ErhI,C/CWWGG
-StyI,C/CWWGG
-StyI-HF,C/CWWGG
-BseX3I,C/GGCCG
-BstZI,C/GGCCG
-EagI,C/GGCCG
-EagI-HF,C/GGCCG
-EclXI,C/GGCCG
-Eco52I,C/GGCCG
-XmaIII,C/GGCCG
-BsiWI,C/GTACG
-BsiWI-HF,C/GTACG
-Pfl23II,C/GTACG
-PspLI,C/GTACG
-SplI,C/GTACG
-BfaI,C/TAG
-FspBI,C/TAG
-MaeI,C/TAG
-SspMI,C/TAG
-XspI,C/TAG
-PaeR7I,C/TCGAG
-Sfr274I,C/TCGAG
-SlaI,C/TCGAG
-XhoI,C/TCGAG
-BstDEI,C/TNAG
-DdeI,C/TNAG
-HpyF3I,C/TNAG
-BfmI,C/TRYAG
-BstSFI,C/TRYAG
-SfcI,C/TRYAG
-SfeI,C/TRYAG
-AflII,C/TTAAG
-BfrI,C/TTAAG
-BspTI,C/TTAAG
-BstAFI,C/TTAAG
-MspCI,C/TTAAG
-Vha464I,C/TTAAG
-SmlI,C/TYRAG
-SmoI,C/TYRAG
-Ama87I,C/YCGRG
-AvaI,C/YCGRG
-BmeT110I,C/YCGRG
-BsiHKCI,C/YCGRG
-BsoBI,C/YCGRG
-Eco88I,C/YCGRG
-FauNDI,CA/TATG
-NdeI,CA/TATG
-AcvI,CAC/GTG
-BbrPI,CAC/GTG
-Eco72I,CAC/GTG
-PmaCI,CAC/GTG
-PmlI,CAC/GTG
-PspCI,CAC/GTG
-Nb.BssSI,CACGAG
-BauI,CACGAG(-5/-1)
-BsiI,CACGAG(-5/-1)
-BssSI,CACGAG(-5/-1)
-BssSαI,CACGAG(-5/-1)
-Bst2BI,CACGAG(-5/-1)
-AjiI,CACGTC(-3/-3)
-BmgBI,CACGTC(-3/-3)
-BtrI,CACGTC(-3/-3)
-AleI,CACNN/NNGTG
-OliI,CACNN/NNGTG
-AdeI,CACNNN/GTG
-DraIII-HF,CACNNN/GTG
-PvuII,CAG/CTG
-PvuII-HF,CAG/CTG
-EcoP15I,CAGCAG(25/27)
-AlwNI,CAGNNN/CTG
-CaiI,CAGNNN/CTG
-PstNI,CAGNNN/CTG
-BtsIMutI,CAGTG(2/0)
-TscAI,CASTGNN/
-TspRI,CASTGNN/
-FaeI,CATG/
-Hin1II,CATG/
-Hsp92II,CATG/
-NlaIII,CATG/
-MslI,CAYNN/NNRTG
-RseI,CAYNN/NNRTG
-SmiMI,CAYNN/NNRTG
-FspEI,CC(12/16)
-Bme1390I,CC/NGG
-BmrFI,CC/NGG
-MspR9I,CC/NGG
-ScrFI,CC/NGG
-AsuC2I,CC/SGG
-BcnI,CC/SGG
-BpuMI,CC/SGG
-CauII,CC/SGG
-NciI,CC/SGG
-AxyI,CC/TNAGG
-Bse21I,CC/TNAGG
-Bsu36I,CC/TNAGG
-Eco81I,CC/TNAGG
-SauI,CC/TNAGG
-BciT130I,CC/WGG
-BseBI,CC/WGG
-Bst2UI,CC/WGG
-BstNI,CC/WGG
-MvaI,CC/WGG
-AccB7I,CCANNNN/NTGG
-PflMI,CCANNNN/NTGG
-Van91I,CCANNNN/NTGG
-XcmI,CCANNNNN/NNNNTGG
-BstXI,CCANNNNN/NTGG
-BccI,CCATC(4/5)
-SmaI,CCC/GGG
-BseYI,CCCAGC(-5/-1)
-PspFI,CCCAGC(-5/-1)
-FauI,CCCGC(4/6)
-LpnPI,CCDG(10/14)
-AciI,CCGC(-3/-1)
-BspACI,CCGC(-3/-1)
-SsiI,CCGC(-3/-1)
-Cfr42I,CCGC/GG
-KspI,CCGC/GG
-SacII,CCGC/GG
-Sfr303I,CCGC/GG
-SgrBI,CCGC/GG
-AccBSI,CCGCTC(-3/-3)
-BsrBI,CCGCTC(-3/-3)
-MbiI,CCGCTC(-3/-3)
-AfiI,CCNNNNN/NNGG
-Bsc4I,CCNNNNN/NNGG
-BseLI,CCNNNNN/NNGG
-BsiYI,CCNNNNN/NNGG
-BslI,CCNNNNN/NNGG
-MnlI,CCTC(7/6)
-Nb.BbvCI,CCTCAGC
-BbvCI,CCTCAGC(-5/-2)
-Nt.BbvCI,CCTCAGC(-5/-7)
-SbfI,CCTGCA/GG
-SbfI-HF,CCTGCA/GG
-SdaI,CCTGCA/GG
-Sse8387I,CCTGCA/GG
-Bpu10I,CCTNAGC(-5/-2)
-BstENI,CCTNN/NNNAGG
-EcoNI,CCTNN/NNNAGG
-XagI,CCTNN/NNNAGG
-Hin4II,CCTTC(6/5)
-HpyAV,CCTTC(6/5)
-Sse232I,CG/CCGGCG
-MreI,CG/CCGGCG
-AccII,CG/CG
-Bsh1236I,CG/CG
-BspFNI,CG/CG
-BstFNI,CG/CG
-BstUI,CG/CG
-FnuDII,CG/CG
-MvnI,CG/CG
-CpoI,CG/GWCCG
-CspI,CG/GWCCG
-Rsr2I,CG/GWCCG
-RsrII,CG/GWCCG
-Ple19I,CGAT/CG
-PvuI,CGAT/CG
-PvuI-HF,CGAT/CG
-Bsh1285I,CGRY/CG
-BsiEI,CGRY/CG
-BstMCI,CGRY/CG
-McrI,CGRY/CG
-BsmBI,CGTCTC(1/5)
-Esp3I,CGTCTC(1/5)
-Hpy99I,CGWCG/
-MspA1I,CMG/CKG
-NspBII,CMG/CKG
-MspJI,CNNR(9/13)
-SgrAI,CR/CCGGYG
-BspCNI,CTCAG(9/7)
-Bst6I,CTCTTC(1/4)
-Eam1104I,CTCTTC(1/4)
-EarI,CTCTTC(1/4)
-Ksp632I,CTCTTC(1/4)
-AcuI,CTGAAG(16/14)
-Eco57I,CTGAAG(16/14)
-BspMAI,CTGCA/G
-PstI,CTGCA/G
-PstI-HF,CTGCA/G
-BpmI,CTGGAG(16/14)
-GsuI,CTGGAG(16/14)
-Bce83I,CTTGAG(16/14)
-BpuEI,CTTGAG(16/14)
-EcoRI,G/AATTC
-EcoRI-HF,G/AATTC
-HinfI,G/ANTC
-PfeI,G/AWTC
-TfiI,G/AWTC
-MroNI,G/CCGGC
-NgoMIV,G/CCGGC
-Hin6I,G/CGC
-HinP1I,G/CGC
-HspAI,G/CGC
-BsePI,G/CGCGC
-BssHII,G/CGCGC
-PauI,G/CGCGC
-PteI,G/CGCGC
-AsuNHI,G/CTAGC
-NheI,G/CTAGC
-NheI-HF,G/CTAGC
-ApeKI,G/CWGC
-TseI,G/CWGC
-BamHI,G/GATCC
-BamHI-HF,G/GATCC
-KasI,G/GCGCC
-SspDI,G/GCGCC
-Bsp120I,G/GGCCC
-PspOMI,G/GGCCC
-AspS9I,G/GNCC
-AsuI,G/GNCC
-BmgT120I,G/GNCC
-Cfr13I,G/GNCC
-PspPI,G/GNCC
-Sau96I,G/GNCC
-Acc65I,G/GTACC
-Asp718I,G/GTACC
-BstEII,G/GTNACC
-BstEII-HF,G/GTNACC
-BstPI,G/GTNACC
-Eco91I,G/GTNACC
-EcoO65I,G/GTNACC
-PspEI,G/GTNACC
-AvaII,G/GWCC
-Bme18I,G/GWCC
-Eco47I,G/GWCC
-SinI,G/GWCC
-VpaK11BI,G/GWCC
-AccB1I,G/GYRCC
-BanI,G/GYRCC
-BshNI,G/GYRCC
-BspT107I,G/GYRCC
-HgiCI,G/GYRCC
-Csp6I,G/TAC
-CviQI,G/TAC
-RsaNI,G/TAC
-SalI,G/TCGAC
-SalI-HF,G/TCGAC
-Alw44I,G/TGCAC
-ApaLI,G/TGCAC
-VneI,G/TGCAC
-DpnI,GA/TC
-MalI,GA/TC
-MboII,GAAGA(8/7)
-BbsI,GAAGAC(2/6)
-BbsI-HF,GAAGAC(2/6)
-BbvII,GAAGAC(2/6)
-BpiI,GAAGAC(2/6)
-BstV2I,GAAGAC(2/6)
-Asp700I,GAANN/NNTTC
-MroXI,GAANN/NNTTC
-PdmI,GAANN/NNTTC
-XmnI,GAANN/NNTTC
-Nb.BsmI,GAATGC
-BsmI,GAATGC(1/-1)
-Mva1269I,GAATGC(1/-1)
-PctI,GAATGC(1/-1)
-ZraI,GAC/GTC
-CseI,GACGC(5/10)
-HgaI,GACGC(5/10)
-AatII,GACGT/C
-PflFI,GACN/NNGTC
-PsyI,GACN/NNGTC
-Tth111I,GACN/NNGTC
-BoxI,GACNN/NNGTC
-BstPAI,GACNN/NNGTC
-PshAI,GACNN/NNGTC
-AhdI,GACNNN/NNGTC
-BmeRI,GACNNN/NNGTC
-DriI,GACNNN/NNGTC
-Eam1105I,GACNNN/NNGTC
-AasI,GACNNNN/NNGTC
-DrdI,GACNNNN/NNGTC
-DseDI,GACNNNN/NNGTC
-Ecl136II,GAG/CTC
-Eco53kI,GAG/CTC
-EcoICRI,GAG/CTC
-Psp124BI,GAGCT/C
-SacI,GAGCT/C
-SacI-HF,GAGCT/C
-SstI,GAGCT/C
-BseRI,GAGGAG(10/8)
-Nt.BstNBI,GAGTC(4/-5)
-PleI,GAGTC(4/5)
-PpsI,GAGTC(4/5)
-MlyI,GAGTC(5/5)
-SchI,GAGTC(5/5)
-Eco32I,GAT/ATC
-EcoRV,GAT/ATC
-EcoRV-HF,GAT/ATC
-BsaBI,GATNN/NNATC
-Bse8I,GATNN/NNATC
-BseJI,GATNN/NNATC
-CciNI,GC/GGCCGC
-NotI,GC/GGCCGC
-NotI-HF,GC/GGCCGC
-BisI,GC/NGC
-Fnu4HI,GC/NGC
-Fsp4HI,GC/NGC
-GluI,GC/NGC
-SatI,GC/NGC
-BlpI,GC/TNAGC
-Bpu1102I,GC/TNAGC
-Bsp1720I,GC/TNAGC
-EspI,GC/TNAGC
-Nb.BsrDI,GCAATG
-Bse3DI,GCAATG(2/0)
-BseMI,GCAATG(2/0)
-BsrDI,GCAATG(2/0)
-BbvI,GCAGC(8/12)
-BseXI,GCAGC(8/12)
-BstV1I,GCAGC(8/12)
-Lsp1109I,GCAGC(8/12)
-Nb.BtsI,GCAGTG
-BtsαI,GCAGTG(2/0)
-BstAPI,GCANNNN/NTGC
-BmsI,GCATC(5/9)
-LweI,GCATC(5/9)
-SfaNI,GCATC(5/9)
-PaeI,GCATG/C
-SphI,GCATG/C
-SphI-HF,GCATG/C
-NaeI,GCC/GGC
-PdiI,GCC/GGC
-SrfI,GCCC/GGGC
-NmeAIII,GCCGAG(21/19)
-BglI,GCCNNNN/NGGC
-AspLEI,GCG/C
-BstHHI,GCG/C
-CfoI,GCG/C
-HhaI,GCG/C
-AsiSI,GCGAT/CGC
-RgaI,GCGAT/CGC
-SfaAI,GCGAT/CGC
-SgfI,GCGAT/CGC
-BtgZI,GCGATG(10/14)
-BlsI,GCN/GC
-PkrI,GCN/GC
-BstC8I,GCN/NGC
-Cac8I,GCN/NGC
-BstMWI,GCNNNNN/NNGC
-HpyF10VI,GCNNNNN/NNGC
-MwoI,GCNNNNN/NNGC
-BmtI,GCTAG/C
-BmtI-HF,GCTAG/C
-BspOI,GCTAG/C
-AsuNHI,GCTAG/C
-NheI,GCTAG/C
-Nt.BspQI,GCTCTTC(1/-7)
-BspQI,GCTCTTC(1/4)
-LguI,GCTCTTC(1/4)
-PciSI,GCTCTTC(1/4)
-SapI,GCTCTTC(1/4)
-Bsp1286I,GDGCH/C
-MhlI,GDGCH/C
-SduI,GDGCH/C
-BshFI,GG/CC
-BsnI,GG/CC
-BspANI,GG/CC
-BsuRI,GG/CC
-HaeIII,GG/CC
-Mly113I,GG/CGCC
-NarI,GG/CGCC
-AscI,GG/CGCGCC
-PalAI,GG/CGCGCC
-SgsI,GG/CGCGCC
-SanDI,GG/GWCCC
-KflI,GG/GWCCC
-Nt.AlwI,GGATC(4/-5)
-AclWI,GGATC(4/5)
-AlwI,GGATC(4/5)
-BinI,GGATC(4/5)
-BspPI,GGATC(4/5)
-BseGI,GGATG(2/0)
-BstF5I,GGATG(2/0)
-BtsCI,GGATG(2/0)
-FokI,GGATG(9/13)
-DinI,GGC/GCC
-EgeI,GGC/GCC
-EheI,GGC/GCC
-SfoI,GGC/GCC
-FseI,GGCCGG/CC
-RigI,GGCCGG/CC
-SfiI,GGCCNNNN/NGGCC
-PluTI,GGCGC/C
-EciI,GGCGGA(11/9)
-FinI,GGGAC
-BslFI,GGGAC
-BsmFI,GGGAC
-FaqI,GGGAC
-BslFI,GGGAC(10/14)
-BsmFI,GGGAC(10/14)
-FaqI,GGGAC(10/14)
-ApaI,GGGCC/C
-BmiI,GGN/NCC
-BspLI,GGN/NCC
-NlaIV,GGN/NCC
-PspN4I,GGN/NCC
-KpnI,GGTAC/C
-KpnI-HF,GGTAC/C
-BsaI,GGTCTC(1/5)
-BsaI-HF,GGTCTC(1/5)
-BsaI-HFv2,GGTCTC(1/5)
-Bso31I,GGTCTC(1/5)
-BspTNI,GGTCTC(1/5)
-Eco31I,GGTCTC(1/5)
-AsuHPI,GGTGA(8/7)
-HphI,GGTGA(8/7)
-BaeGI,GKGCM/C
-BseSI,GKGCM/C
-BstSLI,GKGCM/C
-AcyI,GR/CGYC
-BsaHI,GR/CGYC
-BssNI,GR/CGYC
-BstACI,GR/CGYC
-Hin1I,GR/CGYC
-Hsp92I,GR/CGYC
-BanII,GRGCY/C
-Eco24I,GRGCY/C
-EcoT38I,GRGCY/C
-FriOI,GRGCY/C
-HgiJII,GRGCY/C
-AfaI,GT/AC
-RsaI,GT/AC
-AccI,GT/MKAC
-FblI,GT/MKAC
-XmiI,GT/MKAC
-BssNAI,GTA/TAC
-Bst1107I,GTA/TAC
-BstZ17I-HF,GTA/TAC
-SnaI,GTATAC
-BssNAI,GTATAC
-Bst1107I,GTATAC
-BstZ17I-HF,GTATAC
-BciVI,GTATCC(6/5)
-BfuI,GTATCC(6/5)
-BsuI,GTATCC(6/5)
-Nt.BsmAI,GTCTC(1/-5)
-Alw26I,GTCTC(1/5)
-BcoDI,GTCTC(1/5)
-BsmAI,GTCTC(1/5)
-BstMAI,GTCTC(1/5)
-BsgI,GTGCAG(16/14)
-Hpy166II,GTN/NAC
-Hpy8I,GTN/NAC
-MjaIV,GTNNAC
-Hpy8I,GTNNAC
-Hpy166II,GTNNAC
-HpaI,GTT/AAC
-KspAI,GTT/AAC
-MssI,GTTT/AAAC
-PmeI,GTTT/AAAC
-HincII,GTY/RAC
-HindII,GTY/RAC
-Alw21I,GWGCW/C
-Bbv12I,GWGCW/C
-BsiHKAI,GWGCW/C
-HgiAI,GWGCW/C
-TspRI,NNCASTGNN/
-TscAI,NNCASTGNN/
-AcsI,R/AATTY
-ApoI,R/AATTY
-ApoI-HF,R/AATTY
-XapI,R/AATTY
-Bse118I,R/CCGGY
-BsrFI,R/CCGGY
-BsrFαI,R/CCGGY
-BssAI,R/CCGGY
-Cfr10I,R/CCGGY
-BstX2I,R/GATCY
-BstYI,R/GATCY
-MflI,R/GATCY
-PsuI,R/GATCY
-XhoII,R/GATCY
-BstNSI,RCATG/Y
-NspI,RCATG/Y
-XceI,RCATG/Y
-CviJI,RG/CY
-CviKI-1,RG/CY
-DraII,RG/GNCCY
-EcoO109I,RG/GNCCY
-PpuMI,RG/GWCCY
-Psp5II,RG/GWCCY
-PspPPI,RG/GWCCY
-BfoI,RGCGC/Y
-BstH2I,RGCGC/Y
-HaeII,RGCGC/Y
-BspHI,T/CATGA
-CciI,T/CATGA
-PagI,T/CATGA
-AccIII,T/CCGGA
-Aor13HI,T/CCGGA
-BseAI,T/CCGGA
-Bsp13I,T/CCGGA
-BspEI,T/CCGGA
-BspMII,T/CCGGA
-Kpn2I,T/CCGGA
-MroI,T/CCGGA
-TaqαI,T/CGA
-XbaI,T/CTAGA
-BclI,T/GATCA
-BclI-HF,T/GATCA
-FbaI,T/GATCA
-Ksp22I,T/GATCA
-Bsp1407I,T/GTACA
-BsrGI,T/GTACA
-BsrGI-HF,T/GTACA
-BstAUI,T/GTACA
-MseI,T/TAA
-SaqAI,T/TAA
-Tru1I,T/TAA
-Tru9I,T/TAA
-I-CeuI,TAACTATAACGGTCCTAAGGTAGCGAA(-9/-13)
-BstSNI,TAC/GTA
-Eco105I,TAC/GTA
-SnaBI,TAC/GTA
-I-SceI,TAGGGATAACAGGGTAAT(-9/-13)
-Hpy178III,TC/NNGA
-Hpy188III,TC/NNGA
-MmeI,TCCRAC(20/18)
-Bsp68I,TCG/CGA
-BtuMI,TCG/CGA
-NruI,TCG/CGA
-NruI-HF,TCG/CGA
-RruI,TCG/CGA
-Hpy188I,TCN/GA
-CviRI,TG/CA
-HpyCH4V,TG/CA
-Acc16I,TGC/GCA
-FspI,TGC/GCA
-MstI,TGC/GCA
-NsbI,TGC/GCA
-BalI,TGG/CCA
-MlsI,TGG/CCA
-MluNI,TGG/CCA
-Mox20I,TGG/CCA
-MscI,TGG/CCA
-Msp20I,TGG/CCA
-PI-PspI,TGGCAAACAGCTATTATGGGTATTATGGGT(-13/-17)
-AsuII,TT/CGAA
-Bpu14I,TT/CGAA
-Bsp119I,TT/CGAA
-BspT104I,TT/CGAA
-BstBI,TT/CGAA
-NspV,TT/CGAA
-SfuI,TT/CGAA
-AanI,TTA/TAA
-PsiI,TTA/TAA
-PacI,TTAAT/TAA
-AhaIII,TTT/AAA
-DraI,TTT/AAA
-PspXI,VC/TCGAGB
-BetI,W/CCGGW
-BsaWI,W/CCGGW
-AcoI,Y/GGCCR
-CfrI,Y/GGCCR
-EaeI,Y/GGCCR
-BsaAI,YAC/GTR
-BstBAI,YAC/GTR
-Ppu21I,YAC/GTR
diff --git a/data-raw/nucleobase_legend.R b/data-raw/nucleobase_legend.R
deleted file mode 100644
index ac3ac81..0000000
--- a/data-raw/nucleobase_legend.R
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
-library(dplyr)
-library(readr)
-library(tidyr)
-library(rvest)
-
-
-page <- read_html(paste0("https://en.wikipedia.org/wiki/List_of_restriction_enzyme",
-                         "_cutting_sites:_A#Whole_list_navigation"))
-
-
-nucleobase_legend <- page %>%
-    html_nodes("table") %>%
-    .[[1]] %>%
-    html_table() %>%
-    setNames(c("code", "nucleotides")) %>%
-    slice(-1:-5) %>%
-    mutate(nucleotides = strsplit(nucleotides, " or |, ")) %>%
-    unnest()
-
-write_csv(nucleobase_legend, "data-raw/nucleobase_legend.csv")
-devtools::use_data(nucleobase_legend, overwrite = TRUE)
-
diff --git a/data-raw/nucleobase_legend.csv b/data-raw/nucleobase_legend.csv
deleted file mode 100644
index d7e3b78..0000000
--- a/data-raw/nucleobase_legend.csv
+++ /dev/null
@@ -1,29 +0,0 @@
-code,nucleotides
-N,A
-N,C
-N,G
-N,T
-M,A
-M,C
-R,A
-R,G
-W,A
-W,T
-Y,C
-Y,T
-S,C
-S,G
-K,G
-K,T
-H,A
-H,C
-H,T
-B,C
-B,G
-B,T
-V,A
-V,C
-V,G
-D,A
-D,G
-D,T
diff --git a/man/binding_sites.Rd b/man/binding_sites.Rd
deleted file mode 100644
index c30b7ec..0000000
--- a/man/binding_sites.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{binding_sites}
-\alias{binding_sites}
-\title{Binding sites for selected restriction enzymes.}
-\format{A 695 x 2 data frame with the following columns:
-\describe{
-\item{enzyme}{Restriction enzyme name.}
-\item{sites}{Enzyme binding site sequences.
-See \code{\link{nucleobase_legend}} for what bases other than \code{T}, \code{C}, \code{A},
-and \code{G} mean.
-Each \code{/} indicates a cleavage point.
-According to NEB (see link below under Source):
-"Numbers in parentheses indicate point of cleaveage for non-palindromic
-enzymes."
-These types of enzymes are not implemented.}
-}}
-\source{
-\url{https://www.neb.com/tools-and-resources/selection-charts/alphabetized-list-of-recognition-specificities}
-}
-\usage{
-binding_sites
-}
-\description{
-Binding sites for selected restriction enzymes.
-}
-\keyword{datasets}
diff --git a/man/digest.Rd b/man/digest.Rd
deleted file mode 100644
index 6e6ae83..0000000
--- a/man/digest.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/digest.R
-\name{digest}
-\alias{digest}
-\title{Digest reference genome or variants from a genome.}
-\usage{
-digest(object, enzyme_names, custom_enzymes, chunk_size = 1000,
-  n_cores = 1)
-}
-\arguments{
-\item{object}{Either a \code{ref_genome} or \code{variants} object.}
-
-\item{enzyme_names}{Name of enzymes that should be present inside the package data
-object \code{binding_sites} (in the \code{enzyme} column).}
-
-\item{custom_enzymes}{Custom enzymes for those not present in internal data.}
-
-\item{chunk_size}{The size of chunks to break up sqeuences into when digesting
-a \code{ref_genome} or \code{variants} object.
-Changing this might affect performance, for better or worse.
-The default worked best on my computer. Defaults to \code{1000}.}
-
-\item{n_cores}{Number of cores to use for parallel processing. This argument is
-ignored if OpenMP is not enabled. Defaults to \code{1}.}
-}
-\value{
-All changes occur in place, but the input object is returned invisibly
-so that piping works.
-}
-\description{
-\emph{Note:} This will override any digestions currently in place in the
-object. If you want to add a new digestion, re-run this function with the names
-of all enzymes you're interested in included in the \code{enzyme_names} argument.
-}
diff --git a/man/jackal.Rd b/man/jackal.Rd
index d47fa53..9576413 100644
--- a/man/jackal.Rd
+++ b/man/jackal.Rd
@@ -10,11 +10,8 @@
 (ii) generates variants using summary statistics, phylogenies, Variant
 Call Format (VCF) files, and coalescent simulations—the latter of which can include
 selection, recombination, and demographic fluctuations;
-(iii) simulates sequencing error, mapping qualities, restriction-enzyme digestion,
-and variance in coverage among sites; and
+(iii) simulates sequencing error, mapping qualities, and optical/PCR duplicates; and
 (iv) writes outputs to standard file formats.
-\code{jackal} can simulate single or paired-ended reads for WGS on the Illumina platform,
-and can be extended to simulate different methods (e.g., original RADseq,
-double-digest RADseq, and genotyping-by-sequencing), and sequencing technologies
-(e.g., Pacific BioSciences, Oxford Nanopore Technologies).
+\code{jackal} can simulate single, paired-end, or mate-pair Illumina reads, as well as
+reads from Pacific BioSciences.
 }
diff --git a/man/nucleobase_legend.Rd b/man/nucleobase_legend.Rd
deleted file mode 100644
index dba7df0..0000000
--- a/man/nucleobase_legend.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{nucleobase_legend}
-\alias{nucleobase_legend}
-\title{Legend for the single-letter code of nucleobases indicating restriction sequences.}
-\format{A data frame of 28 rows and two columns:
-\describe{
-\item{code}{The letter indicating more than one possible nucleotides.}
-\item{nucleotides}{One of the multiple nucleotides the code can refer to.}
-}}
-\source{
-\url{https://en.wikipedia.org/wiki/List_of_restriction_enzyme_cutting_sites:_A#Whole_list_navigation}
-}
-\usage{
-nucleobase_legend
-}
-\description{
-Legend for the single-letter code of nucleobases indicating restriction sequences.
-}
-\keyword{datasets}
diff --git a/man/ref_genome.Rd b/man/ref_genome.Rd
index b1b873a..8346889 100644
--- a/man/ref_genome.Rd
+++ b/man/ref_genome.Rd
@@ -23,9 +23,6 @@ For safe ways of manipulating the reference genome, see the "Methods" section.
 \describe{
 \item{\code{genome}}{An \code{externalptr} to a C++ object storing the sequences
 representing the genome.}
-
-\item{\code{digests}}{An \code{externalptr} to a C++ object storing the digestion of the
-genome, if a digestion has been carried out. It's \code{NULL} otherwise.}
 }}
 
 \section{Methods}{
diff --git a/man/variants.Rd b/man/variants.Rd
index 76172fd..4759d71 100644
--- a/man/variants.Rd
+++ b/man/variants.Rd
@@ -24,9 +24,6 @@ For safe ways of manipulating the variants' information, see the "Methods" secti
 \item{\code{genome}}{An \code{externalptr} to a C++ object storing the sequences
 representing the genome.}
 
-\item{\code{digests}}{An \code{externalptr} to a C++ object storing the digestion of the
-genome, if a digestion has been carried out. It's \code{NULL} otherwise.}
-
 \item{\code{reference}}{An \code{externalptr} to a C++ object storing the sequences
 representing the genome.
 There are a few extra notes for this field:
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 0ee426d..9410107 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -72,58 +72,6 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// get_precleavage_lens
-std::vector<uint32> get_precleavage_lens(const std::vector<std::string>& seqs);
-RcppExport SEXP _jackal_get_precleavage_lens(SEXP seqsSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::vector<std::string>& >::type seqs(seqsSEXP);
-    rcpp_result_gen = Rcpp::wrap(get_precleavage_lens(seqs));
-    return rcpp_result_gen;
-END_RCPP
-}
-// expand_seqs
-std::vector<std::string> expand_seqs(const std::vector<std::string>& seqs);
-RcppExport SEXP _jackal_expand_seqs(SEXP seqsSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const std::vector<std::string>& >::type seqs(seqsSEXP);
-    rcpp_result_gen = Rcpp::wrap(expand_seqs(seqs));
-    return rcpp_result_gen;
-END_RCPP
-}
-// digest_var_set
-SEXP digest_var_set(SEXP var_set_ptr, const std::vector<std::string>& bind_sites, const std::vector<uint32>& len5s, const uint32& chunk_size, const uint32& n_cores);
-RcppExport SEXP _jackal_digest_var_set(SEXP var_set_ptrSEXP, SEXP bind_sitesSEXP, SEXP len5sSEXP, SEXP chunk_sizeSEXP, SEXP n_coresSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< SEXP >::type var_set_ptr(var_set_ptrSEXP);
-    Rcpp::traits::input_parameter< const std::vector<std::string>& >::type bind_sites(bind_sitesSEXP);
-    Rcpp::traits::input_parameter< const std::vector<uint32>& >::type len5s(len5sSEXP);
-    Rcpp::traits::input_parameter< const uint32& >::type chunk_size(chunk_sizeSEXP);
-    Rcpp::traits::input_parameter< const uint32& >::type n_cores(n_coresSEXP);
-    rcpp_result_gen = Rcpp::wrap(digest_var_set(var_set_ptr, bind_sites, len5s, chunk_size, n_cores));
-    return rcpp_result_gen;
-END_RCPP
-}
-// digest_ref
-SEXP digest_ref(SEXP ref_genome_ptr, const std::vector<std::string>& bind_sites, const std::vector<uint32>& len5s, const uint32& chunk_size, const uint32& n_cores);
-RcppExport SEXP _jackal_digest_ref(SEXP ref_genome_ptrSEXP, SEXP bind_sitesSEXP, SEXP len5sSEXP, SEXP chunk_sizeSEXP, SEXP n_coresSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< SEXP >::type ref_genome_ptr(ref_genome_ptrSEXP);
-    Rcpp::traits::input_parameter< const std::vector<std::string>& >::type bind_sites(bind_sitesSEXP);
-    Rcpp::traits::input_parameter< const std::vector<uint32>& >::type len5s(len5sSEXP);
-    Rcpp::traits::input_parameter< const uint32& >::type chunk_size(chunk_sizeSEXP);
-    Rcpp::traits::input_parameter< const uint32& >::type n_cores(n_coresSEXP);
-    rcpp_result_gen = Rcpp::wrap(digest_ref(ref_genome_ptr, bind_sites, len5s, chunk_size, n_cores));
-    return rcpp_result_gen;
-END_RCPP
-}
 // illumina_ref_cpp
 void illumina_ref_cpp(SEXP ref_genome_ptr, const bool& paired, const bool& matepair, const std::string& out_prefix, const bool& compress, const uint32& n_reads, const double& prob_dup, const uint32& n_cores, const bool& show_progress, const uint32& read_chunk_size, const double& frag_len_shape, const double& frag_len_scale, const uint32& frag_len_min, const uint32& frag_len_max, const std::vector<std::vector<std::vector<double>>>& qual_probs1, const std::vector<std::vector<std::vector<uint8>>>& quals1, const double& ins_prob1, const double& del_prob1, const std::vector<std::vector<std::vector<double>>>& qual_probs2, const std::vector<std::vector<std::vector<uint8>>>& quals2, const double& ins_prob2, const double& del_prob2, const std::vector<std::string>& barcodes);
 RcppExport SEXP _jackal_illumina_ref_cpp(SEXP ref_genome_ptrSEXP, SEXP pairedSEXP, SEXP matepairSEXP, SEXP out_prefixSEXP, SEXP compressSEXP, SEXP n_readsSEXP, SEXP prob_dupSEXP, SEXP n_coresSEXP, SEXP show_progressSEXP, SEXP read_chunk_sizeSEXP, SEXP frag_len_shapeSEXP, SEXP frag_len_scaleSEXP, SEXP frag_len_minSEXP, SEXP frag_len_maxSEXP, SEXP qual_probs1SEXP, SEXP quals1SEXP, SEXP ins_prob1SEXP, SEXP del_prob1SEXP, SEXP qual_probs2SEXP, SEXP quals2SEXP, SEXP ins_prob2SEXP, SEXP del_prob2SEXP, SEXP barcodesSEXP) {
@@ -863,10 +811,6 @@ static const R_CallMethodDef CallEntries[] = {
     {"_jackal_replace_Ns_cpp", (DL_FUNC) &_jackal_replace_Ns_cpp, 4},
     {"_jackal_create_genome_", (DL_FUNC) &_jackal_create_genome_, 5},
     {"_jackal_rando_seqs", (DL_FUNC) &_jackal_rando_seqs, 5},
-    {"_jackal_get_precleavage_lens", (DL_FUNC) &_jackal_get_precleavage_lens, 1},
-    {"_jackal_expand_seqs", (DL_FUNC) &_jackal_expand_seqs, 1},
-    {"_jackal_digest_var_set", (DL_FUNC) &_jackal_digest_var_set, 5},
-    {"_jackal_digest_ref", (DL_FUNC) &_jackal_digest_ref, 5},
     {"_jackal_illumina_ref_cpp", (DL_FUNC) &_jackal_illumina_ref_cpp, 23},
     {"_jackal_illumina_var_cpp", (DL_FUNC) &_jackal_illumina_var_cpp, 24},
     {"_jackal_make_mutation_sampler_base", (DL_FUNC) &_jackal_make_mutation_sampler_base, 4},
diff --git a/src/digest.cpp b/src/digest.cpp
deleted file mode 100644
index 520760e..0000000
--- a/src/digest.cpp
+++ /dev/null
@@ -1,565 +0,0 @@
-//
-// This file digests sequences from variant set and sequence set objects
-//
-
-
-#include <RcppArmadillo.h>
-#include <string> // find, string::npos, string::size_type, ...
-#include <vector> // vector class
-#include <deque> // deque class
-#ifdef _OPENMP
-#include <omp.h>  // omp
-#endif
-
-
-#include "jackal_types.h" // integer types
-#include "seq_classes_ref.h"  // Ref* classes
-#include "seq_classes_var.h"  // Var* classes
-#include "digest.h" // DigestInfo, MultiOut,
-#include "str_manip.h" // rev_comp
-
-using namespace Rcpp;
-
-
-
-/*
- Remove duplicates from a vector of strings in place.
- */
-void unique_(std::vector<std::string>& x) {
-    std::sort(x.begin(), x.end());
-    std::vector<std::string>::iterator iter = std::unique(x.begin(), x.end());
-    x.resize(iter - x.begin());
-    return;
-}
-
-
-
-
-//' Calculate how many bases come before a cleavage site.
-//'
-//'
-//' @noRd
-//'
-//[[Rcpp::export]]
-std::vector<uint32> get_precleavage_lens(const std::vector<std::string>& seqs) {
-
-    std::vector<uint32> out(seqs.size());
-
-    for (uint32 i = 0; i < seqs.size(); i++) {
-        uint32 cleav = seqs[i].find('/');
-        out[i] = cleav;
-    }
-
-    return out;
-}
-
-
-/*
- Expand sequences with non-specific nucleobases.
- */
-void expand_sites(const std::vector<std::string>& sites,
-                  std::vector<std::string>& seqs_out,
-                  const bool& add_rev_comp = true) {
-
-    uint32 n_combs = 1;
-    for (uint32 i = 0; i < sites.size(); i++) n_combs *= sites[i].size();
-    seqs_out.reserve(n_combs);
-
-    for (uint32 i = 0; i < sites[0].size(); i++) {
-        seqs_out.push_back(std::string(1, sites[0][i]));
-    }
-
-    for (uint32 i = 1; i < sites.size(); i++) {
-        const std::string& site_i(sites[i]);
-        uint32 n = seqs_out.size();
-        for (uint32 j = 1; j < site_i.size(); j++) {
-            for (uint32 k = 0; k < n; k++) {
-                seqs_out.push_back(seqs_out[k] + site_i[j]);
-            }
-        }
-        for (uint32 k = 0; k < n; k++) {
-            seqs_out[k] += site_i[0];
-        }
-    }
-    // Add all reverse complements, whether or not they already exist in the output vector
-    if (add_rev_comp) {
-        seqs_out.reserve(n_combs * 2);
-        for (uint32 i = 0; i < n_combs; i++) {
-            std::string rc = seqs_out[i];
-            rev_comp(rc);
-            seqs_out.push_back(rc);
-        }
-    }
-
-    return;
-}
-
-
-
-
-//' Expand sequences for reverse complements and for non-specific nucleobases.
-//'
-//'
-//' @noRd
-//'
-//[[Rcpp::export]]
-std::vector<std::string> expand_seqs(const std::vector<std::string>& seqs) {
-
-    std::vector<std::string> out;
-    std::string bases = "TCAG";
-
-    for (uint32 i = 0; i < seqs.size(); i++) {
-        const std::string& seq(seqs[i]);
-        // See if the sequence is degenerate (i.e., ambiguous) or not:
-        bool degenerate = false;
-        for (const char& c : seq) {
-            if (bases.find(c) == std::string::npos) degenerate = true;
-        }
-        if (!degenerate) {
-            out.push_back(seq);
-            std::string rc = seq;
-            rev_comp(rc);
-            if (rc != seq) out.push_back(rc);
-        } else {
-            std::vector<std::string> sites(seq.size());
-            for (uint32 j = 0; j < seq.size(); j++) {
-                sites[j] = digest::deg_bases[seq[j]];
-            }
-            // Create all combos of these sites and add to `out`
-            expand_sites(sites, out);
-        }
-    }
-    // Remove duplicates:
-    unique_(out);
-
-    return out;
-}
-
-
-
-
-
-
-/*
- Search for multiple substrings simulateously, change the `search_obj` object so that it
- stores...
- (1) the first position in `s` that matches one of the strings in `bind_sites` and
- (2) the index in `bind_sites` for the site that matches to a position in `s`
- */
-void multi_search(
-        MultiOut& search_obj,
-        const DigestInfo& dinfo,
-        const std::string& s,
-        const uint32& last_pos) {
-
-    search_obj.reset();
-
-    std::string::size_type new_pos;
-
-    for (uint32 j = 0; j < dinfo.bind_sites.size(); j++) {
-        new_pos = s.find(dinfo.bind_sites[j], last_pos);
-        if (new_pos < search_obj.new_pos) {
-            search_obj.new_pos = new_pos;
-            search_obj.j = j;
-        }
-    }
-
-    return;
-}
-
-
-// ------------------
-// Digest one string
-// ------------------
-
-/*
- (`pos_mod` is to modify positions because `s` is a chunk not from the start of the
-  sequence.)
- */
-void digest_seq(std::deque<uint32>& pos_vec,
-                uint32& start_pos,
-                const std::string& s,
-                const DigestInfo& dinfo,
-                const uint32& pos_mod = 0,
-                const uint32& end = 0) {
-
-    uint32 cut_site_pos;
-    MultiOut search_obj;
-
-    multi_search(search_obj, dinfo, s, start_pos);
-
-    if (end == 0) {
-        while (search_obj.new_pos != std::string::npos) {
-            cut_site_pos = search_obj.new_pos + dinfo.len5s[search_obj.j] + pos_mod;
-            pos_vec.push_back(cut_site_pos);
-            start_pos = search_obj.new_pos + dinfo.to_skip(search_obj.j);
-            multi_search(search_obj, dinfo, s, start_pos);
-        }
-    } else {
-        while (search_obj.new_pos != std::string::npos && search_obj.new_pos <= end) {
-            cut_site_pos = search_obj.new_pos + dinfo.len5s[search_obj.j] + pos_mod;
-            pos_vec.push_back(cut_site_pos);
-            start_pos = search_obj.new_pos + dinfo.to_skip(search_obj.j);
-            multi_search(search_obj, dinfo, s, start_pos);
-        }
-    }
-
-    return;
-}
-
-
-
-
-/*
- =============================================================================
- =============================================================================
- =============================================================================
- =============================================================================
-
- Variant digestion
-
- =============================================================================
- =============================================================================
- =============================================================================
- =============================================================================
- */
-
-
-
-
-// Digest one variant genome's sequence
-
-void VarSeqDigest::digest(const DigestInfo& dinfo) {
-
-    // Reset these if it's been digested before
-    start = 0;
-    if (!cut_sites.empty()) cut_sites.clear();
-
-    if (chunk_size >= var_info.seq_size) {
-        std::string seq = var_info.get_seq_full();
-        digest_seq(cut_sites, start, seq, dinfo);
-        return;
-    }
-
-    // index to the nearest mutation for a given chunk's starting position:
-    uint32 mut_i = 0;
-
-    // Initialize chunk sequence string
-    std::string chunk_str(chunk_size, 'x');
-
-    /*
-     I'm iterating by `chunk_size - (max_size - 1)` to make sure I catch any
-    matches that overlap the "seam" between chunks.
-    */
-    uint32 it = chunk_size - (dinfo.max_size - 1);
-
-    /*
-     The last chunk start position in this sequence (without allowing only overlap
-     at the end)
-     */
-    uint32 last_chunk = var_info.seq_size - dinfo.max_size;
-
-    // Digesting each chunk:
-    for (uint32 chunk_start = 0; chunk_start <= last_chunk; chunk_start += it) {
-        // Filling new chunk string:
-        var_info.set_seq_chunk(chunk_str, chunk_start, chunk_size, mut_i);
-        // Compensating for matches at the end of the previous chunk:
-        if (start > it) {
-            start -= it;
-        } else start = 0;
-        // Digest chunk:
-        digest_seq(cut_sites, start, chunk_str, dinfo, chunk_start);
-    }
-
-    return;
-}
-
-
-
-
-// ------------------
-// Digest all sequences, all variants
-// ------------------
-
-
-
-//' Internal C++ function to digest all sequences for all variants in a variant set.
-//'
-//'
-//'
-//' @param var_set_ptr An external pointer to a C++ \code{VarSet} object
-//'     representing variants from the reference genome.
-//' @inheritParams bind_sites digest_ref
-//' @inheritParams len5s digest_ref
-//' @param chunk_size The size of chunks to divide sequences into when digesting.
-//' @param n_cores The number of cores to use for processing. Defaults to \code{1}.
-//'
-//' @return A list of lists, each sub-list containing multiple vectors representing
-//'     the locations of cut sites for a given variant on a given sequence.
-//'     Indexing the output list would be done as such:
-//'     \code{output_list[[variant_index]][[sequence_index]][position_index]}.
-//'
-//' @noRd
-//'
-// [[Rcpp::export]]
-SEXP digest_var_set(
-        SEXP var_set_ptr,
-        const std::vector<std::string>& bind_sites,
-        const std::vector<uint32>& len5s,
-        const uint32& chunk_size,
-        const uint32& n_cores = 1) {
-
-    XPtr<VarSet> var_set(var_set_ptr);
-
-    const uint32 n_vars = var_set->size();
-    const uint32 n_seqs = var_set->reference->size();
-
-    XPtr<GenomeSetDigest> out_digests(new GenomeSetDigest(n_vars, n_seqs));
-
-    const DigestInfo dinfo(bind_sites, len5s);
-
-    if (chunk_size < (2 * dinfo.max_size - 1)) {
-        stop("chunk size needs to be at least 2 times the maximum binding "
-                 "site length minus 1 (i.e., `chunk_size >= 2 * "
-                 "max(<binding site sizes>) - 1`)");
-    }
-
-
-    #ifdef _OPENMP
-    #pragma omp parallel for default(shared) num_threads(n_cores) schedule(dynamic) collapse(2)
-    #endif
-    for (uint32 v = 0; v < n_vars; v++) {
-        for (uint32 s = 0; s < n_seqs; s++) {
-            VarSeqDigest vsd(*var_set, v, s, chunk_size, (*out_digests)[v][s]);
-            vsd.digest(dinfo);
-        }
-    }
-
-    return out_digests;
-}
-
-
-
-
-
-
-
-
-
-
-
-/*
- =============================================================================
- =============================================================================
- =============================================================================
- =============================================================================
-
- Reference genome digestion
-
- =============================================================================
- =============================================================================
- =============================================================================
- =============================================================================
- */
-
-
-
-
-// Digest chunk of a sequence
-
-void RefSeqChunk::digest(const DigestInfo& dinfo) {
-
-    // Reset this if it's been digested already
-    if (!cut_sites.empty()) cut_sites.clear();
-
-    /*
-     `next_start` is in the RefSeqChunk class and keeps track of earliest possible
-     start for the next chunk in this sequence (`start` is const, `next_start` isn't):
-     */
-    next_start = start;
-
-    digest_seq(cut_sites, next_start, ref, dinfo, 0, end);
-
-    return;
-}
-
-
-/*
- Make sure no positions overlap in the "seam" between two chunks.
- `prev` should come before the focal `RefSeqChunk`
- */
-void RefSeqChunk::merge_seam(const RefSeqChunk& prev, const DigestInfo& dinfo) {
-
-    // If they're not on the same sequence, don't do anything
-    if (prev.seq != seq) return;
-    // If there were no cut sites on this chunk or the previous one, don't do anything
-    if (cut_sites.empty() || prev.cut_sites.empty()) return;
-
-    std::deque<uint32>& pos_vec2(cut_sites);
-
-    // Earliest possible starting position in `pos_vec2`
-    uint32 start_pos = prev.next_start;
-
-    if (pos_vec2.front() >= start_pos) return;
-
-    // First remove position(ref) from `pos_vec2` that are < `start_pos`
-    while (pos_vec2.front() < start_pos) {
-        pos_vec2.pop_front();
-        if (pos_vec2.empty()) break;
-    }
-
-    // Now search through sequence again until positions are "merged"
-    MultiOut search_obj;
-    multi_search(search_obj, dinfo, ref, start_pos);
-    while (search_obj.new_pos != std::string::npos) {
-        uint32 cut_site_pos = search_obj.new_pos + dinfo.len5s[search_obj.j];
-        while (pos_vec2.front() < cut_site_pos) pos_vec2.pop_front();
-        if (pos_vec2.front() == cut_site_pos) {  // <---------- how I'm defining "merged"
-            break;
-        } else {
-            pos_vec2.push_front(cut_site_pos);
-        }
-        start_pos = search_obj.new_pos + dinfo.to_skip(search_obj.j);
-        multi_search(search_obj, dinfo, ref, start_pos);
-    }
-
-    /*
-     If we've changed the whole deque of cut sites, then adjust the `next_start` field
-     (this should virtually never happen with adequately large chunk sizes)
-     */
-    if (start_pos > next_start) next_start = start_pos;
-
-    return;
-}
-
-
-
-
-
-
-//' Internal C++ function to digest all sequences in a reference genome.
-//'
-//'
-//'
-//' @param ref_genome_ptr An external pointer to a C++ \code{RefGenome} object
-//'     representing the reference genome.
-//' @param bind_sites Vector of enzyme full recognition site(s).
-//' @param len5s A vector of the numbers of characters of the prime5 sites for each
-//'     recognition site.
-//' @param chunk_size Size of chunks to break sequences into for processing.
-//'     This value is ignored if it's set to zero.
-//'     Ideally this is set to a value that results in a number of chunks divisible by
-//'     the number of cores you're using, and is most useful when `n_cores` is greater
-//'     than the number of scaffolds.
-//'     Breaking into increasingly small chunks results in increasing overhead, so
-//'     beware of making this argument very small.
-//'     Reference genome sequences are not copied during this function, so using
-//'     this argument for a reference genome does NOT decrease memory usage
-//'     appreciably.
-//'     Defaults to \code{0}.
-//' @param n_cores The number of cores to use for processing. This value is ignored
-//'     if the input reference genome is merged and \code{chunk_size == 0}.
-//'     Defaults to \code{1}.
-//'
-//' @return A list of vectors, each vector representing the locations of cut sites
-//'     on a given sequence.
-//'     Indexing the output list would be done as such:
-//'     \code{output_list[[sequence_index]][position_index]}.
-//'
-//' @noRd
-//'
-//[[Rcpp::export]]
-SEXP digest_ref(
-        SEXP ref_genome_ptr,
-        const std::vector<std::string>& bind_sites,
-        const std::vector<uint32>& len5s,
-        const uint32& chunk_size = 0,
-        const uint32& n_cores = 1) {
-
-    const XPtr<RefGenome> ref_genome(ref_genome_ptr);
-
-    XPtr<GenomeDigest> out_digest_xptr(new GenomeDigest(ref_genome->size()));
-    GenomeDigest& out_digest(*out_digest_xptr);
-
-    DigestInfo dinfo(bind_sites, len5s);
-
-    if (chunk_size > 0) {
-        if (chunk_size < (2 * dinfo.max_size - 1)) {
-            stop("chunk size needs to be at least 2 times the maximum binding "
-                     "site length minus 1 (i.e., `chunk_size >= 2 * "
-                     "max(<binding site sizes>) - 1`)");
-        }
-        /*
-         I'm iterating by `chunk_size - (max_size - 1)` to make sure I catch any
-        matches that overlap the "seam" between chunks.
-        */
-        uint32 iter_by = chunk_size - (dinfo.max_size - 1);
-        // Set up temporary deque containing a deque of RefSeqChunk objects:
-        std::deque<RefSeqChunk> chunk_dq;
-        for (uint32 i = 0; i < ref_genome->size(); i++) {
-            const std::string& seq((*ref_genome)[i].nucleos);
-
-            if (chunk_size >= seq.size()) {
-                RefSeqChunk tmp_sc(seq, i, 0, chunk_size);
-                chunk_dq.push_back(tmp_sc);
-            } else {
-                /*
-                 The last chunk start position (without allowing only overlap
-                 at the end):
-                 */
-                uint32 last_chunk = seq.size() - dinfo.max_size;
-                for (uint32 s = 0; s < last_chunk; s += iter_by) {
-                    RefSeqChunk tmp_sc(seq, i, s, chunk_size);
-                    chunk_dq.push_back(tmp_sc);
-                }
-            }
-        }
-
-        #ifdef _OPENMP
-        #pragma omp parallel for default(shared) num_threads(n_cores) schedule(static)
-        #endif
-        for (uint32 i = 0; i < chunk_dq.size(); i++) {
-            chunk_dq[i].digest(dinfo);
-        }
-
-        /*
-         Now check for points over the seams and make sure they aren't too close to
-         each other.
-         */
-        #ifdef _OPENMP
-        #pragma omp parallel for default(shared) num_threads(n_cores) schedule(static)
-        #endif
-        for (uint32 i = 1; i < chunk_dq.size(); i++) {
-            chunk_dq[i].merge_seam(chunk_dq[(i-1)], dinfo);
-        }
-
-        /*
-         Lastly combine all RefSeqChunk objects for a given sequence together into
-         one deque.
-         */
-        for (uint32 i = 0; i < ref_genome->size(); i++) {
-            std::deque<uint32>& out_dq(out_digest[i]);
-            while (chunk_dq.front().seq == i) {
-                chunk_dq.front().combine(out_dq);
-                chunk_dq.pop_front();
-                if (chunk_dq.empty()) break;
-            }
-        }
-    } else if (ref_genome->merged) {
-        uint32 i = 0, j = 0; // j is only used for the call to digest_seq
-        const std::string& seq((*ref_genome)[i].nucleos);
-        digest_seq(out_digest[i], j, seq, dinfo);
-    } else {
-        #ifdef _OPENMP
-        #pragma omp parallel for default(shared) num_threads(n_cores) schedule(dynamic)
-        #endif
-        for (uint32 i = 0; i < ref_genome->size(); i++) {
-            const std::string& seq_s((*ref_genome)[i].nucleos);
-            uint32 j = 0; // j is only used for the call to digest_seq
-            digest_seq(out_digest[i], j, seq_s, dinfo);
-        }
-    }
-
-    return out_digest_xptr;
-}
-
diff --git a/src/digest.h b/src/digest.h
deleted file mode 100644
index 29cfcfc..0000000
--- a/src/digest.h
+++ /dev/null
@@ -1,209 +0,0 @@
-#ifndef __JACKAL_DIGEST_H
-#define __JACKAL_DIGEST_H
-
-#include <RcppArmadillo.h>
-#include <string> // find, string::npos, string::size_type, ...
-#include <vector> // vector class
-#include <deque> // deque class
-#include <unordered_map> // unordered_map
-
-#include "seq_classes_ref.h"  // Ref* classes
-#include "seq_classes_var.h"  // Var* classes
-
-
-namespace digest {
-
-// Degenerate bases and what nucleotides they represent:
-std::unordered_map<char, std::string> deg_bases = {
-    {'N', "ACGT"},
-    {'M', "AC"},
-    {'R', "AG"},
-    {'W', "AT"},
-    {'Y', "CT"},
-    {'S', "CG"},
-    {'K', "GT"},
-    {'H', "ACT"},
-    {'B', "CGT"},
-    {'V', "ACG"},
-    {'D', "AGT"}
-};
-
-}
-
-
-
-
-/*
- Organizes info needed for digestion
- */
-struct DigestInfo {
-    std::vector<std::string> bind_sites;
-    std::vector<uint32> len5s;
-    uint32 max_size;
-
-    // Constructors
-    DigestInfo() : bind_sites(), len5s(), max_size() {};
-
-    DigestInfo(const std::vector<std::string>& bind_sites_,
-               const std::vector<uint32>& len5s_)
-        : bind_sites(bind_sites_), len5s(len5s_), max_size(bind_sites_[0].size()) {
-
-        for (uint32 i = 1; i < bind_sites_.size(); i++) {
-            if (bind_sites_[i].size() > max_size) max_size = bind_sites_[i].size();
-        }
-
-    };
-
-    // Get the # bp to skip before looking for another match
-    uint32 to_skip(const uint32& j) const noexcept {
-        return bind_sites[j].size();
-    }
-};
-
-
-
-
-
-/*
- When searching for multiple strings simultaneously, this is a way to return the
- first position where one of them match and the string match's index
- */
-struct MultiOut {
-    // fields
-    std::string::size_type new_pos;
-    uint32 j;
-    // Constructor
-    MultiOut() {
-        new_pos = std::string::npos;
-        j = 1000000;
-    }
-    // reset to default values
-    void reset() {
-        new_pos = std::string::npos;
-        j = 1000000;
-    }
-};
-
-
-
-// Info on the digestion of a sequence's chunk
-
-class RefSeqChunk {
-
-public:
-
-    const uint32 seq;    // index of the sequence this refers to
-    const uint32 start;  // index of the starting position in the sequence of this chunk
-    const uint32 end;    // index of the ending position in the sequence of this chunk
-    std::deque<uint32> cut_sites;  // cut sites found
-    /*
-     Earliest possible start for the next chunk in this sequence. This is used
-     when merging the "seam" between this chunk and the one after it.
-     */
-    uint32 next_start;
-
-    RefSeqChunk(const std::string& ref_,
-                const uint32& seq_,
-                const uint32& start_,
-                const uint32& chunk_size)
-        : seq(seq_), start(start_),
-          end(std::min(static_cast<uint32>(start_ + chunk_size - 1),
-                       static_cast<uint32>(ref_.size() - 1))),
-          cut_sites() ,
-          ref(ref_) {};
-
-    // To return the number of cut sites
-    uint32 size() const noexcept {
-        return cut_sites.size();
-    }
-    // For combining with an output deque
-    void combine(std::deque<uint32>& out_dq) {
-        for (uint32 j = 0; j < cut_sites.size(); j++) {
-            out_dq.push_back(cut_sites[j]);
-        }
-    }
-
-    // Digest this chunk
-    void digest(const DigestInfo& dinfo);
-    // Make sure no positions overlap in the "seam" between two chunks.
-    void merge_seam(const RefSeqChunk& prev, const DigestInfo& dinfo);
-
-private:
-    const std::string& ref;  // reference genome string this refers to
-};
-
-
-
-
-// Info on the digestion of a variant's sequence
-class VarSeqDigest {
-
-public:
-
-    const VarSequence& var_info;  // info on this variant's sequence
-    const uint32 var;    // index of the variant this refers to
-    const uint32 seq;    // index of the sequence this refers to
-    const uint32 chunk_size;    // size of chunks to digest sequence by
-    std::deque<uint32>& cut_sites;  // cut sites found
-
-    VarSeqDigest(VarSet& var_info_,
-                 const uint32& var_,
-                 const uint32& seq_,
-                 const uint32& chunk_size_,
-                 std::deque<uint32>& cut_sites_)
-        : var_info(var_info_[var_][seq_]), var(var_), seq(seq_),
-          chunk_size(chunk_size_), cut_sites(cut_sites_), start(0) {};
-
-    // Digest this sequence
-    void digest(const DigestInfo& dinfo);
-
-private:
-    uint32 start;  // starting positions of chunks used as it's digested
-};
-
-
-
-// Used to store a single genome's digestion info:
-
-class GenomeDigest {
-
-public:
-
-    std::vector< std::deque<uint32> > cut_sites;
-
-    GenomeDigest() {}
-    GenomeDigest(const uint32& n_seqs) : cut_sites(n_seqs) {}
-
-    std::deque<uint32>& operator[](const uint& idx) {
-        return cut_sites[idx];
-    }
-    const std::deque<uint32>& operator[](const uint& idx) const {
-        return cut_sites[idx];
-    }
-};
-
-
-// Used to store multiple digestions, like from a `VarSet` object
-class GenomeSetDigest {
-
-public:
-
-    std::vector< GenomeDigest > digestions;
-
-    GenomeSetDigest() {}
-    GenomeSetDigest(const uint32& n_vars) : digestions(n_vars) {}
-    GenomeSetDigest(const uint32& n_vars, const uint32& n_seqs)
-        : digestions(n_vars, GenomeDigest(n_seqs)) {}
-
-    GenomeDigest& operator[](const uint& idx) {
-        return digestions[idx];
-    }
-    const GenomeDigest& operator[](const uint& idx) const {
-        return digestions[idx];
-    }
-};
-
-
-
-
-#endif
diff --git a/tests/testthat/test-R_classes.R b/tests/testthat/test-R_classes.R
index 0bc990a..bac7bbe 100644
--- a/tests/testthat/test-R_classes.R
+++ b/tests/testthat/test-R_classes.R
@@ -39,7 +39,6 @@ test_that(paste("Random sequences using `create_genome` aren't significantly dif
 ref <- ref_genome$new(jackal:::make_ref_genome(seqs))
 test_that("ref_genome class starts with the correct fields", {
     expect_is(ref$genome, "externalptr")
-    expect_null(ref$digests)
 })
 
 test_that("ref_genome class methods", {
@@ -89,7 +88,6 @@ phy <- ape::rcoal(n_vars)
 vars <- create_variants(ref, "phy", phy, mevo_obj = mev)
 test_that("variants class starts with the correct fields", {
     expect_is(vars$genomes, "externalptr")
-    expect_null(vars$digests)
 })