From 7fd9737e57df45c204df650ccb08ade46b6ed71a Mon Sep 17 00:00:00 2001
From: kgalinsky <kgalinsky@a16988d2-67c9-4db9-a897-0603be59747b>
Date: Wed, 28 Oct 2009 21:38:00 +0000
Subject: [PATCH] Updating trunk to 0.4.0

git-svn-id: https://jcvi-tools.svn.sourceforge.net/svnroot/jcvi-tools/JCVI-Translator/trunk@3 a16988d2-67c9-4db9-a897-0603be59747b
---
 Changes                      |    4 +-
 MANIFEST                     |    1 +
 README                       |   17 +-
 lib/JCVI/Translator.pm       | 1976 +++++++++++++++++++---------------
 lib/JCVI/Translator/Utils.pm |  791 +++++++++-----
 t/00-load.t                  |    5 +-
 t/01-translate.t             |   42 +
 t/02-translate_exons.t       |  120 +++
 t/03-translate6.t            |   10 +
 t/04-custom.t                |   27 +
 t/21-nonstop.t               |   47 +
 t/22-getORF.t                |   10 +
 t/23-getCDS.t                |   10 +
 13 files changed, 1848 insertions(+), 1212 deletions(-)
 create mode 100644 t/01-translate.t
 create mode 100644 t/02-translate_exons.t
 create mode 100644 t/03-translate6.t
 create mode 100644 t/04-custom.t
 create mode 100644 t/21-nonstop.t
 create mode 100644 t/22-getORF.t
 create mode 100644 t/23-getCDS.t

diff --git a/Changes b/Changes
index ce4c342..b11633a 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,3 @@
 Revision history for JCVI-Translator
 
-0.01    Date/time
-        First version, released on an unsuspecting world.
-
+0.4.0   Major refactor.
\ No newline at end of file
diff --git a/MANIFEST b/MANIFEST
index 8a1749d..1922701 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4,6 +4,7 @@ META.yml # Will be created by "make dist"
 Makefile.PL
 README
 lib/JCVI/Translator.pm
+lib/JCVI/Translator/Utils.pm
 t/00-load.t
 t/boilerplate.t
 t/pod-coverage.t
diff --git a/README b/README
index bf462f9..816bbaa 100644
--- a/README
+++ b/README
@@ -1,15 +1,8 @@
-JCVI-Translator
-
-The README is used to introduce the module and provide instructions on
-how to install the module, any machine dependencies it may have (for
-example C compilers and installed libraries) and any other information
-that should be provided before the module is installed.
-
-A README file is required for CPAN modules since CPAN extracts the README
-file from a module distribution so that people browsing the archive
-can use it get an idea of the modules uses. It is usually a good idea
-to provide version information here so that people can decide whether
-fixes for the module are worth downloading.
+JCVI::Translator tries to be a robust translator object featuring translation
+tables based off the the ones provided by NCBI
+(http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi).
+Key features include the ability to handle degenerate nucleotides and to
+translate to ambiguous amino acids.
 
 INSTALLATION
 
diff --git a/lib/JCVI/Translator.pm b/lib/JCVI/Translator.pm
index ac018d4..474950a 100644
--- a/lib/JCVI/Translator.pm
+++ b/lib/JCVI/Translator.pm
@@ -1,4 +1,4 @@
-# Translator
+# JCVI::Translator
 #
 # $Author$
 # $Date$
@@ -7,62 +7,59 @@
 
 =head1 NAME
 
-JCVI::Translator - JCVI Translator object
+JCVI::Translator - Translate DNA sequences
 
-=head1 SYNOPSES
+=head1 SYNOPSIS
 
- use JCVI::Translator;
+use JCVI::Translator;
 
- my $translator = new Translator(
-			   id => $id,
-                           name => $name,
-                           tableRef => $tableRef
-			   );
+    my $translator = new JCVI::Translator();
+    my $translator = new JCVI::Translator(11);
+    my $translator = new JCVI::Translator( 12, 'id' );
+    my $translator = new JCVI::Translator( 'Yeast Mitochondrial', 'name' );
+    my $translator = new JCVI::Translator( 'mito', 'name' );
+
+    my $translator = custom JCVI::Translator( \$custom_table );
+    my $translator = custom JCVI::Translator( \$custom_table, 1 );
+
+    $translator->translate( \$seq );
+    $translator->translate( \$seq, { strand => 1 } );
+    $translator->translate( \$seq, { strand => -1 } );
 
 =head1 DESCRIPTION
 
-JCVI::Translator tries to be a robust translator object
-featuring translation tables based off the the ones provided
-by NCBI
+JCVI::Translator tries to be a robust translator object featuring translation
+tables based off the the ones provided by NCBI
 (http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi).
-Key features include the ability to handle degenerate
-nucleotides and to translate to ambiguous amino acids.
+Key features include the ability to handle degenerate nucleotides and to
+translate to ambiguous amino acids.
+
+The way to work with JCVI::Translator is you create a new translator using an
+internal translation table or a provided one, this module will translate DNA
+sequences for you.
 
-The way to work with JCVI::Translator is you create a new
-translator using an internal translation table or a provided
-one, and from there you can perform translations and other
-functions requiring knowledge of the translation table.
+Translator uses interbase coordinates. See below for the difference between
+interbase coordinates and traditional numbering methods:
 
-Translator uses interbase numbering. See below for the
-difference between interbase numbering and traditional
-numbering methods:
+    Traditional   1 2 3 4
+                  A C G T ...
+    Interbase    0 1 2 3 4
 
- Traditional   1 2 3 4
-               A C G T ...
- Interbase    0 1 2 3 4
+Conversion methods between the two methods can depend upon what you are trying
+to do, but the simple way to do this is:
 
-Conversion methods between the two methods can depend upon
-what you are trying to do, but in general, just add 1 to the
-start base for interbase numbering to get the boundaries
-for traditional numbering (i.e. 0-4 in interbase numbering
-corresponds to bases 1-4).
+    strand = 3' end <=> 5' end
+    lower  = min( 5' end, 3' end ) - 1
+    lower  = max( 5' end, 3' end )
 
 For logging, it uses Log::Log4Perl. This needs to be
-initialized to work. See http://log4perl.sourceforge.net/.
+initialized to work.
 
 For parameter validation, uses Params::Validate. This
 introduces a bit of overhead, however, for scripts that are
 fully tested, validation can be disabled. See the
 Params::Validate documentation.
 
-=head1 AUTHOR
-
-Kevin Galinsky, <kgalinsk@jcvi.org>
-
-=head1 FUNCTIONS
-
-=over
-
 =cut
 
 package JCVI::Translator;
@@ -70,157 +67,117 @@ package JCVI::Translator;
 use strict;
 use warnings;
 
-our $VERSION = '0.3.1';
-
-use Log::Log4perl qw(:easy);
-use Params::Validate qw(:all);
+use version;
+our $VERSION = qv('0.4.0');
 
-use JCVI::DNATools qw(%degenerateMap
-    $degenMatch
-    $nucs
-    $nucMatch
-    cleanDNA
-    reverseComplement);
+use base qw(Class::Accessor::Fast);
+__PACKAGE__->mk_accessors(qw(id names _table _starts _reverse));
 
-use JCVI::AATools qw(%ambiguousForward);
-
-my $DEFAULT_ID = 1;
-
-=item new()
-
-=item $translator = new Translator(%params);
-
-Creates a new translator using the given parameters. If no
-parameters are given, the standard translation table will be
-used. The parameters are:
-
- id - id of an internal translation table
- name - name or part of the name of an internal table
- tableRef - a reference to a table string
- complete - does the table string contain all degenerate nucs?
-
-table takes precedence, and the format of the table string should
-reflect that of the internal tables.
-
- name "(\w+(; )?)+"
- name "(\w+(; )?)+"
- id \d+
- ncbieaa "\w+"
- sncbieaa "[M-]+"
- base1 [ACGT]+
- base2 [ACGT]+
- base3 [ACGT]+
+use Log::Log4perl qw(:easy);
+use Params::Validate;
+
+use JCVI::DNATools qw(
+  %degenerate_map
+  $degen_match
+  @nucs
+  $nuc_match
+  cleanDNA
+  reverse_complement
+);
+
+use JCVI::AATools qw(
+  %ambiguous_forward
+  $aa_match
+);
+
+# Defaults. Used by the validation functions.
+our $DEFAULT_ID        = 1;
+our $DEFAULT_TYPE      = 'id';
+our $DEFAULT_COMPLETE  = 0;
+our $DEFAULT_BOOTSTRAP = 1;
+our $DEFAULT_STRAND    = 1;
+our $DEFAULT_PARTIAL   = 0;
+our $DEFAULT_SANITIZED = 0;
+
+=head1 CONSTRUCTORS
 
-Examples:
+=cut
 
- $translator = new Translator(); # Default translator
- $translator = new Translator('id' => 4);
- $translator = new Translator('name' => 'mitochondrial');
- $translator = new Translator('table' =>
-               'name "All Alanines; All the Time"
-                id 9000
-                ncbieaa  "AAAAAAAA"
-                sncbieaa "----M---"
-                base1 AAAAAAAA
-                base2 AACCGGTT
-                base3 ACACACAC'
-               );
+=head2 new
+
+    my $translator = new JCVI::Translator();
+    my $translator = new JCVI::Translator( $id );
+    my $translator = new JCVI::Translator( $id, $type );
+
+This method creates a translator by loading a translation table from the
+internal list. Pass an ID and the type of ID. By default, it will load the
+tranlation table with id 1. The type of ID may be "id" or "name," which
+correspond to the numeric id of the translation table or the long name of the
+translation table. For instance, below are the headers for the first 3
+translation tables.
+
+    {
+    name "Standard" ,
+    name "SGC0" ,
+    id 1 ,
+    ...
+    },
+    {
+    name "Vertebrate Mitochondrial" ,
+    name "SGC1" ,
+    id 2 ,
+    ...
+    },
+    {
+    name "Yeast Mitochondrial" ,
+    name "SGC2" ,
+    id 3 ,
+    ...
+    },
+    ...
+
+By default, the "Standard" translation table will be loaded. You may create a
+translator with this table by calling any the following:
+
+    my $t = new JCVI::Translator();                     # default table
+    my $t = new JCVI::Translator(1);                    # explicitly set id
+    my $t = new JCVI::Translator( 1, 'id' );            # set id and type
+    my $t = new JCVI::Translator( 'Standard', 'name' ); # set name
+    my $t = new JCVI::Translator( 'SGC0', 'name' );     # alternate name
+    my $t = new JCVI::Translator( 'standard', 'name' ); # not case-sensitive
+    my $t = new JCVI::Translator( 'stan', 'name' );     # partial match ok
+
+For partial matches, JCVI::Translator will use the first matching translation
+table.
+
+    my $t = new JCVI::Translator( 'mitochondrial', 'name' );
+
+This will use translation table with ID 2, "Vertebrate Mitochondrial," because
+that is the first match.
 
 =cut
 
 sub new {
-    my $type = shift;
-
-    my %params = validate( @_,
-                           {  id => { default => $DEFAULT_ID,
-                                      regex   => qr/^\d+$/
-                              },
-                              name => { optional => 1,
-                                        type     => SCALAR
-                              },
-                              tableRef => { optional => 1,
-                                            type     => SCALARREF
-                              },
-                              complete => { default => 0,
-                                            regex   => qr/^[01]$/
-                              }
-                           }
-    );
-
-    my $self = { seqRef => undef };
-    bless $self, $type;
-
-    DEBUG("New $type");
-
-    TRACE("Params:");
-    map { TRACE("$_ => $params{$_}") } keys %params;
-
-    my $error;
-
-    ########################################
-    # If a tableref is passed, go directly
-    # to loadTable. Else, go to
-    # loadInternalTable which will then call
-    # loadTable.
-
-    ########################################
-    # Maybe this should be split into two
-    # constructors?
-
-    if ( $params{tableRef} ) {
-        DEBUG('Loading custom table');
-        $error = $self->_loadTable( @params{qw(tableRef complete)} );
-    }
-    else {
-        DEBUG('Loading internal table');
-        $error = $self->_loadInternalTable( @params{qw(id name)} );
-    }
-
-    if ($error) {
-        ERROR("Error: $error");
-        return $error;
-    }
-    else {
-        DEBUG("New $type successful");
-        return $self;
-    }
-}
-
-=item _loadInternalTable()
+    TRACE('new called');
 
-=item $err = $translator->_loadInternalTable($id, $name);
+    my $class = shift;
 
-This method loads a table from the internal list. Gets
-called from "new" if no table string is provided. Passed
-either $id or $name. Not recommend to pass both, but if both
-are passed, then whichever matches first is used.
-
-=cut
-
-sub _loadInternalTable {
-    my $self = shift;
-
-    my ( $id, $name ) = validate_pos( @_, { regex => qr/^\d+$/ }, 0 );
+    my ( $id, $type ) = validate_pos(
+        @_,
+        { default => $DEFAULT_ID },
+        { default => $DEFAULT_TYPE, regex => qr/id|name/ }
+    );
 
-    ########################################
-    # Set up regular expression match for
-    # searching.
+    TRACE( uc($type) . ': ' . $id );
 
-    my $match = $name ? qr/name ".*$name.*"/i : qr/id $id/i;
+    # Get the beginning DATA so that we can seek back to it
+    my $start_pos = tell DATA;
 
-    DEBUG("_loadInternalTable called");
-    TRACE("ID $id")     if ( defined $id );
-    TRACE("Name $name") if ( defined $name );
+    # Set up regular expression for searching.
+    my $match = ( $type eq 'id' ) ? qr/id $id\b/ : qr/name ".*$id.*"/i;
 
+    # Go through every internal table until it matches on id or name.
     my $found = 0;
-
-    # Get the beginning DATA input
-    my $startPos = tell DATA;
-
-    ########################################
-    # Go through every internal table until
-    # it matches on id or name.
-
     local $/ = "}";
     local $_;
     while (<DATA>) {
@@ -230,243 +187,380 @@ sub _loadInternalTable {
         }
     }
 
-    # Reset DATA input
-    seek DATA, $startPos, 0;
+    # Reset DATA
+    seek DATA, $start_pos, 0;
+
+    # Call custom with internal table. Complete is set to 1.
+    return $class->custom( \$_, 1 ) if ($found);
 
-    ########################################
-    # Call loadTable with internal table.
-    # Complete is set to 1.
-    return $found
-        ? $self->_loadTable( \$_, 1 )
-        : 'Translation table not found';
+    # Internal table not matched.
+    ERROR("Table with $type of $id not found");
+    return undef;
 }
 
-=item _loadTable()
+=head2 custom()
+
+    my $translator = $translator->custom( $table_ref );
+    my $translator = $translator->custom( $table_ref, $complete );
+
+Create a translator table based off a passed table reference for custom
+translation tables. Loads degenerate nucleotides if $complete isn't set (this
+can take a little time). The format of the translation table should reflect
+those of the internal tables:
+
+    name "Names separated; by semicolons"
+    name "May have multiple lines"
+    id 99
+    ncbieaa  "AMINOACIDS...",
+    sncbieaa "-M--------..."
+    -- Base1  AAAAAAAAAA...
+    -- Base2  AAAACCCCGG...
+    -- Base3  ACGTACTGAC...
 
-=item $err = $translator->loadTable($tableRef, $complete);
+JCVI::Translator is a bit more permissive, see the $TABLE_REGEX regular
+expression to see that actual format.
+
+Examples:
+
+    $translator = new Translator(
+        table_ref => \'name "All Alanines; All the Time"
+                       id 9000
+                       ncbieaa  "AAAAAAAA"
+                       sncbieaa "----M---"
+                       base1     AAAAAAAA
+                       base2     AACCGGTT
+                       base3     ACACACAC'
+    );
 
-Loads a table based off a passed table reference for custom
-translation tables. Gets called from "new" if a table string
-is provided. Loads degenerate nucs if $complete isn't set.
+    $translator = new Translator(
+        table_ref => \$table,
+        complete  => 1
+    );
 
 =cut
 
-sub _loadTable {
-    my $self = shift;
+# Regular expression which should match translation tables and also extracts
+# relevant information.
+our $TABLE_REGEX = qr/
+                        ( (?:name\s+".+?".*?) + )
+                        id\s+(\d+).*
+                        ncbieaa\s+"([a-z*]+)".*
+                        sncbieaa\s+"([a-z-]+)".*
+                        base1\s+([a-z]+).*
+                        base2\s+([a-z]+).*
+                        base3\s+([a-z]+).*
+                     /isx;
 
-    my ( $tableRef, $complete )
-        = validate_pos( @_,
-                        { type => SCALARREF },
-                        { default => 0,
-                          regex   => qr/^[01]$/
-                        }
-        );
+sub custom {
+    TRACE('custom called');
+
+    my $class = shift;
+
+    my ( $table_ref, $complete ) = validate_pos(
+        @_,
+        { type => Params::Validate::SCALARREF },
+        {
+            default => $DEFAULT_COMPLETE,
+            regex   => qr/^[01]$/
+        }
+    );
+
+    # Match the table or return undef.
+    unless ( $$table_ref =~ $TABLE_REGEX ) {
+        ERROR( 'Translation table is in invalid format', $$table_ref );
+        return undef;
+    }
+
+    # Store the data that has been stripped using descriptive names;
+    my $names    = $1;
+    my $id       = $2;
+    my $residues = $3;
+    my $starts   = $4;
+    my $base1    = $5;
+    my $base2    = $6;
+    my $base3    = $7;
 
-    DEBUG("_loadTable called");
+    my $self = $class->_new();
 
-    ( $$self{info}{id} ) = $$tableRef =~ /id\s+(\d+)/i;
+    $self->id($id);
 
-    ########################################
-    # Extract each name, massage, and push
-    # it onto names array
-    while ( $$tableRef =~ /name\s+"(.+?)"/gis ) {
+    # Extract each name, massage, and push it onto names array
+    while ( $names =~ /"(.+?)"/gis ) {
         my @names = split( /;/, $1 );
+        local $_;
         foreach (@names) {
             s/^\s+//;
             s/\s+$//;
             s/\n/ /g;
             s/\s{2,}/ /g;
-            push @{ $$self{info}{names} }, $_ if $_;
+            push @{ $self->names }, $_ if $_;
         }
     }
 
-    ########################################
-    # Pull each string to be used for
-    # translation table generation.
+    # Store all the hashes in $self so we don't have to keep using accessors
+    my $forward_hash    = $self->_table->[0];
+    my $rc_forward_hash = $self->_table->[1];
 
-    my ($residues) = $$tableRef =~ /ncbieaa.+?([a-z*]+)/i;
-    my ($starts)   = $$tableRef =~ /sncbieaa.+?([a-z-]+)/i;
-    my ($base1)    = $$tableRef =~ /base1.+?([a-z]+)/i;
-    my ($base2)    = $$tableRef =~ /base2.+?([a-z]+)/i;
-    my ($base3)    = $$tableRef =~ /base3.+?([a-z]+)/i;
+    my $starts_hash    = $self->_starts->[0];
+    my $rc_starts_hash = $self->_starts->[1];
 
-    ########################################
-    # Chop is used to efficiently get the
-    # last character from each string; like
-    # pop, but for strings.
+    my $reverse_hash    = $self->_reverse->[0];
+    my $rc_reverse_hash = $self->_reverse->[1];
 
+    # Chop is used to efficiently get the last character from each string
     while ( my $residue = uc( chop $residues ) ) {
         my $start = uc( chop $starts );
         my $codon = uc( chop($base1) . chop($base2) . chop($base3) );
 
-        my $rc_codon_ref = reverseComplement( \$codon );
+        my $rc_codon_ref = reverse_complement( \$codon );
 
+        # If the residue is valid, store it
         if ( $residue ne 'X' ) {
-            $$self{forward}{$codon}            = $residue;
-            $$self{rc_forward}{$$rc_codon_ref} = $residue;
+            $forward_hash->{$codon}            = $residue;
+            $rc_forward_hash->{$$rc_codon_ref} = $residue;
+
+            push @{ $reverse_hash->{$residue} },    $codon;
+            push @{ $rc_reverse_hash->{$residue} }, $$rc_codon_ref;
         }
+
+        # If the start is valid, store it
         if ( ( $start ne '-' ) ) {
-            $$self{starts}{$codon}            = $start;
-            $$self{rc_starts}{$$rc_codon_ref} = $start;
-        }
+            $starts_hash->{$codon}            = $start;
+            $rc_starts_hash->{$$rc_codon_ref} = $start;
 
-        push @{ $$self{reverse}{$residue} },    $codon;
-        push @{ $$self{rc_reverse}{$residue} }, $$rc_codon_ref;
+            push @{ $reverse_hash->{start} },    $codon;
+            push @{ $rc_reverse_hash->{start} }, $$rc_codon_ref;
+        }
     }
 
-    ########################################
-    # Use the printTable command to fill in
-    # the gaps in the translation table
-    # unless the table has been marked as
-    # complete.
+    # Unroll the translation table unless it has been marked complete
+    $self->bootstrap() unless ($complete);
 
-    $self->printTable() unless $complete;
+    return $self;
+}
 
-    return $$self{forward} ? 0 : 'Translation table could not be loaded';
+# Helper constructor. Instantiates the object with arrayrefs and hashrefs in
+# the right places
+sub _new {
+    my $class = shift;
+    my $self  = $class->SUPER::new(
+        {
+            names    => [],
+            _table   => [],
+            _starts  => [],
+            _reverse => []
+        }
+    );
+
+    foreach my $func (qw( _table _starts _reverse )) {
+        foreach my $rc ( 0 .. 1 ) {
+            $self->$func->[$rc] = {};
+        }
+    }
+
+    return $self;
 }
 
-=item loadSequence
+=head1 METHODS
+
+=cut
+
+=head2 add_translation
+
+    $translator->add_translation( $codon, $residue );
+    $translator->add_translation( $codon, $residue, \%params );
+
+Add a codon-to-residue translation to the translation table. $start inidicates
+if this is a start codon.
 
-=item $translator->loadSequence(\$seqRef);
+Examples:
 
-Translator can cache sequences internally to be translated
-from later. This can save time when translating from the
-same sequence multiple times, but not all the ranges are
-available immediately.
+    # THESE AREN'T REAL!!!
+    $translator->add_translation( 'ABA', 'G' );
+    $translator->add_translation( 'ABA', 'M', 1 );
 
 =cut
 
-sub loadSequence {
+sub add_translation {
+    TRACE('add_translation called');
+
     my $self = shift;
 
-    my ( $seqRef, $sanitized ) = validate_pos(
+    my ( $codon, $residue, @p );
+
+    ( $codon, $residue, $p[0] ) = validate_pos(
         @_,
-        {  type     => SCALARREF,
-           callback => {
-               'Sequence contains invalid nucleotides' =>
-                   sub { ${ $_[0] } !~ /[^$nucMatch]/ }
-           }
-        },
-        0
+        { regex => qr/^${nuc_match}{3}$/ },
+        { regex => qr/^$aa_match$/ },
+        { type  => Params::Validate::HASHREF, default => {} }
+
     );
 
-    cleanDNA($seqRef) unless ($sanitized);
+    my %p = validate(
+        @p,
+        {
+            strand => {
+                default => 1,
+                regex   => qr/^[+-]?1$/,
+                type    => Params::Validate::SCALAR
+            },
+            start => {
+                default => 0,
+                regex   => qr/^[01]$/,
+                type    => Params::Validate::SCALAR
+            }
+        }
+    );
+
+    my $codon_ref;
+    my $rc_codon_ref;
+
+    if ( $p{strand} == 1 ) {
+        $codon_ref    = \$codon;
+        $rc_codon_ref = reverse_complement( \$codon );
+    }
+    else {
+        $rc_codon_ref = \$codon;
+        $codon_ref    = reverse_complement( \$codon );
+    }
 
-    DEBUG('loadSequence called');
-    TRACE( 'Sequence starts with ' . substr $$seqRef, 0, 5 );
-    TRACE( 'Sequence length is ' . length $$seqRef );
+    # Store residue in the starts or regular translation table.
+    my $table = $p{start} ? '_starts' : '_table';
+    $self->$table->[0]->{$$codon_ref}    = $residue;
+    $self->$table->[1]->{$$rc_codon_ref} = $residue;
 
-    $$self{seqRef} = $seqRef;
+    # Store the reverse lookup
+    $residue = 'start' if ( $p{start} );
+    push @{ $self->_reverse->[0]->{$residue} }, $$codon_ref;
+    push @{ $self->_reverse->[1]->{$residue} }, $$rc_codon_ref;
 }
 
-=item clearSequence
+=head2 bootstrap
 
-=item $translator->clearSequence(\$seqRef);
+    $translator->bootstrap();
 
-Clear cached sequences.
+Bootstrap the translation table. Find every possible translation, even those
+that involve degenerate nucleotides or ambiguous amino acids.
 
 =cut
 
-sub clearSequence {
-    my $self = shift;
+sub bootstrap {
+    TRACE('bootstrap called');
 
-    DEBUG('clearSequence called');
+    my $self = shift;
 
-    undef $$self{seqRef};
+    # Loop through every nucleotide combination and run _translate_codon on
+    # each.
+    foreach my $n1 (@nucs) {
+        foreach my $n2 (@nucs) {
+            foreach my $n3 (@nucs) {
+                $self->_translate_codon( $n1 . $n2 . $n3, $self->_table->[0] );
+                $self->_translate_codon(
+                    $n1 . $n2 . $n3,
+                    $self->_starts->[0],
+                    { start => 1 }
+                );
+            }
+        }
+    }
 }
 
-=item printTable()
+=head2 table_string
+
+    my $table_string_ref = $translator->_table_string();
+    my $table_string_ref = $translator->_table_string( $bootstrap );
 
-=item $tableString = $translator->printTable();
+Returns the table string. $bootstrap specifies whether or not this table should
+try to bootstrap itself using the bootstrap function above. By default, it is
+1.
 
-Returns the complete, absolute version of the table string.
-Unrolls all degenerates and everything. Due to the caching
-nature of translateCodon, this routine will also store all
-possibilities for translation.
+Examples:
+
+    my $table_string_ref = $translator->_table_string();
+    my $table_string_ref = $translator->_table_string(0); # To not bootstrap
 
 =cut
 
-sub printTable {
+sub table_string {
+    TRACE('table_string called');
+
     my $self = shift;
 
-    DEBUG('printTable called');
-
-    my $names = join( '; ', @{ $$self{info}{names} } );
-    my ( $residues, $starts, @base, @b );
-
-    my @NUCS = split '', $nucs;
-
-    ########################################
-    # Rigorous loop unrolls the nucleotide
-    # table. This causes the subroutine to
-    # take a while to run.
-
-    foreach (@NUCS) {
-        $b[0] = $_;
-        foreach (@NUCS) {
-            $b[1] = $_;
-            foreach (@NUCS) {
-                $b[2] = $_;
-                my $aa = $self->translateCodon( join( '', @b ), 1 );
-                my $st = $self->translateCodon( join( '', @b ), 1, 1 );
-
-                unless (    ( $aa eq 'X' )
-                         && ( $st eq '-' ) )
-                {
-                    $residues .= $aa;
-                    $starts   .= $st;
-                    $base[$_] .= $b[$_] foreach ( 0 .. 2 );
-                }
-            }
-        }
+    my $bootstrap =
+      validate_pos( @_,
+        { default => $DEFAULT_BOOTSTRAP, regex => qr/^[01]$/ } );
+
+    # Bootstrap if necessary
+    $self->bootstrap() if ($bootstrap);
+
+    # Generate the names string
+    my $names = join( '; ', @{ $self->names } );
+
+    my ( $residues, $starts );    # starts/residues string
+    my @base = (undef) x 3;       # this will store the bases for the loop
+
+    # Loop over all stored codons. Sort the codons in the translation table and
+    # starts table, then use grep to get the unique ones with the help of $prev
+    # which stores the previous value
+    my $prev = '';
+    foreach my $codon (
+        grep ( ( $_ ne $prev ) && ( $prev = $_ ),
+            sort { $a cmp $b } (
+                keys( %{ $self->_table->[0] } ),
+                keys( %{ $self->_starts->[0] } )
+              ) )
+      )
+    {
+        $residues .= $self->_table->[0]->{$codon}  || 'X';
+        $starts   .= $self->_starts->[0]->{$codon} || '-';
+
+        # Chop up the codon because the bases are stored on separate lines
+        $base[ -$_ ] .= chop $codon foreach ( 1 .. 3 );
     }
 
-    return
-        join( "\n",
-              '{',
-              qq(name "$names" ,),
-              qq(id $$self{info}{id} ,),
-              qq(ncbieaa  "$residues",),
-              qq(sncbieaa "$starts"),
-              map( {"-- Base$_  $base[$_ - 1]"} ( 1 .. 3 ) ),
-              '}' );
+    # Generate the string
+    my $string = join( "\n",
+        '{',
+        qq(name "$names" ,),
+        qq(id $self->{id} ,),
+        qq(ncbieaa  "$residues",),
+        qq(sncbieaa "$starts"),
+        map( {"-- Base$_  $base[$_ - 1]"} ( 1 .. 3 ) ),
+        '}' );
+
+    return \$string;
 }
 
-=item translate()
+=head2 translate
 
-=item $pepRef = $translator->translate(%params);
+    $pep_ref = $translator->translate( $seq_ref, \%params );
 
-The basic function of this module. Translate the specified
-region of the sequence and return a reference to the
-translated string. The parameters are:
+The basic function of this module. Translate the specified region of the
+sequence (passed as $seq_ref) and return a reference to the translated string.
+The parameters are:
 
- strand - 1 or -1; mandatory
- lower  - integer between 0 and length; optional
-          defaults to 0
- upper  - integer between 0 and length; optional
-          defaults to length
- seqRef - reference to a string; optional
- sequence - string; optional
- partial - 0 or 1; optional
+    strand:     1 or -1; default = 1
+    lower:      integer between 0 and length; default = 0
+    upper:      integer between 0 and length; default = length
+    partial:    0 or 1; default = 0
+    sanitized:  0 or 1; default = 0
 
-Translator uses interbase coordinates. lower and upper are
-optional parameters such that:
+Translator uses interbase coordinates. lower and upper are optional parameters
+such that:
 
- 0 <= lower <= upper <= length
+    0 <= lower <= upper <= length
 
-Translator will log and die if those conditions are not
-satisfied. strand is the only mandatory parameter. sequence
-and seqRef are both optional. If both are provided, sequence
-takes priority. If neither is provided, then Translator will
-use a previously loaded sequence. If no sequence has been
-loaded, translator will log and die.
+Translator will log and die if those conditions are not satisfied.
 
-Partial sets whether or not the sequence is a 5' partial.By
-default, partial is taken to be false, and the translator
-will try to translate the first codon as if it is a start
-codon. If you specify partial, the translator will skip
-that step.
+partial sets whether or not the sequence is a 5' partial. By default, partial
+is taken to be false  and the translator will try to translate the first codon
+as if it were a start codon. You can specify that the sequence is 5' partial
+and the translator will skip that step.
 
+sanitized is a flag translator know that this sequence has been stripped of
+whitespace and that all the codons are capitalized. Otherwise, translator will
+do that in order to speed up the translation process
+(see JCVI::DNATools::cleanDNA).
 
 To translate the following:
 
@@ -474,705 +568,793 @@ To translate the following:
   C G C G C A G G A
     ---------->
 
- $pepRef = $translator->translate(seqRef => \$sequence,
-                                  strand => 1,
-                                  lower  => 1,
-                                  upper  => 7);
+    $pep_ref = $translator->translate(
+        \$sequence,
+        {
+            strand => 1,
+            lower  => 1,
+            upper  => 7
+        }
+    );
 
  0 1 2 3 4 5 6 7 8 9
   C G C G C A G G A
       <----------
 
- $pepRef = $translator->translate(seqRef => \$sequence,
-                                  strand => -1,
-                                  lower  => 2,
-                                  upper  => 8);
+    $pep_ref = $translator->translate(
+        \$sequence,
+        {
+            strand => -1,
+            lower  => 2,
+            upper  => 8
+        }
+    );
 
 Examples:
 
- $pepRef = $translator->translate(strand => -1);
+    my $pep_ref = $translator->translate( \'acttgacgt' );
 
- $pepRef = $translator->translate(strand => 1,
-                                  seqRef => \'acttgacgt');
+    my $pep_ref = $translator->translate( \'acttgacgt', { strand => -1 } );
 
- $pepRef = $translator->translate(strand => -1,
-                                  sequence => 'acttgacgt',
-                                  lower => 2,
-                                  upper => 5);
+    my $pep_ref = $translator->translate(
+        \'acttgacgt',
+        {
+            strand => -1,
+            lower  => 2,
+            upper  => 5
+        }
+    );
 
- $pepRef = $translator->translate(strand => +1,
-                                  seqRef => \'acttgacgt',
-                                  lower => 0,
-                                  upper => 8,
-                                  partial => 1);
+    my $pep_ref = $translator->translate(
+        \'acttgacgt',
+        {
+            strand  => 1,
+            lower   => 0,
+            upper   => 8,
+            partial => 0
+        }
+    );
 
 =cut
 
 sub translate {
-    my $self = shift;
+    TRACE('translate called');
 
-    DEBUG('translate called');
+    my $self = shift;
 
-    my %params = validate(
+    my ( $seq_ref, @p );
+    ( $seq_ref, $p[0] ) = validate_pos(
         @_,
-        {  strand => { default => 1,
-                       regex   => qr/^[+-]?1$/,
-                       type    => SCALAR
-           },
-           upper  => 0,
-           lower  => { default => 0 },
-           seqRef => {
-               optional => 1,
-               type     => SCALARREF,
-               callback => {
-                   'Sequence contains invalid nucleotides' => sub {
-                       ${ $_[0] } !~ /[^$nucMatch]/;
-                       }
-               }
-           },
-           partial   => 0,
-           sanitized => 0
+        { type => Params::Validate::SCALARREF, },
+        { type => Params::Validate::HASHREF, default => {} }
+    );
+
+    my %p = validate(
+        @p,
+        {
+            strand => {
+                default => $DEFAULT_STRAND,
+                regex   => qr/^[+-]?1$/,
+                type    => Params::Validate::SCALAR
+            },
+            lower => {
+                default   => 0,
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'lower >= 0'          => sub { $_[0] >= 0 },
+                    'lower <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            upper => {
+                default   => length($$seq_ref),
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'upper >= 0'          => sub { $_[0] >= 0 },
+                    'upper <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            partial   => { default => $DEFAULT_PARTIAL },
+            sanitized => { default => $DEFAULT_SANITIZED }
         }
     );
 
-    unless ( defined $params{upper} ) {
-        $params{upper} =
-            length( defined $params{seqRef}
-                    ? ${ $params{seqRef} }
-                    : $self->{seqRef}
-            );
+    # Die if upper < lower
+    if ( $p{upper} < $p{lower} ) {
+        FATAL "Upper $p{upper} < Lower $p{lower}";
+        die "Upper $p{upper} < Lower $p{lower}";
     }
 
-    $params{exons} = [ [ $params{lower}, $params{upper} ] ];
-    delete $params{lower};
-    delete $params{upper};
+    $seq_ref = cleanDNA($seq_ref) unless ( $p{sanitized} );
+
+    # These are necessary for the _translate function
+    my $prep = $self->_prepare( $p{strand} );
+    my $ends = $self->_endpoints( @p{qw(strand lower upper)} );
+
+    my $peptide = '';
 
-    return $self->translateExons(%params);
+    $self->_start( $seq_ref, \$peptide, $ends, $prep ) unless ( $p{partial} );
+    $self->_translate( $seq_ref, \$peptide, $ends, $prep );
+
+    return \$peptide;
 }
 
-=item translate6()
+=head2 translate6
 
-=item $pepRefs = $translator->translate6($seqRef);
+    my $pep_refs = $translator->translate6( $seq_ref );
+    my $pep_refs = $translator->translate6( $seq_ref, \%params );
 
-Translate the sequence in every possible way. Returns an
-array reference of all the translations. The
-structure of the array is as follows:
+Translate the sequence in every possible way. Returns an array reference of all
+the translations. The structure of the array is as follows:
 
- 0: ---------->
- 1:  --------->
- 2:   -------->
-    NNNN...NNNN
- 3: <----------
- 4: <---------
- 5: <--------
+    0: ---------->
+    1:  --------->
+    2:   -------->
+       NNNN...NNNN
+    3: <----------
+    4: <---------
+    5: <--------
 
-I should stress that $$pepRefs[x] is not a sequence, but a
-reference to a sequence. So to access a sequence, you need
-to do the following:
+The parameters are similar to those use in translate:
 
- $sequence = ${$$pepRefs[x]}
+    lower:      integer between 0 and length; default = 0
+    upper:      integer between 0 and length; default = length
+    partial:    0 or 1; default = 0
+    sanitized:  0 or 1; default = 0
 
 Example:
 
- $pepRefs = $translator->translate6(\'acttgacgt');
+    $pep_refs = $translator->translate6(\'acttgacgt');
 
 Output:
 
- $pepRefs = [$pepRef0,
-             $pepRef1,
-             $pepRef2,
-             $reversePepRef0,
-             $reversePepRef1,
-             $reversePepRef2]
+    $pep_refs = [
+                    $pep1,
+                    $pep2,
+                    $pep3,
+                    $reverse_pep1,
+                    $reverse_pep2,
+                    $reverse_pep3
+                ]
 
 =cut
 
 sub translate6 {
-    my ( $self, $seqRef, $sanitized ) = @_;
+    TRACE('translate6 called');
+
+    my $self = shift;
 
-    DEBUG("translate6 called");
+    my ( $seq_ref, @p );
+    ( $seq_ref, $p[0] ) = validate_pos(
+        @_,
+        { type => Params::Validate::SCALARREF },
+        { type => Params::Validate::HASHREF, default => {} }
+    );
+
+    my %p = validate(
+        @p,
+        {
+            lower => {
+                default   => 0,
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'lower >= 0'          => sub { $_[0] >= 0 },
+                    'lower <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            upper => {
+                default   => length($$seq_ref),
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'upper >= 0'          => sub { $_[0] >= 0 },
+                    'upper <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            partial   => { default => $DEFAULT_PARTIAL },
+            sanitized => { default => $DEFAULT_SANITIZED }
+        }
+    );
+
+    $seq_ref = cleanDNA($seq_ref) unless ( $p{sanitized} );
+
+    my @peptides;
 
-    cleanDNA($seqRef) unless ($sanitized);
+    foreach my $strand ( -1, 1 ) {
 
-    my @pepRefs;
+        # We only need to calculate prep once for a given strand
+        my $prep = $self->_prepare($strand);
+        my $rc   = $prep->[0];                 # True if reverse complement
+        my $fw   = ( $rc + 1 ) % 2;            # True if forward strand
+        foreach ( 0 .. 2 ) {
 
-    push @pepRefs,
-        $self->translate( seqRef    => $seqRef,
-                          lower     => $_,
-                          strand    => 1,
-                          sanitized => 1
-        ) foreach ( 0 .. 2 );
-    push @pepRefs,
-        $self->translate( seqRef    => $seqRef,
-                          upper     => length($$seqRef) - $_,
-                          strand    => -1,
-                          sanitized => 1
-        ) foreach ( 0 .. 2 );
+            # Calculate endpoints and translate
+            my $ends = $self->_endpoints(
+                $strand,
+                $p{lower} + $fw * $_,
+                $p{upper} - $rc * $_
+            );
+            $self->_start( $seq_ref, \$peptides[ $rc * 3 + $_ ], $ends, $prep )
+              unless ( $p{partial} );
+            $self->_translate( $seq_ref, \$peptides[ $rc * 3 + $_ ],
+                $ends, $prep );
+        }
+    }
 
-    return \@pepRefs;
+    return \@peptides;
 }
 
-=item translateExons()
+=head2 translate_exons
 
-=item $pepRef = translateExons(%params);
+    my $pep_ref = translate_exons( $str_ref, $exons_array_ref );
+    my $pep_ref = translate_exons( $str_ref, $exons_array_ref, \%params );
 
 Translate a gene spanning multiple exons. Paramters are:
 
- strand: 1 or -1; mandatory
- seqRef: reference to sequence; optional if one is loaded
- partial: '0' or '1'; optional, defaults to '0'
+    strand:     1 or -1; default = 1
+    partial:    0 or 1;  default = 0
+    sanitized:  0 or 1;  default = 0
 
 Input:
 
- $exonRangesRef = [
-                   [$start0, $stop0],
-                   [$start1, $stop1],
-                    ...
-                  ]
+    $exons_array_ref = [
+                            [$start0, $stop0],
+                            [$start1, $stop1],
+                            ...
+                       ];
+
+The order of the exons in the array doesn't matter. translate_exons will sort
+the exons.
 
 Example:
 
- $pepRef = translateExons(\'actgcat', [ [0,2], [3,7] ], 1);
+    $pep_ref = translate_exons(\'actgcat', [ [0,2], [3,7] ]);
+    $pep_ref = translate_exons(\'actgcat', [ [0,2], [3,7] ], { strand => -1});
 
 =cut
 
-sub translateExons {
+sub translate_exons {
+    TRACE('translate_exons called');
+
     my $self = shift;
 
-    my %params = validate(
+    my ( $seq_ref, $exons, @p );
+    ( $seq_ref, $exons, $p[0] ) = validate_pos(
         @_,
-        {  strand => { regex => qr/^[+-]?1$/,
-                       type  => SCALAR
-           },
-           seqRef => {
-               default  => $$self{seqRef},
-               type     => SCALARREF,
-               callback => {
-                   'Sequence contains invalid nucleotides' => sub {
-                       ${ $_[0] } !~ /[^$nucMatch]/;
-                       }
-               }
-           },
-           exons => {
-               type      => ARRAYREF,
-               callbacks => {
-                   'Bound not an integer' => sub {
-                       foreach my $bounds ( @{ $_[0] } ) {
-                           foreach my $bound (@$bounds) {
-                               return 0 unless ( $bound =~ /^\d+$/ );
-                           }
-                       }
-                       return 1;
-                   },
-                   'Bound out of range' => sub {
-                       foreach my $bounds ( @{ $_[0] } ) {
-                           foreach my $bound (@$bounds) {
-                               return 0
-                                   unless ( ( $bound >= 0 )
-                                    && ( $bound <= length ${ $_[1]{seqRef} } )
-                                   );
-                           }
-                       }
-                       return 1;
-                       }
-               }
-           },
-           partial   => 0,
-           sanitized => 0
-        }
+        { type => Params::Validate::SCALARREF },
+        { type => Params::Validate::ARRAYREF },
+        { type => Params::Validate::HASHREF, default => {} }
     );
 
-    my $seqRef;
-
-    ########################################
-    # Do some further validation.
-
-VALIDATION: {
-        if ( $params{seqRef} ) {
-            $seqRef = $params{seqRef};
-            cleanDNA($seqRef) unless ( $params{sanitized} );
-        }
-        else {
-            $seqRef = $$self{seqRef};
-        }
+    validate_pos(
+        @$exons,
+        (
+            {
+                type      => Params::Validate::ARRAYREF,
+                callbacks => {
+                    'Bound not an integer' => sub {
+                        foreach my $bound ( @{ $_[0] } ) {
+                            return 0 unless ( $bound =~ /^\d+$/ );
+                        }
+                        return 1;
+                    },
+                    'Bound out of range' => sub {
+                        foreach my $bound ( @{ $_[0] } ) {
+                            return 0
+                              unless ( ( $bound >= 0 )
+                                && ( $bound <= length $$seq_ref ) );
+                        }
+                        return 1;
+                    },
+                    'lower <= upper' => sub {
+                        return $_[0][0] <= $_[0][1];
+                      }
+                }
+            }
+          ) x @$exons
+    );
 
-        unless ( defined $seqRef ) {
-            my $logger = get_logger();
-            $logger->logcroak('Sequence undefined');
+    my %p = validate(
+        @p,
+        {
+            strand => {
+                default => $DEFAULT_STRAND,
+                regex   => qr/^[+-]?1$/,
+                type    => Params::Validate::SCALAR
+            },
+            partial   => { default => $DEFAULT_PARTIAL },
+            sanitized => { default => $DEFAULT_SANITIZED }
         }
-    }
-
-    DEBUG('translateExons called');
-    TRACE("Strand is $params{strand}");
-    TRACE("5' Partial") if ( $params{partial} );
-    TRACE( 'Sequence starts with ' . substr ${ $params{seqRef} }, 0, 5 );
-    TRACE( 'Sequence length is ' . length ${ $params{seqRef} } );
+    );
 
-    my $increment;
-    my $prefix;
-    my $offset;
+    my @exons =
+      sort { ( $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] ) * $p{strand} }
+      @$exons;
 
-    if ( $params{strand} == 1 ) {
-        $increment = 3;
-        $prefix    = '';
-        $offset    = 0;
-    }
-    else {
-        $increment = -3;
-        $prefix    = 'rc_';
-        $offset    = -3;
-    }
-
-    my $peptide  = '';
+    my $prep     = $self->_prepare( $p{strand} );
     my $leftover = '';
+    my $peptide;
 
-EXON: foreach my $i ( 0 .. $#{ $params{exons} } ) {
-        TRACE("Exon $i");
-
-        my ( $lower, $upper );
-
-    VALIDATE: {
-            my $exon
-                = $params{strand} == 1
-                ? $params{exons}[$i]
-                : $params{exons}[ -( $i + 1 ) ];
-
-            ( $lower, $upper ) = validate_pos(
-                @$exon,
-                {  'Lower out of range' => sub {
-                       ( ( 0 <= $_[0] ) && ( $_[0] <= $_[1][1] ) );
-                       }
-                },
-                {  'Upper out of range' => sub {
-                       (     ( $_[1][0] <= $_[0] )
-                          && ( $_[0] <= length $$seqRef ) );
-                       }
-                }
-            );
-
-            TRACE("Lower $lower");
-            TRACE("Upper $upper");
-        }
-
-    LEFTOVER: {
-
-            ########################################
-            # Deal with leftovers (exons that cut
-            # codons) and exons that are short.
-            # If the exon has fewer nucleotides than
-            # what is required to complete the
-            # codon, dump the nucleotides into the
-            # growing codon and then continue.
-            # Otherwise, complete the exon and
-            # increment the start index.
+  EXON: foreach my $exon (@exons) {
+        my ( $lower, $upper ) = @$exon;
 
-            my $togo = 3 - length $leftover;
+      LEFTOVER: {
 
-            if ( ( my $length = $upper - $lower ) <= $togo ) {
-                WARN("Exon very short: $length <= $togo");
+            # Deal with leftovers. These are codons that have been cut by
+            # splicing. In the event that no codon has been cut, the leftover
+            # will be the first codon of the exon.
 
-                ########################################
-                # For forward direction, append to
-                # $leftover, otherwise, prepend.
+            my $to_go = 3 - length($leftover);
 
-                unless ($prefix) {
-                    $leftover .= substr( $$seqRef, $lower, $length );
-                }
-                else {
-                    $leftover
-                        = substr( $$seqRef, $lower, $length ) . $leftover;
-                }
+            # If the exon has fewer nucleotides than what is required to
+            # complete the codon, set $to_go to be the length of that exon.
+            if ( ( my $length = $upper - $lower ) < $to_go ) {
+                $to_go = $length;
+            }
 
-                next EXON;
+            #  Complete the leftover and increment the start index.
+            unless ( $prep->[0] ) {
+                $leftover .= substr( $$seq_ref, $lower, $to_go );
+                $lower += $to_go;
             }
             else {
-                unless ($prefix) {
-                    $leftover .= substr( $$seqRef, $lower, $togo );
-                    $lower += $togo;
-                }
-                else {
-                    $upper -= $togo;
-                    $leftover = substr( $$seqRef, $upper, $togo ) . $leftover;
-                }
+                $upper -= $to_go;
+                $leftover = substr( $$seq_ref, $upper, $to_go ) . $leftover;
             }
+
+            # If leftover isn't long enough, then move to the next exon.
+            next EXON if ( length($leftover) < 3 );
         }
 
-    PARTIAL: {
+      START: {
 
-            ########################################
-            # Handle 5' partials. The first exon may
-            # be the actual start of the gene, so
-            # the option to have the start codon
-            # translated is left in there. Otherwise
-            # just translate the leftover codon like
-            # a regular codon.
+            # Handle the start codon. After the start codon has been
+            # translated, set the partial flag so we don't try it again.
 
-            if ( $params{partial} ) {
-                $peptide .= $$self{ $prefix . 'forward' }{$leftover};
-            }
-            else {
-                $peptide 
-                    = $$self{ $prefix . 'starts' }{$leftover}
-                    || $$self{ $prefix . 'forward' }{$leftover}
-                    || 'X';
-                $params{partial} = 1;
+            my $ends = [ 0, $prep->[1] ];
+            unless ( $p{partial} ) {
+                $self->_start( \$leftover, \$peptide, $ends, $prep );
+                $p{partial} = 1;
             }
+
+            $self->_translate( \$leftover, \$peptide, $ends, $prep );
         }
 
-        my $start;
-        my $stop;
+        my $ends = $self->_endpoints( $p{strand}, $lower, $upper );
+      BOUNDS: {
+            my $phase_diff = ( $upper - $lower ) % 3;
+            $leftover =
+              $prep->[0]
+              ? substr( $$seq_ref, $lower,     $phase_diff )
+              : substr( $$seq_ref, $ends->[1], $phase_diff );
+        }
 
-    BOUNDS: {
-            ########################################
-            # Similar to translate. Set up looping
-            # variables.
+        $self->_translate( $seq_ref, \$peptide, $ends, $prep );
+    }
 
-            my $phaseDiff = ( $upper - $lower ) % 3;
+    return \$peptide;
+}
 
-            unless ($prefix) {
-                $start    = $lower;
-                $stop     = $upper - $phaseDiff;
-                $leftover = substr( $$seqRef, $stop, $phaseDiff );
-            }
-            else {
-                $start    = $upper - 3;
-                $stop     = $lower + $phaseDiff - 3;
-                $leftover = substr( $$seqRef, $lower, $phaseDiff );
-            }
-        }
+# Returns [ $is_this_a_reverse_complement, $increment_for_loop ]
+sub _prepare {
+    my $self = shift;
+    my ($strand) = @_;
+    return [ ( $strand == 1 ? 0 : 1 ), $strand * 3 ];
+}
 
-        ########################################
-        # The guts of the translation routine.
-        # This routine is very fast and is what
-        # should be running over the course of
-        # most of the gene. Dealing with where
-        # the exons start and stop slows down
-        # the execution.
-
-        for ( $start = $start; $start != $stop; $start += $increment ) {
-            $peptide .= $$self{ $prefix . 'forward' }
-                { substr( $$seqRef, $start, 3 ) } || 'X';
-        }
+# Convert (lower, upper) into endpoints for a loop. For the + strand, we just
+# adjust upper so that it is in phase with lower. However, for the - strand,
+# we not only adjust lower for phase, but also subtract 3 from everything so
+# that we can take the substring properly. Here is a picture that might make
+# sense of this:
+
+# Positions:             0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4
+# Region of interest (-): . . . .4- - - - - - - - - - - - - - - -20 . . .
+# For + strand:                  4- - -|- - -|- - -|- - -|- - >19
+# For - strand:              2. . .|< - -|- - -|- - -|- - -17 - -|
+
+# For a + strand, it returns [4, 19]. For a - strand, it returns [17, 2]. To
+# get the first codon on the - strand, we take the substring starting at 17,
+# and the loop will end once we decrement our counter to 2.
+
+sub _endpoints {
+    my $self = shift;
+    my ( $strand, $lower, $upper ) = @_;
+
+    return $strand == 1
+      ? [ $lower, $upper - ( ( $upper - $lower ) % 3 ) ]
+      : [ $upper - 3, $lower - 3 + ( ( $upper - $lower ) % 3 ) ];
+}
+
+# The actual translation function. Goes from start to stop, appends to the
+# peptide sequence using the translation table provided.
+
+sub _translate {
+    my $self = shift;
+    my ( $seq_ref, $pep_ref, $ends, $prep ) = @_;
+
+    my $table = $self->_table->[ $prep->[0] ];
+    while ( $ends->[0] != $ends->[1] ) {
+        $$pep_ref .= $table->{ substr( $$seq_ref, $ends->[0], 3 ) } || 'X';
+        $ends->[0] += $prep->[1];
     }
+}
 
-    return \$peptide;
+# Perform translation for only one frame and adjusts the start only if it finds
+# a codon in the translation table.
+
+sub _start {
+    my $self = shift;
+    my ( $seq_ref, $pep_ref, $ends, $prep ) = @_;
+
+    return if ( $ends->[0] == $ends->[1] );
+
+    my $start =
+      $self->_starts->[ $prep->[0] ]->{ substr( $$seq_ref, $ends->[0], 3 ) };
+    if ($start) {
+        $$pep_ref = $start;
+        $ends->[0] += $prep->[1];
+    }
 }
 
-=item translateCodon()
+=head2 translate_codon
 
-=item $residue = $translator->translateCodon($codon, $start);
+    my $residue = $translator->translate_codon( $codon );
+    my $residue = $translator->translate_codon( $codon, \%params );
 
 Translate a codon. Return 'X' or '-' if it isn't in the
 codon table. Handles degenerate nucleotides, so if all
 possible codons for an ambiguity map to the same residue,
 return that residue. Will also handle ambiguous amino acids.
-$start dictates whether or not to translate this as a start
+start dictates whether or not to translate this as a start
 codon. Will also cache any new translations it finds.
 
-For those looking for the translateStart routine, it has
-been merged into translateCodon.
-
 Example:
 
- $residue = $translator->translateCodon('atg');
- $residue = $translator->translateCodon('tty', 1);
- $residue = $translator->translateCodon('cat', -1, 1);
+    $residue = $translator->translate_codon('atg');
+    $residue = $translator->translate_codon( 'tty', { strand => -1 } );
+    $residue = $translator->translate_codon( 'cat', { start => 1 } );
 
 =cut
 
-sub translateCodon {
+sub translate_codon {
+    TRACE("translate_codon called");
+
     my $self = shift;
 
-    my ( $codon, $strand, $start )
-        = validate_pos( @_,
-                        { regex => qr/^${nucMatch}{3}$/ },
-                        { default => 1,
-                          regex   => qr/^[+-]?1$/,
-                          type    => SCALAR
-                        },
-                        { default => 0,
-                          regex   => qr/^[01]$/,
-                          type    => SCALAR
-                        }
-        );
-    $codon = uc $codon;
+    my ( $codon, @p );
+
+    ( $codon, $p[0] ) = validate_pos(
+        @_,
+        { regex => qr/^${nuc_match}{3}$/ },
+        { type  => Params::Validate::HASHREF, default => {} }
+
+    );
 
-    my $prefix = $strand == 1 ? '' : 'rc_';
+    my %p = validate(
+        @p,
+        {
+            strand => {
+                default => 1,
+                regex   => qr/^[+-]?1$/,
+                type    => Params::Validate::SCALAR
+            },
+            start => {
+                default => 0,
+                regex   => qr/^[01]$/,
+                type    => Params::Validate::SCALAR
+            }
+        }
+    );
 
-    DEBUG("translateCodon called");
-    TRACE("Codon $codon");
+    $codon = uc $codon;
+    my $rc = $p{strand} == 1 ? 0 : 1;
 
-    my $table;
-    my $notFound;
-    unless ($start) {
-        $table    = $prefix . 'forward';
-        $notFound = 'X';
+    my ( $table, $not_found );
+    unless ( $p{start} ) {
+        $table     = $self->_table->[$rc];
+        $not_found = 'X';
     }
     else {
-        $table    = $prefix . 'starts';
-        $notFound = '-';
+        $table     = $self->_starts->[$rc];
+        $not_found = '-';
     }
-    TRACE("Using $table table");
-
-    return $$self{$table}{$codon} if ( defined $$self{$table}{$codon} );
-
-    ########################################
-    # Handles codons with degenerate
-    # nucleotides: [RYMKWS] [BDHV] or N
-    # Several codons may map to the same
-    # amino acid. If all possible codons for
-    # an amibguity map to the same residue,
-    # return that residue rather than X
-
-    if ( my ($nuc) = $codon =~ /($degenMatch)/ ) {
-        my $consensus;
-
-        ########################################
-        # Replace the nucleotide with every
-        # possiblity from degenerate map hash.
-
-        foreach ( @{ $degenerateMap{$nuc} } ) {
-            my $newCodon = $codon;
-            $newCodon =~ s/$nuc/$_/;
-            my $residue = $self->translateCodon( $newCodon, $strand, $start );
-
-            ########################################
-            # If consensus isn't set, set it to the
-            # current residue.
-
-            $consensus = $residue unless $consensus;
-
-            ########################################
-            # If the returned residue was an 'X' or
-            # '-', we return that.
-
-            return $notFound if ( $residue eq $notFound );
-
-            ########################################
-            # This is an interesting step. If the
-            # residue isn't the same as the
-            # consensus, check to see if they map to
-            # the same ambiguous amino acid. If
-            # true, then change the consensus to
-            # that ambiguous acid and proceed.
-            # Otherwise, return $notFound.
-
-            if ( $residue ne $consensus ) {
-                if (    ( defined $ambiguousForward{$residue} )
-                     && ( defined $ambiguousForward{$consensus} )
-                     && ( $ambiguousForward{$residue} eq
-                          $ambiguousForward{$consensus} )
-                    )
-                {
-                    $consensus = $ambiguousForward{$consensus};
-                }
-                else {
-                    return $notFound;
-                }
+
+    #    return $table->{$codon} if ( defined $table->{$codon} );
+    return $self->_translate_codon( $codon, $table, \%p ) || $not_found;
+}
+
+# This is the helper function for translate_codon. It is designed to speed
+# things up because it doesn't perform validation or try to figure out which
+# tables to use, which can slow things down since this is a recursive function.
+# Handles codons with degenerate nucleotides: [RYMKWS] [BDHV] or N. Several
+# codons may map to the same amino acid. If all possible codons for an
+# amibguity map to the same residue, return that residue rather than X.
+
+sub _translate_codon {
+    my $self  = shift;
+    my $codon = shift;
+    my $table = shift;
+
+    # Check for base case: no degenerate nucleotides; we can't unroll further.
+    unless ( $codon =~ /($degen_match)/ ) {
+        return $table->{$codon};
+    }
+
+    # Check to see if this degenerate-containing codon has been computed
+    return $table->{$codon} if ( $table->{$codon} );
+
+    my $consensus;
+    my $nuc = $1;
+
+    # Replace the nucleotide with every possiblity from degenerate map hash.
+    foreach ( @{ $degenerate_map{$nuc} } ) {
+        my $new_codon = $codon;
+        $new_codon =~ s/$nuc/$_/;
+
+        # Recursively call this function
+        my $residue = $self->_translate_codon( $new_codon, $table, @_ );
+
+        # If the new_codon didn't come to a consensus, or if the translation
+        # isn't defined for new_codon in a custom translation table, return
+        # undef.
+        return undef unless ( defined $residue );
+
+        # If consensus isn't set, set it to the current residue.
+        $consensus = $residue unless ($consensus);
+
+        # This is an interesting step. If the residue isn't the same as the
+        # consensus, check to see if they map to the same ambiguous amino acid.
+        # If true, then change the consensus to that ambiguous acid and proceed.
+        # Otherwise, return undef (consensus could not be reached).
+        if ( $residue ne $consensus ) {
+            if (
+                   ( defined $ambiguous_forward{$residue} )
+                && ( defined $ambiguous_forward{$consensus} )
+                && ( $ambiguous_forward{$residue} eq
+                    $ambiguous_forward{$consensus} )
+              )
+            {
+                $consensus = $ambiguous_forward{$consensus};
+            }
+            else {
+                return undef;
             }
         }
+    }
+
+    # If we got this far, it means that we have a valid consensus sequence for
+    # a degenerate-nucleotide-containing codon. Cache and return results.
+    DEBUG("New codon translation found: $codon => $consensus");
+    $self->add_translation( $codon, $consensus, @_ );
+    return $consensus;
+}
+
+1;
+
+=head1 MISC
 
-        ########################################
-        # Cache the residue in the translation
-        # tables.
+These are the original translation tables. The translation tables used by this
+module have been boostrap - they include translations for degenerate
+nucleotides and allow ambiguous amino acids to be the targets of translation
+(e.g. every effort has been made to give a translation that isn't "X").
+
+    {
+    name "Standard" ,
+    name "SGC0" ,
+    id 1 ,
+    ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "---M---------------M---------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Vertebrate Mitochondrial" ,
+    name "SGC1" ,
+    id 2 ,
+    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
+    sncbieaa "--------------------------------MMMM---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Yeast Mitochondrial" ,
+    name "SGC2" ,
+    id 3 ,
+    ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "----------------------------------MM----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Mold Mitochondrial; Protozoan Mitochondrial;"
+    name "Coelenterate Mitochondrial; Mycoplasma; Spiroplasma" ,
+    name "SGC3" ,
+    id 4 ,
+    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "--MM---------------M------------MMMM---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Invertebrate Mitochondrial" ,
+    name "SGC4" ,
+    id 5 ,
+    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
+    sncbieaa "---M----------------------------MMMM---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
+    name "SGC5" ,
+    id 6 ,
+    ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
+    name "SGC8" ,
+    id 9 ,
+    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Euplotid Nuclear" ,
+    name "SGC9" ,
+    id 10 ,
+    ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Bacterial and Plant Plastid" ,
+    id 11 ,
+    ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "---M---------------M------------MMMM---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Alternative Yeast Nuclear" ,
+    id 12 ,
+    ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "-------------------M---------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Ascidian Mitochondrial" ,
+    id 13 ,
+    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
+    sncbieaa "---M------------------------------MM---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    },
+    {
+    name "Alternative Flatworm Mitochondrial" ,
+    id 14 ,
+    ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    } ,
+    {
+    name "Blepharisma Macronuclear" ,
+    id 15 ,
+    ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    } ,
+    {
+    name "Chlorophycean Mitochondrial" ,
+    id 16 ,
+    ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    } ,
+    {
+    name "Trematode Mitochondrial" ,
+    id 21 ,
+    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    } ,
+    {
+    name "Scenedesmus obliquus Mitochondrial" ,
+    id 22 ,
+    ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "-----------------------------------M----------------------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    } ,
+    {
+    name "Thraustochytrium Mitochondrial" ,
+    id 23 ,
+    ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+    sncbieaa "--------------------------------M--M---------------M------------"
+    -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+    -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+    }
 
-        DEBUG("New codon translation found: $codon => $consensus");
+=head1 AUTHOR
 
-        my $rc_codon_ref = reverseComplement( \$codon );
+Kevin Galinsky, C<< <kgalinsk at jcvi.org> >>
 
-        $$self{$table}{$codon} = $consensus;
-        $$self{ 'rc_' . $table }{$$rc_codon_ref} = $consensus;
+=head1 BUGS
 
-        ########################################
-        # In the case of regular codons, push
-        # that codon onto the reverse
-        # translation array.
+Please report any bugs or feature requests to
+C<bug-jcvi-translator at rt.cpan.org>, or through the web interface at
+L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=JCVI-Translator>.
+I will be notified, and then you'll automatically be notified of progress on
+your bug as I make changes.
 
-        unless ($start) {
-            push @{ $$self{reverse}{$consensus} },    $codon;
-            push @{ $$self{rc_reverse}{$consensus} }, $$rc_codon_ref;
-        }
+=head1 SUPPORT
 
-        return $consensus;
-    }
+You can find documentation for this module with the perldoc command.
 
-    return $notFound;
-}
+    perldoc JCVI::Translator
+
+You can also look for information at:
+
+=over 4
+
+=item * AnnoCPAN: Annotated CPAN documentation
 
-1;    #end of module
+L<http://annocpan.org/dist/JCVI-Translator>
+
+=item * CPAN Ratings
+
+L<http://cpanratings.perl.org/d/JCVI-Translator>
+
+=item * RT: CPAN's request tracker
+
+L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=JCVI-Translator>
+
+=item * Search CPAN
+
+L<http://search.cpan.org/dist/JCVI-Translator>
 
 =back
 
-=cut
+=head1 ACKNOWLEDGEMENTS
 
-=head1 MISC
+Log::Log4perl
+Params::Validate
 
-These are the original translation tables. The translation
-tables below have been unrolled - degenerate nucleotides are
-in place as well as ambiguous amino acids.
+=head1 COPYRIGHT & LICENSE
 
-{
-name "Standard" ,
-name "SGC0" ,
-id 1 ,
-ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "---M---------------M---------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Vertebrate Mitochondrial" ,
-name "SGC1" ,
-id 2 ,
-ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
-sncbieaa "--------------------------------MMMM---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Yeast Mitochondrial" ,
-name "SGC2" ,
-id 3 ,
-ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "----------------------------------MM----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Mold Mitochondrial; Protozoan Mitochondrial;"
-name "Coelenterate Mitochondrial; Mycoplasma; Spiroplasma" ,
-name "SGC3" ,
-id 4 ,
-ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "--MM---------------M------------MMMM---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Invertebrate Mitochondrial" ,
-name "SGC4" ,
-id 5 ,
-ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
-sncbieaa "---M----------------------------MMMM---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
-name "SGC5" ,
-id 6 ,
-ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
-name "SGC8" ,
-id 9 ,
-ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Euplotid Nuclear" ,
-name "SGC9" ,
-id 10 ,
-ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Bacterial and Plant Plastid" ,
-id 11 ,
-ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "---M---------------M------------MMMM---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Alternative Yeast Nuclear" ,
-id 12 ,
-ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "-------------------M---------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Ascidian Mitochondrial" ,
-id 13 ,
-ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
-sncbieaa "---M------------------------------MM---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-},
-{
-name "Alternative Flatworm Mitochondrial" ,
-id 14 ,
-ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-} ,
-{
-name "Blepharisma Macronuclear" ,
-id 15 ,
-ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-} ,
-{
-name "Chlorophycean Mitochondrial" ,
-id 16 ,
-ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-} ,
-{
-name "Trematode Mitochondrial" ,
-id 21 ,
-ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-} ,
-{
-name "Scenedesmus obliquus Mitochondrial" ,
-id 22 ,
-ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "-----------------------------------M----------------------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-} ,
-{
-name "Thraustochytrium Mitochondrial" ,
-id 23 ,
-ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
-sncbieaa "--------------------------------M--M---------------M------------"
--- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
--- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
--- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-}
+Copyright 2008-2009 J. Craig Venter Institute, all rights reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
 
 =cut
 
@@ -1338,4 +1520,4 @@ sncbieaa "-----------------------------M----------------------------------------
 -- Base1  AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHMMMMMMMMMMSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTWYYY
 -- Base2  AAAAAACCCCCCCCCCCCCCCGGGGGGTTTTTTTTAAAAAACCCCCCCCCCCCCCCGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTAAAAAACCCCCCCCCCCCCCCGGGGGGGGGGGGGGGMMMTTTTTTTTTTTTTTTTGGGTTTTTTTAAAAAAAAACCCCCCCCCCCCCCCGGGGGRTTTTTTTTTT
 -- Base3  ACGRTYABCDGHKMNRSTVWYACGRTYACGHMTWYACGRTYABCDGHKMNRSTVWYABCDGHKMNRSTVWYABCDGHKMNRSTVWYACGRTYABCDGHKMNRSTVWYABCDGHKMNRSTVWYCTYABCDGHKMNRSTVWYAAGRACHMTWYAGRACGRTYABCDGHKMNRSTVWYACGTYAACGRTYAAGR
-}
\ No newline at end of file
+}
diff --git a/lib/JCVI/Translator/Utils.pm b/lib/JCVI/Translator/Utils.pm
index 13131ce..9d7cbcd 100644
--- a/lib/JCVI/Translator/Utils.pm
+++ b/lib/JCVI/Translator/Utils.pm
@@ -7,419 +7,614 @@
 
 =head1 NAME
 
-package Utils
+JCVI::Translator::Utils - Utilities that requrie a translation table
 
-=head1 SYNOPSES
+=head1 SYNOPSIS
 
- use JCVI::Translator::Utils;
+    use JCVI::Translator::Utils;
 
- my $translator = new JCVI::Translator::Utils(
-                           id => $id,
-                           name => $name,
-                           table => $table,
-                           tableRef => $tableRef
-                           );
+    # Same constructor as JCVI::Translator
+    my $utils = new JCVI::Translator::Utils();
+    my $utils = custom JCVI::Translator( \$custom_table );
 
-=head1 DESCRIPTION
+    my $codons = $utils->codons( $residue );
+    my $regex  = $utils->regex( $residue );
 
-See Translator for more info. Utils extends Translator and
-adds a few more functions that are normally not used.
-
-=head1 AUTHOR
+    my $orf = $utils->getORF( $seq_ref );
+    my $cds = $utils->getCDS( $seq_ref );
 
-Kevin Galinsky, <kgalinsk@jcvi.org>
+    my $frames = $utils->nonstop( $seq_ref );
 
-=head1 FUNCTIONS
+=head1 DESCRIPTION
 
-=over
+See Translator for more info. Utils extends Translator and
+adds a few more functions that are normally not used.
 
 =cut
 
 package JCVI::Translator::Utils;
-use base JCVI::Translator;
 
 use strict;
 use warnings;
 
-our $VERSION = '0.2.2';
+use base qw(JCVI::Translator);
+__PACKAGE__->mk_accessors(qw( _regexes ));
 
 use Log::Log4perl qw(:easy);
-use Params::Validate qw(:all);
+use Params::Validate;
+
+use JCVI::DNATools qw( cleanDNA );
+use JCVI::AATools qw( $aa_match );
+
+our $DEFAULT_STRAND    = 0;
+our $DEFAULT_SANITIZED = 0;
+
+=head1 METHODS
+
+=cut
+
+sub _new {
+    my $class = shift;
+    my $self  = $class->SUPER::_new();
+
+    $self->_regexes( [] );
+    foreach my $rc ( 0 .. 1 ) {
+        $self->_regexes->[$rc] = {};
+    }
+
+    return $self;
+}
 
-=item getORF()
+=head2 codons
 
-=item [$start, $stop] = $translator->getORF($seqRef, $strand);
+    my $codon_array = $translator->codons( $residue);
+    my $codon_array = $translator->codons( $residue, $strand );
 
-This will get the longest region between stops and return
-the strand, lower and upper bounds, inclusive:
+Returns a list of codons for a particular residue or start codon. For start
+codons, input "start" for the residue.
+
+=cut
+
+sub codons {
+    my $self = shift;
+    my ( $residue, $strand ) = validate_pos(
+        @_,
+        { regex => qr/^(?:$aa_match|start|lower|upper)$/ },
+        {
+            default => 1,
+            regex   => qr/^[+-]?1$/
+        }
+    );
+
+    if    ( $residue eq 'lower' ) { $residue = $strand == 1  ? 'start' : '*' }
+    elsif ( $residue eq 'upper' ) { $residue = $strand == -1 ? 'start' : '*' }
+    elsif ( $residue eq 'start' ) { $residue = 'start' }
+    else                          { $residue = uc $residue }
+
+    return [
+        @{ $self->_reverse->[ $strand == 1 ? 0 : 1 ]->{$residue} ||= [] } ];
+}
+
+=head2 regex
+
+    my $regex = $translator->regex( $residue );
+    my $regex = $translator->regex( $residue, $strand );
+
+Returns a regular expression matching codons for a particular amino acid
+residue. In addition, three special values are allowed:
+
+    start:  Start codons
+    lower:  Start or stop codons, depending up on strand
+    lower:  Start or stop codons, depending up on strand
+
+lower and upper match the respective ends of a CDS for a given strand (i.e. on
+the positive strand, lower matches the start, and upper matches the stop). The
+stop codon is stored as "*" by the translator.
+
+=cut
+
+sub regex {
+    my $self = shift;
+    my ( $residue, $strand ) = validate_pos(
+        @_, 1,
+        {
+            default => 1,
+            regex   => qr/^[+-]?1$/
+        }
+    );
+
+    my $rc = $strand == 1 ? 0 : 1;
+
+    my $regex = $self->_regexes->[$rc]->{residue};
+
+    return $regex if ( defined $regex );
+
+    $regex = join '|', @{ $self->codons(@_) };
+    $regex = qr/$regex/;
+
+    $self->_regexes->[$rc]->{residue} = $regex;
+    return $regex;
+}
+
+=head2 getORF
+
+    my $orf_hash = $translator->getORF( $seq_ref );
+    my $orf_hash = $translator->getORF( $seq_ref, \%params );
+
+This will get the longest region between stops and return the strand, lower and
+upper bounds, inclusive. The parameters are:
+
+    strand:     0, 1 or -1; default = 0 (meaning search both strands)
+    lower:      integer between 0 and length; default = 0
+    upper:      integer between 0 and length; default = length
+    sanitized:  0 or 1; default = 0
+
+Lower and upper are used to specify bounds between which you are searching.
+Suppose the following was the longest ORF:
 
  0 1 2 3 4 5 6 7 8 9 10
   T A A A T C T A A G
   *****       *****
         <--------->
 
-Will return [1, 3, 9]. You can also specify which strand
-you are looking for the ORF to be on.
+This will return:
 
-For ORFs starting at the very beginning of the strand or
-trailing off the end, but not in phase with the start or
-ends, this method will cut at the last complete codon.
+    {
+        strand => 1,
+        lower  => 3,
+        upper  => 9
+    }
 
- Eg:
+You can also specify which strand you are looking for the ORF to be on.
 
- 0 1 2 3 4 5 6 7 8 9 10
-  A C G T A G T T T A
-                *****
-    <--------->
+For ORFs starting at the very beginning of the strand or trailing off the end,
+but not in phase with the start or ends, this method will cut at the last
+complete codon.
 
-Will return [-1, 1, 7]. The distance between lower and
-upper will always be a multiple of 3. This is to make it
-clear which frame the ORF is in.
+    Eg:
 
-Example:
+    0 1 2 3 4 5 6 7 8 9 10
+     A C G T A G T T T A
+                   *****
+       <--------------->
+
+Will return:
 
- $ref = $translator->getORF(\'TAGAAATAG');
+    {
+        strand => 1,
+        lower  => 1,
+        upper  => 10
+    }
+
+The distance between lower and upper will always be a multiple of 3. This is to
+make it clear which frame the ORF is in. The resulting hash may be passed to
+the translate method.
 
-Output:
+Example:
 
- $ref = [$strand, $lower, $upper]
+    my $orf_ref = $translator->getORF( \'TAGAAATAG' );
+    my $orf_ref = $translator->getORF( \$seq, { strand => -1 } );
+    my $orf_ref = $translator->getORF(
+        \$seq,
+        {
+            lower => $lower,
+            upper => $upper
+        }
+    );
 
 =cut
 
 sub getORF {
+    TRACE('getORF called');
+
     my $self = shift;
 
-    my ( $seqRef, $strand )
-        = validate_pos( @_,
-                        { type => SCALARREF },
-                        { default => 0,
-                          regex   => qr/^[+-]?[01]$/
-                        }
-        );
-
-    DEBUG('getORF called');
-
-    my ( $best_strand, $lower, $upper ) = ( 1, 0, 0 );
-
-    foreach my $cur_strand ( $strand == 0 ? ( -1, 1 ) : ($strand) ) {
-        my @lowers = ( 0 .. 2 );
-        my $stopRegex = $self->regex( 'stop', $cur_strand );
-
-        ########################################
-        # Rather than using a regular expression
-        # to find regions between stops, it
-        # should be  more computationally
-        # efficient to find all the stops and
-        # compute from there. However, Perl's
-        # regular expression engine may be
-        # faster than code execution, so this
-        # may not be the case
-
-        ########################################
-        # A lookahead is used for two reasons:
-        # the main one is to get every position
-        # within two bases of the end of the
-        # sequence, and also to cope with the
-        # possibility of overlapping stop
-        # codons.
-
-        while (
-            $$seqRef =~ /(?=
-				($stopRegex)|.{0,2}$
-			    )/gx
-            )
+    my ( $seq_ref, @p );
+    ( $seq_ref, $p[0] ) = validate_pos(
+        @_,
+        { type => Params::Validate::SCALARREF },
+        { type => Params::Validate::HASHREF, default => {} }
+    );
+
+    my %p = validate(
+        @p,
         {
-            my $curUpper = pos $$seqRef;
-            my $frame    = $curUpper % 3;
-
-            $curUpper += length $1 if ( $1 && ( $cur_strand == 1 ) );
-
-            ########################################
-            # If the current distance between start
-            # and stop is greater than the distance
-            # between the stored start and stop,
-            # change the stored start and stop to be
-            # the current one.
-
-            if ( $upper - $lower < $curUpper - $lowers[$frame] ) {
-                $best_strand = $cur_strand;
-                $lower       = $lowers[$frame];
-                $upper       = $curUpper;
-            }
+            strand => {
+                default => 0,
+                regex   => qr/^[+-]?[01]$/,
+                type    => Params::Validate::SCALAR
+            },
+            lower => {
+                default   => 0,
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'lower >= 0'          => sub { $_[0] >= 0 },
+                    'lower <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            upper => {
+                default   => length($$seq_ref),
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'upper >= 0'          => sub { $_[0] >= 0 },
+                    'upper <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            sanitized => { default => $DEFAULT_SANITIZED }
+        }
+    );
+
+    return undef if ( $p{upper} < $p{lower} );
+
+    $seq_ref = cleanDNA($seq_ref) unless ( $p{sanitized} );
+
+    # Initialize the longest ORF.
+    my %ORF = (
+        strand => 0,
+        lower  => $p{lower},
+        upper  => $p{lower}
+    );
+
+    # Go through each strand which we are looking in
+    foreach my $strand ( $p{strand} == 0 ? ( -1, 1 ) : $p{strand} ) {
+
+        # Initialize lower bounds and regular expression for stop
+        my @lowers = map { $_ + $p{lower} } ( 0 .. 2 );
+        my $stop_regex = $self->regex( '*', $strand );
+
+        # Look for all the stops in our sequence using a regular expression. A
+        # lookahead is used to cope with the possibility of overlapping stop
+        # codons
+
+        pos($$seq_ref) = $p{lower};
 
-            $lowers[$frame] = $curUpper;
+        while ( $$seq_ref =~ /(?=stop_regex)/gx ) {
+
+            # Get the location of the upper bound. Add 3 for the length of the
+            # stop codon if we are on the + strand.
+            my $upper = pos($$seq_ref) + ( $strand == 1 ? 3 : 0 );
+
+            # End the iteration if we are out of range
+            last if ( $upper > $p{upper} );
+
+            # Call our helper function
+            $self->_getORF( $strand, \@lowers, $upper, $p{lower}, \%ORF );
+        }
+
+        # Now evaluate for the last three ORFS
+        foreach my $i ( 0 .. 2 ) {
+            my $upper = $p{upper} - $i;
+            $self->_getORF( $strand, \@lowers, $upper, $p{lower}, \%ORF );
         }
 
+        # NOTE: Perl's regular expression engine could be faster than code
+        # execution, so it may be faster to find ORFS using regular expression
+        # matching an entire ORF.
+        # m/(?=(^|$stop)((.{3})*)($stop|$))/g
     }
 
-    return [ $best_strand, $lower, $upper ];
+    return \%ORF;
+}
+
+# Helper function for getORF above.
+sub _getORF {
+    my $self = shift;
+    my ( $strand, $lowers, $upper, $offset, $longest ) = @_;
+
+    # Calculate the frame relative to the starting offset
+    my $frame = ( $upper - $offset ) % 3;
+
+    # Compare if this is better than the longest ORF
+    $self->_compare_regions(
+        $longest,
+        {
+            strand => $strand,
+            lower  => $lowers->[$frame],
+            upper  => $upper
+        }
+    );
+
+    # Mark the lower bound for this frame
+    $lowers->[$frame] = $upper;
 }
 
-=item getCDS()
+=head2 getCDS
 
-=item [$start, $stop] = $translator->getCDS($seqRef, $strand, $strict);
+    my $cds_ref = $translator->getCDS( $seq_ref );
+    my $cds_ref = $translator->getCDS( $seq_ref, \%params );
 
-This will return the strand and boundaries of the longest
-CDS.
+This will return the strand and boundaries of the longest CDS.
 
  0 1 2 3 4 5 6 7 8 9 10
   A T G A A A T A A G
   >>>>>       *****
   <--------------->
 
-Will return [1, 0, 9].
+Will return:
 
-Strict controls how strictly getCDS functions.
-There are 3 levels of strictness, enumerated 0, 1 and 2. 2 is the most strict,
-and in that mode, a region will only be considered a CDS if both the start and
-stop is found. In strict level 1, if a start is found, but no stop is present
-before the end of the sequence, the CDS will run until the end of the sequence.
-Strict level 0 assumes that start codon is present in each frame just before
-the start of the molecule.
+    {
+        strand => 1,
+        lower  => 0,
+        upper  => 9
+    }
 
-Example:
+It takes the following parameters:
+
+    strand:     0, 1 or -1; default = 0 (meaning search both strands)
+    strict:     0, 1 or 2;  default = 1
+    sanitized:  0 or 1; default = 0
 
- $ref = $translator->getCDSs(\'ATGAAATAG');
- $ref = $translator->getCDSs(\'ATGAAATAG', -1);
+Strict controls how strictly getCDS functions. There are 3 levels of
+strictness, enumerated 0, 1 and 2. 2 is the most strict, and in that mode, a
+region will only be considered a CDS if both the start and stop is found. In
+strict level 1, if a start is found, but no stop is present before the end of
+the sequence, the CDS will run until the end of the sequence. Strict level 0
+assumes that start codon is present in each frame just before the start of the
+molecule. Level 1 is a pretty safe bet, so that is the default.
 
-Output:
+Example:
 
- $ref = [$strand, $lower, $upper]
+    my $cds_ref = $translator->getCDS(\'ATGAAATAG');
+    my $cds_ref = $translator->getCDS(\$seq, { strand => -1 } );
+    my $cds_ref = $translator->getCDS(\$seq, { strict => 2 } );
 
 =cut
 
 sub getCDS {
+    TRACE('getCDS called');
+
     my $self = shift;
 
-    my ( $seqRef, $strand, $strict )
-        = validate_pos( @_,
-                        { type => SCALARREF },
-                        { default => 0,
-                          regex   => qr/^[+-]?[01]$/
-                        },
-                        { default => 1,
-                          regex   => qr/^[012]$/
-                        }
-        );
-
-    DEBUG('getCDS called');
-
-    my ( $best_strand, $lower, $upper ) = ( 1, 0, 0 );
-
-    foreach my $cur_strand ( $strand == 0 ? ( -1, 1 ) : ($strand) ) {
-        my $lowerRegex = $self->regex( 'lower', $cur_strand );
-        my $upperRegex = $self->regex( 'upper', $cur_strand );
-
-        ########################################
-        # Initialize
-        my @lowers;
-        if ( $cur_strand == 1 ) {
-            @lowers = ( $strict != 0 ? map {undef} ( 0 .. 2 ) : ( 0 .. 2 ) );
-        }
-        else {
-            @lowers = ( $strict == 2
-                        ? map {undef} ( 0 .. 2 )
-                        : ( 0 .. 2 )
-            );
-        }
+    my ( $seq_ref, @p );
+    ( $seq_ref, $p[0] ) = validate_pos(
+        @_,
+        { type => Params::Validate::SCALARREF, },
+        { type => Params::Validate::HASHREF, default => {} }
+    );
 
-        ########################################
-        # Similar to getORF, rather than
-        # using a regular expression to find
-        # entire regions, instead find
-        # individual starts and stops and react
-        # accordingly. It captures the starts
-        # and stops separately ($1 vs $2) so
-        # that it is easy to tell if a start or
-        # a stop was matched.
-        #
-        # If strict mode is at level 2, we don't
-        # tes is a newly added feature which t CDSs trailing off the end
-        # of the molecule
-
-        my $regex = qr/(?=($lowerRegex)|($upperRegex))/;
-        $regex = qr/$regex|(?=.{0,2}$)/ unless ( $strict == 2 );
-
-        while ( $$seqRef =~ /$regex/g ) {
-
-            my $position = pos $$seqRef;
-            my $frame    = $position % 3;
-
-            ########################################
-            # If we match the lower regex we:
-            #
-            # In the case that we are on the '-'
-            # strand, that means we found a stop,
-            # so we update the lower bound.
-            #
-            # Otherwise, we are on the positive
-            # strand, meaning we have found the
-            # start, so only set the lower bound if
-            # it is not already set (don't want to
-            # overwrite the location of a previous
-            # start codon).
-
-            if ( $1
-                 && (    ( $cur_strand eq '-' )
-                      || ( !defined $lowers[$frame] ) )
-                )
-            {
-                $lowers[$frame] = $position;
-            }
+    my %p = validate(
+        @p,
+        {
+            strand => {
+                default => 0,
+                regex   => qr/^[+-]?1$/,
+            },
+            lower => {
+                default   => 0,
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'lower >= 0'          => sub { $_[0] >= 0 },
+                    'lower <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            upper => {
+                default   => length($$seq_ref),
+                regex     => qr/^[0-9]+$/,
+                type      => Params::Validate::SCALAR,
+                callbacks => {
+                    'upper >= 0'          => sub { $_[0] >= 0 },
+                    'upper <= seq_length' => sub { $_[0] <= length($$seq_ref) }
+                }
+            },
+            strict => {
+                default => 1,
+                regex   => qr/^[012]$/,
 
-            ########################################
-            # If we don't match the lower regex:
-            #
-            # If this is the positive strand, that
-            # means that this is a valid stop -
-            # either a stop codon or the end of the
-            # string. Reset the lower bound in this
-            # case.
-            #
-            # On the negative strand, we only care
-            # if we matched a start. In that case,
-            # do the compute and update.
-            #
-            # Another option would be to mark where
-            # the start is, and only do the compute
-            # when we find a stop.
+            },
+            sanitized => { default => $DEFAULT_SANITIZED }
+        }
+    );
 
-            elsif ( ( $cur_strand == 1 ) || $2 ) {
+    return undef if ( $p{upper} < $p{lower} );
 
-                # Move on if the lower is unset
-                next unless ( defined $lowers[$frame] );
+    $seq_ref = cleanDNA($seq_ref) unless ( $p{sanitized} );
 
-                $position += length $2 if ($2);
+    # Initialize the longest ORF. Length is -1.
+    my %CDS = (
+        strand => 0,
+        lower  => 0,
+        upper  => -1
+    );
 
-                if ( $upper - $lower < $position - $lowers[$frame] ) {
-                    $best_strand = $cur_strand;
-                    $lower       = $lowers[$frame];
-                    $upper       = $position;
-                }
+    foreach my $strand ( $p{strand} == 0 ? ( -1, 1 ) : $p{strand} ) {
+        my $lower_regex = $self->regex( 'lower', $strand );
+        my $upper_regex = $self->regex( 'upper', $strand );
 
-                # Reset lower if we found a stop
-                undef $lowers[$frame] if ( $cur_strand == 1 );
-            }
-        }
-    }
+        # Initialize lowers. On the + strand, we don't set the lower bounds
+        # unless strict is 0. On the - strand, we don't set the lower bounds if
+        # strict is 2. Otherwise, set the lower boudns to be the first bases.
+        my @lowers =
+          (      ( ( $strand == 1 ) && ( $p{strict} != 0 ) )
+              || ( ( $strand == -1 ) && ( $p{strict} == 2 ) ) )
+          ? (undef) x 3
+          : map { $p{lower} + $_ } ( 0 .. 2 );
 
-    return [ $best_strand, $lower, $upper ];
-}
+        # Similar to getORF, rather than using a regular expression to find
+        # entire coding regions, instead find individual starts and stops and
+        # react accordingly.
+        # The regular expression captures the starts and stops separately
+        # ($1 vs $2) so that it is easy to tell if a start or a stop was
+        # matched.
 
-=item find()
+        pos($$seq_ref) = $p{lower};
 
-=item $positions = $translator->find( $seqRef, $type, $strand )
+        while ( $$seq_ref =~ /(?=($lower_regex)|($upper_regex))/g ) {
+            my $position = pos $$seq_ref;
+            last if ( $position > $p{upper} );
 
-Find codons of given type (i.e. start or stop) in the sequence. Note, the
-second two parameters are passed directly to regex, so please look there for
-more information. Returns an arrayref containing all the locations of all those
-codons.
+            my $frame = $position % 3;
 
-=cut
+            # If the lower regex matches:
+            #
+            # In the case that it is on the '-' strand, that means a stop was
+            # found. CDSs always end on stops, so update the lower bound.
+            #
+            # Otherwise, it is on the positive strand, meaning a start was
+            # found. Internal start codons are allowed, so only set the lower
+            # bound if it is not already set.
+            if ($1) {
+                if (   ( $strand == -1 )
+                    || ( !defined $lowers[$frame] ) )
+
+                {
+                    $lowers[$frame] = $position;
+                }
+            }
 
-sub find {
-    my $self = shift;
+            # If the lower regex wasn't matched, the the upper one was.
+            #
+            # If this is the positive strand, that means that this is a stop
+            # codon. Compute the CDS, update if necessary, and reset the lower
+            # bound in this case.
+            #
+            # On the negative strand, that means that a start was matched.
+            # Compute the CDS, update if necessary, but don't reset the lower
+            # bound.
 
-    my @seqRef = splice @_, 0, 1;
-    my $seqRef = validate_pos( @seqRef, { type => SCALARREF } );
+            else {
+                $position += 3;
+                last if ( $position > $p{upper} );
+
+                $self->_getCDS( $strand, \@lowers, $position, $p{lower},
+                    \%CDS );
+            }
+        }
 
-    my $regex = $self->regex(@_);
+        # If strict mode is at level 2, we don't allow CDSs to trail off the
+        # end of the molecule. We also don't allow the end to trail off if we
+        # are on the - strand and strict isn't 0.
 
-    my @positions;
+        next
+          if ( ( $p{strict} == 2 )
+            || ( ( $strand == -1 ) && ( $p{strict} != 0 ) ) );
 
-    while ( $$seqRef =~ /(?=($regex))/g ) {
-        push @positions, pos $$seqRef;
+        foreach my $i ( 0 .. 2 ) {
+            my $upper = $p{upper} - $i;
+            $self->_getCDS( $strand, \@lowers, $upper, $p{lower}, \%CDS );
+        }
     }
 
-    return \@positions;
+    return \%CDS;
 }
 
-=item regex()
+# Helper function for getORF above.
+sub _getCDS {
+    my $self = shift;
+    my ( $strand, $lowers, $upper, $offset, $longest ) = @_;
 
-=item $regex = $translator->regex( $type, $strand )
+    # Calculate the frame relative to the starting offset
+    my $frame = ( $upper - $offset ) % 3;
 
-Returns a regular expression of a certain type for a given strand. These are
-'start', 'stop', 'lower' and 'upper.' Lower and upper match the lower and upper
-end of a CDS for a given strand (i.e. on the positive strand, lower matches
-the start, and upper matches the stop).
+    # Do nothing if lower bound wasn't defined
+    return unless ( defined $lowers->[$frame] );
 
-=cut
+    # Compare if this is better than the longest ORF
+    $self->_compare_regions(
+        $longest,
+        {
+            strand => $strand,
+            lower  => $lowers->[$frame],
+            upper  => $upper
+        }
+    );
 
-sub regex {
-    my $self = shift;
-    my ( $type, $strand )
-        = validate_pos( @_,
-                        { regex => qr/^(?:start|stop|lower|upper)$/ },
-                        { default => 1,
-                          regex   => qr/^[+-]?1$/
-                        }
-        );
-
-    my $prefix = $strand == 1 ? '' : 'rc_';
-
-    if    ( $type eq 'lower' ) { $type = $strand == 1  ? 'start' : 'stop' }
-    elsif ( $type eq 'upper' ) { $type = $strand == -1 ? 'start' : 'stop' }
-
-    unless ( defined $self->{"${prefix}${type}Regex"} ) {
-        my $regex = join '|',
-            ( $type eq 'start'
-              ? keys %{ $self->{"${prefix}starts"} }
-              : @{ $$self{"${prefix}reverse"}{'*'} }
-            );
-        $self->{"${prefix}${type}Regex"} = qr/$regex/;
-    }
+    # Mark the lower bound for this frame
+    undef $lowers->[$frame] if ( $strand == 1 );
+}
 
-    return $self->{"${prefix}${type}Regex"};
+# If the current ORF is longer than the previously stored longest bounds, store
+# the current ORF
+sub _compare_regions {
+    my $self = shift;
+    my ( $longest, $current ) = @_;
+    %$longest = %$current
+      if ( $longest->{upper} - $longest->{lower} <
+        $current->{upper} - $current->{lower} );
 }
 
-=item nonstop
+=head2 nonstop
 
-=item $frames = $translator->nonstop( $seqRef, $strand )
+    my $frames = $translator->nonstop( $seq_ref );
+    my $frames = $translator->nonstop( $seq_ref, \%params );
 
-Returns the frames that contain no stop codons for the sequence. $strand is
-optional and defaults to 0. Frames are 1, 2, 3, -1, -2, -3.
+Returns the frames that contain no stop codons for the sequence. Valid
+parameters are strand and sanitized. strand is defaults to 0. Frames are
+numbered -3, -2, -1, 1, 2 and 3.
 
- 3    ---->
- 2   ----->
- 1  ------>
-    -------
- -1 <------
- -2 <-----
- -3 <----
+     3   ---->
+     2  ----->
+     1 ------>
+       -------
+    -1 <------
+    -2 <-----
+    -3 <----
 
 Example:
 
- $frames = $translator->nonstop(\'TACGTTGGTTAAGTT');     # [-1, -3, 2, 3]
- $frames = $translator->nonstop(\'TACGTTGGTTAAGTT', 1);  # [2, 3]
- $frames = $translator->nonstop(\'TACGTTGGTTAAGTT', -1); # [-1, -3]
+    my $frames = $translator->nonstop(\'TACGTTGGTTAAGTT'); # [ 2, 3, -1, -3 ]
+    my $frames = $translator->nonstop(\$seq, { strand => 1 }  ); # [ 2, 3 ]
+    my $frames = $translator->nonstop(\$seq, { strand => -1 } ); # [ -1, -3 ]
 
 =cut
 
 sub nonstop {
+    TRACE('nonstop called');
+
     my $self = shift;
-    my ( $seqRef, $strand )
-        = validate_pos( @_,
-                        { type => SCALARREF },
-                        { default => 0,
-                          regex   => qr/^[+-]?1$/
-                        }
-        );
+
+    my ( $seq_ref, @p );
+    ( $seq_ref, $p[0] ) = validate_pos(
+        @_,
+        { type => Params::Validate::SCALARREF },
+        { type => Params::Validate::HASHREF, default => {} }
+    );
+
+    my %p = validate(
+        @p,
+        {
+            strand => {
+                default => 0,
+                regex   => qr/^[+-]?[01]$/,
+                type    => Params::Validate::SCALAR
+            },
+            sanitized => { default => $DEFAULT_SANITIZED }
+        }
+    );
+
+    $seq_ref = cleanDNA($seq_ref) unless ( $p{sanitized} );
 
     my @frames;
-    foreach my $cur_strand ( $strand == 0 ? ( -1, 1 ) : ($strand) ) {
-        my $stop = $self->regex( 'stop', $cur_strand );
+    foreach my $strand ( $p{strand} == 0 ? ( 1, -1 ) : $p{strand} ) {
+        my $stop = $self->regex( '*', $strand );
 
         foreach my $frame ( 0 .. 2 ) {
-            my $regex = $cur_strand == 1
-                ? qr/^.{$frame}(?:.{3})*$stop/
-                : qr/$stop(?:.{3})*.{$frame}$/;
+            my $regex =
+              $strand == 1
+              ? qr/^.{$frame}(?:.{3})*$stop/
+              : qr/$stop(?:.{3})*.{$frame}$/;
 
-            push @frames, ( $frame + 1 ) * $cur_strand
-                unless ( $$seqRef =~ m/$regex/ );
+            push @frames, ( $frame + 1 ) * $strand
+              unless ( $$seq_ref =~ m/$regex/ );
         }
     }
-    
+
     return \@frames;
 }
 
 1;
+
+=head1 AUTHOR
+
+Kevin Galinsky, <kgalinsk@jcvi.org>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2008-2009 J. Craig Venter Institute, all rights reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
diff --git a/t/00-load.t b/t/00-load.t
index 0cf46a0..ecfb6c0 100644
--- a/t/00-load.t
+++ b/t/00-load.t
@@ -1,9 +1,10 @@
 #!perl
 
-use Test::More tests => 1;
+use Test::More tests => 2;
 
 BEGIN {
 	use_ok( 'JCVI::Translator' );
+    use_ok( 'JCVI::Translator::Utils' );
 }
 
-diag( "Testing JCVI::Translator $JCVI::Translator::VERSION, Perl $], $^X" );
+diag( "Testing JCVI::Translator $JCVI::Translator::VERSION, Perl $], $^X" );
\ No newline at end of file
diff --git a/t/01-translate.t b/t/01-translate.t
new file mode 100644
index 0000000..f68254e
--- /dev/null
+++ b/t/01-translate.t
@@ -0,0 +1,42 @@
+#!perl
+
+use Test::More 'no_plan';
+
+use JCVI::Translator;
+
+my $seq = 'CTGATATCATGCATGCCATTCTCGACCGCTATGCGCCTCCTGTTCCTCGTGGGCCCAAAA';
+
+my $translator = new JCVI::Translator();
+
+is( ${ $translator->translate( \$seq ) },
+    'MISCMPFSTAMRLLFLVGPK', 'Translate frame 1' );
+
+is( ${ $translator->translate( \$seq, { lower => 1 } ) },
+    '*YHACHSRPLCASCSSWAQ', 'Translate frame 2' );
+
+is( ${ $translator->translate( \$seq, { lower => 2 } ) },
+    'DIMHAILDRYAPPVPRGPK', 'Translate frame 3' );
+
+is( ${ $translator->translate( \$seq, { strand => -1 } ) },
+    'FWAHEEQEAHSGREWHA*YQ', 'Translate frame -1' );
+
+is( ${ $translator->translate( \$seq, { strand => -1, upper => 59 } ) },
+    'FGPTRNRRRIAVENGMHDI', 'Translate frame -2' );
+
+is( ${ $translator->translate( \$seq, { strand => -1, upper => 58 } ) },
+    'MGPRGTGGA*RSRMACMIS', 'Translate frame -3' );
+
+is(
+    ${ $translator->translate( \$seq, { partial => 1 } ) },
+    'LISCMPFSTAMRLLFLVGPK',
+    q{Translate 5' partial frame 1}
+);
+
+is(
+    ${
+        $translator->translate( \$seq,
+            { strand => -1, upper => 58, partial => 1 } )
+      },
+    'LGPRGTGGA*RSRMACMIS',
+    q{Translate 5' partial frame -3}
+);
\ No newline at end of file
diff --git a/t/02-translate_exons.t b/t/02-translate_exons.t
new file mode 100644
index 0000000..d2ed94e
--- /dev/null
+++ b/t/02-translate_exons.t
@@ -0,0 +1,120 @@
+#!perl
+
+use Test::More 'no_plan';
+
+use JCVI::Translator;
+
+my $seq = 'CTGATATCATGCATGCCATTCTCGACCGCTATGCGCCTCCTGTTCCTCGTGGGCCCAAAA';
+
+my $translator = new JCVI::Translator();
+
+# Simple cases
+is( ${ $translator->translate_exons( \$seq, [ [ 0, 60 ] ] ) },
+    'MISCMPFSTAMRLLFLVGPK', 'Translate frame 1' );
+is(
+    ${
+        $translator->translate_exons( \$seq, [ [ 0, 60 ] ], { partial => 1 } )
+      },
+    'LISCMPFSTAMRLLFLVGPK',
+    'Translate frame 1 with partial flag'
+);
+
+is(
+    ${
+        $translator->translate_exons( \$seq, [ [ 0, 60 ] ], { strand => -1 } )
+      },
+    'FWAHEEQEAHSGREWHA*YQ',
+    'Translate frame -1'
+);
+
+# Break in the middle
+is( ${ $translator->translate_exons( \$seq, [ [ 0, 30 ], [ 30, 60 ] ] ) },
+    'MISCMPFSTAMRLLFLVGPK', 'Translate frame 1 with break' );
+
+is(
+    ${
+        $translator->translate_exons(
+            \$seq,
+            [ [ 0, 30 ], [ 30, 60 ] ],
+            { strand => -1 }
+        )
+      },
+    'FWAHEEQEAHSGREWHA*YQ',
+    'Translate frame -1 with break'
+);
+
+# Small codons
+is(
+    ${
+        $translator->translate_exons( \$seq,
+            [ map { [ $_, $_ + 1 ] } ( 0 .. 59 ) ] )
+      },
+    'MISCMPFSTAMRLLFLVGPK',
+    'Translate frame 1 with small breaks'
+);
+
+is(
+    ${
+        $translator->translate_exons(
+            \$seq,
+            [ map { [ $_, $_ + 1 ] } ( 0 .. 59 ) ],
+            { strand => -1 }
+        )
+      },
+    'FWAHEEQEAHSGREWHA*YQ',
+    'Translate frame -1 with break'
+);
+
+# Gaps
+is( ${ $translator->translate_exons( \$seq, [ [ 0, 18 ], [ 21, 60 ] ] ) },
+    'MISCMPSTAMRLLFLVGPK', 'Translate frame 1 with gap' );
+
+is(
+    ${
+        $translator->translate_exons(
+            \$seq,
+            [ [ 0, 6 ], [ 9, 60 ] ],
+            { strand => -1 }
+        )
+      },
+    'FWAHEEQEAHSGREWHAYQ',
+    'Translate frame -1 with gap'
+);
+
+# Traverse reading frames
+is( ${ $translator->translate_exons( \$seq, [ [ 0, 18 ], [ 28, 60 ] ] ) },
+    'MISCMPLCASCSSWAQ', 'Translate with gap going from frame 1 to 2' );
+
+is(
+    ${
+        $translator->translate_exons(
+            \$seq,
+            [ [ 6, 21 ], [ 31, 58 ] ],
+            { strand => -1 }
+        )
+      },
+    'MGPRGTGGAEWHA*',
+    'Translate with gap going from frame -3 to -1'
+);
+
+# Translate with random gaps
+is(
+    ${
+        $translator->translate_exons( \$seq,
+            [ [ 6, 23 ], [ 26, 32 ], [ 36, 54 ] ] )
+      },
+    'SCMPFSAISCSSW',
+    'Translate with random gaps on + strand'
+);
+
+is(
+    ${
+        $translator->translate_exons(
+            \$seq,
+            [ [ 6, 23 ], [ 26, 32 ], [ 36, 54 ] ],
+            { strand => -1 }
+        )
+      },
+    'AHEEQEIAENGMH',
+    'Translate with random gaps on - strand'
+);
diff --git a/t/03-translate6.t b/t/03-translate6.t
new file mode 100644
index 0000000..5ccd911
--- /dev/null
+++ b/t/03-translate6.t
@@ -0,0 +1,10 @@
+#!perl
+
+use Test::More 'no_plan';
+use JCVI::DNATools qw(randomDNA);
+
+use JCVI::Translator;
+
+my $translator = new JCVI::Translator;
+
+ok( $translator->translate6( randomDNA() ), 'translate6 ran' );
diff --git a/t/04-custom.t b/t/04-custom.t
new file mode 100644
index 0000000..bfccef0
--- /dev/null
+++ b/t/04-custom.t
@@ -0,0 +1,27 @@
+#!perl
+
+use Test::More 'no_plan';
+
+use JCVI::Translator;
+
+my $translator;
+
+ok(
+    $translator = custom JCVI::Translator(
+        \'{
+name "Standard" ,
+name "SGC0" ,
+id 1 ,
+ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+sncbieaa "---M---------------M---------------M----------------------------"
+-- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+-- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+-- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+}'
+    ),
+    'Custom translation table loaded'
+);
+
+ok(
+    $translator->table_string(), 'Table string'
+);
\ No newline at end of file
diff --git a/t/21-nonstop.t b/t/21-nonstop.t
new file mode 100644
index 0000000..ea85f66
--- /dev/null
+++ b/t/21-nonstop.t
@@ -0,0 +1,47 @@
+#!perl
+
+use Test::More 'no_plan';
+use List::Compare::Functional 'get_symdiff';
+
+use JCVI::Translator::Utils;
+
+my $utils = new JCVI::Translator::Utils;
+
+is(
+    scalar(
+        get_symdiff(
+            [
+                $utils->nonstop( \'TACGTTGGTTAAGTT' ),
+                [ 2, 3, -1, -3 ]
+            ]
+        )
+    ),
+    0,
+    'Nonstop on both strands'
+);
+
+is(
+    scalar(
+        get_symdiff(
+            [
+                $utils->nonstop( \'TACGTTGGTTAAGTT', { strand => 1 } ),
+                [ 2, 3 ]
+            ]
+        )
+    ),
+    0,
+    'Nonstop on + strand'
+);
+
+is(
+    scalar(
+        get_symdiff(
+            [
+                $utils->nonstop( \'TACGTTGGTTAAGTT', { strand => -1 } ),
+                [ -1, -3 ]
+            ]
+        )
+    ),
+    0,
+    'Nonstop on - strand'
+);
\ No newline at end of file
diff --git a/t/22-getORF.t b/t/22-getORF.t
new file mode 100644
index 0000000..801c768
--- /dev/null
+++ b/t/22-getORF.t
@@ -0,0 +1,10 @@
+#!perl
+
+use Test::More 'no_plan';
+use JCVI::DNATools qw(randomDNA);
+
+use JCVI::Translator::Utils;
+
+my $utils = new JCVI::Translator::Utils;
+
+ok( $utils->getORF( randomDNA() ), 'getORF ran' );
diff --git a/t/23-getCDS.t b/t/23-getCDS.t
new file mode 100644
index 0000000..ae6be27
--- /dev/null
+++ b/t/23-getCDS.t
@@ -0,0 +1,10 @@
+#!perl
+
+use Test::More 'no_plan';
+use JCVI::DNATools qw(randomDNA);
+
+use JCVI::Translator::Utils;
+
+my $utils = new JCVI::Translator::Utils;
+
+ok( $utils->getCDS( randomDNA() ), 'getCDS ran' );