diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..18e5d4c --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,31 @@ +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +LABEL org.opencontainers.image.source=https://github.com/cfe-lab/gotoh +LABEL org.opencontainers.image.description="Gotoh dev container for development and testing" + +ARG DEBIAN_FRONTEND=noninteractive + +ENV TZ=America/Vancouver \ + RUBY_VERSION=2.2.2 \ + BUNDLER_VERSION=1.17.3 + +RUN apt-get -y update &&\ + apt-get -y upgrade &&\ + apt-get install -y curl gpg + +RUN echo "deb http://security.ubuntu.com/ubuntu/ bionic-security main" >> /etc/apt/sources.list &&\ + echo "deb http://security.ubuntu.com/ubuntu/ bionic main" >> /etc/apt/sources.list &&\ + apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 &&\ + apt update &&\ + apt install -y \ + gawk=1:4.1.4+dfsg-1build1 + +RUN curl -sSL https://rvm.io/mpapis.asc | gpg --import - && \ + curl -sSL https://rvm.io/pkuczynski.asc | gpg --import - && \ + curl -sSL https://get.rvm.io | bash -s stable && \ + usermod -a -G rvm root + +RUN /bin/bash -l -c "source /etc/profile.d/rvm.sh && rvm pkg install openssl" +RUN /bin/bash -l -c "source /etc/profile.d/rvm.sh && rvm requirements && rvm install ${RUBY_VERSION} --with-openssl-dir=/usr/local/rvm/usr && rvm --default use ${RUBY_VERSION}" diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..7e62f97 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,15 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu +{ + "name": "CfE Ubuntu Ruby", + + "build": { + "dockerfile": "Dockerfile" + }, + + "updateContentCommand": "cd /workspaces/gotoh/ruby && bundle install", + + "features": { + "ghcr.io/devcontainers/features/git:1": {} + } +} diff --git a/.github/workflows/build_and_publish_gem.yml b/.github/workflows/build_and_publish_gem.yml new file mode 100644 index 0000000..2d4c1f1 --- /dev/null +++ b/.github/workflows/build_and_publish_gem.yml @@ -0,0 +1,49 @@ +name: Build and publish the Ruby package + +on: + release: + types: [published] + +jobs: + build_gem: + runs-on: ubuntu-latest + + env: + CFE_GOTOH_VERSION: ${{ github.ref_name }} + BUILD_PATH: ${{ github.workspace }}/ruby + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Checkout code from repo + uses: actions/checkout@v4 + + - name: Log into GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build devcontainer and build the package + uses: devcontainers/ci@v0.3 + with: + imageName: ghcr.io/cfe-lab/gotoh_devcontainer + cacheFrom: ghcr.io/cfe-lab/gotoh_devcontainer + runCmd: cd /workspaces/gotoh/ruby && bash build_gem.bash + env: | + CFE_GOTOH_VERSION + + - name: Publish gem to Github Packages + run: | + mkdir -p $HOME/.gem + touch $HOME/.gem/credentials + chmod 0600 $HOME/.gem/credentials + printf -- "---\n:github: Bearer ${GH_TOKEN}\n" > $HOME/.gem/credentials + gem push --KEY github --host https://rubygems.pkg.github.com/${OWNER} ${BUILD_PATH}/*.gem + env: + OWNER: ${{ github.repository_owner }} + + - name: Add gem as a release asset + run: gh release upload $CFE_GOTOH_VERSION ${BUILD_PATH}/*.gem + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml new file mode 100644 index 0000000..0a4bb00 --- /dev/null +++ b/.github/workflows/unit_tests.yml @@ -0,0 +1,37 @@ +name: Automated Tests + +on: ["push"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout code from repo + uses: actions/checkout@v4 + + - name: Log into GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build devcontainer and run tests + uses: devcontainers/ci@v0.3 + with: + imageName: ghcr.io/cfe-lab/gotoh_devcontainer + cacheFrom: ghcr.io/cfe-lab/gotoh_devcontainer + runCmd: cd /workspaces/gotoh/ruby && rake test + env: | + CI_COMMIT_REF=${{ github.ref_name }} + CI_COMMIT_SHA=${{ github.sha }} + CI_PIPELINE_ID=${{ github.run_id }} + CI_COMMIT_BRANCH=${{ github.ref_name }} + CI_PROJECT=${{ github.repository }} + + - name: Upload coverage reports to Codecov with GitHub Action + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + root_dir: ${{ github.workspace }} + files: ${{ github.workspace }}/ruby/coverage/coverage.xml diff --git a/.gitignore b/.gitignore index 6431d81..82d8773 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ env/ __pycache__/ .pytest_cache/ +.DS_Store diff --git a/README.md b/README.md index 56a2c76..eba85c5 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Gotoh, Osamu. "Optimal alignment between groups of sequences and its application CABIOS 9.3 (1993): 361-370. ``` -# Launching the Docker container +# Launching the webserver Docker container To build the Docker container, use the included Makefile in the root directory. This is currently tested with `make build-base`. This builds the Docker image. @@ -56,6 +56,10 @@ to your `requirements.txt` file or similar method. # Ruby Bindings +The `/ruby` directory contains the directory structure required to build a +gem containing Gotoh's Ruby bindings. This gem, named `gotoh`, contains +a module called `Gotoh` that holds the `align_it` and `align_it_aa` functions. + To build Gotoh's Ruby bindings (which are called "alignment" instead of "gotoh" for arbitrary historical reasons), you'll need to have the following installed: @@ -65,8 +69,26 @@ for arbitrary historical reasons), you'll need to have the following installed: [RVM](https://rvm.io/) on Linux) - A C++ compiler -With these installed, you should be able to build the extension module with +The "canonical" environment for testing and building this package is the +dev container defined in the `.devcontainer` directory. This system is based +on the `cfe_ubuntu` Ruby image based on Ubuntu 20.04 (this old version is +required to run Ruby 2.2.2), and stripped down to only the parts necessary +for Ruby. This environment is also used by the CI/CD pipeline for testing +and building. + +In this environment (or another suitable environment), you should be able to +build the extension module using the `build_gem.bash` script. By default this +will make a gem with the version number `0.1.0.pre`; set the environment variable +`CFE_GOTOH_VERSION` before building to assign a proper version number. + +## Manually building the Ruby bindings + +If you want to manually build the bindings, e.g. for testing/debugging/development, +copy `/ruby/ext/gotoh/extconf.rb` to the `/alignment/gotoh/` directory and +change the `create_makefile('gotoh/gotoh')` line to `create_makefile('gotoh')`; +then run ``` - ruby extconf.rb + ruby extconf.rb # generates a Makefile and other supporting files make ``` +which will build `gotoh.so`, which can be `require`d from Ruby. diff --git a/alignment/gotoh/.gitignore b/alignment/gotoh/.gitignore new file mode 100644 index 0000000..4ba3832 --- /dev/null +++ b/alignment/gotoh/.gitignore @@ -0,0 +1,5 @@ +Makefile +*.so +*.o +extconf.h +build/* \ No newline at end of file diff --git a/alignment/gotoh/Makefile b/alignment/gotoh/Makefile deleted file mode 100644 index d60bbe7..0000000 --- a/alignment/gotoh/Makefile +++ /dev/null @@ -1,161 +0,0 @@ - -SHELL = /bin/sh - -#### Start of system configuration section. #### - -srcdir = . -topdir = /opt/local/lib/ruby/1.8/i686-darwin13 -hdrdir = $(topdir) -VPATH = $(srcdir):$(topdir):$(hdrdir) -prefix = $(DESTDIR)/opt/local -exec_prefix = $(prefix) -sitearchdir = $(sitelibdir)/$(sitearch) -libexecdir = $(exec_prefix)/libexec -dvidir = $(docdir) -mandir = $(DESTDIR)/opt/local/share/man -datadir = $(datarootdir) -pdfdir = $(docdir) -infodir = $(datarootdir)/info -oldincludedir = $(DESTDIR)/usr/include -sitelibdir = $(sitedir)/$(ruby_version) -bindir = $(exec_prefix)/bin -archdir = $(rubylibdir)/$(arch) -sbindir = $(exec_prefix)/sbin -sitedir = $(libdir)/ruby/site_ruby -localstatedir = $(prefix)/var -localedir = $(datarootdir)/locale -datarootdir = $(prefix)/share -libdir = $(exec_prefix)/lib -sysconfdir = $(prefix)/etc -docdir = $(datarootdir)/doc/$(PACKAGE) -sharedstatedir = $(prefix)/com -includedir = $(prefix)/include -vendorlibdir = $(vendordir)/$(ruby_version) -vendorarchdir = $(vendorlibdir)/$(sitearch) -rubylibdir = $(libdir)/ruby/$(ruby_version) -psdir = $(docdir) -vendordir = $(DESTDIR)/opt/local/lib/ruby/vendor_ruby -htmldir = $(docdir) - -CC = /usr/bin/clang -LIBRUBY = $(LIBRUBY_SO) -LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a -LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME) -LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static - -RUBY_EXTCONF_H = -CFLAGS = -fno-common -pipe -Os -arch x86_64 -fno-common -pipe -fno-common $(cflags) -arch x86_64 -INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir) -DEFS = -CPPFLAGS = -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags) -CXXFLAGS = $(CFLAGS) -ldflags = -L. -L/opt/local/lib -Wl,-headerpad_max_install_names -arch x86_64 -dldflags = -archflag = -arch x86_64 -DLDFLAGS = $(ldflags) $(dldflags) $(archflag) -LDSHARED = $(CC) -dynamic -bundle -undefined suppress -flat_namespace -AR = ar -EXEEXT = - -RUBY_INSTALL_NAME = ruby1.8 -RUBY_SO_NAME = ruby -arch = i686-darwin13 -sitearch = i686-darwin13 -ruby_version = 1.8 -ruby = /opt/local/bin/ruby1.8 -RUBY = $(ruby) -RM = rm -f -MAKEDIRS = mkdir -p -INSTALL = /usr/bin/install -c -INSTALL_PROG = $(INSTALL) -m 0755 -INSTALL_DATA = $(INSTALL) -m 644 -COPY = cp - -#### End of system configuration section. #### - -preload = - -libpath = . $(libdir) /opt/local/lib -LIBPATH = -L. -L$(libdir) -L/opt/local/lib -DEFFILE = - -CLEANFILES = mkmf.log -DISTCLEANFILES = - -extout = -extout_prefix = -target_prefix = -LOCAL_LIBS = -LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc -SRCS = alignment.cpp -OBJS = alignment.o -TARGET = alignment -DLLIB = $(TARGET).bundle -EXTSTATIC = -STATIC_LIB = - -BINDIR = $(bindir) -RUBYCOMMONDIR = $(sitedir)$(target_prefix) -RUBYLIBDIR = $(sitelibdir)$(target_prefix) -RUBYARCHDIR = $(sitearchdir)$(target_prefix) - -TARGET_SO = $(DLLIB) -CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map -CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak - -all: $(DLLIB) -static: $(STATIC_LIB) - -clean: - @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) - -distclean: clean - @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log - @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES) - -realclean: distclean -install: install-so install-rb - -install-so: $(RUBYARCHDIR) -install-so: $(RUBYARCHDIR)/$(DLLIB) -$(RUBYARCHDIR)/$(DLLIB): $(DLLIB) - $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR) -install-rb: pre-install-rb install-rb-default -install-rb-default: pre-install-rb-default -pre-install-rb: Makefile -pre-install-rb-default: Makefile -$(RUBYARCHDIR): - $(MAKEDIRS) $@ - -site-install: site-install-so site-install-rb -site-install-so: install-so -site-install-rb: install-rb - -.SUFFIXES: .c .m .cc .cxx .cpp .C .o - -.cc.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.cxx.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.cpp.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.C.o: - $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $< - -.c.o: - $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $< - -$(DLLIB): $(OBJS) Makefile - @-$(RM) $@ - $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS) - - - -$(OBJS): ruby.h defines.h - -### USED TO COMPILE .so FOR RUBY APPLICATIONS ### -# alignment: -# g++ gotoh.cpp -shared -DUSERUBY -DUSERUBY2 -I/usr/share/rvm/rubies/ruby-3.0.0/include/ruby-3.0.0 -I/usr/share/rvm/rubies/ruby-3.0.0/include/ruby-3.0.0/x86_64-linux -o alignment.X.r2.so -O3 -fPIC diff --git a/alignment/gotoh/extconf.rb b/alignment/gotoh/extconf.rb deleted file mode 100644 index a75b78f..0000000 --- a/alignment/gotoh/extconf.rb +++ /dev/null @@ -1,4 +0,0 @@ -require "mkmf" - -create_header -create_makefile("alignment") diff --git a/alignment/gotoh/gotoh.cpp b/alignment/gotoh/gotoh.cpp index fb9ffb5..eab96e9 100644 --- a/alignment/gotoh/gotoh.cpp +++ b/alignment/gotoh/gotoh.cpp @@ -852,11 +852,11 @@ void widen_gaps(string* seq) return ret; } - - extern "C" void Init_alignment() + extern "C" void Init_cfe_gotoh() { - rb_define_global_function("align_it", (VALUE(*)(...))align_it, 4); - rb_define_global_function("align_it_aa", (VALUE(*)(...))align_it_aa, 4); + VALUE gotoh = rb_define_module("CfeGotoh"); + rb_define_module_function(gotoh, "align_it", (VALUE(*)(...))align_it, 4); + rb_define_module_function(gotoh, "align_it_aa", (VALUE(*)(...))align_it_aa, 4); } #endif diff --git a/ruby/.gitignore b/ruby/.gitignore new file mode 100644 index 0000000..b79747d --- /dev/null +++ b/ruby/.gitignore @@ -0,0 +1,3 @@ +*.gem +coverage/* +.DS_Store diff --git a/ruby/Gemfile b/ruby/Gemfile new file mode 100644 index 0000000..5d71a3a --- /dev/null +++ b/ruby/Gemfile @@ -0,0 +1,9 @@ +source 'https://rubygems.org' +ruby '2.2.2' + +# To install all these gems, run "gem install bundler", then "bundle install". + +gem 'minitest', '5.15.0' +gem 'minitest-reporters', '1.7.1' +gem 'simplecov', '0.17.1' +gem 'simplecov-cobertura', '1.4.2' diff --git a/ruby/Gemfile.lock b/ruby/Gemfile.lock new file mode 100644 index 0000000..28a58a1 --- /dev/null +++ b/ruby/Gemfile.lock @@ -0,0 +1,36 @@ +GEM + remote: https://rubygems.org/ + specs: + ansi (1.5.0) + builder (3.3.0) + docile (1.3.5) + json (2.5.1) + minitest (5.15.0) + minitest-reporters (1.7.1) + ansi + builder + minitest (>= 5.0) + ruby-progressbar + ruby-progressbar (1.13.0) + simplecov (0.17.1) + docile (~> 1.1) + json (>= 1.8, < 3) + simplecov-html (~> 0.10.0) + simplecov-cobertura (1.4.2) + simplecov (~> 0.8) + simplecov-html (0.10.2) + +PLATFORMS + ruby + +DEPENDENCIES + minitest (= 5.15.0) + minitest-reporters (= 1.7.1) + simplecov (= 0.17.1) + simplecov-cobertura (= 1.4.2) + +RUBY VERSION + ruby 2.2.2p95 + +BUNDLED WITH + 1.17.3 diff --git a/ruby/build_gem.bash b/ruby/build_gem.bash new file mode 100644 index 0000000..e2d4c25 --- /dev/null +++ b/ruby/build_gem.bash @@ -0,0 +1,5 @@ +#! /usr/bin/env bash + +cp ../alignment/gotoh/gotoh.cpp ext/cfe_gotoh/cfe_gotoh.cpp +gem build cfe_gotoh.gemspec +rm ext/cfe_gotoh/cfe_gotoh.cpp diff --git a/ruby/cfe_gotoh.gemspec b/ruby/cfe_gotoh.gemspec new file mode 100644 index 0000000..918753e --- /dev/null +++ b/ruby/cfe_gotoh.gemspec @@ -0,0 +1,23 @@ +Gem::Specification.new do |s| + s.name = "cfe_gotoh" + s.version = ENV['CFE_GOTOH_VERSION'] || '0.1.0.pre' + s.summary = "CfE implementation of the Gotoh sequence alignment algorithm" + s.files = [ + 'lib/cfe_gotoh.rb', + 'ext/cfe_gotoh/cfe_gotoh.cpp' + ] + s.extensions = [ + 'ext/cfe_gotoh/extconf.rb', + ] + s.authors = [ + "Conan Woods", + "Jamie Kai", + "David Rickett", + "Richard Liang" + ] + # Associate this gem with a GitHub repo; this allows the gems to be + # automatically associated with the repo when pushed to GitHub Packages. + s.metadata = { + "github_repo" => "ssh://github.com/cfe-lab/gotoh" + } +end diff --git a/ruby/ext/.gitignore b/ruby/ext/.gitignore new file mode 100644 index 0000000..496ee2c --- /dev/null +++ b/ruby/ext/.gitignore @@ -0,0 +1 @@ +.DS_Store \ No newline at end of file diff --git a/ruby/ext/cfe_gotoh/.gitignore b/ruby/ext/cfe_gotoh/.gitignore new file mode 100644 index 0000000..c16485e --- /dev/null +++ b/ruby/ext/cfe_gotoh/.gitignore @@ -0,0 +1,6 @@ +cfe_gotoh.cpp +cfe_gotoh.o +cfe_gotoh.so +extconf.h +Makefile +mkmf.log diff --git a/ruby/ext/cfe_gotoh/extconf.rb b/ruby/ext/cfe_gotoh/extconf.rb new file mode 100644 index 0000000..18945cb --- /dev/null +++ b/ruby/ext/cfe_gotoh/extconf.rb @@ -0,0 +1,4 @@ +require "mkmf" + +create_header +create_makefile('cfe_gotoh/cfe_gotoh') diff --git a/ruby/lib/.gitignore b/ruby/lib/.gitignore new file mode 100644 index 0000000..162836e --- /dev/null +++ b/ruby/lib/.gitignore @@ -0,0 +1,2 @@ +cfe_gotoh/* +.DS_Store diff --git a/ruby/lib/cfe_gotoh.rb b/ruby/lib/cfe_gotoh.rb new file mode 100644 index 0000000..5229e7e --- /dev/null +++ b/ruby/lib/cfe_gotoh.rb @@ -0,0 +1,413 @@ +#TODO: Scoring algorithm to improve frame_align? + +require_relative 'cfe_gotoh/cfe_gotoh' + + +module CfeGotoh + class Error < RuntimeError + end + + class GapMergeError < Error + end + + sub_matrix = Array.new(127) {Array.new(127) {-1.0} } + ['A','T','G','C','R','Y','K','M','B','D','H','V','S','W','N'].each do |nuc| + sub_matrix[nuc.ord()][nuc.ord()] = 1.0 + sub_matrix[nuc.ord()]['X'.ord()]=sub_matrix['X'.ord()][nuc.ord()]=-6.0 if(nuc !='N') + end + #bi-mixtures + sub_matrix['A'.ord()]['R'.ord()]=sub_matrix['R'.ord()]['A'.ord()]=1.0 + sub_matrix['G'.ord()]['R'.ord()]=sub_matrix['R'.ord()]['G'.ord()]=1.0 + sub_matrix['C'.ord()]['Y'.ord()]=sub_matrix['Y'.ord()]['C'.ord()]=1.0 + sub_matrix['T'.ord()]['Y'.ord()]=sub_matrix['Y'.ord()]['T'.ord()]=1.0 + sub_matrix['G'.ord()]['K'.ord()]=sub_matrix['K'.ord()]['G'.ord()]=1.0 + sub_matrix['T'.ord()]['K'.ord()]=sub_matrix['K'.ord()]['T'.ord()]=1.0 + sub_matrix['C'.ord()]['M'.ord()]=sub_matrix['M'.ord()]['C'.ord()]=1.0 + sub_matrix['A'.ord()]['M'.ord()]=sub_matrix['M'.ord()]['A'.ord()]=1.0 + sub_matrix['C'.ord()]['S'.ord()]=sub_matrix['S'.ord()]['C'.ord()]=1.0 + sub_matrix['G'.ord()]['S'.ord()]=sub_matrix['S'.ord()]['G'.ord()]=1.0 + sub_matrix['T'.ord()]['W'.ord()]=sub_matrix['W'.ord()]['T'.ord()]=1.0 + sub_matrix['A'.ord()]['W'.ord()]=sub_matrix['W'.ord()]['A'.ord()]=1.0 + #tri-mixtures + sub_matrix['C'.ord()]['B'.ord()]=sub_matrix['B'.ord()]['C'.ord()]=1.0 + sub_matrix['G'.ord()]['B'.ord()]=sub_matrix['B'.ord()]['G'.ord()]=1.0 + sub_matrix['T'.ord()]['B'.ord()]=sub_matrix['B'.ord()]['T'.ord()]=1.0 + sub_matrix['A'.ord()]['D'.ord()]=sub_matrix['D'.ord()]['A'.ord()]=1.0 + sub_matrix['G'.ord()]['D'.ord()]=sub_matrix['D'.ord()]['G'.ord()]=1.0 + sub_matrix['T'.ord()]['D'.ord()]=sub_matrix['D'.ord()]['T'.ord()]=1.0 + sub_matrix['A'.ord()]['H'.ord()]=sub_matrix['H'.ord()]['A'.ord()]=1.0 + sub_matrix['C'.ord()]['H'.ord()]=sub_matrix['H'.ord()]['C'.ord()]=1.0 + sub_matrix['T'.ord()]['H'.ord()]=sub_matrix['H'.ord()]['T'.ord()]=1.0 + sub_matrix['A'.ord()]['V'.ord()]=sub_matrix['V'.ord()]['A'.ord()]=1.0 + sub_matrix['C'.ord()]['V'.ord()]=sub_matrix['V'.ord()]['C'.ord()]=1.0 + sub_matrix['G'.ord()]['V'.ord()]=sub_matrix['V'.ord()]['G'.ord()]=1.0 + #other + sub_matrix['$'.ord()]['$'.ord()]=50.0 + sub_matrix['T'.ord()]['U'.ord()] = sub_matrix['U'.ord()]['T'.ord()] = 1.0 + sub_matrix['N'.ord()]['N'.ord()] = 0.0 + sub_matrix['X'.ord()]['-'.ord()]=sub_matrix['-'.ord()]['X'.ord()]=3.0 + ['A','T','G','C'].each do |ch| + sub_matrix[ch.ord()]['*'.ord()]=sub_matrix['*'.ord()][ch.ord()]=1.0 + sub_matrix[ch.ord()]['&'.ord()]=sub_matrix['&'.ord()][ch.ord()]=0.7 + sub_matrix[ch.ord()]['$'.ord()]=sub_matrix['$'.ord()][ch.ord()]=0.0 + sub_matrix[ch.ord()]['.'.ord()]=sub_matrix['.'.ord()][ch.ord()]=-20.0 + sub_matrix[ch.ord()]['N'.ord()]=sub_matrix['N'.ord()][ch.ord()]=-3.0 + end + sub_matrix.each {|column| column.freeze} + sub_matrix.freeze + + NUCLEOTIDE_MATRIX = sub_matrix + + def self.score_alignment(standard, query) + sc = 0.0 + 0.upto(standard.size() - 1) do |i| + sc += NUCLEOTIDE_MATRIX[standard[i,1].upcase().ord()][query[i,1].upcase().ord()] + end + return sc + end + + def self.make_gap_list(seq) + list = [] + cur_ins = nil + prev_i = nil + 0.upto(seq.size() - 1) do |i| + if(seq[i,1] == '-') + if(prev_i and i == prev_i + 1) + cur_ins << i + prev_i = i + else + list << cur_ins if(cur_ins != nil and cur_ins != []) + cur_ins = [i] + prev_i = i + end + end + end + list << cur_ins if(cur_ins != nil and cur_ins != []) + return list + end + + def self.trim_leading_dashes(standard, query) + leading_dashes_match = /^(-+)[^-]/.match(standard) + if (leading_dashes_match.nil?) + return + end + leading_dashes = leading_dashes_match[1] + standard[0, leading_dashes.size()] = '' + query[0, leading_dashes.size()] = '' + end + + def self.trim_trailing_dashes(standard, query) + trailing_dashes_match = /[^-](-+)$/.match(standard) + if (trailing_dashes_match.nil?) + return + end + trailing_dashes = trailing_dashes_match[1] + end_of_standard = standard.size() - trailing_dashes.size() + standard[end_of_standard, trailing_dashes.size()] = '' + query[end_of_standard, trailing_dashes.size()] = '' + end + + def self.fix_incomplete_edge_codon(query, side=:leading) + edge_idx = 0 + dash_regex = /^(-+)[^-]/ + incr = 1 + if (side != :leading) # fix the trailing edge + edge_idx = -1 + dash_regex = /[^-](-+)$/ + incr = -1 + end + + if (query[edge_idx] == '-') + dashes = dash_regex.match(query)[1] # we know there will be a match + + # If the length of the dashes aren't a multiple of 3, turn some + # of the query characters into dashes to force it to be a full + # codon of dashes. + if (dashes.size() % 3 >= 1) + first_non_dash_idx = dashes.size() + if (side != :leading) + first_non_dash_idx = query.size() - dashes.size() - 1 + end + query[first_non_dash_idx] = '-' + if (dashes.size() % 3 == 1) + query[first_non_dash_idx + incr] = '-' + end + end + end + end + + def self.merge_insertions_and_deletions_to_fix_oof_sequences( + standard, + query + ) + # Merge deletions and insertions until the sequences have a cogent length + # (i.e. have length divisible by 3). This helps fix poor insertions near + # the start of the sequence. + raise 'Standard and query should be the same length' if standard.size() != query.size() + if(standard.size() % 3 != 0) + dex = 0 + while(dex = standard.index(/-/, dex)) + [-1, 1, -2, 2].each do |offset| # look one base away, then two bases away + if ((dex + offset >= 0) and query[dex + offset] == '-') + standard[dex] = '' + query[dex + offset] = '' + dex = 0 + break + end + end + + # Stop if the sequences are now a cogent length. + if(standard.size() % 3 == 0) + break + end + dex += 1 + end + end + end + + def self.cluster_gaps(gaps, raise_errors=false) + # Merge adjacent gaps if they are not a codon-sized gap. + new_gap_list = [] + gaps.each_with_index do |gap, i| + next if(gap.size() == 0) # we already ate this one + if(gap.size() % 3 == 0) # this gap is fine! + new_gap_list << gap + next + end + + gap2 = gaps[i + 1] # note: these could be nil, which we test for below + gap3 = gaps[i + 2] + # Can I merge with the next gap? + if (gap2 and (gap + gap2).size() % 3 == 0 and (gap2.first - gap.last) < 9) + if(gap2.size() > gap.size()) + new_gap_list << ((gap2.first - gap.size()) .. gap2.first - 1).to_a() + gap2 + else + new_gap_list << gap + ((gap.last + 1) .. (gap.last + gap2.size())).to_a() + end + gaps[i + 1] = [] + # Can I merge with the next two gaps? + elsif( + gap2 and gap3 and + (gap + gap2 + gap3).size() % 3 == 0 and + (gap3.first - gap.last) < 12 + ) + # Place the gap around the middle of the three merging gaps. + new_gap = ( + ((gap2.first - gap.size()) .. gap2.first - 1).to_a() + + gap2 + + ((gap2.last + 1) .. (gap2.last + gap3.size())).to_a() + ) + new_gap_list << new_gap + + gaps[i + 1] = [] + gaps[i + 2] = [] + else + # We can't merge the gaps; either raise an error or meekly proceed. + if (raise_errors) + raise GapMergeError + else + new_gap_list << gap # FIXME this behaviour differs between insertions and deletions + end + end + end + return new_gap_list + end + + def self.align_gaps_to_frame(gaps, common_gap_locations=nil) + # Align gaps to codon boundaries, giving preference to common + # gap locations if specified. + # Gaps must be listed in ascending order, i.e. from left to right. + + offset = 0 # offset created by previous gaps. + gaps.each do |gap| + # See if this gap is close to a common gap location (within 3 amino acids). + if (!common_gap_locations.nil?) + closest_common = common_gap_locations.min() do |a, b| + (3 * a - (gap[0] - offset)).abs() <=> (3 * b - (gap[0] - offset)).abs() + end + if(closest_common != nil and (3 * closest_common - (gap[0] - offset)).abs() <= 9) + # Align the gap to this position. + new_gap = [] + 0.upto(gap.size() - 1) do |i| + new_gap << 3 * closest_common + i + offset + end + gap.replace(new_gap) + end + end + + # Align the gap to the nearest appropriate frame. + # Original comment from Conan: scoring would be good here + if(gap[0] % 3 == 1) # set back one base + new_gap = [] + gap.each do |i| + new_gap << i - 1 + end + gap.replace(new_gap) + elsif(gap[0] % 3 == 2) # set forward one base + new_gap = [] + gap.each do |i| + new_gap << i + 1 + end + gap.replace(new_gap) + end + + offset += gap.size() + end + return gaps + end + + def self.splice_gaps_into_sequence(seq, gaps) + # Place the specified gaps into the sequence. Note that the + # gaps are specified by their positions in an *aligned* sequence, + # and as such include "offsets" introduced by gaps placed earlier + # in the sequence. The gaps must be in left-to-right order. + seq = seq.gsub('-','') + gaps.each do |gap| + gap.each do |i| + if(i > seq.size()) + seq.insert(-1, '-') + else + seq.insert(i, '-') + end + end + end + return seq + end + + #common_insert_locations is based on amino acid locations starting at base 0. + #Assumes standard in the first base. + #Prealign lets you run a lot of the corrections and qc on a already aligned sequence. + def self.frame_align( + standard, + query, + gap_init=3, + gap_penalty=1, + common_insert_locations=nil, + trim=false, + raise_errors=false, + prealigned=false + ) + if (common_insert_locations.nil?) + common_insert_locations = [] + end + if(!prealigned) + elem = align_it(standard, query, gap_init, gap_penalty) + standard = elem[0] + query = elem[1] + end + raise "Standard and query should be the same length" if standard.size() != query.size() + + # Trim leading and trailing dashes if desired. + if (trim) + trim_leading_dashes(standard, query) + trim_trailing_dashes(standard, query) + fix_incomplete_edge_codon(query, :leading) + fix_incomplete_edge_codon(query, :trailing) + end + + merge_insertions_and_deletions_to_fix_oof_sequences(standard, query) + + if(standard.gsub(/[^-]/,'').size() % 3 != 0 and raise_errors) + raise "Cannot frame align, #{standard.gsub(/[^-]/,'').size()} inserted bases not divisible by 3" + end + if(query.gsub(/[^-]/,'').size() % 3 != 0 and raise_errors) + raise "Cannot frame align, #{query.gsub(/[^-]/,'').size()} deleted bases not divisible by 3" + end + + # Build the insert/delete lists. These lists look like + # [[3,4,5], [9], [11,12]] + insert_list = make_gap_list(standard) + delete_list = make_gap_list(query) + + # Process the insertions. + if(insert_list.size() > 0) + new_ins_list = [] + + # Step 1: cluster the insertions. + begin + new_ins_list = cluster_gaps(insert_list, raise_errors=raise_errors) + rescue GapMergeError + raise "Cannot frame align insert" if raise_errors + end + + # Step 2: frame-align the insertions, shifting things to common insertion + # positions where appropriate. + align_gaps_to_frame(new_ins_list, common_gap_locations=common_insert_locations) + + # Put the insertions back into the standard. + standard = splice_gaps_into_sequence(standard, new_ins_list) + end + + # Process the deletions. + if(delete_list.size() > 0) + new_del_list = [] + + # As above, step 1 is to cluster the deletions. + # FIXME note that the original code behaved differently between + # insertions and deletions; confirm that this is the right + # way forward. + begin + new_del_list = cluster_gaps(delete_list, raise_errors=raise_errors) + rescue GapMergeError + raise "Cannot frame align deletion" if raise_errors + end + + # Again as above, frame-align the deletions; this time + # we don't worry about any common deletion positions. + align_gaps_to_frame(new_del_list) + + # Put the deletions back into the query. + query = splice_gaps_into_sequence(query, new_del_list) + end + + return [standard, query] + end + + #Returns a [seq_sans_inserts, [list of inserts]] + def self.remove_inserts(elem) + return remove_insertions_from_query(elem[0], elem[1]) + end + + def self.remove_insertions_from_query(standard, query) + seq = '' + query + inserts = [] + + insert_list = [] + 0.upto(standard.size() - 1) do |i| + insert_list << i if(standard[i,1] == '-') + end + + big_insert_list = [] + if(standard.include?('-'))#Inserts first + #First step should be to cluster inserts + cur_ins = nil + prev_i = nil + insert_list.each do |i| + if(prev_i and i == prev_i + 1) + cur_ins << i + prev_i = i + else + big_insert_list << cur_ins if(cur_ins != nil and cur_ins != []) + cur_ins = [i] + prev_i = i + end + end + big_insert_list << cur_ins if(cur_ins != nil and cur_ins != []) + end + + offset = 0 + big_insert_list.each do |ins| + ins_seq = '' + ins.each do |i| + ins_seq += query[i,1] + end + inserts << [((ins[0] - offset) / 3), ins_seq] + offset += ins.size() + ins.each do |i| + seq[i,1] = '.' + end + end + + return [seq.gsub('.',''), inserts] + end +end diff --git a/ruby/rakefile b/ruby/rakefile new file mode 100644 index 0000000..62a6c8a --- /dev/null +++ b/ruby/rakefile @@ -0,0 +1,44 @@ +require 'rake' +require 'rake/testtask' +require 'rake/file_utils' +require 'fileutils' + +task :default => :test + +base_dir = File.expand_path(".") + + +file 'lib/cfe_gotoh/cfe_gotoh.so' => ['ext/cfe_gotoh/cfe_gotoh.so'] do + FileUtils.mkdir_p('lib/cfe_gotoh') + FileUtils.copy('ext/cfe_gotoh/cfe_gotoh.so', 'lib/cfe_gotoh/cfe_gotoh.so') +end + + +file 'ext/cfe_gotoh/cfe_gotoh.so' => [ + 'ext/cfe_gotoh/Makefile', + 'ext/cfe_gotoh/cfe_gotoh.cpp' +] do + sh 'cd ext/cfe_gotoh && make' +end + + +file 'ext/cfe_gotoh/cfe_gotoh.cpp' => ['../alignment/gotoh/gotoh.cpp'] do + FileUtils.copy('../alignment/gotoh/gotoh.cpp', 'ext/cfe_gotoh/cfe_gotoh.cpp') +end + + +file 'ext/cfe_gotoh/Makefile' => [ + 'ext/cfe_gotoh/extconf.rb', + 'ext/cfe_gotoh/cfe_gotoh.cpp' +] do + Dir.chdir('ext/cfe_gotoh') do + ruby 'extconf.rb' + end +end + + +Rake::TestTask.new do |t| + t.libs = [base_dir] + t.pattern = "test/**/*_test.rb" +end +task :test => ['lib/cfe_gotoh/cfe_gotoh.so'] \ No newline at end of file diff --git a/ruby/test/.gitignore b/ruby/test/.gitignore new file mode 100644 index 0000000..ced4113 --- /dev/null +++ b/ruby/test/.gitignore @@ -0,0 +1,2 @@ +html_reports/* +reports/* diff --git a/ruby/test/cfe_gotoh_test.rb b/ruby/test/cfe_gotoh_test.rb new file mode 100644 index 0000000..ee93b5c --- /dev/null +++ b/ruby/test/cfe_gotoh_test.rb @@ -0,0 +1,1201 @@ +require_relative 'test_helper' +require_relative '../lib/cfe_gotoh' + + +class CfeGotohTest < Minitest::Test +end + + +class ScoreAlignmentTest < CfeGotohTest + REGULAR_BASES = ['A', 'C', 'G', 'T'].freeze + MIXTURES = { + 'R' => ['A', 'G'], + 'Y' => ['C', 'T'], + 'S' => ['G', 'C'], + 'W' => ['A', 'T'], + 'K' => ['G', 'T'], + 'M' => ['A', 'C'], + 'B' => ['C', 'G', 'T'], + 'D' => ['A', 'G', 'T'], + 'H' => ['A', 'C', 'T'], + 'V' => ['A', 'C', 'G'], + 'N' => ['A', 'C', 'G', 'T'] + }.freeze + + def score_alignment_symmetric_test(expected, base1, base2) + assert_equal expected, CfeGotoh.score_alignment(base1, base2) + assert_equal expected, CfeGotoh.score_alignment(base2, base1) + end + + def test_regular_base_scores + REGULAR_BASES.each do |std_base| + REGULAR_BASES.each do |query_base| + expected = -1.0 + if (std_base == query_base) + expected = 1.0 + end + score_alignment_symmetric_test(expected, std_base, query_base) + end + end + end + + def test_mixture_scores + MIXTURES.each do |std_base, _| + MIXTURES.each do |query_base, __| + expected = -1.0 + if (std_base == query_base) + expected = 1.0 + if (std_base == 'N') + expected = 0.0 + end + end + score_alignment_symmetric_test(expected, std_base, query_base) + end + end + end + + def test_mixture_with_regular_scores + REGULAR_BASES.each do |base1| + MIXTURES.each do |base2, possible_bases| + expected = -1.0 + if (base2 == 'N') + expected = -3.0 + elsif (possible_bases.include?(base1)) + expected = 1.0 + end + score_alignment_symmetric_test(expected, base1, base2) + end + end + end + + def test_score_x + (REGULAR_BASES + MIXTURES.keys()).each do |base| + expected = -6.0 + if (base == 'N') + expected = -1.0 + end + score_alignment_symmetric_test(expected, base, 'X') + end + end + + def test_special_characters + assert_equal 50.0, CfeGotoh.score_alignment('$', '$') + score_alignment_symmetric_test(50.0, '$', '$') + score_alignment_symmetric_test(1.0, 'T', 'U') + assert_equal 0.0, CfeGotoh.score_alignment('N', 'N') + score_alignment_symmetric_test(3.0, 'X', '-') + REGULAR_BASES.each do |base| + score_alignment_symmetric_test(1.0, base, '*') + score_alignment_symmetric_test(0.7, base, '&') + score_alignment_symmetric_test(0.0, base, '$') + score_alignment_symmetric_test(-20.0, base, '.') + end + end + + def test_multiple_bases + seq1 = 'AGACTCTVC---' + seq2 = 'CGANNCTGCXXX' + expected = -1.0 + 2.0 - 3.0 - 3.0 + 4.0 + 9.0 + assert_equal expected, CfeGotoh.score_alignment(seq1, seq2) + end +end + + +class MakeGapListTest < CfeGotohTest + MAKE_GAP_LIST_TEST_CASES = [ + { + name: 'empty_sequence', + seq: '', + expected: [] + }, + { + name: 'no_gaps', + seq: 'ACAGAT', + expected: [] + }, + { + name: 'gap_in_middle', + seq: 'ACA--GATC', + expected: [[3, 4]] + }, + { + name: 'gap_at_start', + seq: '--ACAGATCC', + expected: [[0, 1]] + }, + { + name: 'gap_at_end', + seq: 'ACAGATCC----', + expected: [[8, 9, 10, 11]] + }, + { + name: 'multiple_gaps_in_middle', + seq: 'ACA--GA-TC', + expected: [[3, 4], [7]] + }, + { + name: 'multiple_gaps_throughout', + seq: '-ACA--GA-TC----', + expected: [[0], [4, 5], [8], [11, 12, 13, 14]] + } + ] + + MAKE_GAP_LIST_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + assert_equal test_entry[:expected], CfeGotoh.make_gap_list(test_entry[:seq]) + end + end +end + + +class TrimLeadingDashesTest < CfeGotohTest + TRIM_LEADING_DASHES_TEST_CASES = [ + { + name: 'no_leading_dashes', + std: 'ACAGAT', + query: 'ACACAT', + expected_std: 'ACAGAT', + expected_query: 'ACACAT' + }, + { + name: 'one_leading_dash', + std: '-ACAGAT', + query: 'GACACAT', + expected_std: 'ACAGAT', + expected_query: 'ACACAT' + }, + { + name: 'several_leading_dashes', + std: '----ACAGAT', + query: 'GGGGACACAT', + expected_std: 'ACAGAT', + expected_query: 'ACACAT' + }, + { + name: 'no_leading_dashes_other_dashes_ignored', + std: 'ACA---CATGAT-', + query: 'ACAGGG---CATC', + expected_std: 'ACA---CATGAT-', + expected_query: 'ACAGGG---CATC' + }, + { + name: 'one_leading_dash_other_dashes_ignored', + std: '-ACA---CATGAT-', + query: '-ACAGGG---CATC', + expected_std: 'ACA---CATGAT-', + expected_query: 'ACAGGG---CATC' + }, + { + name: 'several_leading_dashes_other_dashes_ignored', + std: '----ACA---CATGAT-', + query: 'GGGGACAGGG---CATC', + expected_std: 'ACA---CATGAT-', + expected_query: 'ACAGGG---CATC' + } + ] + + TRIM_LEADING_DASHES_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + std = test_entry[:std] + query = test_entry[:query] + CfeGotoh.trim_leading_dashes(std, query) + assert_equal test_entry[:expected_std], std + assert_equal test_entry[:expected_query], query + end + end +end + + +class TrimTrailingDashesTest < CfeGotohTest + TRIM_TRAILING_DASHES_TEST_CASES = [ + { + name: 'no_trailing_dashes', + std: 'ACAGAT', + query: 'ACACAT', + expected_std: 'ACAGAT', + expected_query: 'ACACAT' + }, + { + name: 'one_trailing_dash', + std: 'ACAGAT-', + query: 'ACACATG', + expected_std: 'ACAGAT', + expected_query: 'ACACAT' + }, + { + name: 'several_trailing_dashes', + std: 'ACAGAT----', + query: 'ACACATGGGG', + expected_std: 'ACAGAT', + expected_query: 'ACACAT' + }, + { + name: 'no_trailing_dashes_other_dashes_ignored', + std: '-ACA---CATGAT', + query: 'CACAGGG---CAT', + expected_std: '-ACA---CATGAT', + expected_query: 'CACAGGG---CAT' + }, + { + name: 'one_trailing_dash_other_dashes_ignored', + std: '-ACA---CATGAT-', + query: 'CACAGGG---CATC', + expected_std: '-ACA---CATGAT', + expected_query: 'CACAGGG---CAT' + }, + { + name: 'several_trailing_dashes_other_dashes_ignored', + std: '-ACA---CATGAT----', + query: 'CACAGGG---CATCGGC', + expected_std: '-ACA---CATGAT', + expected_query: 'CACAGGG---CAT' + } + ] + + TRIM_TRAILING_DASHES_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + std = test_entry[:std] + query = test_entry[:query] + CfeGotoh.trim_trailing_dashes(std, query) + assert_equal test_entry[:expected_std], std + assert_equal test_entry[:expected_query], query + end + end +end + + +class FixIncompleteEdgeCodonTest < CfeGotohTest + FIX_INCOMPLETE_EDGE_CODON_TEST_CASES = [ + { + name: 'no_leading_dashes', + seq: 'ACTAGG', + expected: 'ACTAGG' + }, + { + name: 'one_complete_blank_codon_leading', + seq: '---ACTAGG', + expected: '---ACTAGG' + }, + { + name: 'several_complete_blank_codons_leading', + seq: '---------ACTAGG', + expected: '---------ACTAGG' + }, + { + name: 'no_leading_dashes_other_blanks_ignored', + seq: 'ACT---AGG---', + expected: 'ACT---AGG---' + }, + { + name: 'one_leading_dash', + seq: '-GGACTAGG', + expected: '---ACTAGG' + }, + { + name: 'two_leading_dashes', + seq: '--GACTAGG', + expected: '---ACTAGG' + }, + { + name: 'one_dash_plus_full_codon_leading', + seq: '----GGACTAGG', + expected: '------ACTAGG' + }, + { + name: 'two_dashes_plus_full_codons_leading', + seq: '--------GACTAGG', + expected: '---------ACTAGG' + }, + { + name: 'leading_dashes_other_blanks_ignored', + seq: '-----GACT---AGG---', + expected: '------ACT---AGG---' + }, + { + name: 'no_trailing_dashes', + seq: 'ACTAGG', + expected: 'ACTAGG', + side: :trailing + }, + { + name: 'one_complete_blank_codon_trailing', + seq: 'ACTAGG---', + expected: 'ACTAGG---', + side: :trailing + }, + { + name: 'several_complete_blank_codons_trailing', + seq: 'ACTAGG---------', + expected: 'ACTAGG---------', + side: :trailing + }, + { + name: 'no_trailing_dashes_other_blanks_ignored', + seq: '---ACT---AGG', + expected: '---ACT---AGG', + side: :trailing + }, + { + name: 'one_trailing_dash', + seq: 'ACTAGGTT-', + expected: 'ACTAGG---', + side: :trailing + }, + { + name: 'two_trailing_dashes', + seq: 'ACTAGGT--', + expected: 'ACTAGG---', + side: :trailing + }, + { + name: 'one_trailing_dash_plus_full_codon', + seq: 'ACTAGGTT----', + expected: 'ACTAGG------', + side: :trailing + }, + { + name: 'two_trailing_dashes_plus_full_codons', + seq: 'ACTAGGT--------', + expected: 'ACTAGG---------', + side: :trailing + }, + { + name: 'trailing_dashes_other_blanks_ignored', + seq: '------ACT---AGGT-----', + expected: '------ACT---AGG------', + side: :trailing + } + ] + + FIX_INCOMPLETE_EDGE_CODON_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + seq = test_entry[:seq] + side = test_entry[:side] + if (side.nil?) + side = :leading + end + CfeGotoh.fix_incomplete_edge_codon(seq, side) + assert_equal test_entry[:expected], seq + end + end +end + + +class MergeInsertionsAndDeletionsToFixOofSequencesTest < CfeGotohTest + def test_standard_and_query_must_be_same_length + assert_raises RuntimeError do + CfeGotoh.merge_insertions_and_deletions_to_fix_oof_sequences('ACT', 'ACTACT') + end + end + + MERGE_INDELS_TEST_CASES = [ + { + name: 'no_need_for_merge', + std: 'ACTAAG', + query: 'ACTCAG', + expected_std: 'ACTAAG', + expected_query: 'ACTCAG' + }, + { + name: 'blanks_but_no_need_for_merge', + std: 'ACT---AGCTTT', + query: 'ACTAGC---TTC', + expected_std: 'ACT---AGCTTT', + expected_query: 'ACTAGC---TTC' + }, + { + name: 'bad_length_but_no_possible_merges', + std: 'ACTAAGC', + query: 'ACTCAGC', + expected_std: 'ACTAAGC', + expected_query: 'ACTCAGC' + }, + { + name: 'merge_one_base_ahead', + std: 'ACT-AAG', + query: 'ACTC-AG', + expected_std: 'ACTAAG', + expected_query: 'ACTCAG' + }, + { + name: 'merge_one_base_behind', + std: 'ACT-AAG', + query: 'AC-TCAG', + expected_std: 'ACTAAG', + expected_query: 'ACTCAG' + }, + { + name: 'merge_two_bases_ahead', + std: 'ACT-AAG', + query: 'ACTCA-G', + expected_std: 'ACTAAG', + expected_query: 'ACTCAG' + }, + { + name: 'merge_two_bases_behind', + std: 'ACT-AAG', + query: 'A-CTCAG', + expected_std: 'ACTAAG', + expected_query: 'ACTCAG' + }, + { + name: 'one_base_behind_preferred', + std: 'ACT-AAG', + query: 'AC-T-AG', + expected_std: 'ACTAAG', + expected_query: 'ACT-AG' + }, + { + name: 'one_base_ahead_preferred_over_two_bases_behind', + std: 'ACT-AAA', + query: 'A-CT-AA', + expected_std: 'ACTAAA', + expected_query: 'A-CTAA' + }, + { + name: 'two_bases_behind_preferred_over_two_bases_ahead', + std: 'ACT-AAA', + query: 'A-CTA-A', + expected_std: 'ACTAAA', + expected_query: 'ACTA-A' + }, + { + name: 'gaps_too_far_to_merge', + std: 'ACT-AAA', + query: 'ACTAAA-', + expected_std: 'ACT-AAA', + expected_query: 'ACTAAA-' + }, + { + name: 'merges_stop_at_cogent_length', + std: 'ACT-AAAG-GG-CC', + query: 'AC-TAAAGGG--CC', + expected_std: 'ACTAAAGGG-CC', + expected_query: 'ACTAAAGGG-CC' + }, + { + name: 'impossible_merge_skipped_but_later_ones_happen', + std: 'ACT-AAAGGG-CC', + query: 'ACTAAA-GG-GCC', + expected_std: 'ACT-AAAGGGCC', + expected_query: 'ACTAAA-GGGCC' + } + ] + + MERGE_INDELS_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + std = test_entry[:std] + query = test_entry[:query] + side = test_entry[:side] + CfeGotoh.merge_insertions_and_deletions_to_fix_oof_sequences(std, query) + assert_equal test_entry[:expected_std], std + assert_equal test_entry[:expected_query], query + end + end +end + + +class ClusterGapsTest < CfeGotohTest + CLUSTER_GAPS_TEST_CASES = [ + { + name: 'no_gaps', + gaps: [], + expected: [] + }, + { + name: 'empty_gap_causes_nothing', + gaps: [[]], + expected: [] + }, + { + name: 'good_size_gap', + gaps: [[3, 4, 5]], + expected: [[3, 4, 5]] + }, + { + name: 'bad_size_gap', + gaps: [[3, 4, 5, 6]], + expected: [[3, 4, 5, 6]] + }, + { + name: 'merge_two_close_gaps_to_first', + gaps: [[10, 11], [13]], + expected: [[10, 11, 12]] + }, + { + name: 'merge_two_close_gaps_to_second', + gaps: [[7], [10, 11]], + expected: [[9, 10, 11]] + }, + { + name: 'two_gaps_can_merge_edge_case', + gaps: [[2, 3, 4, 5, 6], [14]], + expected: [[2, 3, 4, 5, 6, 7]] + }, + { + name: 'two_gaps_too_far_edge_case', + gaps: [[2, 3, 4, 5, 6], [15]], + expected: [[2, 3, 4, 5, 6], [15]] + }, + { + name: 'two_gaps_too_far', + gaps: [[2, 3, 4, 5, 6], [21]], + expected: [[2, 3, 4, 5, 6], [21]] + }, + { + name: 'three_close_gaps_merge', + gaps: [[8], [12, 13, 14, 15], [18, 19, 20, 21]], + expected: [[11, 12, 13, 14, 15, 16, 17, 18, 19]] + }, + { + name: 'three_gaps_merge_edge_case', + gaps: [[8], [12, 13, 14, 15], [19, 20, 21, 22]], + expected: [[11, 12, 13, 14, 15, 16, 17, 18, 19]] + }, + { + name: 'three_gaps_too_far_edge_case', + gaps: [[8], [12, 13, 14, 15], [20, 21, 22, 23]], + expected: [[8], [12, 13, 14, 15], [20, 21, 22, 23]] + }, + { + name: 'typical_case', + gaps: [[3, 4, 5], [8, 9], [13], [19, 20, 21, 22, 23, 24], [27], [32], [38, 39, 40, 41], [50, 51], [60], [70], [75, 76]], + expected: [[3, 4, 5], [8, 9, 10], [19, 20, 21, 22, 23, 24], [31, 32, 33, 34, 35, 36], [50, 51], [60], [74, 75, 76]] + } + ] + + CLUSTER_GAPS_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + assert_equal test_entry[:expected], CfeGotoh.cluster_gaps(test_entry[:gaps]) + end + end + + def test_bad_gap_causes_error + assert_raises CfeGotoh::GapMergeError do + CfeGotoh.cluster_gaps([[3, 4]], raise_errors=true) + end + end + + def test_bad_gap_among_several_gaps_causes_error + assert_raises CfeGotoh::GapMergeError do + CfeGotoh.cluster_gaps([[3, 4, 5], [9, 10, 11, 12, 13, 14], [17]], raise_errors=true) + end + end +end + + +class AlignGapsToFrameTest < CfeGotohTest + NO_COMMON_POSITIONS_TEST_CASES = [ + { + name: 'no_gaps', + gaps: [], + expected: [] + }, + { + name: 'already_in_frame', + gaps: [[6, 7, 8]], + expected: [[6, 7, 8]] + }, + { + name: 'shift_toward_beginning', + gaps: [[7, 8, 9]], + expected: [[6, 7, 8]] + }, + { + name: 'shift_toward_end', + gaps: [[5, 6, 7]], + expected: [[6, 7, 8]] + }, + { + name: 'two_in_frame', + gaps: [[6, 7, 8], [15, 16, 17]], + expected: [[6, 7, 8], [15, 16, 17]] + }, + { + name: 'two_needing_shifts', + gaps: [[5, 6, 7, 8, 9, 10], [16, 17, 18]], + expected: [[6, 7, 8, 9, 10, 11], [15, 16, 17]] + }, + { + name: 'several_in_frame', + gaps: [[6, 7, 8], [15, 16, 17], [24, 25, 26]], + expected: [[6, 7, 8], [15, 16, 17], [24, 25, 26]] + }, + { + name: 'several_needing_shifts', + gaps: [[5, 6, 7, 8, 9, 10], [16, 17, 18], [31, 32, 33]], + expected: [[6, 7, 8, 9, 10, 11], [15, 16, 17], [30, 31, 32]] + } + ] + + NO_COMMON_POSITIONS_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + assert_equal test_entry[:expected], CfeGotoh.align_gaps_to_frame(test_entry[:gaps]) + end + end + + WITH_COMMON_POSITIONS_TEST_CASES = [ + { + name: 'no_gaps_with_common', + gaps: [], + common: [7, 15], + expected: [] + }, + { + name: 'too_far_from_common', + gaps: [[3, 4, 5]], + common: [7], + expected: [[3, 4, 5]] + }, + { + name: 'too_far_from_common_but_needs_shift', + gaps: [[2, 3, 4]], + common: [7], + expected: [[3, 4, 5]] + }, + { + name: 'too_far_before_common_edge_case', + gaps: [[9, 10, 11]], + common: [7], + expected: [[9, 10, 11]] + }, + { + name: 'within_range_before_common_edge_case', + gaps: [[12, 13, 14]], + common: [7], + expected: [[21, 22, 23]] + }, + { + name: 'within_range_after_common_edge_case', + gaps: [[30, 31, 32]], + common: [7], + expected: [[21, 22, 23]] + }, + { + name: 'too_far_after_common_edge_case', + gaps: [[33, 34, 35]], + common: [7], + expected: [[33, 34, 35]] + }, + { + name: 'too_far_after_common', + gaps: [[45, 46, 47]], + common: [7], + expected: [[45, 46, 47]] + }, + { + name: 'too_far_after_common_but_needs_shift', + gaps: [[46, 47, 48]], + common: [7], + expected: [[45, 46, 47]] + }, + { + name: 'offset_from_first_shifted_to_common_is_factored_into_second_shifted_to_common', + # [57, 58, 59] is at codon 20 of the aligned sequence, which would be + # at position 54 of the "raw" sequence; this should be just in the + # "catchment area" of the common insertion at codon 15. + gaps: [[15, 16, 17], [57, 58, 59]], + common: [7, 15], + expected: [[21, 22, 23], [48, 49, 50]] + }, + { + name: 'offset_from_first_not_shifted_to_common_is_factored_into_second_shifted_to_common', + gaps: [[4, 5, 6], [57, 58, 59]], + common: [15], + expected: [[3, 4, 5], [48, 49, 50]] + }, + { + name: 'offset_from_first_without_shifting_is_factored_into_second_shifted_to_common', + gaps: [[3, 4, 5], [57, 58, 59]], + common: [15], + expected: [[3, 4, 5], [48, 49, 50]] + }, + { + name: 'offset_from_first_shifted_to_common_is_factored_into_second', + # Even though [36, 37, 38] is in the "catchment area" of the common + # insertion at codon 15, that's in the coordinates of the aligned + # sequence; when the offset is accounted for, it should not be shifted. + gaps: [[15, 16, 17], [36, 37, 38]], + common: [7, 15], + expected: [[21, 22, 23], [36, 37, 38]] + }, + { + name: 'offset_from_first_not_shifted_to_common_is_factored_into_second', + gaps: [[3, 4, 5], [36, 37, 38]], + common: [7, 15], + expected: [[3, 4, 5], [36, 37, 38]] + }, + { + name: 'offset_from_first_already_at_common_is_factored_into_second', + gaps: [[21, 22, 23], [36, 37, 38]], + common: [7, 15], + expected: [[21, 22, 23], [36, 37, 38]] + }, + { + name: 'offsets_taken_into_account_in_shifting_to_common', + gaps: [[3, 4, 5, 6, 7, 8], [36, 37, 38], [111, 112, 113]], + common: [15, 31], + # [102, 103, 104] is codon 31 after the 9 base offset + expected: [[3, 4, 5, 6, 7, 8], [36, 37, 38], [102, 103, 104]] + }, + { + name: 'two_gaps_shifted_to_same_common_position', + gaps: [[14, 15, 16], [22, 23, 24]], + common: [7], + expected: [[21, 22, 23], [24, 25, 26]] + }, + { + name: 'typical_case', + gaps: [[3, 4, 5], [17, 18, 19, 20, 21, 22], [40, 41, 42], [45, 46, 47]], + common: [6, 14], + expected: [[3, 4, 5], [21, 22, 23, 24, 25, 26], [39, 40, 41], [54, 55, 56]] + } + ] + + WITH_COMMON_POSITIONS_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + assert_equal( + test_entry[:expected], + CfeGotoh.align_gaps_to_frame(test_entry[:gaps], test_entry[:common]) + ) + end + end +end + + +class SpliceGapsIntoSequenceTest < CfeGotohTest + SPLICE_GAPS_TEST_CASES = [ + { + name: 'no_gaps', + seq: 'ACTAAG', + gaps: [], + expected: 'ACTAAG' + }, + { + name: 'no_sequence', + seq: '', + gaps: [], + expected: '' + }, + { + name: 'insert_at_beginning', + seq: '---ACTAAG', + gaps: [[0, 1, 2]], + expected: '---ACTAAG' + }, + { + name: 'insert_at_beginning_removed_from_elsewhere', + seq: 'ACT---AAG', + gaps: [[0, 1, 2]], + expected: '---ACTAAG' + }, + { + name: 'insert_at_end', + seq: 'ACTAAG---', + gaps: [[6, 7, 8]], + expected: 'ACTAAG---' + }, + { + name: 'insert_at_end_removed_from_elsewhere', + seq: 'ACT---AAG', + gaps: [[6, 7, 8]], + expected: 'ACTAAG---' + }, + { + name: 'insert_past_end', + seq: 'ACTAAG', + gaps: [[7, 8, 9, 10]], + expected: 'ACTAAG----' + }, + { + name: 'insert_in_middle_retained', + seq: 'ACT---AAG', + gaps: [[3, 4, 5]], + expected: 'ACT---AAG' + }, + { + name: 'insert_in_middle_corrected', + seq: 'AC--TA-AG', + gaps: [[3, 4, 5]], + expected: 'ACT---AAG' + }, + { + name: 'offsets_accounted_for', + seq: 'AACAT---GGG---G', + gaps: [[3, 4, 5], [9, 10, 11]], + expected: 'AAC---ATG---GGG' + }, + { + name: 'typical_case', + seq: '---AACAT---GGG---G------', + gaps: [[0, 1, 2], [6, 7, 8], [12, 13, 14], [18, 19, 20]], + expected: '---AAC---ATG---GGG---' + } + ] + + SPLICE_GAPS_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + assert_equal( + test_entry[:expected], + CfeGotoh.splice_gaps_into_sequence(test_entry[:seq], test_entry[:gaps]) + ) + end + end +end + + +class FrameAlignTest < CfeGotohTest + def test_bad_inserted_bases_error + std = 'ACGTACGT-ACGT' + query = 'ACGTACGTAACGT' + assert_raises RuntimeError do + CfeGotoh.frame_align(std, query, 3, 1, nil, false, true, true) + end + end + + def test_bad_deleted_bases_error + std = 'ACGTACGTAACGT' + query = 'ACGTACGT-ACGT' + assert_raises RuntimeError do + CfeGotoh.frame_align(std, query, 3, 1, nil, false, true, true) + end + end + + def test_unmerged_inserts_raise_error + std = 'ACG--TACGTACGTAC-GT' + query = 'ACGGGTACGTACGTACCGT' + assert_raises RuntimeError do + CfeGotoh.frame_align(std, query, 3, 1, nil, false, true, true) + end + end + + def test_unmerged_deletions_raise_error + std = 'ACGGGTACGTACGTACCGT' + query = 'ACG--TACGTACGTAC-GT' + assert_raises RuntimeError do + CfeGotoh.frame_align(std, query, 3, 1, nil, false, true, true) + end + end + + PREALIGN_TEST_CASES = [ + { + name: 'edges_are_trimmed', + std: '------ACGTACGTACGT------', + query: 'AAAAAA-CGTACGTAC--AAAAAA', + expected_std: 'ACGTACGTACGT', + expected_query: '---TACGTA---' + }, + { + name: 'edges_not_trimmed_when_not_specified', + std: '------ACGTACGTACGT------', + query: 'AAAAAAACGTACGTACGTAAAAAA', + trim: false, + expected_std: '------ACGTACGTACGT------', + expected_query: 'AAAAAAACGTACGTACGTAAAAAA' + }, + { + name: 'indels_are_merged', + std: 'ACGT-ACGTACGT', + query: 'ACGTAC-GTACGT', + expected_std: 'ACGTACGTACGT', + expected_query: 'ACGTACGTACGT' + }, + { + name: 'insertions_are_clustered', + std: 'ACG--TA-CGTACGT', + query: 'ACGGGTAACGTACGT', + expected_std: 'ACG---TACGTACGT', + expected_query: 'ACGGGTAACGTACGT' + }, + { + name: 'deletions_are_clustered', + std: 'ACGGGTAACGTACGT', + query: 'ACG--T-ACGTACGT', + expected_std: 'ACGGGTAACGTACGT', + expected_query: 'ACG---TACGTACGT' + }, + { + name: 'insertions_are_frame_aligned', + std: 'ACGT---ACGTACGT', + query: 'ACGTTTTACGTACGT', + expected_std: 'ACG---TACGTACGT', + expected_query: 'ACGTTTTACGTACGT' + }, + { + name: 'deletions_are_frame_aligned', + std: 'ACGTTTTACGTACGT', + query: 'ACGT---ACGTACGT', + expected_std: 'ACGTTTTACGTACGT', + expected_query: 'ACG---TACGTACGT' + }, + { + name: 'insertions_moved_to_common_positions', + std: 'ACGT---ACGTACGT', + query: 'ACGTTTTACGTACGT', + common_insert_locations: [3], + expected_std: 'ACGTACGTA---CGT', + expected_query: 'ACGTTTTACGTACGT', + }, + { + # The changes that should be made here: + # * edges trimmed + # * the insertions after position 3 and 4 should be merged + # * the deletions at 6-7 and 9 should be merged + # * the insertion after 12 should be merged with the deletion at 13 + # * the insertion after 19 should be frame-aligned by setting back 1 base + # * the deletion at 24-26 should be frame-aligned by setting forward 1 base + # * the insertion after 36 should be shifted over to common insert location 11 + name: 'typical_case', + std: '------ACG--T-ACAAGCTA-TCGTACG---TACGCCCTACGTACGTA---CGT------', + query: 'AAAAAAACGAATCAC--G-TAG-CGTACGCCCTACG---TACGTACGTATTTCGTAAAAAA', + common_insert_locations: [11], + expected_std: 'ACG---TACAAGCTATCGTAC---GTACGCCCTACGTAC---GTACGT', + expected_query: 'ACGAATCAC---GTAGCGTACGCCCTACGT---ACGTACGTATTTCGT' + } + ] + + PREALIGN_TEST_CASES.each do |test_entry| + define_method("test_without_alignment_#{test_entry[:name]}") do + trim = test_entry[:trim].nil? ? true : false + raise_errors = test_entry[:raise_errors].nil? ? true : false + result = CfeGotoh.frame_align( + test_entry[:std], + test_entry[:query], + 3, + 1, + test_entry[:common_insert_locations], + trim, + raise_errors, + true + ) + assert_equal(test_entry[:expected_std], result[0]) + assert_equal(test_entry[:expected_query], result[1]) + end + end + + ALIGNMENT_TEST_CASES = [ + { + name: 'edges_are_trimmed', + std: 'ACGTACGTACGT', + query: 'CCCCCCACGTACGTACGTAAAAAA', + expected_std: 'ACGTACGTACGT', + expected_query: 'ACGTACGTACGT' + }, + { + name: 'edges_not_trimmed_when_not_specified', + std: 'ACGTACGTACGT', + query: 'AAAAAAACGTACGTACGTAAAAAA', + trim: false, + expected_std: '------ACGTACGTACGT------', + expected_query: 'AAAAAAACGTACGTACGTAAAAAA' + }, + { + # X preferentially aligns to dashes. + # std: TCTAAACCXC-GGGTTT + # qry: TCTAAACC-CXGGGTTT + name: 'indels_are_merged', + std: 'TCTAAACXCGGGTTT', + query: 'TCTAAACCXGGGTTT', + expected_std: 'TCTAAACXCGGGTTT', + expected_query: 'TCTAAACCXGGGTTT' + }, + { + # std: AAACC--CGG-GTTT + # qry: AAACCTTCGGAGTTT + name: 'insertions_are_clustered', + std: 'AAACCCGGGTTT', + query: 'AAACCTTCGGAGTTT', + expected_std: 'AAACCC---GGGTTT', + expected_query: 'AAACCTTCGGAGTTT' + }, + { + # std: AAACCTTCGGAGTTT + # qry: AAACC--CGG-GTTT + name: 'deletions_are_clustered', + std: 'AAACCTTCGGAGTTT', + query: 'AAACCCGGGTTT', + expected_std: 'AAACCTTCGGAGTTT', + expected_query: 'AAACCC---GGGTTT' + }, + { + # std: AAACC---CGGGTTT + # qry: AAACCAGACGGGTTT + name: 'insertions_are_frame_aligned', + std: 'AAACCCGGGTTT', + query: 'AAACCAGACGGGTTT', + expected_std: 'AAACCC---GGGTTT', + expected_query: 'AAACCAGACGGGTTT' + }, + { + # std: AAACCAGACGGGTTT + # qry: AAACC---CGGGTTT + name: 'deletions_are_frame_aligned', + std: 'AAACCAGACGGGTTT', + query: 'AAACCCGGGTTT', + expected_std: 'AAACCAGACGGGTTT', + expected_query: 'AAACCC---GGGTTT' + }, + { + # std: AAACC---CGGGTTT + # qry: AAACCAGACGGGTTT + name: 'insertions_moved_to_common_positions', + std: 'AAACCCGGGTTT', + query: 'AAACCAGACGGGTTT', + common_insert_locations: [3], + expected_std: 'AAACCCGGG---TTT', + expected_query: 'AAACCAGACGGGTTT', + }, + # Nov 13, 2024: having trouble cooking up a case that aligns the way I want it to. + # Skipping this for now as most of this logic is tested in the no-alignment + # tests anyway. + # { + # # The changes that should be made here: + # # * edges trimmed + # # * the insertions after position 3 and 4 should be merged + # # * the deletions at 14-15 and 17 should be merged + # # * the insertion after 12 should be merged with the deletion at 13 + # # * the insertion after 19 should be frame-aligned by setting back 1 base + # # * the deletion at 24-26 should be frame-aligned by setting forward 1 base + # # * the insertion after 36 should be shifted over to common insert location 11 + # # std: ---TCT--A-AACCC-XGGGXXTXTT---AACCCGGGTTTAAACCC---GGG--- + # # qry: XXXTCTXXAXAACCCX-GGG--T-TTGGGAACCCGG---TAAACCCAAAGGGAAA + # name: 'typical_case', + # std: 'TCTAAACCCXGGGXXTXTTAACCCGGGTTTAAACCCGGG', + # query: 'XXXTCTXXAXAACCCXGGG--T-TTGGGAACCCGGTAAACCCAAAGGGAAA', + # common_insert_locations: [11], + # expected_std: 'TCT---AAACCCXGGGXXTX---TTAACCCGGGTTTAAA---CCCGGG', + # expected_query: 'TCTXXAXAACCCXGG---GTTTGGGAACCCGGT---AAACCCAAAGGG' + # } + ] + + ALIGNMENT_TEST_CASES.each do |test_entry| + define_method("test_with_alignment_#{test_entry[:name]}") do + trim = test_entry[:trim].nil? ? true : false + raise_errors = test_entry[:raise_errors].nil? ? true : false + result = CfeGotoh.frame_align( + test_entry[:std], + test_entry[:query], + 3, + 1, + test_entry[:common_insert_locations], + trim, + raise_errors, + false + ) + assert_equal(test_entry[:expected_std], result[0]) + assert_equal(test_entry[:expected_query], result[1]) + end + end +end + + +class RemoveInsertsTest < CfeGotohTest + REMOVE_INSERTS_TEST_CASES = [ + { + name: 'no_insertions', + std: 'AAACCCGGGTTT', + query: 'AAACCCGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [] + }, + { + name: 'no_insertions_deletions_ignored', + std: 'AAACCCGGGTTT', + query: 'AAACCC---TTT', + expected_seq: 'AAACCC---TTT', + expected_inserts: [] + }, + { + name: 'single_base_insertion_in_middle', + std: 'AAACCC-GGGTTT', + query: 'AAACCCTGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[2, 'T']] + }, + { + name: 'single_base_insertion_in_middle_mid_codon', + std: 'AAACCCGG-GTTT', + query: 'AAACCCGGTGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[2, 'T']] + }, + { + name: 'single_base_insertion_at_beginning', + std: '-AAACCCGGGTTT', + query: 'TAAACCCGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[0, 'T']] + }, + { + name: 'single_base_insertion_at_end', + std: 'AAACCCGGGTTT-', + query: 'AAACCCGGGTTTA', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[4, 'A']] + }, + { + name: 'several_single_base_insertions', + std: '-AAACCC-GGGT-TT-', + query: 'TAAACCCAG-GTCTTA', + expected_seq: 'AAACCCG-GTTT', + expected_inserts: [[0, 'T'], [2, 'A'], [3, 'C'], [4, 'A']] + }, + { + name: 'multiple_base_insertion_in_middle', + std: 'AAACCC--GGGTTT', + query: 'AAACCCCAGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[2, 'CA']] + }, + { + name: 'multiple_base_insertion_in_middle_mid_codon', + std: 'AAACC---CGGGTTT', + query: 'AAACCAGTCGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[1, 'AGT']] + }, + { + name: 'multiple_base_insertion_at_beginning', + std: '---AAACCCGGGTTT', + query: 'CGTAAACCCGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[0, 'CGT']] + }, + { + name: 'multiple_base_insertion_at_end', + std: 'AAACCCGGGTTT----', + query: 'AAACCCGGGTTTACGT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[4, 'ACGT']] + }, + { + name: 'distinct_insertions_at_same_codon', + std: 'AAA---C-CCGGGTTT', + query: 'AAAGTGCTCCGGGTTT', + expected_seq: 'AAACCCGGGTTT', + expected_inserts: [[1, 'GTG'], [1, 'T']] + }, + { + name: 'typical_case', + std: '---AAACC-CGGG---TT-T----', + query: 'CGTAAACGGCG-GACGTTATACGT', + expected_seq: 'AAACGCG-GTTT', + expected_inserts: [[0, 'CGT'], [1, 'G'], [3, 'ACG'], [3, 'A'], [4, 'ACGT']] + } + ] + + REMOVE_INSERTS_TEST_CASES.each do |test_entry| + define_method("test_#{test_entry[:name]}") do + result = CfeGotoh.remove_insertions_from_query( + test_entry[:std], + test_entry[:query] + ) + assert_equal(test_entry[:expected_seq], result[0]) + assert_equal(test_entry[:expected_inserts], result[1]) + + wrapper_result = CfeGotoh.remove_inserts([test_entry[:std], test_entry[:query]]) + assert_equal(test_entry[:expected_seq], wrapper_result[0]) + assert_equal(test_entry[:expected_inserts], wrapper_result[1]) + end + end +end diff --git a/ruby/test/templates/index.html.erb b/ruby/test/templates/index.html.erb new file mode 100644 index 0000000..114bb94 --- /dev/null +++ b/ruby/test/templates/index.html.erb @@ -0,0 +1,89 @@ + + +
++ Finished in <%= total_time_to_hms %>, <%= '%.2f tests/s' % (count / total_time) %>, <%= '%.2f assertions/s' % (assertions / total_time) %> +
++ + <%= '%d' % count %> tests, + <%= '%d' % assertions %> assertions, + <%= '%d' % failures %> failures, + <%= '%d' % errors %> errors, + <%= '%d' % skips %> skips + +
+ +<%= "#{location(test.failure)}\n\n#{test.failure.message}" %>+ <% end %> +