From 48ae9fe9b6a3943ae7472e6196e9887c1fbb3985 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Thu, 22 Jun 2023 15:19:44 +0200 Subject: [PATCH] Error when translating gaps DNA_Gap cannot be meaningfully translated, as it does not correspond to any nucleotides, not even an unknown one. In fact, it's dubious that it's even a nucleotide at all. This PR makes `translate(!)` error when run on sequences with gaps, whereas before this PR this was undefined behaviour (out-of-bounds access). Alternative solutions could be to silently skip gaps, which is biologically meaningful, but might lead to strange errors, or to insert AA_Gap, which can only be done if the gaps come in groups of three corresponding to a whole gap codon. In the future we could change behaviour to skip gaps. See issue 277 --- Project.toml | 2 +- src/geneticcode.jl | 10 ++++++---- test/translation.jl | 7 ++++++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Project.toml b/Project.toml index 4976b65e..1851fa5e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BioSequences" uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" authors = ["Sabrina Jaye Ward ", "Jakob Nissen "] -version = "3.1.4" +version = "3.1.5" [deps] BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" diff --git a/src/geneticcode.jl b/src/geneticcode.jl index 6b67538c..7fab9d42 100644 --- a/src/geneticcode.jl +++ b/src/geneticcode.jl @@ -372,7 +372,11 @@ function translate!(aaseq::LongAA, a = reinterpret(RNA, ntseq[3i-2]) b = reinterpret(RNA, ntseq[3i-1]) c = reinterpret(RNA, ntseq[3i]) - if isambiguous(a) | isambiguous(b) | isambiguous(c) + if isgap(a) | isgap(b) | isgap(c) + error("Cannot translate nucleotide sequences with gaps.") + elseif iscertain(a) & iscertain(b) & iscertain(c) + aaseq[i] = code[unambiguous_codon(a, b, c)] + else aa = try_translate_ambiguous_codon(code, a, b, c) if aa === nothing if allow_ambiguous_codons @@ -382,8 +386,6 @@ function translate!(aaseq::LongAA, end end aaseq[i] = aa - else - aaseq[i] = code[unambiguous_codon(a, b, c)] end end alternative_start && !isempty(aaseq) && (@inbounds aaseq[1] = AA_M) @@ -421,4 +423,4 @@ function try_translate_ambiguous_codon(code::GeneticCode, end end return found -end \ No newline at end of file +end diff --git a/test/translation.jl b/test/translation.jl index a10ac5ac..fcf71a97 100644 --- a/test/translation.jl +++ b/test/translation.jl @@ -75,6 +75,11 @@ # can't translate N @test_throws Exception translate(rna"ACGUACGNU", allow_ambiguous_codons=false) + # Can't translate gaps + @test_throws Exception translate(dna"A-G") + @test_throws Exception translate(dna"---") + @test_throws Exception translate(dna"AACGAT-A-") + # issue #133 @test translate(rna"GAN") == aa"X" -end \ No newline at end of file +end