diff --git a/CHANGELOG.md b/CHANGELOG.md index eee698cc..ce99715b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [UNRELEASED] +### Added +* Improved error message when encoding LongDNA from byte-like objects ## [3.1.0] ### Added diff --git a/src/longsequences/copying.jl b/src/longsequences/copying.jl index f939a811..b98e0360 100644 --- a/src/longsequences/copying.jl +++ b/src/longsequences/copying.jl @@ -152,9 +152,19 @@ end # This is used to effectively scan an array of UInt8 for invalid bytes, when one is detected @noinline function throw_encode_error(A::Alphabet, src::AbstractArray{UInt8}, soff::Integer) for i in 1:div(64, bits_per_symbol(A)) - sym = src[soff+i-1] - ascii_encode(A, sym) & 0x80 == 0x80 && error("Cannot encode $(repr(sym)) to $A") + index = soff + i - 1 + sym = src[index] + if ascii_encode(A, sym) & 0x80 == 0x80 + # If byte is a printable char, also display it + repr_char = if sym in UInt8('\a'):UInt8('\r') || sym in UInt8(' '):UInt8('~') + " (char '$(Char(sym))')" + else + "" + end + error("Cannot encode byte $(repr(sym))$(repr_char) at index $(index) to $A") + end end + @assert false "Expected error in encoding" end @inline function encode_chunk(A::Alphabet, src::AbstractArray{UInt8}, soff::Integer, N::Integer) diff --git a/test/longsequences/basics.jl b/test/longsequences/basics.jl index 02af359e..d0c71705 100644 --- a/test/longsequences/basics.jl +++ b/test/longsequences/basics.jl @@ -24,6 +24,15 @@ @test LongSequence(SimpleSeq("AUCGU")) isa LongRNA{2} @test LongSequence(SimpleSeq("AUCGU")) == LongRNA{2}("AUCGU") LongDNA{4}(LongRNA{4}("AUCGUA")) == LongDNA{4}("ATCGTA") + + # Displays a nice error when constructed from strings substrings + # and bytearrays on encoding error + @static if VERSION >= v"1.8" + malformed = "ACWpNS" + @test_throws "Cannot encode byte $(repr(UInt8('p'))) (char 'p') at index 4 to BioSequences.DNAAlphabet{4}" LongDNA{4}(malformed) + malformed = "AGCUGUAGUCGGUAUAUAGGCGCGCUCGAUGAUGAUGCGUGCUGCUATDNANCUG" + @test_throws "Cannot encode byte $(repr(UInt8('T'))) (char 'T') at index $(length(malformed) - 7) to BioSequences.RNAAlphabet{2}" LongRNA{2}(malformed) + end end @testset "Copy sequence" begin