Skip to content

Commit

Permalink
Stop pirating BioSymbols (BioJulia#259)
Browse files Browse the repository at this point in the history
BioSequences pirated BioSymbols by overloading `gap`, `isambigous`, `isgap` and
`iscertain`. Remove these overloads.
  • Loading branch information
jakobnissen authored Nov 12, 2022
1 parent e4c8997 commit db8a692
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 39 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BioSequences"
uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
authors = ["Sabrina Jaye Ward <[email protected]>", "Jakob Nissen <[email protected]>"]
version = "3.1.0"
version = "3.1.1"

[deps]
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
Expand All @@ -10,11 +10,11 @@ SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c"
Twiddle = "7200193e-83a8-5a55-b20d-5d36d44a0795"

[compat]
BioSymbols = "5.1.0"
BioSymbols = "5.1.2"
SnoopPrecompile = "1"
StableRNGs = "0.1, 1.0"
Twiddle = "1.1.1"
julia = "1.5"
SnoopPrecompile = "1"

[extras]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Expand Down
2 changes: 0 additions & 2 deletions src/BioSequences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,6 @@ import Twiddle: enumerate_nibbles,
repeatpattern
using Random

BioSymbols.gap(::Type{Char}) = '-'

include("alphabet.jl")

# Load the bit-twiddling internals that optimised BioSequences methods depend on.
Expand Down
16 changes: 10 additions & 6 deletions src/biosequence/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,13 @@ Base.count(pred, seq::BioSequence) = count_naive(pred, seq)
Base.count(pred, seqa::BioSequence, seqb::BioSequence) = count_naive(pred, seqa, seqb)

# These functions are BioSequences-specific because they take two arguments
BioSymbols.isambiguous(x::T, y::T) where {T<:NucleicAcid} = isambiguous(x) | isambiguous(y)
BioSymbols.isgap(x::T, y::T) where {T<:NucleicAcid} = isgap(x) | isgap(y)
BioSymbols.iscertain(x::T, y::T) where {T<:NucleicAcid} = iscertain(x) & iscertain(y)
isambiguous_or(x::T, y::T) where {T<:NucleicAcid} = isambiguous(x) | isambiguous(y)
isgap_or(x::T, y::T) where {T<:NucleicAcid} = isgap(x) | isgap(y)
iscertain_and(x::T, y::T) where {T<:NucleicAcid} = iscertain(x) & iscertain(y)

#BioSymbols.isambiguous(x::T, y::T) where {T<:NucleicAcid} = isambiguous(x) | isambiguous(y)
#BioSymbols.isgap(x::T, y::T) where {T<:NucleicAcid} = isgap(x) | isgap(y)
#BioSymbols.iscertain(x::T, y::T) where {T<:NucleicAcid} = iscertain(x) & iscertain(y)

Base.count(::typeof(isambiguous), seqa::S, seqb::S) where {S<:BioSequence{<:NucleicAcidAlphabet{2}}} = 0
Base.count(::typeof(isgap), seqa::S, seqb::S) where {S<:BioSequence{<:NucleicAcidAlphabet{2}}} = 0
Expand All @@ -56,13 +60,13 @@ Calculate GC content of `seq`.
gc_content(seq::NucleotideSeq) = isempty(seq) ? 0.0 : count(isGC, seq) / length(seq)

n_ambiguous(seq) = count(isambiguous, seq)
n_ambiguous(seqa::BioSequence, seqb::BioSequence) = count(isambiguous, seqa, seqb)
n_ambiguous(seqa::BioSequence, seqb::BioSequence) = count(isambiguous_or, seqa, seqb)

n_certain(seq) = count(iscertain, seq)
n_certain(seqa::BioSequence, seqb::BioSequence) = count(iscertain, seqa, seqb)
n_certain(seqa::BioSequence, seqb::BioSequence) = count(iscertain_and, seqa, seqb)

n_gaps(seq::BioSequence) = count(isgap, seq)
n_gaps(seqa::BioSequence, seqb::BioSequence) = count(isgap, seqa, seqb)
n_gaps(seqa::BioSequence, seqb::BioSequence) = count(isgap_or, seqa, seqb)

mismatches(seqa::BioSequence, seqb::BioSequence) = count(!=, seqa, seqb)
matches(seqa::BioSequence, seqb::BioSequence) = count(==, seqa, seqb)
12 changes: 6 additions & 6 deletions src/longsequences/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ Base.count(::typeof(isambiguous), seq::SeqOrView{<:NucleicAcidAlphabet{4}}) = co
# A pair of 2-bit encoded sequences will never have ambiguous bases.
Base.count(::typeof(isambiguous), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{2}} = 0
Base.count(::typeof(isambiguous), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_ambiguous_bitpar(seqa, seqb)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isambiguous, promote(seqa, seqb)...)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isambiguous, promote(seqa, seqb)...)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isambiguous_or, promote(seqa, seqb)...)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isambiguous_or, promote(seqa, seqb)...)

# Counting certain sites
let
Expand All @@ -120,8 +120,8 @@ let
) |> eval
end
Base.count(::typeof(iscertain), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_certain_bitpar(seqa, seqb)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(iscertain, promote(seqa, seqb)...)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(iscertain, promote(seqa, seqb)...)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(iscertain_and, promote(seqa, seqb)...)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(iscertain_and, promote(seqa, seqb)...)

# Counting gap sites
let
Expand Down Expand Up @@ -163,5 +163,5 @@ let
end
Base.count(::typeof(isgap), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_gap_bitpar(seqa, seqb)
Base.count(::typeof(isgap), seqa::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_gap_bitpar(seqa)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isgap, promote(seqa, seqb)...)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isgap, promote(seqa, seqb)...)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isgap_or, promote(seqa, seqb)...)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isgap_or, promote(seqa, seqb)...)
44 changes: 23 additions & 21 deletions test/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@
alias::Function,
seqa::BioSequence,
seqb::BioSequence,
singlearg::Bool
singlearg::Bool,
multi_alias::Function
)
# Test that order does not matter.
@test count(pred, seqa, seqb) == count(pred, seqb, seqa)
@test BioSequences.count_naive(pred, seqa, seqb) == BioSequences.count_naive(pred, seqb, seqa)
@test BioSequences.count_naive(multi_alias, seqa, seqb) == BioSequences.count_naive(multi_alias, seqb, seqa)
@test alias(seqa, seqb) == alias(seqb, seqa)
# Test that result is the same as counting naively.
@test count(pred, seqa, seqb) == BioSequences.count_naive(pred, seqa, seqb)
@test count(pred, seqb, seqa) == BioSequences.count_naive(pred, seqb, seqa)
@test count(pred, seqa, seqb) == BioSequences.count_naive(multi_alias, seqa, seqb)
@test count(pred, seqb, seqa) == BioSequences.count_naive(multi_alias, seqb, seqa)
# Test that the alias function works.
@test count(pred, seqa, seqb) == alias(seqa, seqb)
@test count(pred, seqb, seqa) == alias(seqb, seqa)
Expand All @@ -57,7 +58,8 @@
alphx::Type{<:Alphabet},
alphy::Type{<:Alphabet},
subset::Bool,
singlearg::Bool
singlearg::Bool,
multi_alias::Function
)
for _ in 1:10
seqA = random_seq(alphx, rand(10:100))
Expand All @@ -72,7 +74,7 @@
sa = subA
sb = subB
end
testcounter(pred, alias, sa, sb, singlearg)
testcounter(pred, alias, sa, sb, singlearg, multi_alias)
end
end

Expand All @@ -81,23 +83,23 @@
# Can't promote views
for sub in (true, false)
for n in (4, 2)
counter_random_tests(!=, mismatches, a{n}, a{n}, sub, false)
counter_random_tests(!=, mismatches, a{n}, a{n}, sub, false, !=)
end
end
counter_random_tests(!=, mismatches, a{4}, a{2}, false, false)
counter_random_tests(!=, mismatches, a{2}, a{4}, false, false)
counter_random_tests(!=, mismatches, a{4}, a{2}, false, false, !=)
counter_random_tests(!=, mismatches, a{2}, a{4}, false, false, !=)
end
end

@testset "Matches" begin
for a in (DNAAlphabet, RNAAlphabet)
for sub in (true, false)
for n in (4, 2)
counter_random_tests(==, matches, a{n}, a{n}, sub, false)
counter_random_tests(==, matches, a{n}, a{n}, sub, false, ==)
end
end
counter_random_tests(==, matches, a{4}, a{2}, false, false)
counter_random_tests(==, matches, a{2}, a{4}, false, false)
counter_random_tests(==, matches, a{4}, a{2}, false, false, ==)
counter_random_tests(==, matches, a{2}, a{4}, false, false, ==)
end
end

Expand All @@ -106,35 +108,35 @@
# Can't promote views
for n in (4, 2)
for sub in (true, false)
counter_random_tests(isambiguous, n_ambiguous, a{n}, a{n}, sub, true)
counter_random_tests(isambiguous, n_ambiguous, a{n}, a{n}, sub, false, BioSequences.isambiguous_or)
end
end
counter_random_tests(isambiguous, n_ambiguous, a{4}, a{2}, false, true)
counter_random_tests(isambiguous, n_ambiguous, a{2}, a{4}, false, true)
counter_random_tests(isambiguous, n_ambiguous, a{4}, a{2}, false, true, BioSequences.isambiguous_or)
counter_random_tests(isambiguous, n_ambiguous, a{2}, a{4}, false, true, BioSequences.isambiguous_or)
end
end

@testset "Certain" begin
for a in (DNAAlphabet, RNAAlphabet)
for n in (4, 2)
for sub in (true, false)
counter_random_tests(iscertain, n_certain, a{n}, a{n}, sub, true)
counter_random_tests(iscertain, n_certain, a{n}, a{n}, sub, true, BioSequences.iscertain_and)
end
end
counter_random_tests(iscertain, n_certain, a{4}, a{2}, false, true)
counter_random_tests(iscertain, n_certain, a{2}, a{4}, false, true)
counter_random_tests(iscertain, n_certain, a{4}, a{2}, false, true, BioSequences.iscertain_and)
counter_random_tests(iscertain, n_certain, a{2}, a{4}, false, true, BioSequences.iscertain_and)
end
end

@testset "Gap" begin
for a in (DNAAlphabet, RNAAlphabet)
for n in (4, 2)
for sub in (true, false)
counter_random_tests(isgap, n_gaps, a{n}, a{n}, sub, true)
counter_random_tests(isgap, n_gaps, a{n}, a{n}, sub, true, BioSequences.isgap_or)
end
end
counter_random_tests(isgap, n_gaps, a{4}, a{2}, false, true)
counter_random_tests(isgap, n_gaps, a{2}, a{4}, false, true)
counter_random_tests(isgap, n_gaps, a{4}, a{2}, false, true, BioSequences.isgap_or)
counter_random_tests(isgap, n_gaps, a{2}, a{4}, false, true, BioSequences.isgap_or)
end
end

Expand Down
1 change: 0 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ end
include("longsequences/randseq.jl")
include("longsequences/shuffle.jl")
end

include("translation.jl")
include("counting.jl")

Expand Down

0 comments on commit db8a692

Please sign in to comment.