Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement broadcasting for BioSequences #171

Open
wants to merge 4 commits into
base: v3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/biosequence/biosequence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,17 @@ Base.keys(seq::BioSequence) = eachindex(seq)
Base.nextind(::BioSequence, i::Integer) = Int(i) + 1
Base.prevind(::BioSequence, i::Integer) = Int(i) - 1
Base.size(x::BioSequence) = (length(x),)
Base.ndims(::Type{<:BioSequence}) = 1
Base.ndims(::T) where {T<:BioSequence} = 1
Base.IndexStyle(x::BioSequence) = Base.IndexStyle(typeof(x))
Base.IndexStyle(::Type{<:BioSequence}) = Base.IndexLinear()
Base.eltype(::Type{<:BioSequence{A}}) where {A <: Alphabet} = eltype(A)
Base.eltype(x::BioSequence) = eltype(typeof(x))
Alphabet(::Type{<:BioSequence{A}}) where {A <: Alphabet} = A()
Alphabet(x::BioSequence) = Alphabet(typeof(x))
Base.isempty(x::BioSequence) = iszero(length(x))
Base.empty(::Type{T}) where {T <: BioSequence} = T(eltype(T)[])
Base.empty(x::BioSequence) = empty(typeof(x))
Alphabet(::Type{<:BioSequence{A}}) where {A <: Alphabet} = A()
Alphabet(x::BioSequence) = Alphabet(typeof(x))
BitsPerSymbol(x::BioSequence) = BitsPerSymbol(Alphabet(typeof(x)))

function Base.similar(seq::BioSequence, len::Integer=length(seq))
Expand Down Expand Up @@ -153,3 +157,4 @@ include("printing.jl")
include("transformations.jl")
include("counting.jl")
include("copying.jl")
include("seqbroadcast.jl")
2 changes: 1 addition & 1 deletion src/biosequence/predicates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ function hasambiguity(seq::BioSequence)
return false
end
# 2 Bit specialization:
@inline hasambiguity(seq::BioSequence{<:NucleicAcidAlphabet{2}}) = false
@inline hasambiguity(seq::NucSeq{2}) = false

"""
iscanonical(seq::NucleotideSeq)
Expand Down
269 changes: 269 additions & 0 deletions src/biosequence/seqbroadcast.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
###
### Broadcasting with BioSequences.
###
### Customising the broadcasting machinery for BioSequences.
###
### This file is a part of BioJulia.
### License is MIT: https://github.com/BioJulia/BioSequences.jl/blob/master/LICENSE.md


# First we ensure that BioSequence types are actually passed to the broadcast
# machinery.
#
# By default, this would otherwise `collect` the sequence into a `Vector` of
# `DNA` or `RNA` etc. This wastes time and memory, when broadcasting should
# just be able to use the BioSequence itself as if it were an array with a
# a single dimension, akin to Vector.
Base.broadcastable(x::BioSequence) = x



#=

Let's have a look at the example of broadcasting addition with a vector.

@less broadcast(+, [1,2,3], 1)
===
broadcast(f::Tf, As...) where {Tf} = materialize(broadcasted(f, As...))


@less Base.broadcasted(+, [1,2,3], 1)
===
@inline function broadcasted(f, arg1, arg2, args...)
arg1β€² = broadcastable(arg1)
arg2β€² = broadcastable(arg2)
argsβ€² = map(broadcastable, args)
broadcasted(combine_styles(arg1β€², arg2β€², argsβ€²...), f, arg1β€², arg2β€², argsβ€²...)
end
@inline broadcasted(::S, f, args...) where S<:BroadcastStyle = Broadcasted{S}(f, args)

bc = Base.broadcasted(Base.Broadcast.combine_styles(arg1, arg2), +, arg1, arg2)
Base.Broadcast.Broadcasted(+, ([1, 2, 3], 1))

typeof(bc)
Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Vector{Int64}, Int64}}


@less Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}}(+, (arg1, arg2))
===
function Broadcasted{Style}(f::F, args::Args, axes=nothing) where {Style, F, Args<:Tuple}
# using Core.Typeof rather than F preserves inferrability when f is a type
Broadcasted{Style, typeof(axes), Core.Typeof(f), Args}(f, args, axes)
end


@less materialize(bc)
===
@inline materialize(bc::Broadcasted) = copy(instantiate(bc))
materialize(x) = x


@less Base.Broadcast.instantiate(bc)
===
@inline function instantiate(bc::Broadcasted{Style}) where {Style}
if bc.axes isa Nothing # Not done via dispatch to make it easier to extend instantiate(::Broadcasted{Style})
axes = combine_axes(bc.args...)
else
axes = bc.axes
check_broadcast_axes(axes, bc.args...)
end
return Broadcasted{Style}(bc.f, bc.args, axes)
end


@less Base.Broadcast.combine_axes(bc.args...)
===
@inline combine_axes(A, B...) = broadcast_shape(axes(A), combine_axes(B...))
@inline combine_axes(A, B) = broadcast_shape(axes(A), axes(B))


@less @less Base.Broadcast.broadcast_shape(axes([1,2,3]), axes(1))
===
broadcast_shape(shape::Tuple) = shape
broadcast_shape(shape::Tuple, shape1::Tuple, shapes::Tuple...) = broadcast_shape(_bcs(shape, shape1), shapes...)


@less Base.Broadcast._bcs(axes([1,2,3]), axes(1))
===
_bcs(shape::Tuple, ::Tuple{}) = (shape[1], _bcs(tail(shape), ())...)

_bcs consolidates two shapes into a single output shape

shape = axes([1,2,3])
@less Base.Broadcast._bcs(Base.tail(shape), ())
===
_bcs(::Tuple{}, ::Tuple{}) = ()
()

(shape[1], _bcs(tail(shape), ())...)


@less copy(Base.Broadcast.instantiate(bc))
===
@inline function copy(bc::Broadcasted{Style}) where {Style}
ElType = combine_eltypes(bc.f, bc.args)
if Base.isconcretetype(ElType)
# We can trust it and defer to the simpler `copyto!`
return copyto!(similar(bc, ElType), bc)
end
# When ElType is not concrete, use narrowing. Use the first output
# value to determine the starting output eltype; copyto_nonleaf!
# will widen `dest` as needed to accommodate later values.
bcβ€² = preprocess(nothing, bc)
iter = eachindex(bcβ€²)
y = iterate(iter)
if y === nothing
# if empty, take the ElType at face value
return similar(bcβ€², ElType)
end
# Initialize using the first value
I, state = y
@inbounds val = bcβ€²[I]
dest = similar(bcβ€², typeof(val))
@inbounds dest[I] = val
# Now handle the remaining values
# The typeassert gives inference a helping hand on the element type and dimensionality
# (work-around for #28382)
ElTypeβ€² = ElType === Union{} ? Any : ElType <: Type ? Type : ElType
RT = dest isa AbstractArray ? AbstractArray{<:ElTypeβ€², ndims(dest)} : Any
return copyto_nonleaf!(dest, bcβ€², iter, state, 1)::RT
end


@less Base.Broadcast.combine_eltypes(bc.f, bc.args)
===
combine_eltypes(f, args::Tuple) = promote_typejoin_union(Base._return_type(f, eltypes(args)))

@less Base.Broadcast.eltypes(bc.args)

@less Base._return_type(+, Base.Broadcast.eltypes(bc.args))

@less Base.Broadcast.promote_typejoin_union(Base._return_type(+, Base.Broadcast.eltypes(bc.args)))
===
function promote_typejoin_union(::Type{T}) where T
if T === Union{}
return Union{}
elseif T isa UnionAll
return Any # TODO: compute more precise bounds
elseif T isa Union
return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
elseif T <: Tuple
return typejoin_union_tuple(T)
else
return T
end
end


@less similar(bc, Base.Broadcast.combine_eltypes(bc.f, bc.args))
===
Base.similar(bc::Broadcasted, ::Type{T}) where {T} = similar(bc, T, axes(bc))
Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}, dims) where {N,ElType} = similar(Array{ElType}, dims)
Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}, dims) where N = similar(BitArray, dims)

@less copyto!(similar(bc, Base.Broadcast.combine_eltypes(bc.f, bc.args)), bc)
===
## general `copyto!` methods
# The most general method falls back to a method that replaces Style->Nothing
# This permits specialization on typeof(dest) without introducing ambiguities
@inline copyto!(dest::AbstractArray, bc::Broadcasted) = copyto!(dest, convert(Broadcasted{Nothing}, bc))


@less copyto!(dest, convert(Base.Broadcast.Broadcasted{Nothing}, bc))
===
@inline function copyto!(dest::AbstractArray, bc::Broadcasted{Nothing})
axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
# Performance optimization: broadcast!(identity, dest, A) is equivalent to copyto!(dest, A) if indices match
if bc.f === identity && bc.args isa Tuple{AbstractArray} # only a single input argument to broadcast!
A = bc.args[1]
if axes(dest) == axes(A)
return copyto!(dest, A)
end
end
bcβ€² = preprocess(dest, bc)
# Performance may vary depending on whether `@inbounds` is placed outside the
# for loop or not. (cf. https://github.com/JuliaLang/julia/issues/38086)
@inbounds @simd for I in eachindex(bcβ€²)
dest[I] = bcβ€²[I]
end
return dest
end


@less Base.Broadcast.preprocess(dest, bc)
===
# Preprocessing a `Broadcasted` does two things:
# * unaliases any arguments from `dest`
# * "extrudes" the arguments where it is advantageous to pre-compute the broadcasted indices
@inline preprocess(dest, bc::Broadcasted{Style}) where {Style} = Broadcasted{Style}(bc.f, preprocess_args(dest, bc.args), bc.axes)
preprocess(dest, x) = extrude(broadcast_unalias(dest, x))

@inline preprocess_args(dest, args::Tuple) = (preprocess(dest, args[1]), preprocess_args(dest, tail(args))...)
preprocess_args(dest, args::Tuple{Any}) = (preprocess(dest, args[1]),)
preprocess_args(dest, args::Tuple{}) = ()


Base.Broadcast.preprocess(dest, bc.args[1])
@less Base.Broadcast.broadcast_unalias(dest, bc.args[1])
===
# For broadcasted assignments like `broadcast!(f, A, ..., A, ...)`, where `A`
# appears on both the LHS and the RHS of the `.=`, then we know we're only
# going to make one pass through the array, and even though `A` is aliasing
# against itself, the mutations won't affect the result as the indices on the
# LHS and RHS will always match. This is not true in general, but with the `.op=`
# syntax it's fairly common for an argument to be `===` a source.
broadcast_unalias(dest, src) = dest === src ? src : unalias(dest, src)
broadcast_unalias(::Nothing, src) = src


@less Base.Broadcast.unalias(dest, bc.args[1])
===
## rudimentary aliasing detection ##
"""
Base.unalias(dest, A)

Return either `A` or a copy of `A` in a rough effort to prevent modifications to `dest` from
affecting the returned object. No guarantees are provided.

Custom arrays that wrap or use fields containing arrays that might alias against other
external objects should provide a [`Base.dataids`](@ref) implementation.

This function must return an object of exactly the same type as `A` for performance and type
stability. Mutable custom arrays for which [`copy(A)`](@ref) is not `typeof(A)` should
provide a [`Base.unaliascopy`](@ref) implementation.

See also [`Base.mightalias`](@ref).
"""
unalias(dest, A::AbstractArray) = mightalias(dest, A) ? unaliascopy(A) : A
unalias(dest, A::AbstractRange) = A
unalias(dest, A) = A

"""
Base.mightalias(A::AbstractArray, B::AbstractArray)

Perform a conservative test to check if arrays `A` and `B` might share the same memory.

By default, this simply checks if either of the arrays reference the same memory
regions, as identified by their [`Base.dataids`](@ref).
"""
mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !_isdisjoint(dataids(A)
, dataids(B))
mightalias(x, y) = false

"""
Base.dataids(A::AbstractArray)

Return a tuple of `UInt`s that represent the mutable data segments of an array.

Custom arrays that would like to opt-in to aliasing detection of their component
parts can specialize this method to return the concatenation of the `dataids` of
their component parts. A typical definition for an array that wraps a parent is
`Base.dataids(C::CustomArray) = dataids(C.parent)`.
"""
dataids(A::AbstractArray) = (UInt(objectid(A)),)
dataids(A::Array) = (UInt(pointer(A)),)
dataids(::AbstractRange) = ()
dataids(x) = ()

=#

14 changes: 14 additions & 0 deletions src/longsequences/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,17 @@ end
@inbounds data[vi] = (v_ << off) | (bits & ~(bitmask(i) << off))
return s
end

# Broadcasting

#Base.axes(x::BioSymbol) = ()
#Base.ndims(::Type{<:BioSymbol}) = 0
#Base.ndims(x::BioSymbol) = 0
#Base.broadcastable(x::BioSymbol) = x

#Base.broadcastable(x::LongSequence) = x
#Base.getindex(x::LongSequence, i::Base.CartesianIndex{1}) = x[first(i.I)]

#Base.BroadcastStyle(::Type{T}) where {T<:LongSequence} = Broadcast.Style{T}()

#Base.BroadcastStyle(::Broadcast.Style{<:LongSequence}, ::Broadcast.DefaultArrayStyle{N}) where {N} = Broadcast.DefaultArrayStyle{N}()