From d3fdd66b204183a87c327b442d7604f9515de861 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Sat, 12 Jun 2021 12:52:04 +0800 Subject: [PATCH 1/3] add Vectorization implementation --- Project.toml | 4 +++- src/Distances.jl | 1 + src/generic.jl | 21 +++++++++++++++++++++ src/metrics.jl | 42 +++++++++++++++++++++++++++++------------- test/test_dists.jl | 9 ++++++++- 5 files changed, 62 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index 3cfde3b..b518e32 100644 --- a/Project.toml +++ b/Project.toml @@ -3,13 +3,15 @@ uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" version = "0.10.3" [deps] +ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" [compat] -julia = "1" +ArrayInterface = "3.1.17" StatsAPI = "1" +julia = "1" [extras] OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" diff --git a/src/Distances.jl b/src/Distances.jl index 2bee2f1..ee2420e 100644 --- a/src/Distances.jl +++ b/src/Distances.jl @@ -1,5 +1,6 @@ module Distances +using ArrayInterface: device, AbstractDevice, GPU using LinearAlgebra using Statistics import StatsAPI: pairwise, pairwise! diff --git a/src/generic.jl b/src/generic.jl index 66f01c8..27d015f 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -43,6 +43,27 @@ _eltype(::Type{Union{Missing, T}}) where {T} = Union{Missing, T} __eltype(::Base.HasEltype, a) = _eltype(eltype(a)) __eltype(::Base.EltypeUnknown, a) = _eltype(typeof(first(a))) + +abstract type AbstractEvaluateStrategy end +struct Vectorization <: AbstractEvaluateStrategy end +struct ScalarMapReduce <: AbstractEvaluateStrategy end + +# Infer the optimal evaluation strategy based on given array types and distance type. +function infer_evaluate_strategy(d::PreMetric, a, b) + da, db = device(a), device(b) + return _infer_evaluate_strategy(d::PreMetric, da, db) +end +@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::AbstractDevice) = ScalarMapReduce() +# when one of the input are scalar types +@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::Nothing) = ScalarMapReduce() +@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::AbstractDevice) = ScalarMapReduce() +@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::Nothing) = ScalarMapReduce() +# It is way slower to use scalar indexing if any of the given array is GPU array +@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::GPU) = Vectorization() +@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::AbstractDevice) = Vectorization() +@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::GPU) = Vectorization() + + # Generic column-wise evaluation """ diff --git a/src/metrics.jl b/src/metrics.jl index 693c76f..3b8b52e 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -220,13 +220,26 @@ result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, ::Nothing) where {Ta,Tb} result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} = typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(_eltype(p)))) -Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b) - _evaluate(d, a, b, parameters(d)) +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p=parameters(d)) + _evaluate(infer_evaluate_strategy(d, a, b), d, a, b, p) +end +for M in (metrics..., weightedmetrics...) + @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b) end # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end +function _evaluate(::Vectorization, d::UnionMetrics, a, b, ::Nothing) + map_op(x,y) = eval_op(d, x, y) + reduce_op(x, y) = eval_reduce(d, x, y) + eval_end(d, reduce(reduce_op, map_op.(a, b); init=eval_start(d, a, b))) +end +function _evaluate(::Vectorization, d::UnionMetrics, a, b, p) + map_op(x,y,p) = eval_op(d, x, y, p) + reduce_op(x, y) = eval_reduce(d, x, y) + eval_end(d, reduce(reduce_op, map_op.(a, b, p); init=eval_start(d, a, b))) +end -Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, ::Nothing) +Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -239,7 +252,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, ::Nothing) end return eval_end(d, s) end -Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) +Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -263,7 +276,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b end end -Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p) +Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, p) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -279,7 +292,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p) end return eval_end(d, s) end -Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) +Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -308,8 +321,8 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b end end -_evaluate(dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b)) -function _evaluate(dist::UnionMetrics, a::Number, b::Number, p) +_evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b)) +function _evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, p) length(p) != 1 && throw(DimensionMismatch("inputs are scalars but parameters have length $(length(p)).")) eval_end(dist, eval_op(dist, a, b, first(p))) end @@ -324,10 +337,6 @@ _eval_start(d::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} = eval_reduce(::UnionMetrics, s1, s2) = s1 + s2 eval_end(::UnionMetrics, s) = s -for M in (metrics..., weightedmetrics...) - @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b, parameters(dist)) -end - # Euclidean @inline eval_op(::Euclidean, ai, bi) = abs2(ai - bi) eval_end(::Euclidean, s) = sqrt(s) @@ -373,7 +382,14 @@ totalvariation(a, b) = TotalVariation()(a, b) @inline eval_op(::Chebyshev, ai, bi) = abs(ai - bi) @inline eval_reduce(::Chebyshev, s1, s2) = max(s1, s2) # if only NaN, will output NaN -Base.@propagate_inbounds eval_start(::Chebyshev, a, b) = abs(first(a) - first(b)) +Base.@propagate_inbounds function eval_start(d::Chebyshev, a, b) + T = result_type(d, a, b) + if any(isnan, a) || any(isnan, b) + return convert(T, NaN) + else + zero(T) # lower bound of chebyshev distance + end +end chebyshev(a, b) = Chebyshev()(a, b) # Minkowski diff --git a/test/test_dists.jl b/test/test_dists.jl index fedb397..d82b761 100644 --- a/test/test_dists.jl +++ b/test/test_dists.jl @@ -23,7 +23,14 @@ end function test_metricity(dist, x, y, z) @testset "Test metricity of $(typeof(dist))" begin - @test dist(x, y) == evaluate(dist, x, y) + d = dist(x, y) + @test d == evaluate(dist, x, y) + if d isa Distances.UnionMetrics + # currently only UnionMetrics supports this strategy trait + d_vec = Distances._evaluate(Distances.Vectorization(), dist, x, y, Distances.parameters(dist)) + d_scalar = Distances._evaluate(Distances.ScalarMapReduce(), dist, x, y, Distances.parameters(dist)) + @test d_vec ≈ d_scalar + end dxy = dist(x, y) dxz = dist(x, z) From 8949547ef0216363e5731df44e7dbea04ed49183 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Sat, 12 Jun 2021 21:52:39 +0800 Subject: [PATCH 2/3] remove ArrayInterface --- Project.toml | 2 -- src/Distances.jl | 1 - src/generic.jl | 62 +++++++++++++++++++++++++++++++++++----------- src/metrics.jl | 19 +++++++------- test/test_dists.jl | 4 +-- 5 files changed, 59 insertions(+), 29 deletions(-) diff --git a/Project.toml b/Project.toml index b518e32..e51fa0c 100644 --- a/Project.toml +++ b/Project.toml @@ -3,13 +3,11 @@ uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" version = "0.10.3" [deps] -ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" [compat] -ArrayInterface = "3.1.17" StatsAPI = "1" julia = "1" diff --git a/src/Distances.jl b/src/Distances.jl index ee2420e..2bee2f1 100644 --- a/src/Distances.jl +++ b/src/Distances.jl @@ -1,6 +1,5 @@ module Distances -using ArrayInterface: device, AbstractDevice, GPU using LinearAlgebra using Statistics import StatsAPI: pairwise, pairwise! diff --git a/src/generic.jl b/src/generic.jl index 27d015f..0702919 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -45,24 +45,56 @@ __eltype(::Base.EltypeUnknown, a) = _eltype(typeof(first(a))) abstract type AbstractEvaluateStrategy end -struct Vectorization <: AbstractEvaluateStrategy end -struct ScalarMapReduce <: AbstractEvaluateStrategy end +struct Broadcasting <: AbstractEvaluateStrategy end +struct MapReduce1 <: AbstractEvaluateStrategy end -# Infer the optimal evaluation strategy based on given array types and distance type. -function infer_evaluate_strategy(d::PreMetric, a, b) - da, db = device(a), device(b) - return _infer_evaluate_strategy(d::PreMetric, da, db) +""" + evaluate_strategy(d::PreMetric, a, b) + +Infer the optimal strategy to evaluate `d(a, b)`. + +Two strategies are provided in Distances, take `Euclidean` as an example: + +- `Broadcasting`: evaluate each pair by broadcasting, this is usually performant for arrays + that has slow scalar indexing, e.g., `CUDA.CuArray`. But it introduces an extra memory + allocation due to large intermediate result. For `Euclidean`, this is almost equivalent + to `sqrt(sum(abs2, a - b))`. +- `MapReduce1`: use a single-thread version of mapreduce. This has minimal memory allocation. + For `Euclidean`, this is almost equivalent to + `sqrt(mapreduce(ab->abs2(ab[1]-ab[2]), +, zip(a, b); init=0))`. + +# Example + +This function is non-exported. Packages that provides custom array types can provide +specializations for this function and could implement their own evaluation strategy +for specific (pre)metric types. + +For example, these delegates distance evaluation for `CuArray` to the `Broadcasting` strategy. + +```julia +evaluate_strategy(d::Distances.UnionMetrics, a::CuArray, b::CuArray) = Distances.Broadcasting() +evaluate_strategy(d::Distances.UnionMetrics, a, b::CuArray) = Distances.Broadcasting() +evaluate_strategy(d::Distances.UnionMetrics, a::CuArray, b) = Distances.Broadcasting() +``` + +!!! note + Currently, only `Distances.UnionMetrics` respect the result of this function. + +Adding a new implementation strategy for `UnionMetrics` types can be done by adding new methods +to `Distances._evaluate`. For example, + +```julia +struct AnotherFancyStrategy <: Distances.AbstractEvaluateStrategy +function Distances._evaluate(::AnotherFancyStrategy, d::UnionMetrics, a, b, p) + # implementation details end -@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::AbstractDevice) = ScalarMapReduce() -# when one of the input are scalar types -@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::Nothing) = ScalarMapReduce() -@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::AbstractDevice) = ScalarMapReduce() -@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::Nothing) = ScalarMapReduce() -# It is way slower to use scalar indexing if any of the given array is GPU array -@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::GPU) = Vectorization() -@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::AbstractDevice) = Vectorization() -@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::GPU) = Vectorization() +``` +The `_evaluate` function belongs to implementation detail that normal users shouldn't +call directly. But it is considered as a stable API so package developer can add new +strategy implementation to it. +""" +evaluate_strategy(::PreMetric, ::Any, ::Any) = MapReduce1() # Generic column-wise evaluation diff --git a/src/metrics.jl b/src/metrics.jl index 3b8b52e..067711e 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -221,25 +221,26 @@ result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} = typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(_eltype(p)))) Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p=parameters(d)) - _evaluate(infer_evaluate_strategy(d, a, b), d, a, b, p) + _evaluate(evaluate_strategy(d, a, b), d, a, b, p) end for M in (metrics..., weightedmetrics...) @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b) end # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end -function _evaluate(::Vectorization, d::UnionMetrics, a, b, ::Nothing) +function _evaluate(::Broadcasting, d::UnionMetrics, a, b, ::Nothing) map_op(x,y) = eval_op(d, x, y) reduce_op(x, y) = eval_reduce(d, x, y) eval_end(d, reduce(reduce_op, map_op.(a, b); init=eval_start(d, a, b))) end -function _evaluate(::Vectorization, d::UnionMetrics, a, b, p) +function _evaluate(::Broadcasting, d::UnionMetrics, a, b, p) map_op(x,y,p) = eval_op(d, x, y, p) reduce_op(x, y) = eval_reduce(d, x, y) eval_end(d, reduce(reduce_op, map_op.(a, b, p); init=eval_start(d, a, b))) end +_evaluate(::AbstractEvaluateStrategy, d::UnionMetrics, a, b, p) = error("Not implemented.") -Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, ::Nothing) +Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a, b, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -252,7 +253,7 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, end return eval_end(d, s) end -Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) +Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -276,7 +277,7 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, end end -Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, p) +Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a, b, p) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -292,7 +293,7 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, end return eval_end(d, s) end -Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) +Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -321,8 +322,8 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, end end -_evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b)) -function _evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, p) +_evaluate(::MapReduce1, dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b)) +function _evaluate(::MapReduce1, dist::UnionMetrics, a::Number, b::Number, p) length(p) != 1 && throw(DimensionMismatch("inputs are scalars but parameters have length $(length(p)).")) eval_end(dist, eval_op(dist, a, b, first(p))) end diff --git a/test/test_dists.jl b/test/test_dists.jl index d82b761..c1c6eda 100644 --- a/test/test_dists.jl +++ b/test/test_dists.jl @@ -27,8 +27,8 @@ function test_metricity(dist, x, y, z) @test d == evaluate(dist, x, y) if d isa Distances.UnionMetrics # currently only UnionMetrics supports this strategy trait - d_vec = Distances._evaluate(Distances.Vectorization(), dist, x, y, Distances.parameters(dist)) - d_scalar = Distances._evaluate(Distances.ScalarMapReduce(), dist, x, y, Distances.parameters(dist)) + d_vec = Distances._evaluate(Distances.Broadcasting(), dist, x, y, Distances.parameters(dist)) + d_scalar = Distances._evaluate(Distances.MapReduce1(), dist, x, y, Distances.parameters(dist)) @test d_vec ≈ d_scalar end From d7d31cf11096f69d8f76b6aa2e30041c76a4634a Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Sat, 12 Jun 2021 22:17:55 +0800 Subject: [PATCH 3/3] word tweaks --- src/generic.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/generic.jl b/src/generic.jl index 0702919..a74c8ef 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -53,15 +53,14 @@ struct MapReduce1 <: AbstractEvaluateStrategy end Infer the optimal strategy to evaluate `d(a, b)`. -Two strategies are provided in Distances, take `Euclidean` as an example: +Currently, two strategies are provided in Distances: - `Broadcasting`: evaluate each pair by broadcasting, this is usually performant for arrays that has slow scalar indexing, e.g., `CUDA.CuArray`. But it introduces an extra memory allocation due to large intermediate result. For `Euclidean`, this is almost equivalent to `sqrt(sum(abs2, a - b))`. - `MapReduce1`: use a single-thread version of mapreduce. This has minimal memory allocation. - For `Euclidean`, this is almost equivalent to - `sqrt(mapreduce(ab->abs2(ab[1]-ab[2]), +, zip(a, b); init=0))`. + For `Euclidean`, this is almost equivalent to `sqrt(mapreduce((x,y)->abs2(x-y), +, a, b))`. # Example