From d3fdd66b204183a87c327b442d7604f9515de861 Mon Sep 17 00:00:00 2001
From: Johnny Chen <johnnychen94@hotmail.com>
Date: Sat, 12 Jun 2021 12:52:04 +0800
Subject: [PATCH 1/3] add Vectorization implementation

---
 Project.toml       |  4 +++-
 src/Distances.jl   |  1 +
 src/generic.jl     | 21 +++++++++++++++++++++
 src/metrics.jl     | 42 +++++++++++++++++++++++++++++-------------
 test/test_dists.jl |  9 ++++++++-
 5 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3cfde3b..b518e32 100644
--- a/Project.toml
+++ b/Project.toml
@@ -3,13 +3,15 @@ uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 version = "0.10.3"
 
 [deps]
+ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 
 [compat]
-julia = "1"
+ArrayInterface = "3.1.17"
 StatsAPI = "1"
+julia = "1"
 
 [extras]
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
diff --git a/src/Distances.jl b/src/Distances.jl
index 2bee2f1..ee2420e 100644
--- a/src/Distances.jl
+++ b/src/Distances.jl
@@ -1,5 +1,6 @@
 module Distances
 
+using ArrayInterface: device, AbstractDevice, GPU
 using LinearAlgebra
 using Statistics
 import StatsAPI: pairwise, pairwise!
diff --git a/src/generic.jl b/src/generic.jl
index 66f01c8..27d015f 100644
--- a/src/generic.jl
+++ b/src/generic.jl
@@ -43,6 +43,27 @@ _eltype(::Type{Union{Missing, T}}) where {T} = Union{Missing, T}
 __eltype(::Base.HasEltype, a) = _eltype(eltype(a))
 __eltype(::Base.EltypeUnknown, a) = _eltype(typeof(first(a)))
 
+
+abstract type AbstractEvaluateStrategy end
+struct Vectorization <: AbstractEvaluateStrategy end
+struct ScalarMapReduce <: AbstractEvaluateStrategy end
+
+# Infer the optimal evaluation strategy based on given array types and distance type.
+function infer_evaluate_strategy(d::PreMetric, a, b)
+    da, db = device(a), device(b)
+    return _infer_evaluate_strategy(d::PreMetric, da, db)
+end
+@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::AbstractDevice) = ScalarMapReduce()
+# when one of the input are scalar types
+@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::Nothing) = ScalarMapReduce()
+@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::AbstractDevice) = ScalarMapReduce()
+@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::Nothing) = ScalarMapReduce()
+# It is way slower to use scalar indexing if any of the given array is GPU array
+@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::GPU) = Vectorization()
+@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::AbstractDevice) = Vectorization()
+@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::GPU) = Vectorization()
+
+
 # Generic column-wise evaluation
 
 """
diff --git a/src/metrics.jl b/src/metrics.jl
index 693c76f..3b8b52e 100644
--- a/src/metrics.jl
+++ b/src/metrics.jl
@@ -220,13 +220,26 @@ result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, ::Nothing) where {Ta,Tb}
 result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} =
     typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(_eltype(p))))
 
-Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b)
-    _evaluate(d, a, b, parameters(d))
+Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p=parameters(d))
+    _evaluate(infer_evaluate_strategy(d, a, b), d, a, b, p)
+end
+for M in (metrics..., weightedmetrics...)
+    @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b)
 end
 
 # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end
+function _evaluate(::Vectorization, d::UnionMetrics, a, b, ::Nothing)
+    map_op(x,y) = eval_op(d, x, y)
+    reduce_op(x, y) = eval_reduce(d, x, y)
+    eval_end(d, reduce(reduce_op, map_op.(a, b); init=eval_start(d, a, b)))
+end
+function _evaluate(::Vectorization, d::UnionMetrics, a, b, p)
+    map_op(x,y,p) = eval_op(d, x, y, p)
+    reduce_op(x, y) = eval_reduce(d, x, y)
+    eval_end(d, reduce(reduce_op, map_op.(a, b, p); init=eval_start(d, a, b)))
+end
 
-Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, ::Nothing)
+Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, ::Nothing)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -239,7 +252,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, ::Nothing)
     end
     return eval_end(d, s)
 end
-Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing)
+Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -263,7 +276,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b
     end
 end
 
-Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p)
+Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, p)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -279,7 +292,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p)
     end
     return eval_end(d, s)
 end
-Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray)
+Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -308,8 +321,8 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b
     end
 end
 
-_evaluate(dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b))
-function _evaluate(dist::UnionMetrics, a::Number, b::Number, p)
+_evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b))
+function _evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, p)
     length(p) != 1 && throw(DimensionMismatch("inputs are scalars but parameters have length $(length(p))."))
     eval_end(dist, eval_op(dist, a, b, first(p)))
 end
@@ -324,10 +337,6 @@ _eval_start(d::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} =
 eval_reduce(::UnionMetrics, s1, s2) = s1 + s2
 eval_end(::UnionMetrics, s) = s
 
-for M in (metrics..., weightedmetrics...)
-    @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b, parameters(dist))
-end
-
 # Euclidean
 @inline eval_op(::Euclidean, ai, bi) = abs2(ai - bi)
 eval_end(::Euclidean, s) = sqrt(s)
@@ -373,7 +382,14 @@ totalvariation(a, b) = TotalVariation()(a, b)
 @inline eval_op(::Chebyshev, ai, bi) = abs(ai - bi)
 @inline eval_reduce(::Chebyshev, s1, s2) = max(s1, s2)
 # if only NaN, will output NaN
-Base.@propagate_inbounds eval_start(::Chebyshev, a, b) = abs(first(a) - first(b))
+Base.@propagate_inbounds function eval_start(d::Chebyshev, a, b)
+    T = result_type(d, a, b)
+    if any(isnan, a) || any(isnan, b)
+        return convert(T, NaN)
+    else
+        zero(T) # lower bound of chebyshev distance
+    end
+end
 chebyshev(a, b) = Chebyshev()(a, b)
 
 # Minkowski
diff --git a/test/test_dists.jl b/test/test_dists.jl
index fedb397..d82b761 100644
--- a/test/test_dists.jl
+++ b/test/test_dists.jl
@@ -23,7 +23,14 @@ end
 
 function test_metricity(dist, x, y, z)
     @testset "Test metricity of $(typeof(dist))" begin
-        @test dist(x, y) == evaluate(dist, x, y)
+        d = dist(x, y)
+        @test d == evaluate(dist, x, y)
+        if d isa Distances.UnionMetrics
+            # currently only UnionMetrics supports this strategy trait
+            d_vec = Distances._evaluate(Distances.Vectorization(), dist, x, y, Distances.parameters(dist))
+            d_scalar = Distances._evaluate(Distances.ScalarMapReduce(), dist, x, y, Distances.parameters(dist))
+            @test d_vec ≈ d_scalar
+        end
 
         dxy = dist(x, y)
         dxz = dist(x, z)

From 8949547ef0216363e5731df44e7dbea04ed49183 Mon Sep 17 00:00:00 2001
From: Johnny Chen <johnnychen94@hotmail.com>
Date: Sat, 12 Jun 2021 21:52:39 +0800
Subject: [PATCH 2/3] remove ArrayInterface

---
 Project.toml       |  2 --
 src/Distances.jl   |  1 -
 src/generic.jl     | 62 +++++++++++++++++++++++++++++++++++-----------
 src/metrics.jl     | 19 +++++++-------
 test/test_dists.jl |  4 +--
 5 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/Project.toml b/Project.toml
index b518e32..e51fa0c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -3,13 +3,11 @@ uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 version = "0.10.3"
 
 [deps]
-ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 
 [compat]
-ArrayInterface = "3.1.17"
 StatsAPI = "1"
 julia = "1"
 
diff --git a/src/Distances.jl b/src/Distances.jl
index ee2420e..2bee2f1 100644
--- a/src/Distances.jl
+++ b/src/Distances.jl
@@ -1,6 +1,5 @@
 module Distances
 
-using ArrayInterface: device, AbstractDevice, GPU
 using LinearAlgebra
 using Statistics
 import StatsAPI: pairwise, pairwise!
diff --git a/src/generic.jl b/src/generic.jl
index 27d015f..0702919 100644
--- a/src/generic.jl
+++ b/src/generic.jl
@@ -45,24 +45,56 @@ __eltype(::Base.EltypeUnknown, a) = _eltype(typeof(first(a)))
 
 
 abstract type AbstractEvaluateStrategy end
-struct Vectorization <: AbstractEvaluateStrategy end
-struct ScalarMapReduce <: AbstractEvaluateStrategy end
+struct Broadcasting <: AbstractEvaluateStrategy end
+struct MapReduce1 <: AbstractEvaluateStrategy end
 
-# Infer the optimal evaluation strategy based on given array types and distance type.
-function infer_evaluate_strategy(d::PreMetric, a, b)
-    da, db = device(a), device(b)
-    return _infer_evaluate_strategy(d::PreMetric, da, db)
+"""
+    evaluate_strategy(d::PreMetric, a, b)
+
+Infer the optimal strategy to evaluate `d(a, b)`.
+
+Two strategies are provided in Distances, take `Euclidean` as an example:
+
+- `Broadcasting`: evaluate each pair by broadcasting, this is usually performant for arrays
+  that has slow scalar indexing, e.g., `CUDA.CuArray`. But it introduces an extra memory
+  allocation due to large intermediate result. For `Euclidean`, this is almost equivalent
+  to `sqrt(sum(abs2, a - b))`. 
+- `MapReduce1`: use a single-thread version of mapreduce. This has minimal memory allocation.
+  For `Euclidean`, this is almost equivalent to
+  `sqrt(mapreduce(ab->abs2(ab[1]-ab[2]), +, zip(a, b); init=0))`.
+
+# Example
+
+This function is non-exported. Packages that provides custom array types can provide
+specializations for this function and could implement their own evaluation strategy
+for specific (pre)metric types.
+
+For example, these delegates distance evaluation for `CuArray` to the `Broadcasting` strategy.
+
+```julia
+evaluate_strategy(d::Distances.UnionMetrics, a::CuArray, b::CuArray) = Distances.Broadcasting()
+evaluate_strategy(d::Distances.UnionMetrics, a, b::CuArray) = Distances.Broadcasting()
+evaluate_strategy(d::Distances.UnionMetrics, a::CuArray, b) = Distances.Broadcasting()
+```
+
+!!! note
+    Currently, only `Distances.UnionMetrics` respect the result of this function.
+
+Adding a new implementation strategy for `UnionMetrics` types can be done by adding new methods
+to `Distances._evaluate`. For example,
+
+```julia
+struct AnotherFancyStrategy <: Distances.AbstractEvaluateStrategy
+function Distances._evaluate(::AnotherFancyStrategy, d::UnionMetrics, a, b, p)
+    # implementation details
 end
-@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::AbstractDevice) = ScalarMapReduce()
-# when one of the input are scalar types
-@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::Nothing) = ScalarMapReduce()
-@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::AbstractDevice) = ScalarMapReduce()
-@inline _infer_evaluate_strategy(d::PreMetric, ::Nothing, ::Nothing) = ScalarMapReduce()
-# It is way slower to use scalar indexing if any of the given array is GPU array
-@inline _infer_evaluate_strategy(d::PreMetric, ::AbstractDevice, ::GPU) = Vectorization()
-@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::AbstractDevice) = Vectorization()
-@inline _infer_evaluate_strategy(d::PreMetric, ::GPU, ::GPU) = Vectorization()
+```
 
+The `_evaluate` function belongs to implementation detail that normal users shouldn't
+call directly. But it is considered as a stable API so package developer can add new
+strategy implementation to it.
+"""
+evaluate_strategy(::PreMetric, ::Any, ::Any) = MapReduce1()
 
 # Generic column-wise evaluation
 
diff --git a/src/metrics.jl b/src/metrics.jl
index 3b8b52e..067711e 100644
--- a/src/metrics.jl
+++ b/src/metrics.jl
@@ -221,25 +221,26 @@ result_type(dist::UnionMetrics, ::Type{Ta}, ::Type{Tb}, p) where {Ta,Tb} =
     typeof(_evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(_eltype(p))))
 
 Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a, b, p=parameters(d))
-    _evaluate(infer_evaluate_strategy(d, a, b), d, a, b, p)
+    _evaluate(evaluate_strategy(d, a, b), d, a, b, p)
 end
 for M in (metrics..., weightedmetrics...)
     @eval @inline (dist::$M)(a, b) = _evaluate(dist, a, b)
 end
 
 # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end
-function _evaluate(::Vectorization, d::UnionMetrics, a, b, ::Nothing)
+function _evaluate(::Broadcasting, d::UnionMetrics, a, b, ::Nothing)
     map_op(x,y) = eval_op(d, x, y)
     reduce_op(x, y) = eval_reduce(d, x, y)
     eval_end(d, reduce(reduce_op, map_op.(a, b); init=eval_start(d, a, b)))
 end
-function _evaluate(::Vectorization, d::UnionMetrics, a, b, p)
+function _evaluate(::Broadcasting, d::UnionMetrics, a, b, p)
     map_op(x,y,p) = eval_op(d, x, y, p)
     reduce_op(x, y) = eval_reduce(d, x, y)
     eval_end(d, reduce(reduce_op, map_op.(a, b, p); init=eval_start(d, a, b)))
 end
+_evaluate(::AbstractEvaluateStrategy, d::UnionMetrics, a, b, p) = error("Not implemented.")
 
-Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, ::Nothing)
+Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a, b, ::Nothing)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -252,7 +253,7 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics,
     end
     return eval_end(d, s)
 end
-Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing)
+Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -276,7 +277,7 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics,
     end
 end
 
-Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a, b, p)
+Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a, b, p)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first collection has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -292,7 +293,7 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics,
     end
     return eval_end(d, s)
 end
-Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray)
+Base.@propagate_inbounds function _evaluate(::MapReduce1, d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray)
     @boundscheck if length(a) != length(b)
         throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b))."))
     end
@@ -321,8 +322,8 @@ Base.@propagate_inbounds function _evaluate(::ScalarMapReduce, d::UnionMetrics,
     end
 end
 
-_evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b))
-function _evaluate(::ScalarMapReduce, dist::UnionMetrics, a::Number, b::Number, p)
+_evaluate(::MapReduce1, dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b))
+function _evaluate(::MapReduce1, dist::UnionMetrics, a::Number, b::Number, p)
     length(p) != 1 && throw(DimensionMismatch("inputs are scalars but parameters have length $(length(p))."))
     eval_end(dist, eval_op(dist, a, b, first(p)))
 end
diff --git a/test/test_dists.jl b/test/test_dists.jl
index d82b761..c1c6eda 100644
--- a/test/test_dists.jl
+++ b/test/test_dists.jl
@@ -27,8 +27,8 @@ function test_metricity(dist, x, y, z)
         @test d == evaluate(dist, x, y)
         if d isa Distances.UnionMetrics
             # currently only UnionMetrics supports this strategy trait
-            d_vec = Distances._evaluate(Distances.Vectorization(), dist, x, y, Distances.parameters(dist))
-            d_scalar = Distances._evaluate(Distances.ScalarMapReduce(), dist, x, y, Distances.parameters(dist))
+            d_vec = Distances._evaluate(Distances.Broadcasting(), dist, x, y, Distances.parameters(dist))
+            d_scalar = Distances._evaluate(Distances.MapReduce1(), dist, x, y, Distances.parameters(dist))
             @test d_vec ≈ d_scalar
         end
 

From d7d31cf11096f69d8f76b6aa2e30041c76a4634a Mon Sep 17 00:00:00 2001
From: Johnny Chen <johnnychen94@hotmail.com>
Date: Sat, 12 Jun 2021 22:17:55 +0800
Subject: [PATCH 3/3] word tweaks

---
 src/generic.jl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/generic.jl b/src/generic.jl
index 0702919..a74c8ef 100644
--- a/src/generic.jl
+++ b/src/generic.jl
@@ -53,15 +53,14 @@ struct MapReduce1 <: AbstractEvaluateStrategy end
 
 Infer the optimal strategy to evaluate `d(a, b)`.
 
-Two strategies are provided in Distances, take `Euclidean` as an example:
+Currently, two strategies are provided in Distances:
 
 - `Broadcasting`: evaluate each pair by broadcasting, this is usually performant for arrays
   that has slow scalar indexing, e.g., `CUDA.CuArray`. But it introduces an extra memory
   allocation due to large intermediate result. For `Euclidean`, this is almost equivalent
   to `sqrt(sum(abs2, a - b))`. 
 - `MapReduce1`: use a single-thread version of mapreduce. This has minimal memory allocation.
-  For `Euclidean`, this is almost equivalent to
-  `sqrt(mapreduce(ab->abs2(ab[1]-ab[2]), +, zip(a, b); init=0))`.
+  For `Euclidean`, this is almost equivalent to `sqrt(mapreduce((x,y)->abs2(x-y), +, a, b))`.
 
 # Example