JuliaStats · NicoACloutier · Nov 25, 2023 · Nov 26, 2023 · Nov 27, 2023 · Nov 29, 2023
diff --git a/docs/src/nonparametric.md b/docs/src/nonparametric.md
@@ -81,3 +81,9 @@ ApproximatePermutationTest
 ```@docs
 FlignerKilleenTest
 ```
+
+## Friedman two-way ANOVA by ranks
+
+```@docs
+FriedmanTest
+```
diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl
@@ -148,5 +148,6 @@ include("diebold_mariano.jl")
 include("clark_west.jl")
 include("white.jl")
 include("var_equality.jl")
+include("friedman.jl")
 
 end
diff --git a/src/friedman.jl b/src/friedman.jl
@@ -0,0 +1,117 @@
+# Friedman.jl
+# Friedman two-way ANOVA by ranks
+#
+# Copyright (C) 2012   Simon Kornblith
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+export FriedmanTest
+
+struct FriedmanTest <: HypothesisTest
+    n::Int                    # number of observations
+    k::Int                    # number of treatments
+    df::Int                   # degrees of freedom
+    rank_sums::Vector{<:Real} # rank sums vector
+    Q::Real                   # Q statistic
+end
+
+"""
+    FriedmanTest(groups::AbstractVector{<:Real}...)
+
+Perform the Friedman two-way ANOVA by ranks, a rank sum test to test the difference of ``k``
+treatments across ``N`` repeated tests. This is a non-parametric test similar to the
+Kruskall-Wallis one-way ANOVA by ranks. It is a special case of the Durbin test.
+
+The p-value is computed using a ``Q`` statistic, as follows:
+```math
+    Q = \\frac{12}{kN(k+1)} \\sum_{j = 1}^{k}(R_j^2) - 3n(k+1)
+```
+where ``N`` is the number of tests, ``k`` is the number of treatments, and ``R`` is the rank
+sum vector.
+
+Implements: [`pvalue`](@ref)
+
+# References
+
+  * Daniel, W.W., Friedman two-way analysis of variance by ranks. Applied Nonparametric Statistics
+    (2nd ed.). Boston: PWS-Kent. pp. 262–74, 1990.
+
+# External links
+
+  * [Friedman test on Wikipedia
+    ](https://en.wikipedia.org/wiki/Friedman_test)
+"""
+function FriedmanTest(groups::AbstractVector{T}...) where {T<:Real}
+    x = mapreduce(permutedims, vcat, groups) |> permutedims
+    n = size(x, 1)
+    k = size(x, 2)
+    df = k - 1
+    rank_sums = sum.(convert_rank(x) |> eachcol)
+    tie_correction = get_tie_correction(x, n, k)
+    Q = ((12 / (k*n*(k+1))) * sum(rank_sum^2 for rank_sum in rank_sums) - 3*n*(k+1)) / tie_correction
+    FriedmanTest(n, k, df, rank_sums, Q)
+end
+
+testname(::FriedmanTest) = "Friedman two-way ANOVA by ranks"
+population_param_of_interest(x::FriedmanTest) = ("location parameter", "all equal", NaN) # parameter of interest: name, value under h0, point estimate
+default_tail(x::FriedmanTest) = :right
+
+function show_params(io::IO, x::FriedmanTest, ident)
+    println(io, ident, "number of observations: ", x.n)
+    println(io, ident, "number of treatments:   ", x.k)
+    println(io, ident, "degrees of freedom:     ", x.df)
+    println(io, ident, "rank sums vector:       ", x.rank_sums)
+    println(io, ident, "Q statistic:            ", x.Q)
+end
+
+StatsAPI.pvalue(x::FriedmanTest; tail=:right) = pvalue(Chisq(x.df), x.Q, tail=tail)
+
+# helper functions
+
+# find the average index of an item in a vector
+function avg_index(item::T, x::AbstractVector{T}) where {T<:Real}
+    equal = item .== x
+    sum([i for (i, value) in enumerate(equal) if value]) / sum(equal) # get average rank (for ties)
+end
+
+# find the ranks for a single test/row
+function row_rank(row::AbstractVector{T}) where {T<:Real}
+    sorted_row = sort(row)
+    map(x -> avg_index(x, sorted_row), row)
+end
+
+# convert an input matrix into ranks
+function convert_rank(x::AbstractMatrix{T}) where {T<:Real}
+    x = map(row_rank, eachrow(x))
+    mapreduce(permutedims, vcat, x)
+end
+
+# get the row count vector for a single row to be used for the tie correction
+function row_tie_count(x::AbstractVector{T}) where {T<:Real}
+    counts = map(item -> sum(x.==item), unique(x))
+    counts = map(item -> item^3, counts) - counts
+    sum(counts)
+end
+
+# get the tie correction
+function get_tie_correction(x::AbstractMatrix{T}, n::Int64, k::Int64) where {T<:Real}
+    row_ties = map(row_tie_count, eachrow(x)) |> sum
+    1 - row_ties / (k * (k^2 - 1)*n)
+end
diff --git a/test/friedman.jl b/test/friedman.jl
@@ -0,0 +1,41 @@
+using HypothesisTests, Test
+using HypothesisTests: default_tail
+
+@testset "Friedman" begin
+a1 = [4, 6, 3, 4, 3, 2, 2, 7, 6, 5, 3, 8, 6, 1, 5, 7]
+a2 = [5, 6, 8, 7, 7, 8, 4, 6, 4, 5, 2, 4, 4, 3, 8, 9]
+a3 = [2, 4, 4, 3, 2, 2, 1, 4, 3, 2, 5, 3, 5, 7, 6, 1]
+t = FriedmanTest(a1, a2, a3)
+@test t.n == length(a1) == length(a2) == length(a3)
+@test t.df == 2
+@test t.rank_sums == [33.5, 39, 23.5]
+@test t.Q ≈ 8.098360655737705
+@test pvalue(t) ≈ 0.017436661128762587
+@test default_tail(t) == :right
+show(IOBuffer(), t)
+
+# https://github.com/scipy/scipy/blob/main/scipy/stats/_stats_py.py
+b1 = [72, 96, 88, 92, 74, 76, 82]
+b2 = [120, 120, 132, 120, 101, 96, 112]
+b3 = [76, 95, 104, 96, 84, 72, 76]
+t = FriedmanTest(b1, b2, b3)
+@test t.n == length(b1) == length(b2) == length(b3)
+@test t.df == 2
+@test t.rank_sums == [10, 21, 11]
+@test t.Q ≈ 10.57142857142857
+@test pvalue(t) ≈ 0.005063414171757498
+show(IOBuffer(), t)
+
+c1 = [-0.263, 0.216, 0.667, 0.487, 0.376, 0.333, 0.353, 0.132, -0.864, 0.567]
+c2 = [0.384, -0.432, 0.987, 0.084, 0.974, 0.415, 0.121, -0.369, 0.847, 0.076]
+c3 = [0.129, 0.312, -0.453, 0.927, 0.594, 0.767, -0.009, 0.735, 0.853, 0.697]
+c4 = [0.292, 0.203, 0.983, -0.690, 0.906, -0.035, 0.209, 0.309, 0.516, 0.850]
+c5 = [0.256, 0.941, 0.260, 0.628, -0.189, 0.687, 0.443, 0.767, 0.636, -0.931]
+t = FriedmanTest(c1, c2, c3, c4, c5)
+@test t.n == length(c1) == length(c2) == length(c3) == length(c4) == length(c5)
+@test t.df == 4
+@test t.rank_sums == [24, 30, 34, 29, 33]
+@test t.Q ≈ 2.4799999999999999898
+@test pvalue(t) ≈ 0.6482206481834754
+show(IOBuffer(), t)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -40,3 +40,4 @@ include("diebold_mariano.jl")
 include("clark_west.jl")
 include("white.jl")
 include("var_equality.jl")
+include("friedman.jl")