Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Friedman test #312

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/src/nonparametric.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,9 @@ ApproximatePermutationTest
```@docs
FlignerKilleenTest
```

## Friedman two-way ANOVA by ranks

```@docs
FriedmanTest
```
1 change: 1 addition & 0 deletions src/HypothesisTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,5 +148,6 @@ include("diebold_mariano.jl")
include("clark_west.jl")
include("white.jl")
include("var_equality.jl")
include("friedman.jl")

end
117 changes: 117 additions & 0 deletions src/friedman.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Friedman.jl
# Friedman two-way ANOVA by ranks
#
# Copyright (C) 2012 Simon Kornblith
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

export FriedmanTest

struct FriedmanTest <: HypothesisTest
n::Int # number of observations
k::Int # number of treatments
df::Int # degrees of freedom
rank_sums::Vector{<:Real} # rank sums vector
Q::Real # Q statistic
end

"""
FriedmanTest(groups::AbstractVector{<:Real}...)

Perform the Friedman two-way ANOVA by ranks, a rank sum test to test the difference of ``k``
treatments across ``N`` repeated tests. This is a non-parametric test similar to the
Kruskall-Wallis one-way ANOVA by ranks. It is a special case of the Durbin test.

The p-value is computed using a ``Q`` statistic, as follows:
```math
Q = \\frac{12}{kN(k+1)} \\sum_{j = 1}^{k}(R_j^2) - 3n(k+1)
```
where ``N`` is the number of tests, ``k`` is the number of treatments, and ``R`` is the rank
sum vector.

Implements: [`pvalue`](@ref)

# References

* Daniel, W.W., Friedman two-way analysis of variance by ranks. Applied Nonparametric Statistics
(2nd ed.). Boston: PWS-Kent. pp. 262–74, 1990.

# External links

* [Friedman test on Wikipedia
](https://en.wikipedia.org/wiki/Friedman_test)
"""
function FriedmanTest(groups::AbstractVector{T}...) where {T<:Real}
x = mapreduce(permutedims, vcat, groups) |> permutedims
n = size(x, 1)
k = size(x, 2)
df = k - 1
rank_sums = sum.(convert_rank(x) |> eachcol)
tie_correction = get_tie_correction(x, n, k)
Q = ((12 / (k*n*(k+1))) * sum(rank_sum^2 for rank_sum in rank_sums) - 3*n*(k+1)) / tie_correction
FriedmanTest(n, k, df, rank_sums, Q)
end

testname(::FriedmanTest) = "Friedman two-way ANOVA by ranks"
population_param_of_interest(x::FriedmanTest) = ("location parameter", "all equal", NaN) # parameter of interest: name, value under h0, point estimate
default_tail(x::FriedmanTest) = :right

function show_params(io::IO, x::FriedmanTest, ident)
println(io, ident, "number of observations: ", x.n)
println(io, ident, "number of treatments: ", x.k)
println(io, ident, "degrees of freedom: ", x.df)
println(io, ident, "rank sums vector: ", x.rank_sums)
println(io, ident, "Q statistic: ", x.Q)
end

StatsAPI.pvalue(x::FriedmanTest; tail=:right) = pvalue(Chisq(x.df), x.Q, tail=tail)

# helper functions

# find the average index of an item in a vector
function avg_index(item::T, x::AbstractVector{T}) where {T<:Real}
equal = item .== x
sum([i for (i, value) in enumerate(equal) if value]) / sum(equal) # get average rank (for ties)
end

# find the ranks for a single test/row
function row_rank(row::AbstractVector{T}) where {T<:Real}
sorted_row = sort(row)
map(x -> avg_index(x, sorted_row), row)
end

# convert an input matrix into ranks
function convert_rank(x::AbstractMatrix{T}) where {T<:Real}
x = map(row_rank, eachrow(x))
mapreduce(permutedims, vcat, x)
end

# get the row count vector for a single row to be used for the tie correction
function row_tie_count(x::AbstractVector{T}) where {T<:Real}
counts = map(item -> sum(x.==item), unique(x))
counts = map(item -> item^3, counts) - counts
sum(counts)
end

# get the tie correction
function get_tie_correction(x::AbstractMatrix{T}, n::Int64, k::Int64) where {T<:Real}
row_ties = map(row_tie_count, eachrow(x)) |> sum
1 - row_ties / (k * (k^2 - 1)*n)
end
41 changes: 41 additions & 0 deletions test/friedman.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using HypothesisTests, Test
using HypothesisTests: default_tail

@testset "Friedman" begin
a1 = [4, 6, 3, 4, 3, 2, 2, 7, 6, 5, 3, 8, 6, 1, 5, 7]
a2 = [5, 6, 8, 7, 7, 8, 4, 6, 4, 5, 2, 4, 4, 3, 8, 9]
a3 = [2, 4, 4, 3, 2, 2, 1, 4, 3, 2, 5, 3, 5, 7, 6, 1]
t = FriedmanTest(a1, a2, a3)
@test t.n == length(a1) == length(a2) == length(a3)
@test t.df == 2
@test t.rank_sums == [33.5, 39, 23.5]
@test t.Q ≈ 8.098360655737705
@test pvalue(t) ≈ 0.017436661128762587
@test default_tail(t) == :right
show(IOBuffer(), t)

# https://github.com/scipy/scipy/blob/main/scipy/stats/_stats_py.py
b1 = [72, 96, 88, 92, 74, 76, 82]
b2 = [120, 120, 132, 120, 101, 96, 112]
b3 = [76, 95, 104, 96, 84, 72, 76]
t = FriedmanTest(b1, b2, b3)
@test t.n == length(b1) == length(b2) == length(b3)
@test t.df == 2
@test t.rank_sums == [10, 21, 11]
@test t.Q ≈ 10.57142857142857
@test pvalue(t) ≈ 0.005063414171757498
show(IOBuffer(), t)

c1 = [-0.263, 0.216, 0.667, 0.487, 0.376, 0.333, 0.353, 0.132, -0.864, 0.567]
c2 = [0.384, -0.432, 0.987, 0.084, 0.974, 0.415, 0.121, -0.369, 0.847, 0.076]
c3 = [0.129, 0.312, -0.453, 0.927, 0.594, 0.767, -0.009, 0.735, 0.853, 0.697]
c4 = [0.292, 0.203, 0.983, -0.690, 0.906, -0.035, 0.209, 0.309, 0.516, 0.850]
c5 = [0.256, 0.941, 0.260, 0.628, -0.189, 0.687, 0.443, 0.767, 0.636, -0.931]
t = FriedmanTest(c1, c2, c3, c4, c5)
@test t.n == length(c1) == length(c2) == length(c3) == length(c4) == length(c5)
@test t.df == 4
@test t.rank_sums == [24, 30, 34, 29, 33]
@test t.Q ≈ 2.4799999999999999898
@test pvalue(t) ≈ 0.6482206481834754
show(IOBuffer(), t)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ include("diebold_mariano.jl")
include("clark_west.jl")
include("white.jl")
include("var_equality.jl")
include("friedman.jl")
Loading