From af410385c6232d27bda9ef8d6ea3fec47c20c1b4 Mon Sep 17 00:00:00 2001 From: stanlazic Date: Sat, 28 Dec 2024 19:23:20 -0500 Subject: [PATCH 1/2] added D'Agostino test for skewness --- src/HypothesisTests.jl | 1 + src/agostino_test.jl | 77 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 src/agostino_test.jl diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index cc33ed8c..9090d0ab 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -122,6 +122,7 @@ end include("common.jl") +include("agostino_test.jl") include("binomial.jl") include("circular.jl") include("fisher.jl") diff --git a/src/agostino_test.jl b/src/agostino_test.jl new file mode 100644 index 00000000..a53d4bab --- /dev/null +++ b/src/agostino_test.jl @@ -0,0 +1,77 @@ +# D'Agostino test of skewness + +export AgostinoTest + +struct AgostinoTest <: HypothesisTest + nobs::Int # number of observations + skewness::Float64 # skewness (3rd moment) + z_statistic::Float64 # z-statistic +end + + +function agostino_stat(x::AbstractVector{<:Real}) + + #x = sort(x) + n = length(x) + + if n < 8 || n > 46340 + throw(ArgumentError("Sample size must be ≥ 8 and ≤ 46340")) + end + + g₁ = skewness(x) + Y = g₁ * sqrt((n + 1) * (n + 3) / (6 * (n - 2))) + β₂ = 3 * (n^2 + 27 * n - 70) * (n + 1) * (n + 3) / ((n - 2) * (n + 5) * (n + 7) * (n + 9)) + W² = -1 + sqrt(2 * (β₂ - 1)) + δ = 1 / sqrt(log(sqrt(W²))) + α = sqrt(2 / (W² - 1)) + Z = δ * log(Y / α + sqrt((Y / α)^2 + 1)) + + return (n, g₁, Z) +end + + +""" + AgostinoTest(x::AbstractVector{<:Real}) + +Performs D'Agostino's test of the null hypothesis that the data in vector `x` are normally distributed, against the alternative that the data are skewed. The test is only applicable for N ≥ 8, and large sample sizes (e.g. N > 46340) can result in √-1 in the calculations. Hence, the length of `x` is restricted to be between 8 and 46340 (inclusive). `x` should not contain any missing values. + +# References +D'Agostino, R.B. (1970). Transformation to Normality of the Null Distribution of g_1. Biometrika, 57(3) 679-681. + +Implements: [`pvalue`](@ref) +""" +function AgostinoTest(x::AbstractVector{<:Real}) + AgostinoTest(agostino_stat(x)...) +end + + +testname(::AgostinoTest) = "D'Agostino's test for skewness" + +# parameter of interest: name, value under h0, point estimate +population_param_of_interest(x::AgostinoTest) = ("Skewness", 0.0, x.skewness) + +function show_params(io::IO, x::AgostinoTest, ident = "") + println(io, ident, "number of observations: $(x.nobs)") + println(io, ident, "z-statistic: $(round(x.z_statistic, digits = 3))") +end + +StatsAPI.nobs(x::AgostinoTest) = x.nobs + +function StatsAPI.pvalue(x::AgostinoTest; tail = :both) + + check_tail(tail) + + left_tail = cdf(Normal(), x.z_statistic) + right_tail = ccdf(Normal(), x.z_statistic) + + if tail == :both + p = minimum(2 * [left_tail, right_tail]) + elseif tail == :left + p = left_tail + else tail == :right + p = right_tail + end + + return p +end + From 817986a04418df9e6ae2192d60f588144faab485 Mon Sep 17 00:00:00 2001 From: stanlazic Date: Sat, 28 Dec 2024 19:24:01 -0500 Subject: [PATCH 2/2] added tests for D'Agostino test for skewness Updated docs for D'Agostino test added test, plus code formatting --- docs/src/parametric.md | 5 +++++ src/agostino_test.jl | 16 +++++++--------- test/agostino_test.jl | 23 +++++++++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 test/agostino_test.jl diff --git a/docs/src/parametric.md b/docs/src/parametric.md index db445286..cb538904 100644 --- a/docs/src/parametric.md +++ b/docs/src/parametric.md @@ -56,3 +56,8 @@ LeveneTest ```@docs BrownForsytheTest ``` + +## D'Agostino Test +```@docs +AgostinoTest +``` diff --git a/src/agostino_test.jl b/src/agostino_test.jl index a53d4bab..521c53a6 100644 --- a/src/agostino_test.jl +++ b/src/agostino_test.jl @@ -10,14 +10,13 @@ end function agostino_stat(x::AbstractVector{<:Real}) - - #x = sort(x) + n = length(x) if n < 8 || n > 46340 throw(ArgumentError("Sample size must be ≥ 8 and ≤ 46340")) end - + g₁ = skewness(x) Y = g₁ * sqrt((n + 1) * (n + 3) / (6 * (n - 2))) β₂ = 3 * (n^2 + 27 * n - 70) * (n + 1) * (n + 3) / ((n - 2) * (n + 5) * (n + 7) * (n + 9)) @@ -26,7 +25,7 @@ function agostino_stat(x::AbstractVector{<:Real}) α = sqrt(2 / (W² - 1)) Z = δ * log(Y / α + sqrt((Y / α)^2 + 1)) - return (n, g₁, Z) + return (n, g₁, Z) end @@ -41,7 +40,7 @@ D'Agostino, R.B. (1970). Transformation to Normality of the Null Distribution of Implements: [`pvalue`](@ref) """ function AgostinoTest(x::AbstractVector{<:Real}) - AgostinoTest(agostino_stat(x)...) + return AgostinoTest(agostino_stat(x)...) end @@ -60,18 +59,17 @@ StatsAPI.nobs(x::AgostinoTest) = x.nobs function StatsAPI.pvalue(x::AgostinoTest; tail = :both) check_tail(tail) - + left_tail = cdf(Normal(), x.z_statistic) right_tail = ccdf(Normal(), x.z_statistic) - + if tail == :both p = minimum(2 * [left_tail, right_tail]) elseif tail == :left p = left_tail - else tail == :right + else p = right_tail end return p end - diff --git a/test/agostino_test.jl b/test/agostino_test.jl new file mode 100644 index 00000000..d60e435a --- /dev/null +++ b/test/agostino_test.jl @@ -0,0 +1,23 @@ +using HypothesisTests, Test + +@testset "D'Agostino Test" begin + # Check errors + @test_throws ArgumentError AgostinoTest(rand(5)) # too few samples + @test_throws ArgumentError AgostinoTest(rand(50000)) # too many samples + + # Check correctness of results (benchmarked against `moments` package in R) + y = [ + 2.44816, 1.086045, 0.154618, 0.778503, 0.923376, + 1.18671, -0.495009, -0.151178, 1.662281, 2.615421, + ] + + result = @inferred AgostinoTest(y) + @test result.z_statistic ≈ 0.28356 atol = 1.0e-6 + @test result.skewness ≈ 0.158581 atol = 1.0e-6 + @test nobs(result) == 10 + @test pvalue(result) ≈ 0.776748 atol = 1.0e-6 + @test pvalue(result, tail = :left) ≈ 0.611626 atol = 1.0e-6 + @test pvalue(result, tail = :right) ≈ 0.388374 atol = 1.0e-6 + @test AgostinoTest(exp.(y)).z_statistic ≈ 2.10965 atol = 1.0e-5 + @test AgostinoTest(-1 .* exp.(y)).z_statistic ≈ -2.10965 atol = 1.0e-5 +end diff --git a/test/runtests.jl b/test/runtests.jl index 5cbbfd76..aed5b7fe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,6 +12,7 @@ macro test_ci_approx(x::Expr, y::Expr) end end +include("agostino_test.jl") include("anderson_darling.jl") include("augmented_dickey_fuller.jl") include("bartlett.jl")