JuliaStats · JaredSchwartz · May 24, 2024 · May 24, 2024 · May 24, 2024 · May 24, 2024
diff --git a/docs/src/time_series.md b/docs/src/time_series.md
@@ -39,3 +39,8 @@ DieboldMarianoTest
 WhiteTest
 BreuschPaganTest
 ```
+
+## KPSS test
+```@docs
+KPSSTest
+```
diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl
@@ -148,5 +148,6 @@ include("diebold_mariano.jl")
 include("clark_west.jl")
 include("white.jl")
 include("var_equality.jl")
+include("kpss.jl")
 
 end
diff --git a/src/kpss.jl b/src/kpss.jl
@@ -0,0 +1,180 @@
+# KPSS.jl
+# Kwiatkowski–Phillips–Schmidt–Shin unit root test
+#
+# Copyright (C) 2024 Jared Schwartz
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+export KPSSTest
+
+struct KPSSTest <: HypothesisTest
+    stat::Float64  # test statistic
+    regression::Symbol  # regression type
+    lag::Int   # number of lags  
+end
+
+function autolag(resids::AbstractVector{<:Real}, T::Int)
+    # Computes the optimal number of lags using the Bartlett kernel as described in Hobijn et al. (2004).
+    # Reference: Table 3 p.489 of Hobijn et al. (2004)
+
+    n = trunc(Int, T^(2.0 / 9.0))
+
+    ŝ_0 = sum(resids .^ 2) / T
+    ŝ_j = 0.0
+    for i in 1:n
+        resids_prod = dot(resids[i+1:end], resids[1:T-i])
+        resids_prod /= T / 2.0
+        ŝ_0 += resids_prod
+        ŝ_j += i * resids_prod
+    end
+
+    γ̂ = 1.1447 * ((ŝ_j / ŝ_0)^2)^(1.0 / 3.0)
+    m̂ₜ = min(T, trunc(Int,γ̂ * T^(1.0 / 3.0)))
+    return m̂ₜ
+
+end
+
+function s²_l(ϵ::AbstractVector{<:Real}, T::Int, l::Int)
+    # calculate weighted s² using Bartlett kernel weighting as in Kwiatkowski et al. (1992) and Hobijn et al. (2004)
+    # Reference: Equation (10), p. 164 (Kwiatkowski et al., 1992).
+
+    SSE = sum(ϵ.^2)
+
+    weighted_sum = 0
+    for s in 1:l
+        weight = 1.0 - (s / (l + 1.0)) 
+        resids_prod = dot(ϵ[s+1:end], ϵ[1:T-s])
+        weighted_sum += 2 * weight * resids_prod
+    end
+
+    ŝ² = (SSE + weighted_sum) / T
+
+    return ŝ²
+end
+
+"""
+    KPSSTest(y::AbstractVector{<:Real}, regression::Symbol, lag::Union{Symbol, Int})
+
+Compute the Kwiatkowski-Phillips-Schmidt-Shin unit root test.
+
+`y` is the time series to be tested, `regression` indicates constant or constant+trend
+(options: `:constant`, `:trend`) and `lag` is the number of lagged
+first-differences included in the test regression. `lag` can also be configured to automatically find 
+the find the optimal number of lags for the data using `:auto` for the the Hobijn et al. data-dependent method
+or `:legacy` for the the KPSS data-independent method.
+
+# References
+
+  * Hobijn, Bart, Philip Hans Franses, and Marius Ooms. "Generalizations of the KPSS-Test for Stationarity." 
+    Statistica Neerlandica 58, no. 4 (2004): 483-502. https://doi.org/10.1111/j.1467-9574.2004.00272.x.
+
+  * Kwiatkowski, Denis, Peter C. B. Phillips, Peter Schmidt, and Yongcheol Shin. "Testing the Null Hypothesis of
+    Stationarity against the Alternative of a Unit Root: How Sure Are We That Economic Time Series Have a Unit Root?" 
+    Journal of Econometrics 54, no. 1 (October 1, 1992): 159-78. https://doi.org/10.1016/0304-4076(92)90104-Y.
+
+# External links
+
+  * [KPSS test on Wikipedia](https://en.wikipedia.org/wiki/KPSS_test)
+"""
+function KPSSTest(y::AbstractVector{<:Real}, regression::Symbol= :constant, lag::Union{Symbol, Int}= :auto)
+
+    # Number of observations
+    T = length(y)
+
+    # Handle regression argument and detrend based on regression type
+    if regression == :constant
+        ϵ = y .- mean(y)
+    elseif regression == :trend
+        t = collect(1:T)
+        X = [ones(T) t]
+        β = (X'X)\(X'y)
+        ϵ = y .- X*β
+    else
+        throw(ArgumentError("regression = $(regression) is invalid. Choose :constant for constant or :trend for constant and trend."))
+    end
+
+    # Handle lags argument
+    if lag == :auto
+        # data dependent method defined in Hobijn et al. (2004)
+        lag = autolag(y, T)
+    elseif lag == :legacy
+        # data independent method used for Kwiatkowski et al. (1992)
+        lag = trunc(Int, ceil(12.0 * (T / 100.0)^(1 / 4.0)))  
+    elseif typeof(lag) == Int
+        lag = lag
+    else
+        throw(ArgumentError("lag = $(lag) is invalid. lag must be :legacy, :auto, or an Int"))
+    end
+
+    lag = min(lag, T) # lags cannot be greater than the length of the series
+
+    η  = sum(cumsum(ϵ).^2) / (T^2)
+    ŝ² = s²_l(ϵ, T, lag)
+
+    stat = η / ŝ²
+    return KPSSTest(stat, regression, lag)
+end
+
+
+function interp_p(kpss_stat::Real, regression::Symbol)
+
+    # Approximate p-values in Table 1 p.166 (Kwiatkowski et al., 1992)
+    p_values = [0.10, 0.05, 0.025, 0.01]
+    critical_values = Dict(
+        :constant => [0.347, 0.463, 0.574, 0.739],
+        :trend => [0.119, 0.146, 0.176, 0.216]
+    )
+
+    if kpss_stat > maximum(critical_values[regression])
+        @warn "p-value is outside of interpolation table. It is smaller than the returned p-value"
+        return 0.01
+    elseif kpss_stat < minimum(critical_values[regression])
+        @warn "p-value is outside of interpolation table. It is greater than the returned p-value"
+        return 0.10
+    else
+       # Manual linear interpolation
+       x = kpss_stat
+       xp = critical_values[regression]
+       fp = p_values
+       function interp(x::d, xp::Vector{d}, fp::Vector{d}) where d <: Real
+            for i in 1:length(xp)-1
+                if xp[i] <= x <= xp[i+1]
+                    p_value =  fp[i] + (fp[i+1] - fp[i]) * (x - xp[i]) / (xp[i+1] - xp[i])
+                end
+            end
+
+            return p_value
+        end
+
+        p_value = interp(x, xp, fp)
+    end
+    return p_value
+end
+
+StatsAPI.pvalue(x::KPSSTest) = interp_p(x.stat, x.regression)
+
+testname(::KPSSTest) = "Kwiatkowski-Phillips-Schmidt-Shin unit root test"
+
+function show_params(io::IO, x::KPSSTest, ident)
+    println(io, ident, "Number of lags:                     ", x.lag)
+    println(io, ident, "Regression type:                    ", x.regression)
+    println(io, ident, "KPSS statistic:                     ", x.stat)
+    println(io)
+end
diff --git a/test/kpss.jl b/test/kpss.jl
@@ -0,0 +1,143 @@
+using Test
+using Random
+
+@testset "KPSS" begin
+    sim_data_h0 = [
+        0.3823959677906078, -0.4064364928329272, -0.21366349105383925, -0.9458585999156837,
+        -0.161817961459508, 2.2141788431075566, -1.1599969272467523, -0.05003288745663004,
+        0.406405082694597, 0.7869108289160771, 1.3567270194962293, 1.1371544648018315,
+        0.04624047497940731, 0.4315160758144559, 0.16530680797356267, -0.6110002398171043,
+        -2.079333373316608, -0.9189007217208647, -1.21708339858096, -2.338297880681166,
+        -0.3732003183359671, 0.48346182208807886, 0.7925832885448003, 0.33291705184283954,
+        1.503401308172342, 0.6785520207484317, -0.4061883062665398, -1.4231492816679225,
+        -0.7647519783616538, -0.5475123469836606, -2.3891264219191415, -1.2613311974661598,
+        0.5917962497349819, 0.8635935846059344, 1.1952957206263544, 0.9762467401665272,
+        -0.15747368655333344, -0.7433830876676981, -2.174717868044743, -2.376406537872116,
+        -1.5234733484541754, -0.6912690728659298, -0.003840769053141846, 1.7332452815278112,
+        2.1665386050740976, 1.2896333839396656, -0.3640432635677706, -1.032077302099283,
+        0.6133707848346852, -1.0418480640744843, 0.7193668683918175, 0.3040363409885845,
+        0.911408384439752, 0.4254373055731135, -0.1483640149695369, -2.081485210017331,
+        -1.6096017323553033, -1.9544283905746491, 0.9034079793438605, -1.0189205517154498,
+        -1.0446712460234568, -1.4858795828372295, -2.1280478590945284, -0.9297236919859942,
+        -1.0809785120018163, -2.26047861232065, -0.8094700261729145, -1.8521079109679686,
+        -1.4183251435535507, -1.1222499557599581, 0.4408035345759739, 1.395083093263098,
+        -0.6294287984304979, -1.907929267259316, -1.3926746093884548, 0.5294601933036396,
+        1.7538203747214345, 3.0363433744615707, 0.6086926691932826, 1.4521666352091431,
+        0.22101112108219845, -0.15656147467703047, 1.4208016565922137, 1.5077049884714973,
+        0.5822120509286517, -0.1779735844657705, 0.12863722897571492, 0.42346502209020714,
+        0.5317572267531613, 0.5250949414322668, 0.7222434557577614, 0.5711080879210234,
+        1.8087294351377763, 2.1641593211810846, 0.9167965594762817, 0.6145927746257767,
+        -0.5070189070638104, -1.9633776595364965, 0.5182490463457843, 0.13020888464287564,
+        -0.3850795292384054
+    ]
+
+
+    @testset "Constant Regression" begin
+        t = KPSSTest(sim_data_h0, :constant, :legacy)
+
+        @test t.stat ≈ 0.13987897604307584
+        @test t.regression == :constant
+        @test t.lag == 13
+        @test pvalue(t) ≈ 0.1
+        show(IOBuffer(), t)
+    end
+
+    @testset "Trend Regression" begin
+        t = KPSSTest(sim_data_h0, :trend, :legacy)
+
+        @test t.stat ≈ 0.09890587815850194
+        @test t.regression == :trend
+        @test t.lag == 13
+        @test pvalue(t) ≈  0.1
+        show(IOBuffer(), t)
+    end
+
+    @testset "Specified Lags" begin
+        t = KPSSTest(sim_data_h0, :constant, 7)
+
+        @test t.stat ≈ 0.1658918837769353
+        @test t.regression == :constant
+        @test t.lag == 7
+        @test pvalue(t) ≈  0.1
+        show(IOBuffer(), t)
+    end
+
+    @testset "Automatic Lags" begin
+        t = KPSSTest(sim_data_h0, :constant, :auto)
+
+        @test t.stat ≈ 0.17245866753792385
+        @test t.regression == :constant
+        @test t.lag == 4
+        @test pvalue(t) ≈  0.1
+        show(IOBuffer(), t)
+    end
+
+    sim_data_h1 = [
+        0.3823959677906078, -0.2152385089376233, -0.22568375357499895, -1.064710607963763,
+        -0.7535992694654291, 1.5414885543718815, -0.7255977944286491, -0.19563221826190302,
+        0.23578930816100901, 0.8194975957297876, 1.7827692007679783, 2.2415601558216953,
+        1.719223398400187, 2.1276192367249394, 2.077168006791274, 1.3835143629873885,
+        -0.39031889042066714, -0.26955292548322785, -1.0271859632037557, -2.7569421445944418,
+        -1.9609935225898258, -1.2909315413337634, -0.7400791638330025, -0.8034537562625632,
+        0.5334890259883591, 0.46034039265061977, -0.28512392399013586, -1.5051790525247886,
+        -1.558356390052481, -1.7234927478553148, -3.838862996282626, -3.9056309827892153,
+        -2.6831691343211537, -2.1154736745827103, -1.351974746259323, -0.973375866405973,
+        -1.61897292304257, -2.2836191674336015, -4.086645491644496, -5.37569309549424,
+        -5.710963175012358, -5.6404955736512, -5.298701806271377, -3.563536140216995,
+        -2.263620175906803, -2.057256094504186, -3.0661160500417894, -3.9161717203571875,
+        -2.786762284472861, -4.135295740964688, -2.8950048405356283, -2.9506519337429524,
+        -2.1912617197974926, -2.221528606444255, -2.5826112742003486, -4.589914476732911,
+        -5.158773604079549, -6.308401128476547, -4.427778953845362, -5.898403495232742,
+        -6.433614465398474, -7.397158425223975, -8.78226649289989, -8.64796625533862,
+        -9.26408292134744, -10.984072277667181, -10.66330299767977, -12.11067589556128,
+        -12.602947083630847, -13.016034467614029, -12.014105955158076, -10.839424629182965,
+        -12.166394974245012, -13.759609842289079, -14.198319818047874, -12.972522320050007,
+        -11.483432041980393, -9.32399885487954, -10.233477872917042, -9.085657572304541,
+        -9.590729768826915, -9.857796804045044, -8.358714410114315, -7.561410249938924,
+        -7.733050693246021, -8.202130303176117, -7.984506281967517, -7.625359874365167,
+        -7.30533515865711, -7.046118830601424, -6.586422845559796, -6.376436485517654,
+        -4.8532610943403895, -3.5934664907281935, -3.758749591842454, -3.602555096954818,
+        -4.416870391331517, -6.126738597336108, -4.626800721222075, -4.755716359752092,
+        -5.205900331311935
+    ]
+
+    @testset "Constant Regression 2" begin
+        t = KPSSTest(sim_data_h1, :constant, 4)
+
+        @test t.stat ≈ 1.4173729681925817
+        @test t.regression == :constant
+        @test t.lag == 4
+        @test pvalue(t) ≈  0.01
+        show(IOBuffer(), t)
+    end
+
+    @testset "Trend Regression 2" begin
+        t = KPSSTest(sim_data_h1, :trend, :legacy)
+
+        @test t.stat ≈ 0.10863791731446652
+        @test t.regression == :trend
+        @test t.lag == 13
+        @test pvalue(t) ≈  0.1
+        show(IOBuffer(), t)
+    end
+
+    @testset "Specified Lags 2" begin
+        t = KPSSTest(sim_data_h1, :constant, 7)
+
+        @test t.stat ≈ 0.9223695420686719
+        @test t.regression == :constant
+        @test t.lag == 7
+        @test pvalue(t) ≈  0.01
+        show(IOBuffer(), t)
+    end
+
+    @testset "Automatic Lags 2" begin
+        t = KPSSTest(sim_data_h1, :constant, :auto)
+
+        @test t.stat ≈ 1.1975763349807966
+        @test t.regression == :constant
+        @test t.lag == 5
+        @test pvalue(t) ≈  0.01
+        show(IOBuffer(), t)
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -40,3 +40,4 @@ include("diebold_mariano.jl")
 include("clark_west.jl")
 include("white.jl")
 include("var_equality.jl")
+include("kpss.jl")