Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding KPSS unit root test #317

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/src/time_series.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,8 @@ DieboldMarianoTest
WhiteTest
BreuschPaganTest
```

## KPSS test
```@docs
KPSSTest
```
1 change: 1 addition & 0 deletions src/HypothesisTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,5 +148,6 @@ include("diebold_mariano.jl")
include("clark_west.jl")
include("white.jl")
include("var_equality.jl")
include("kpss.jl")

end
180 changes: 180 additions & 0 deletions src/kpss.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# KPSS.jl
# Kwiatkowski–Phillips–Schmidt–Shin unit root test
#
# Copyright (C) 2024 Jared Schwartz
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

export KPSSTest

struct KPSSTest <: HypothesisTest
stat::Float64 # test statistic
regression::Symbol # regression type
lag::Int # number of lags
end

function autolag(resids::AbstractVector{<:Real}, T::Int)
# Computes the optimal number of lags using the Bartlett kernel as described in Hobijn et al. (2004).
# Reference: Table 3 p.489 of Hobijn et al. (2004)

n = trunc(Int, T^(2.0 / 9.0))

ŝ_0 = sum(resids .^ 2) / T
ŝ_j = 0.0
for i in 1:n
resids_prod = dot(resids[i+1:end], resids[1:T-i])
resids_prod /= T / 2.0
ŝ_0 += resids_prod
ŝ_j += i * resids_prod
end

γ̂ = 1.1447 * ((ŝ_j / ŝ_0)^2)^(1.0 / 3.0)
m̂ₜ = min(T, trunc(Int,γ̂ * T^(1.0 / 3.0)))
return m̂ₜ

end

function s²_l(ϵ::AbstractVector{<:Real}, T::Int, l::Int)
# calculate weighted s² using Bartlett kernel weighting as in Kwiatkowski et al. (1992) and Hobijn et al. (2004)
# Reference: Equation (10), p. 164 (Kwiatkowski et al., 1992).

SSE = sum(ϵ.^2)

weighted_sum = 0
for s in 1:l
weight = 1.0 - (s / (l + 1.0))
resids_prod = dot(ϵ[s+1:end], ϵ[1:T-s])
weighted_sum += 2 * weight * resids_prod
end

ŝ² = (SSE + weighted_sum) / T

return ŝ²
end

"""
KPSSTest(y::AbstractVector{<:Real}, regression::Symbol, lag::Union{Symbol, Int})

Compute the Kwiatkowski-Phillips-Schmidt-Shin unit root test.

`y` is the time series to be tested, `regression` indicates constant or constant+trend
(options: `:constant`, `:trend`) and `lag` is the number of lagged
first-differences included in the test regression. `lag` can also be configured to automatically find
the find the optimal number of lags for the data using `:auto` for the the Hobijn et al. data-dependent method
or `:legacy` for the the KPSS data-independent method.

# References

* Hobijn, Bart, Philip Hans Franses, and Marius Ooms. "Generalizations of the KPSS-Test for Stationarity."
Statistica Neerlandica 58, no. 4 (2004): 483-502. https://doi.org/10.1111/j.1467-9574.2004.00272.x.

* Kwiatkowski, Denis, Peter C. B. Phillips, Peter Schmidt, and Yongcheol Shin. "Testing the Null Hypothesis of
Stationarity against the Alternative of a Unit Root: How Sure Are We That Economic Time Series Have a Unit Root?"
Journal of Econometrics 54, no. 1 (October 1, 1992): 159-78. https://doi.org/10.1016/0304-4076(92)90104-Y.

# External links

* [KPSS test on Wikipedia](https://en.wikipedia.org/wiki/KPSS_test)
"""
function KPSSTest(y::AbstractVector{<:Real}, regression::Symbol= :constant, lag::Union{Symbol, Int}= :auto)

# Number of observations
T = length(y)

# Handle regression argument and detrend based on regression type
if regression == :constant
ϵ = y .- mean(y)
elseif regression == :trend
t = collect(1:T)
X = [ones(T) t]
β = (X'X)\(X'y)
ϵ = y .- X*β
else
throw(ArgumentError("regression = $(regression) is invalid. Choose :constant for constant or :trend for constant and trend."))
end

# Handle lags argument
if lag == :auto
# data dependent method defined in Hobijn et al. (2004)
lag = autolag(y, T)
elseif lag == :legacy
# data independent method used for Kwiatkowski et al. (1992)
lag = trunc(Int, ceil(12.0 * (T / 100.0)^(1 / 4.0)))
elseif typeof(lag) == Int
lag = lag
else
throw(ArgumentError("lag = $(lag) is invalid. lag must be :legacy, :auto, or an Int"))
end

lag = min(lag, T) # lags cannot be greater than the length of the series

η = sum(cumsum(ϵ).^2) / (T^2)
ŝ² = s²_l(ϵ, T, lag)

stat = η / ŝ²
return KPSSTest(stat, regression, lag)
end


function interp_p(kpss_stat::Real, regression::Symbol)

# Approximate p-values in Table 1 p.166 (Kwiatkowski et al., 1992)
p_values = [0.10, 0.05, 0.025, 0.01]
critical_values = Dict(
:constant => [0.347, 0.463, 0.574, 0.739],
:trend => [0.119, 0.146, 0.176, 0.216]
)

if kpss_stat > maximum(critical_values[regression])
@warn "p-value is outside of interpolation table. It is smaller than the returned p-value"
return 0.01
elseif kpss_stat < minimum(critical_values[regression])
@warn "p-value is outside of interpolation table. It is greater than the returned p-value"
return 0.10
else
# Manual linear interpolation
x = kpss_stat
xp = critical_values[regression]
fp = p_values
function interp(x::d, xp::Vector{d}, fp::Vector{d}) where d <: Real
for i in 1:length(xp)-1
if xp[i] <= x <= xp[i+1]
p_value = fp[i] + (fp[i+1] - fp[i]) * (x - xp[i]) / (xp[i+1] - xp[i])
end
end

return p_value
end

p_value = interp(x, xp, fp)
end
return p_value
end

StatsAPI.pvalue(x::KPSSTest) = interp_p(x.stat, x.regression)

testname(::KPSSTest) = "Kwiatkowski-Phillips-Schmidt-Shin unit root test"

function show_params(io::IO, x::KPSSTest, ident)
println(io, ident, "Number of lags: ", x.lag)
println(io, ident, "Regression type: ", x.regression)
println(io, ident, "KPSS statistic: ", x.stat)
println(io)
end
143 changes: 143 additions & 0 deletions test/kpss.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
using Test
using Random

@testset "KPSS" begin
sim_data_h0 = [
0.3823959677906078, -0.4064364928329272, -0.21366349105383925, -0.9458585999156837,
-0.161817961459508, 2.2141788431075566, -1.1599969272467523, -0.05003288745663004,
0.406405082694597, 0.7869108289160771, 1.3567270194962293, 1.1371544648018315,
0.04624047497940731, 0.4315160758144559, 0.16530680797356267, -0.6110002398171043,
-2.079333373316608, -0.9189007217208647, -1.21708339858096, -2.338297880681166,
-0.3732003183359671, 0.48346182208807886, 0.7925832885448003, 0.33291705184283954,
1.503401308172342, 0.6785520207484317, -0.4061883062665398, -1.4231492816679225,
-0.7647519783616538, -0.5475123469836606, -2.3891264219191415, -1.2613311974661598,
0.5917962497349819, 0.8635935846059344, 1.1952957206263544, 0.9762467401665272,
-0.15747368655333344, -0.7433830876676981, -2.174717868044743, -2.376406537872116,
-1.5234733484541754, -0.6912690728659298, -0.003840769053141846, 1.7332452815278112,
2.1665386050740976, 1.2896333839396656, -0.3640432635677706, -1.032077302099283,
0.6133707848346852, -1.0418480640744843, 0.7193668683918175, 0.3040363409885845,
0.911408384439752, 0.4254373055731135, -0.1483640149695369, -2.081485210017331,
-1.6096017323553033, -1.9544283905746491, 0.9034079793438605, -1.0189205517154498,
-1.0446712460234568, -1.4858795828372295, -2.1280478590945284, -0.9297236919859942,
-1.0809785120018163, -2.26047861232065, -0.8094700261729145, -1.8521079109679686,
-1.4183251435535507, -1.1222499557599581, 0.4408035345759739, 1.395083093263098,
-0.6294287984304979, -1.907929267259316, -1.3926746093884548, 0.5294601933036396,
1.7538203747214345, 3.0363433744615707, 0.6086926691932826, 1.4521666352091431,
0.22101112108219845, -0.15656147467703047, 1.4208016565922137, 1.5077049884714973,
0.5822120509286517, -0.1779735844657705, 0.12863722897571492, 0.42346502209020714,
0.5317572267531613, 0.5250949414322668, 0.7222434557577614, 0.5711080879210234,
1.8087294351377763, 2.1641593211810846, 0.9167965594762817, 0.6145927746257767,
-0.5070189070638104, -1.9633776595364965, 0.5182490463457843, 0.13020888464287564,
-0.3850795292384054
]


@testset "Constant Regression" begin
t = KPSSTest(sim_data_h0, :constant, :legacy)

@test t.stat ≈ 0.13987897604307584
@test t.regression == :constant
@test t.lag == 13
@test pvalue(t) ≈ 0.1
show(IOBuffer(), t)
end

@testset "Trend Regression" begin
t = KPSSTest(sim_data_h0, :trend, :legacy)

@test t.stat ≈ 0.09890587815850194
@test t.regression == :trend
@test t.lag == 13
@test pvalue(t) ≈ 0.1
show(IOBuffer(), t)
end

@testset "Specified Lags" begin
t = KPSSTest(sim_data_h0, :constant, 7)

@test t.stat ≈ 0.1658918837769353
@test t.regression == :constant
@test t.lag == 7
@test pvalue(t) ≈ 0.1
show(IOBuffer(), t)
end

@testset "Automatic Lags" begin
t = KPSSTest(sim_data_h0, :constant, :auto)

@test t.stat ≈ 0.17245866753792385
@test t.regression == :constant
@test t.lag == 4
@test pvalue(t) ≈ 0.1
show(IOBuffer(), t)
end

sim_data_h1 = [
0.3823959677906078, -0.2152385089376233, -0.22568375357499895, -1.064710607963763,
-0.7535992694654291, 1.5414885543718815, -0.7255977944286491, -0.19563221826190302,
0.23578930816100901, 0.8194975957297876, 1.7827692007679783, 2.2415601558216953,
1.719223398400187, 2.1276192367249394, 2.077168006791274, 1.3835143629873885,
-0.39031889042066714, -0.26955292548322785, -1.0271859632037557, -2.7569421445944418,
-1.9609935225898258, -1.2909315413337634, -0.7400791638330025, -0.8034537562625632,
0.5334890259883591, 0.46034039265061977, -0.28512392399013586, -1.5051790525247886,
-1.558356390052481, -1.7234927478553148, -3.838862996282626, -3.9056309827892153,
-2.6831691343211537, -2.1154736745827103, -1.351974746259323, -0.973375866405973,
-1.61897292304257, -2.2836191674336015, -4.086645491644496, -5.37569309549424,
-5.710963175012358, -5.6404955736512, -5.298701806271377, -3.563536140216995,
-2.263620175906803, -2.057256094504186, -3.0661160500417894, -3.9161717203571875,
-2.786762284472861, -4.135295740964688, -2.8950048405356283, -2.9506519337429524,
-2.1912617197974926, -2.221528606444255, -2.5826112742003486, -4.589914476732911,
-5.158773604079549, -6.308401128476547, -4.427778953845362, -5.898403495232742,
-6.433614465398474, -7.397158425223975, -8.78226649289989, -8.64796625533862,
-9.26408292134744, -10.984072277667181, -10.66330299767977, -12.11067589556128,
-12.602947083630847, -13.016034467614029, -12.014105955158076, -10.839424629182965,
-12.166394974245012, -13.759609842289079, -14.198319818047874, -12.972522320050007,
-11.483432041980393, -9.32399885487954, -10.233477872917042, -9.085657572304541,
-9.590729768826915, -9.857796804045044, -8.358714410114315, -7.561410249938924,
-7.733050693246021, -8.202130303176117, -7.984506281967517, -7.625359874365167,
-7.30533515865711, -7.046118830601424, -6.586422845559796, -6.376436485517654,
-4.8532610943403895, -3.5934664907281935, -3.758749591842454, -3.602555096954818,
-4.416870391331517, -6.126738597336108, -4.626800721222075, -4.755716359752092,
-5.205900331311935
]

@testset "Constant Regression 2" begin
t = KPSSTest(sim_data_h1, :constant, 4)

@test t.stat ≈ 1.4173729681925817
@test t.regression == :constant
@test t.lag == 4
@test pvalue(t) ≈ 0.01
show(IOBuffer(), t)
end

@testset "Trend Regression 2" begin
t = KPSSTest(sim_data_h1, :trend, :legacy)

@test t.stat ≈ 0.10863791731446652
@test t.regression == :trend
@test t.lag == 13
@test pvalue(t) ≈ 0.1
show(IOBuffer(), t)
end

@testset "Specified Lags 2" begin
t = KPSSTest(sim_data_h1, :constant, 7)

@test t.stat ≈ 0.9223695420686719
@test t.regression == :constant
@test t.lag == 7
@test pvalue(t) ≈ 0.01
show(IOBuffer(), t)
end

@testset "Automatic Lags 2" begin
t = KPSSTest(sim_data_h1, :constant, :auto)

@test t.stat ≈ 1.1975763349807966
@test t.regression == :constant
@test t.lag == 5
@test pvalue(t) ≈ 0.01
show(IOBuffer(), t)
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ include("diebold_mariano.jl")
include("clark_west.jl")
include("white.jl")
include("var_equality.jl")
include("kpss.jl")
Loading