From 131dab2daf0d02ef5c5be33d79b380af0e461fff Mon Sep 17 00:00:00 2001 From: smishr Date: Sun, 22 Jan 2023 12:57:16 +0530 Subject: [PATCH 01/11] Add general CI in mean --- Project.toml | 3 ++- src/Survey.jl | 2 ++ src/ci.jl | 18 ++++++++++++++++++ src/mean.jl | 10 ++++++++-- 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 src/ci.jl diff --git a/Project.toml b/Project.toml index f288d231..fc2e35a3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Survey" uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c" -authors = ["Ayush Patnaik"] +authors = ["Ayush Patnaik ", "Iulia Dmitru ", "Shikhar Mishra ", "Sayantika Sengupta "] version = "0.11.1" [deps] @@ -9,6 +9,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/src/Survey.jl b/src/Survey.jl index 66de8042..b5ac0cd3 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -5,6 +5,7 @@ using Statistics import Statistics: quantile using StatsBase import StatsBase: mean,quantile +using Distributions using CSV using LinearAlgebra using CairoMakie @@ -25,6 +26,7 @@ include("boxplot.jl") include("show.jl") include("ratio.jl") include("by.jl") +include("ci.jl") export load_data export AbstractSurveyDesign, SurveyDesign, ReplicateDesign diff --git a/src/ci.jl b/src/ci.jl new file mode 100644 index 00000000..4351bc24 --- /dev/null +++ b/src/ci.jl @@ -0,0 +1,18 @@ +""" + Calculate confidence intervals for given estimates. + Supports normal, margin of error and t-distribution based CI. +""" +function _ci(estimate::Real, se::Real, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) + # Parse type of CI, calc critical value + if type == "normal" + critical_value = quantile(Normal(),1-alpha/2) + elseif type == "margin" + critical_value = margin + elseif type == "t" + critical_value = quantile(TDist(dof),1-alpha/2) + end + # Calculate upper and lower estimates + ci_lower = estimate - critical_value * se + ci_upper = estimate + critical_value * se + return ci_lower, ci_upper +end \ No newline at end of file diff --git a/src/mean.jl b/src/mean.jl index 5b87ffdf..4bc61a72 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -24,11 +24,17 @@ julia> mean([:api00, :enroll], clus_one_stage) 2 │ enroll 549.716 46.2597 ``` """ -function mean(x::Symbol, design::ReplicateDesign) +function mean(x::Symbol, design::ReplicateDesign; ci_type::Union{Nothing,String}=nothing, kwargs...) X = mean(design.data[!, x], weights(design.data[!,design.weights])) Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates] variance = sum((Xt .- X).^2) / design.replicates - DataFrame(mean = X, SE = sqrt(variance)) + SE = sqrt(variance) + if !isnothing(ci_type) + ci_lower, ci_upper = _ci(X, SE, ci_type; kwargs...) + return DataFrame(mean = X, SE = SE, ci_lower = ci_lower, ci_upper = ci_upper ) + else + return DataFrame(mean = X, SE = SE) + end end function mean(x::Vector{Symbol}, design::ReplicateDesign) From 357ba2329b48f51697e3bfacb33546edfd90e464 Mon Sep 17 00:00:00 2001 From: smishr Date: Sat, 4 Feb 2023 11:40:44 +0530 Subject: [PATCH 02/11] add multiple dispatch CI --- src/ci.jl | 8 +++++--- src/mean.jl | 21 +++++++++++++-------- test/ci.jl | 3 +++ 3 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 test/ci.jl diff --git a/src/ci.jl b/src/ci.jl index 4351bc24..d238f68a 100644 --- a/src/ci.jl +++ b/src/ci.jl @@ -2,7 +2,9 @@ Calculate confidence intervals for given estimates. Supports normal, margin of error and t-distribution based CI. """ -function _ci(estimate::Real, se::Real, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) +function _ci(df::DataFrame, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) + estimate = select(df,1) # est should be in first column + se = select(df,2) # se should be in second column # Parse type of CI, calc critical value if type == "normal" critical_value = quantile(Normal(),1-alpha/2) @@ -12,7 +14,7 @@ function _ci(estimate::Real, se::Real, type::String="normal"; alpha::Float64=0.0 critical_value = quantile(TDist(dof),1-alpha/2) end # Calculate upper and lower estimates - ci_lower = estimate - critical_value * se - ci_upper = estimate + critical_value * se + ci_lower = estimate .- critical_value .* se + ci_upper = estimate .+ critical_value .* se return ci_lower, ci_upper end \ No newline at end of file diff --git a/src/mean.jl b/src/mean.jl index c9723c08..08d106af 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -45,17 +45,22 @@ julia> mean(:api00, bclus1) 1 │ 644.169 23.4107 ``` """ -function mean(x::Symbol, design::ReplicateDesign; ci_type::Union{Nothing,String}=nothing, kwargs...) +function mean(x::Symbol, design::ReplicateDesign) X = mean(design.data[!, x], weights(design.data[!,design.weights])) Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates] variance = sum((Xt .- X).^2) / design.replicates - SE = sqrt(variance) - if !isnothing(ci_type) - ci_lower, ci_upper = _ci(X, SE, ci_type; kwargs...) - return DataFrame(mean = X, SE = SE, ci_lower = ci_lower, ci_upper = ci_upper ) - else - return DataFrame(mean = X, SE = SE) - end + DataFrame(mean = X, SE = sqrt(variance)) +end + +""" +Add confidence intervals for mean, using multiple dispatch +""" +function mean(x::Symbol, design::ReplicateDesign; ci_type::String="normal", kwargs...) + df_mean = mean(x,design) + ci_lower, ci_upper = _ci(df_mean, ci_type; kwargs...) + df_mean[!,:ci_lower] = ci_lower + df_mean[!,:ci_upper] = ci_upper + return df_mean end """ diff --git a/test/ci.jl b/test/ci.jl new file mode 100644 index 00000000..99c3d715 --- /dev/null +++ b/test/ci.jl @@ -0,0 +1,3 @@ +@testset "ci.jl" begin + +end \ No newline at end of file From 5bfb1c1a4b6a852bd741a2d2764acdb7c0423e16 Mon Sep 17 00:00:00 2001 From: smishr Date: Sun, 5 Feb 2023 09:55:48 +0530 Subject: [PATCH 03/11] integrate ci with mean --- src/ci.jl | 4 +--- src/mean.jl | 6 +++--- test/ci.jl | 9 ++++++++- test/runtests.jl | 1 + 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/ci.jl b/src/ci.jl index d238f68a..a8713fd6 100644 --- a/src/ci.jl +++ b/src/ci.jl @@ -2,9 +2,7 @@ Calculate confidence intervals for given estimates. Supports normal, margin of error and t-distribution based CI. """ -function _ci(df::DataFrame, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) - estimate = select(df,1) # est should be in first column - se = select(df,2) # se should be in second column +function _ci(estimate::AbstractVector,se::AbstractVector, type::String, alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) # Parse type of CI, calc critical value if type == "normal" critical_value = quantile(Normal(),1-alpha/2) diff --git a/src/mean.jl b/src/mean.jl index 08d106af..549189cf 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -55,9 +55,9 @@ end """ Add confidence intervals for mean, using multiple dispatch """ -function mean(x::Symbol, design::ReplicateDesign; ci_type::String="normal", kwargs...) - df_mean = mean(x,design) - ci_lower, ci_upper = _ci(df_mean, ci_type; kwargs...) +function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) + df_mean = mean(x, design) + ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin) df_mean[!,:ci_lower] = ci_lower df_mean[!,:ci_upper] = ci_upper return df_mean diff --git a/test/ci.jl b/test/ci.jl index 99c3d715..d6e30b56 100644 --- a/test/ci.jl +++ b/test/ci.jl @@ -1,3 +1,10 @@ @testset "ci.jl" begin - + mean(:api00, dclus1_boot, "normal") + mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.1) + mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.15) + mean(:api00, dclus1_boot, ci_type="margin") + mean(:api00, dclus1_boot, ci_type="margin",margin=3.0) # 3 - sigma + mean(:api00, dclus1_boot, ci_type="margin",margin=6.0) # Six-sigma + mean(:api00, dclus1_boot) + mean([:api00, :enroll], dclus1_boot, ci_type="normal", alpha = 0.1) end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 2d6c9d20..50ea90ff 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -35,3 +35,4 @@ include("hist.jl") include("boxplot.jl") include("ratio.jl") include("show.jl") +include("ci.jl") \ No newline at end of file From d594daf9e37144b6fabd1f9c10a7822161ebed6b Mon Sep 17 00:00:00 2001 From: smishr Date: Sun, 5 Feb 2023 10:24:01 +0530 Subject: [PATCH 04/11] fix merge conflict --- src/Survey.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Survey.jl b/src/Survey.jl index b8643ac8..5fba7fcf 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -4,12 +4,8 @@ using DataFrames using Statistics import Statistics: quantile using StatsBase -<<<<<<< HEAD -import StatsBase: mean,quantile using Distributions -======= import StatsBase: mean, quantile ->>>>>>> main using CSV using LinearAlgebra using CairoMakie From 7dcb21d4d147dd396a38b8fa85fe6599de2f3e79 Mon Sep 17 00:00:00 2001 From: smishr Date: Sun, 5 Feb 2023 10:36:23 +0530 Subject: [PATCH 05/11] add ci for domain mean, vector symbols --- src/mean.jl | 19 ++++++++++++++++--- test/ci.jl | 12 ++++++------ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/mean.jl b/src/mean.jl index d5b5d565..1a9edd85 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -55,9 +55,6 @@ function mean(x::Symbol, design::ReplicateDesign) DataFrame(mean = X, SE = sqrt(variance)) end -""" -Add confidence intervals for mean, using multiple dispatch -""" function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) df_mean = mean(x, design) ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin) @@ -97,6 +94,14 @@ function mean(x::Vector{Symbol}, design::AbstractSurveyDesign) return df end +function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) + df_mean = mean(x, design) + ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns + df_mean[!,:ci_lower] = ci_lower + df_mean[!,:ci_upper] = ci_upper + return df_mean +end + """ mean(var, domain, design) @@ -147,3 +152,11 @@ function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign) rename!(df, :statistic => :mean) return df end + +function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) + df_mean = mean(x, design) + ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # domain mean and SE are in 2nd and 3rd columns + df_mean[!,:ci_lower] = ci_lower + df_mean[!,:ci_upper] = ci_upper + return df_mean +end \ No newline at end of file diff --git a/test/ci.jl b/test/ci.jl index d6e30b56..b9f885e4 100644 --- a/test/ci.jl +++ b/test/ci.jl @@ -1,10 +1,10 @@ @testset "ci.jl" begin mean(:api00, dclus1_boot, "normal") - mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.1) - mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.15) - mean(:api00, dclus1_boot, ci_type="margin") - mean(:api00, dclus1_boot, ci_type="margin",margin=3.0) # 3 - sigma - mean(:api00, dclus1_boot, ci_type="margin",margin=6.0) # Six-sigma + mean(:api00, dclus1_boot, "normal", alpha = 0.1) + mean(:api00, dclus1_boot, "normal", alpha = 0.15) + mean(:api00, dclus1_boot, "margin") + mean(:api00, dclus1_boot, "margin",margin=3.0) # 3 - sigma + mean(:api00, dclus1_boot, "margin",margin=6.0) # Six-sigma mean(:api00, dclus1_boot) - mean([:api00, :enroll], dclus1_boot, ci_type="normal", alpha = 0.1) + mean([:api00, :enroll], dclus1_boot, "normal", alpha = 0.1) end \ No newline at end of file From 753707baf3d08510a8974fd0324107ba55e7ce00 Mon Sep 17 00:00:00 2001 From: smishr Date: Sun, 5 Feb 2023 11:38:25 +0530 Subject: [PATCH 06/11] add tests ci.jl --- src/ci.jl | 2 +- src/mean.jl | 37 +++++++++++++++++++++---------------- test/ci.jl | 43 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 57 insertions(+), 25 deletions(-) diff --git a/src/ci.jl b/src/ci.jl index a8713fd6..8c2de3dc 100644 --- a/src/ci.jl +++ b/src/ci.jl @@ -2,7 +2,7 @@ Calculate confidence intervals for given estimates. Supports normal, margin of error and t-distribution based CI. """ -function _ci(estimate::AbstractVector,se::AbstractVector, type::String, alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) +function _ci(estimate::AbstractVector, se::AbstractVector, type::String, alpha::Float64, dof::Int64, margin::Float64) # Parse type of CI, calc critical value if type == "normal" critical_value = quantile(Normal(),1-alpha/2) diff --git a/src/mean.jl b/src/mean.jl index 1a9edd85..45f77aef 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -55,13 +55,6 @@ function mean(x::Symbol, design::ReplicateDesign) DataFrame(mean = X, SE = sqrt(variance)) end -function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) - df_mean = mean(x, design) - ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin) - df_mean[!,:ci_lower] = ci_lower - df_mean[!,:ci_upper] = ci_upper - return df_mean -end """ Estimate the mean of a list of variables. @@ -94,14 +87,6 @@ function mean(x::Vector{Symbol}, design::AbstractSurveyDesign) return df end -function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) - df_mean = mean(x, design) - ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns - df_mean[!,:ci_lower] = ci_lower - df_mean[!,:ci_upper] = ci_upper - return df_mean -end - """ mean(var, domain, design) @@ -153,8 +138,28 @@ function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign) return df end -function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0) +""" + +Confidence intervals for `mean` +""" +function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) + df_mean = mean(x, design) + ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin) + df_mean[!,:ci_lower] = ci_lower + df_mean[!,:ci_upper] = ci_upper + return df_mean +end + +function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) df_mean = mean(x, design) + ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns + df_mean[!,:ci_lower] = ci_lower + df_mean[!,:ci_upper] = ci_upper + return df_mean +end + +function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) + df_mean = mean(x, domain, design) ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # domain mean and SE are in 2nd and 3rd columns df_mean[!,:ci_lower] = ci_lower df_mean[!,:ci_upper] = ci_upper diff --git a/test/ci.jl b/test/ci.jl index b9f885e4..1d27fc85 100644 --- a/test/ci.jl +++ b/test/ci.jl @@ -1,10 +1,37 @@ @testset "ci.jl" begin - mean(:api00, dclus1_boot, "normal") - mean(:api00, dclus1_boot, "normal", alpha = 0.1) - mean(:api00, dclus1_boot, "normal", alpha = 0.15) - mean(:api00, dclus1_boot, "margin") - mean(:api00, dclus1_boot, "margin",margin=3.0) # 3 - sigma - mean(:api00, dclus1_boot, "margin",margin=6.0) # Six-sigma - mean(:api00, dclus1_boot) - mean([:api00, :enroll], dclus1_boot, "normal", alpha = 0.1) + #### Each of the 3 options with default keyword arguments + # 95% CI - normal + @test mean(:api00, dclus1_boot, "normal").ci_lower[1] ≈ 598.28529 atol = 1e-4 + # 95% CI, with dof=Infinity - t + @test mean(:api00, dclus1_boot, "t").ci_upper[1] ≈ 690.361 atol = 1e-4 + # margin of 2 SE + @test mean(:api00, dclus1_boot, "margin").ci_upper[1] ≈ 690.99077 atol = 1e-4 + + #### Test "normal" keyword options + # 90% CI + @test mean(:api00, dclus1_boot, "normal", alpha = 0.1).ci_upper[1] ≈ 682.67655 atol = 1e-4 + # 85% CI + @test mean(:api00, dclus1_boot, "normal", alpha = 0.15).ci_lower[1] ≈ 610.469 atol = 1e-4 + + #### Test "t" keyword options + #### For illustration purposes, dclus1_boot is actually a 'large' sample + # 90% CI + @test mean(:api00, dclus1_boot, "t", dof = 30).ci_upper[1] ≈ 691.98 atol = 1e-4 + # 85% CI + @test mean(:api00, dclus1_boot, "t", alpha = 0.1, dof = 50).ci_lower[1] ≈ 683.403 atol = 1e-4 + + #### Test "t" keyword options + # 3 - sigma + @test mean(:api00, dclus1_boot, "margin", margin=3.0).ci_upper[1] ≈ 714.40146 atol = 1e-4 + # Six-sigma + @test mean(:api00, dclus1_boot, "margin", margin=6.0).ci_lower[1] ≈ 503.70526 atol = 1e-4 + + #### Test Vector of Symbols + @test mean([:api00, :enroll], dclus1_boot, "normal").ci_lower[2] ≈ 459.98174 atol = 1e-4 + @test mean([:api00, :enroll], dclus1_boot, "normal").ci_upper[2] ≈ 639.44995 atol = 1e-4 + + #### Test domain estimation + mn = mean(:api00, :cname, dclus1_boot, "normal") + @test filter(:cname => ==("Los Angeles"), mn).ci_lower[1] ≈ 553.92680 atol = 1e-4 + @test filter(:cname => ==("Santa Clara"), mn).ci_upper[1] ≈ 846.17990 atol = 1e-4 end \ No newline at end of file From e5641c0348508f6db325f24cceb0393bc7d9f570 Mon Sep 17 00:00:00 2001 From: smishr Date: Sun, 5 Feb 2023 12:04:33 +0530 Subject: [PATCH 07/11] add documentation --- src/mean.jl | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/mean.jl b/src/mean.jl index 45f77aef..72c933ab 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -55,7 +55,6 @@ function mean(x::Symbol, design::ReplicateDesign) DataFrame(mean = X, SE = sqrt(variance)) end - """ Estimate the mean of a list of variables. @@ -139,8 +138,32 @@ function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign) end """ + mean(x, design, ci_type; kwargs) + mean(x, domain, design, ci_type; kwargs) + +Confidence intervals for `mean`. Three options for ci_type: +```julia +ci_type="normal" # use Normal distribution critical values +ci_type="t" # use Student t distribution critical values +ci_type="margin" # use margin of error +```` + +```math +\left[ \bar{x} - critical value * SE , \bar{x} + critical value * SE \right] +``` +Keyword arguments for each type of CI +```julia +alpha # Significance level. Confidence level is 100*(1 - alpha)% +dof # Degrees of freedom when ci_type="t" +margin # Margin of error when ci_type="margin" +``` +Also works when `Vector{Symbol}` and `domain` are specified. +```julia +# TODO example +``` +External links +[Confidence intervals on Wikipedia](https://en.wikipedia.org/wiki/Confidence_interval) -Confidence intervals for `mean` """ function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) df_mean = mean(x, design) From a58101ca64d53af9dc5188182e0c790707587ab9 Mon Sep 17 00:00:00 2001 From: smishr Date: Thu, 23 Feb 2023 18:59:34 +0530 Subject: [PATCH 08/11] Update Project.toml --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index fe895297..3a767c6f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Survey" uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c" -authors = ["Ayush Patnaik ", "Iulia Dmitru ", "Shikhar Mishra ", "Sayantika Sengupta "] -version = "0.1.1" +authors = ["Ayush Patnaik "] +version = "0.1.0" [deps] AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67" From de6c41c12dff83c27eba0bdd65829e11ac9597db Mon Sep 17 00:00:00 2001 From: smishr Date: Thu, 23 Feb 2023 19:00:01 +0530 Subject: [PATCH 09/11] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3a767c6f..9234e7b9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Survey" uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c" -authors = ["Ayush Patnaik "] +authors = ["Ayush Patnaik"] version = "0.1.0" [deps] From 551731aa5b623d3daacbcf5da1aed3e7c7ddbab0 Mon Sep 17 00:00:00 2001 From: smishr Date: Thu, 23 Feb 2023 20:51:24 +0530 Subject: [PATCH 10/11] fix math mode error docstring --- src/mean.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/mean.jl b/src/mean.jl index 72c933ab..f281473d 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -146,11 +146,13 @@ Confidence intervals for `mean`. Three options for ci_type: ci_type="normal" # use Normal distribution critical values ci_type="t" # use Student t distribution critical values ci_type="margin" # use margin of error -```` +``` +Mathematically, the confidence interval is the range ```math -\left[ \bar{x} - critical value * SE , \bar{x} + critical value * SE \right] +\\left[\\bar{x} - critical value * SE , \\bar{x} + critical value * SE \\right] ``` + Keyword arguments for each type of CI ```julia alpha # Significance level. Confidence level is 100*(1 - alpha)% @@ -161,11 +163,11 @@ Also works when `Vector{Symbol}` and `domain` are specified. ```julia # TODO example ``` -External links +## External links [Confidence intervals on Wikipedia](https://en.wikipedia.org/wiki/Confidence_interval) - """ -function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) +function mean(x::Symbol, design::ReplicateDesign, ci_type::String; + alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) df_mean = mean(x, design) ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin) df_mean[!,:ci_lower] = ci_lower @@ -173,7 +175,8 @@ function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float6 return df_mean end -function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) +function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; + alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) df_mean = mean(x, design) ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns df_mean[!,:ci_lower] = ci_lower @@ -181,7 +184,8 @@ function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha return df_mean end -function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) +function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; + alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0) df_mean = mean(x, domain, design) ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # domain mean and SE are in 2nd and 3rd columns df_mean[!,:ci_lower] = ci_lower From 731f6b17e7d3121fcf777555e2206127c261d2b8 Mon Sep 17 00:00:00 2001 From: smishr Date: Thu, 23 Feb 2023 21:30:03 +0530 Subject: [PATCH 11/11] fix ci tests --- test/ci.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/ci.jl b/test/ci.jl index 1d27fc85..6bc5c9f3 100644 --- a/test/ci.jl +++ b/test/ci.jl @@ -3,7 +3,7 @@ # 95% CI - normal @test mean(:api00, dclus1_boot, "normal").ci_lower[1] ≈ 598.28529 atol = 1e-4 # 95% CI, with dof=Infinity - t - @test mean(:api00, dclus1_boot, "t").ci_upper[1] ≈ 690.361 atol = 1e-4 + @test mean(:api00, dclus1_boot, "t").ci_upper[1] ≈ 690.3606 atol = 1e-4 # margin of 2 SE @test mean(:api00, dclus1_boot, "margin").ci_upper[1] ≈ 690.99077 atol = 1e-4 @@ -16,9 +16,9 @@ #### Test "t" keyword options #### For illustration purposes, dclus1_boot is actually a 'large' sample # 90% CI - @test mean(:api00, dclus1_boot, "t", dof = 30).ci_upper[1] ≈ 691.98 atol = 1e-4 + @test mean(:api00, dclus1_boot, "t", dof = 30).ci_upper[1] ≈ 691.9804 atol = 1e-4 # 85% CI - @test mean(:api00, dclus1_boot, "t", alpha = 0.1, dof = 50).ci_lower[1] ≈ 683.403 atol = 1e-4 + @test mean(:api00, dclus1_boot, "t", alpha = 0.1, dof = 50).ci_lower[1] ≈ 604.9353 atol = 1e-4 #### Test "t" keyword options # 3 - sigma