From 131dab2daf0d02ef5c5be33d79b380af0e461fff Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 22 Jan 2023 12:57:16 +0530
Subject: [PATCH 01/11] Add general CI in mean

---
 Project.toml  |  3 ++-
 src/Survey.jl |  2 ++
 src/ci.jl     | 18 ++++++++++++++++++
 src/mean.jl   | 10 ++++++++--
 4 files changed, 30 insertions(+), 3 deletions(-)
 create mode 100644 src/ci.jl

diff --git a/Project.toml b/Project.toml
index f288d231..fc2e35a3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "Survey"
 uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
-authors = ["Ayush Patnaik"]
+authors = ["Ayush Patnaik <ayushpatnaik@gmail.com>", "Iulia Dmitru <iuliadmtru@gmail.com>", "Shikhar Mishra <sm.oz@outlook.com>", "Sayantika Sengupta <sayantikasenguptassg@gmail.com>"]
 version = "0.11.1"
 
 [deps]
@@ -9,6 +9,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/src/Survey.jl b/src/Survey.jl
index 66de8042..b5ac0cd3 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -5,6 +5,7 @@ using Statistics
 import Statistics: quantile
 using StatsBase
 import StatsBase: mean,quantile
+using Distributions
 using CSV
 using LinearAlgebra
 using CairoMakie
@@ -25,6 +26,7 @@ include("boxplot.jl")
 include("show.jl")
 include("ratio.jl")
 include("by.jl")
+include("ci.jl")
 
 export load_data
 export AbstractSurveyDesign, SurveyDesign, ReplicateDesign
diff --git a/src/ci.jl b/src/ci.jl
new file mode 100644
index 00000000..4351bc24
--- /dev/null
+++ b/src/ci.jl
@@ -0,0 +1,18 @@
+"""
+    Calculate confidence intervals for given estimates.
+    Supports normal, margin of error and t-distribution based CI.
+"""
+function _ci(estimate::Real, se::Real, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+    # Parse type of CI, calc critical value
+    if type == "normal"
+        critical_value = quantile(Normal(),1-alpha/2)
+    elseif type == "margin"
+        critical_value = margin
+    elseif type == "t"
+        critical_value = quantile(TDist(dof),1-alpha/2)
+    end
+    # Calculate upper and lower estimates
+    ci_lower = estimate - critical_value * se
+    ci_upper = estimate + critical_value * se
+    return ci_lower, ci_upper
+end
\ No newline at end of file
diff --git a/src/mean.jl b/src/mean.jl
index 5b87ffdf..4bc61a72 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -24,11 +24,17 @@ julia> mean([:api00, :enroll], clus_one_stage)
    2 │ enroll  549.716  46.2597
 ```
 """
-function mean(x::Symbol, design::ReplicateDesign)
+function mean(x::Symbol, design::ReplicateDesign; ci_type::Union{Nothing,String}=nothing, kwargs...)
     X = mean(design.data[!, x], weights(design.data[!,design.weights]))
     Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
-    DataFrame(mean = X, SE = sqrt(variance))
+    SE = sqrt(variance)
+    if !isnothing(ci_type)
+        ci_lower, ci_upper = _ci(X, SE, ci_type; kwargs...)
+        return DataFrame(mean = X, SE = SE, ci_lower = ci_lower, ci_upper = ci_upper )
+    else
+        return DataFrame(mean = X, SE = SE)
+    end
 end
 
 function mean(x::Vector{Symbol}, design::ReplicateDesign)

From 357ba2329b48f51697e3bfacb33546edfd90e464 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sat, 4 Feb 2023 11:40:44 +0530
Subject: [PATCH 02/11] add multiple dispatch CI

---
 src/ci.jl   |  8 +++++---
 src/mean.jl | 21 +++++++++++++--------
 test/ci.jl  |  3 +++
 3 files changed, 21 insertions(+), 11 deletions(-)
 create mode 100644 test/ci.jl

diff --git a/src/ci.jl b/src/ci.jl
index 4351bc24..d238f68a 100644
--- a/src/ci.jl
+++ b/src/ci.jl
@@ -2,7 +2,9 @@
     Calculate confidence intervals for given estimates.
     Supports normal, margin of error and t-distribution based CI.
 """
-function _ci(estimate::Real, se::Real, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+function _ci(df::DataFrame, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+    estimate = select(df,1) # est should be in first column
+    se = select(df,2) # se should be in second column
     # Parse type of CI, calc critical value
     if type == "normal"
         critical_value = quantile(Normal(),1-alpha/2)
@@ -12,7 +14,7 @@ function _ci(estimate::Real, se::Real, type::String="normal"; alpha::Float64=0.0
         critical_value = quantile(TDist(dof),1-alpha/2)
     end
     # Calculate upper and lower estimates
-    ci_lower = estimate - critical_value * se
-    ci_upper = estimate + critical_value * se
+    ci_lower = estimate .- critical_value .* se
+    ci_upper = estimate .+ critical_value .* se
     return ci_lower, ci_upper
 end
\ No newline at end of file
diff --git a/src/mean.jl b/src/mean.jl
index c9723c08..08d106af 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -45,17 +45,22 @@ julia> mean(:api00, bclus1)
    1 │ 644.169  23.4107
 ```
 """
-function mean(x::Symbol, design::ReplicateDesign; ci_type::Union{Nothing,String}=nothing, kwargs...)
+function mean(x::Symbol, design::ReplicateDesign)
     X = mean(design.data[!, x], weights(design.data[!,design.weights]))
     Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
-    SE = sqrt(variance)
-    if !isnothing(ci_type)
-        ci_lower, ci_upper = _ci(X, SE, ci_type; kwargs...)
-        return DataFrame(mean = X, SE = SE, ci_lower = ci_lower, ci_upper = ci_upper )
-    else
-        return DataFrame(mean = X, SE = SE)
-    end
+    DataFrame(mean = X, SE = sqrt(variance))
+end
+
+"""
+Add confidence intervals for mean, using multiple dispatch
+"""
+function mean(x::Symbol, design::ReplicateDesign; ci_type::String="normal", kwargs...)
+    df_mean = mean(x,design)
+    ci_lower, ci_upper = _ci(df_mean, ci_type; kwargs...)
+    df_mean[!,:ci_lower] = ci_lower
+    df_mean[!,:ci_upper] = ci_upper
+    return df_mean
 end
 
 """
diff --git a/test/ci.jl b/test/ci.jl
new file mode 100644
index 00000000..99c3d715
--- /dev/null
+++ b/test/ci.jl
@@ -0,0 +1,3 @@
+@testset "ci.jl" begin
+    
+end
\ No newline at end of file

From 5bfb1c1a4b6a852bd741a2d2764acdb7c0423e16 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 5 Feb 2023 09:55:48 +0530
Subject: [PATCH 03/11] integrate ci with mean

---
 src/ci.jl        | 4 +---
 src/mean.jl      | 6 +++---
 test/ci.jl       | 9 ++++++++-
 test/runtests.jl | 1 +
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/ci.jl b/src/ci.jl
index d238f68a..a8713fd6 100644
--- a/src/ci.jl
+++ b/src/ci.jl
@@ -2,9 +2,7 @@
     Calculate confidence intervals for given estimates.
     Supports normal, margin of error and t-distribution based CI.
 """
-function _ci(df::DataFrame, type::String="normal"; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
-    estimate = select(df,1) # est should be in first column
-    se = select(df,2) # se should be in second column
+function _ci(estimate::AbstractVector,se::AbstractVector, type::String, alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
     # Parse type of CI, calc critical value
     if type == "normal"
         critical_value = quantile(Normal(),1-alpha/2)
diff --git a/src/mean.jl b/src/mean.jl
index 08d106af..549189cf 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -55,9 +55,9 @@ end
 """
 Add confidence intervals for mean, using multiple dispatch
 """
-function mean(x::Symbol, design::ReplicateDesign; ci_type::String="normal", kwargs...)
-    df_mean = mean(x,design)
-    ci_lower, ci_upper = _ci(df_mean, ci_type; kwargs...)
+function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+    df_mean = mean(x, design)
+    ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin)
     df_mean[!,:ci_lower] = ci_lower
     df_mean[!,:ci_upper] = ci_upper
     return df_mean
diff --git a/test/ci.jl b/test/ci.jl
index 99c3d715..d6e30b56 100644
--- a/test/ci.jl
+++ b/test/ci.jl
@@ -1,3 +1,10 @@
 @testset "ci.jl" begin
-    
+    mean(:api00, dclus1_boot, "normal")
+    mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.1)
+    mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.15)
+    mean(:api00, dclus1_boot, ci_type="margin")
+    mean(:api00, dclus1_boot, ci_type="margin",margin=3.0) # 3 - sigma
+    mean(:api00, dclus1_boot, ci_type="margin",margin=6.0) # Six-sigma
+    mean(:api00, dclus1_boot)
+    mean([:api00, :enroll], dclus1_boot, ci_type="normal", alpha = 0.1)
 end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 2d6c9d20..50ea90ff 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -35,3 +35,4 @@ include("hist.jl")
 include("boxplot.jl")
 include("ratio.jl")
 include("show.jl")
+include("ci.jl")
\ No newline at end of file

From d594daf9e37144b6fabd1f9c10a7822161ebed6b Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 5 Feb 2023 10:24:01 +0530
Subject: [PATCH 04/11] fix merge conflict

---
 src/Survey.jl | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Survey.jl b/src/Survey.jl
index b8643ac8..5fba7fcf 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -4,12 +4,8 @@ using DataFrames
 using Statistics
 import Statistics: quantile
 using StatsBase
-<<<<<<< HEAD
-import StatsBase: mean,quantile
 using Distributions
-=======
 import StatsBase: mean, quantile
->>>>>>> main
 using CSV
 using LinearAlgebra
 using CairoMakie

From 7dcb21d4d147dd396a38b8fa85fe6599de2f3e79 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 5 Feb 2023 10:36:23 +0530
Subject: [PATCH 05/11] add ci for domain mean, vector symbols

---
 src/mean.jl | 19 ++++++++++++++++---
 test/ci.jl  | 12 ++++++------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/mean.jl b/src/mean.jl
index d5b5d565..1a9edd85 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -55,9 +55,6 @@ function mean(x::Symbol, design::ReplicateDesign)
     DataFrame(mean = X, SE = sqrt(variance))
 end
 
-"""
-Add confidence intervals for mean, using multiple dispatch
-"""
 function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
     df_mean = mean(x, design)
     ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin)
@@ -97,6 +94,14 @@ function mean(x::Vector{Symbol}, design::AbstractSurveyDesign)
     return df
 end
 
+function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+    df_mean = mean(x, design)
+    ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns
+    df_mean[!,:ci_lower] = ci_lower
+    df_mean[!,:ci_upper] = ci_upper
+    return df_mean
+end
+
 """
     mean(var, domain, design)
 
@@ -147,3 +152,11 @@ function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign)
     rename!(df, :statistic => :mean)
     return df
 end
+
+function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+    df_mean = mean(x, design)
+    ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # domain mean and SE are in 2nd and 3rd columns
+    df_mean[!,:ci_lower] = ci_lower
+    df_mean[!,:ci_upper] = ci_upper
+    return df_mean
+end
\ No newline at end of file
diff --git a/test/ci.jl b/test/ci.jl
index d6e30b56..b9f885e4 100644
--- a/test/ci.jl
+++ b/test/ci.jl
@@ -1,10 +1,10 @@
 @testset "ci.jl" begin
     mean(:api00, dclus1_boot, "normal")
-    mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.1)
-    mean(:api00, dclus1_boot, ci_type="normal", alpha = 0.15)
-    mean(:api00, dclus1_boot, ci_type="margin")
-    mean(:api00, dclus1_boot, ci_type="margin",margin=3.0) # 3 - sigma
-    mean(:api00, dclus1_boot, ci_type="margin",margin=6.0) # Six-sigma
+    mean(:api00, dclus1_boot, "normal", alpha = 0.1)
+    mean(:api00, dclus1_boot, "normal", alpha = 0.15)
+    mean(:api00, dclus1_boot, "margin")
+    mean(:api00, dclus1_boot, "margin",margin=3.0) # 3 - sigma
+    mean(:api00, dclus1_boot, "margin",margin=6.0) # Six-sigma
     mean(:api00, dclus1_boot)
-    mean([:api00, :enroll], dclus1_boot, ci_type="normal", alpha = 0.1)
+    mean([:api00, :enroll], dclus1_boot, "normal", alpha = 0.1)
 end
\ No newline at end of file

From 753707baf3d08510a8974fd0324107ba55e7ce00 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 5 Feb 2023 11:38:25 +0530
Subject: [PATCH 06/11] add tests ci.jl

---
 src/ci.jl   |  2 +-
 src/mean.jl | 37 +++++++++++++++++++++----------------
 test/ci.jl  | 43 +++++++++++++++++++++++++++++++++++--------
 3 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/src/ci.jl b/src/ci.jl
index a8713fd6..8c2de3dc 100644
--- a/src/ci.jl
+++ b/src/ci.jl
@@ -2,7 +2,7 @@
     Calculate confidence intervals for given estimates.
     Supports normal, margin of error and t-distribution based CI.
 """
-function _ci(estimate::AbstractVector,se::AbstractVector, type::String, alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+function _ci(estimate::AbstractVector, se::AbstractVector, type::String, alpha::Float64, dof::Int64, margin::Float64)
     # Parse type of CI, calc critical value
     if type == "normal"
         critical_value = quantile(Normal(),1-alpha/2)
diff --git a/src/mean.jl b/src/mean.jl
index 1a9edd85..45f77aef 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -55,13 +55,6 @@ function mean(x::Symbol, design::ReplicateDesign)
     DataFrame(mean = X, SE = sqrt(variance))
 end
 
-function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
-    df_mean = mean(x, design)
-    ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin)
-    df_mean[!,:ci_lower] = ci_lower
-    df_mean[!,:ci_upper] = ci_upper
-    return df_mean
-end
 
 """
 Estimate the mean of a list of variables.
@@ -94,14 +87,6 @@ function mean(x::Vector{Symbol}, design::AbstractSurveyDesign)
     return df
 end
 
-function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
-    df_mean = mean(x, design)
-    ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns
-    df_mean[!,:ci_lower] = ci_lower
-    df_mean[!,:ci_upper] = ci_upper
-    return df_mean
-end
-
 """
     mean(var, domain, design)
 
@@ -153,8 +138,28 @@ function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign)
     return df
 end
 
-function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Float64=Inf64, margin::Float64=2.0)
+"""
+
+Confidence intervals for `mean`
+"""
+function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
+    df_mean = mean(x, design)
+    ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin)
+    df_mean[!,:ci_lower] = ci_lower
+    df_mean[!,:ci_upper] = ci_upper
+    return df_mean
+end
+
+function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
     df_mean = mean(x, design)
+    ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns
+    df_mean[!,:ci_lower] = ci_lower
+    df_mean[!,:ci_upper] = ci_upper
+    return df_mean
+end
+
+function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
+    df_mean = mean(x, domain, design)
     ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # domain mean and SE are in 2nd and 3rd columns
     df_mean[!,:ci_lower] = ci_lower
     df_mean[!,:ci_upper] = ci_upper
diff --git a/test/ci.jl b/test/ci.jl
index b9f885e4..1d27fc85 100644
--- a/test/ci.jl
+++ b/test/ci.jl
@@ -1,10 +1,37 @@
 @testset "ci.jl" begin
-    mean(:api00, dclus1_boot, "normal")
-    mean(:api00, dclus1_boot, "normal", alpha = 0.1)
-    mean(:api00, dclus1_boot, "normal", alpha = 0.15)
-    mean(:api00, dclus1_boot, "margin")
-    mean(:api00, dclus1_boot, "margin",margin=3.0) # 3 - sigma
-    mean(:api00, dclus1_boot, "margin",margin=6.0) # Six-sigma
-    mean(:api00, dclus1_boot)
-    mean([:api00, :enroll], dclus1_boot, "normal", alpha = 0.1)
+    #### Each of the 3 options with default keyword arguments
+    # 95% CI - normal
+    @test mean(:api00, dclus1_boot, "normal").ci_lower[1] ≈ 598.28529 atol = 1e-4
+    # 95% CI, with dof=Infinity - t
+    @test mean(:api00, dclus1_boot, "t").ci_upper[1] ≈ 690.361 atol = 1e-4
+    # margin of 2 SE
+    @test mean(:api00, dclus1_boot, "margin").ci_upper[1] ≈ 690.99077 atol = 1e-4
+
+    #### Test "normal" keyword options 
+    # 90% CI
+    @test mean(:api00, dclus1_boot, "normal", alpha = 0.1).ci_upper[1] ≈ 682.67655 atol = 1e-4
+    # 85% CI
+    @test mean(:api00, dclus1_boot, "normal", alpha = 0.15).ci_lower[1] ≈ 610.469 atol = 1e-4
+
+    #### Test "t" keyword options
+    #### For illustration purposes, dclus1_boot is actually a 'large' sample
+    # 90% CI
+    @test mean(:api00, dclus1_boot, "t", dof = 30).ci_upper[1] ≈ 691.98 atol = 1e-4
+    # 85% CI
+    @test mean(:api00, dclus1_boot, "t", alpha = 0.1, dof = 50).ci_lower[1] ≈ 683.403 atol = 1e-4
+
+    #### Test "t" keyword options
+    # 3 - sigma
+    @test mean(:api00, dclus1_boot, "margin", margin=3.0).ci_upper[1] ≈ 714.40146 atol = 1e-4 
+    # Six-sigma
+    @test mean(:api00, dclus1_boot, "margin", margin=6.0).ci_lower[1] ≈ 503.70526 atol = 1e-4
+
+    #### Test Vector of Symbols
+    @test mean([:api00, :enroll], dclus1_boot, "normal").ci_lower[2] ≈ 459.98174 atol = 1e-4
+    @test mean([:api00, :enroll], dclus1_boot, "normal").ci_upper[2] ≈ 639.44995 atol = 1e-4
+
+    #### Test domain estimation
+    mn = mean(:api00, :cname, dclus1_boot, "normal")
+    @test filter(:cname => ==("Los Angeles"), mn).ci_lower[1] ≈ 553.92680 atol = 1e-4
+    @test filter(:cname => ==("Santa Clara"), mn).ci_upper[1] ≈ 846.17990 atol = 1e-4
 end
\ No newline at end of file

From e5641c0348508f6db325f24cceb0393bc7d9f570 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 5 Feb 2023 12:04:33 +0530
Subject: [PATCH 07/11] add documentation

---
 src/mean.jl | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/mean.jl b/src/mean.jl
index 45f77aef..72c933ab 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -55,7 +55,6 @@ function mean(x::Symbol, design::ReplicateDesign)
     DataFrame(mean = X, SE = sqrt(variance))
 end
 
-
 """
 Estimate the mean of a list of variables.
 
@@ -139,8 +138,32 @@ function mean(x::Symbol, domain::Symbol, design::AbstractSurveyDesign)
 end
 
 """
+    mean(x, design, ci_type; kwargs)
+    mean(x, domain, design, ci_type; kwargs)
+
+Confidence intervals for `mean`. Three options for ci_type:
+```julia
+ci_type="normal"    # use Normal distribution critical values
+ci_type="t"         # use Student t distribution critical values
+ci_type="margin"    # use margin of error
+````
+
+```math
+\left[ \bar{x} - critical value * SE , \bar{x} + critical value * SE  \right]
+```
+Keyword arguments for each type of CI
+```julia
+alpha         # Significance level. Confidence level is 100*(1 - alpha)%
+dof           # Degrees of freedom when ci_type="t"
+margin        # Margin of error when ci_type="margin"
+```
+Also works when `Vector{Symbol}` and `domain` are specified.
+```julia
+# TODO example
+```
+External links
+[Confidence intervals on Wikipedia](https://en.wikipedia.org/wiki/Confidence_interval)
 
-Confidence intervals for `mean`
 """
 function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
     df_mean = mean(x, design)

From a58101ca64d53af9dc5188182e0c790707587ab9 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Thu, 23 Feb 2023 18:59:34 +0530
Subject: [PATCH 08/11] Update Project.toml

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index fe895297..3a767c6f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Survey"
 uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
-authors = ["Ayush Patnaik <ayushpatnaik@gmail.com>", "Iulia Dmitru <iuliadmtru@gmail.com>", "Shikhar Mishra <sm.oz@outlook.com>", "Sayantika Sengupta <sayantikasenguptassg@gmail.com>"]
-version = "0.1.1"
+authors = ["Ayush Patnaik <ayushpatnaik@gmail.com>"]
+version = "0.1.0"
 
 [deps]
 AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"

From de6c41c12dff83c27eba0bdd65829e11ac9597db Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Thu, 23 Feb 2023 19:00:01 +0530
Subject: [PATCH 09/11] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 3a767c6f..9234e7b9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "Survey"
 uuid = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
-authors = ["Ayush Patnaik <ayushpatnaik@gmail.com>"]
+authors = ["Ayush Patnaik"]
 version = "0.1.0"
 
 [deps]

From 551731aa5b623d3daacbcf5da1aed3e7c7ddbab0 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Thu, 23 Feb 2023 20:51:24 +0530
Subject: [PATCH 10/11] fix math mode error docstring

---
 src/mean.jl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/mean.jl b/src/mean.jl
index 72c933ab..f281473d 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -146,11 +146,13 @@ Confidence intervals for `mean`. Three options for ci_type:
 ci_type="normal"    # use Normal distribution critical values
 ci_type="t"         # use Student t distribution critical values
 ci_type="margin"    # use margin of error
-````
+```
 
+Mathematically, the confidence interval is the range
 ```math
-\left[ \bar{x} - critical value * SE , \bar{x} + critical value * SE  \right]
+\\left[\\bar{x} - critical value * SE , \\bar{x} + critical value * SE  \\right]
 ```
+
 Keyword arguments for each type of CI
 ```julia
 alpha         # Significance level. Confidence level is 100*(1 - alpha)%
@@ -161,11 +163,11 @@ Also works when `Vector{Symbol}` and `domain` are specified.
 ```julia
 # TODO example
 ```
-External links
+## External links
 [Confidence intervals on Wikipedia](https://en.wikipedia.org/wiki/Confidence_interval)
-
 """
-function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
+function mean(x::Symbol, design::ReplicateDesign, ci_type::String;
+    alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
     df_mean = mean(x, design)
     ci_lower, ci_upper = _ci(df_mean[!,1], df_mean[!,2], ci_type, alpha, dof, margin)
     df_mean[!,:ci_lower] = ci_lower
@@ -173,7 +175,8 @@ function mean(x::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float6
     return df_mean
 end
 
-function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
+function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String;
+    alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
     df_mean = mean(x, design)
     ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # mean and SE are in 2nd and 3rd columns
     df_mean[!,:ci_lower] = ci_lower
@@ -181,7 +184,8 @@ function mean(x::Vector{Symbol}, design::ReplicateDesign, ci_type::String; alpha
     return df_mean
 end
 
-function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
+function mean(x::Symbol, domain::Symbol, design::ReplicateDesign, ci_type::String; 
+    alpha::Float64=0.05, dof::Int64=nrow(design.data)-1, margin::Float64=2.0)
     df_mean = mean(x, domain, design)
     ci_lower, ci_upper = _ci(df_mean[!,2], df_mean[!,3], ci_type, alpha, dof, margin) # domain mean and SE are in 2nd and 3rd columns
     df_mean[!,:ci_lower] = ci_lower

From 731f6b17e7d3121fcf777555e2206127c261d2b8 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Thu, 23 Feb 2023 21:30:03 +0530
Subject: [PATCH 11/11] fix ci tests

---
 test/ci.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/ci.jl b/test/ci.jl
index 1d27fc85..6bc5c9f3 100644
--- a/test/ci.jl
+++ b/test/ci.jl
@@ -3,7 +3,7 @@
     # 95% CI - normal
     @test mean(:api00, dclus1_boot, "normal").ci_lower[1] ≈ 598.28529 atol = 1e-4
     # 95% CI, with dof=Infinity - t
-    @test mean(:api00, dclus1_boot, "t").ci_upper[1] ≈ 690.361 atol = 1e-4
+    @test mean(:api00, dclus1_boot, "t").ci_upper[1] ≈ 690.3606 atol = 1e-4
     # margin of 2 SE
     @test mean(:api00, dclus1_boot, "margin").ci_upper[1] ≈ 690.99077 atol = 1e-4
 
@@ -16,9 +16,9 @@
     #### Test "t" keyword options
     #### For illustration purposes, dclus1_boot is actually a 'large' sample
     # 90% CI
-    @test mean(:api00, dclus1_boot, "t", dof = 30).ci_upper[1] ≈ 691.98 atol = 1e-4
+    @test mean(:api00, dclus1_boot, "t", dof = 30).ci_upper[1] ≈ 691.9804 atol = 1e-4
     # 85% CI
-    @test mean(:api00, dclus1_boot, "t", alpha = 0.1, dof = 50).ci_lower[1] ≈ 683.403 atol = 1e-4
+    @test mean(:api00, dclus1_boot, "t", alpha = 0.1, dof = 50).ci_lower[1] ≈ 604.9353 atol = 1e-4
 
     #### Test "t" keyword options
     # 3 - sigma