From 4b74a48fbde97c75e0b811063f8a8b4f201d7b20 Mon Sep 17 00:00:00 2001 From: PharmCat <13901158+PharmCat@users.noreply.github.com> Date: Wed, 23 Aug 2023 23:16:42 +0300 Subject: [PATCH 1/5] update doc --- .github/workflows/Documenter.yml | 2 +- docs/src/index.md | 8 ++--- src/descriptive.jl | 61 +++++++++++++++++--------------- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index faf28a2..bd915ca 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 30 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-docdeploy@latest env: diff --git a/docs/src/index.md b/docs/src/index.md index ad74d8c..15b0985 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,7 +4,7 @@ CurrentModule = MetidaStats ``` -Metida descriptive statistics. +Metida descriptive statistics - provide tables with categirized descriptive statistics from tabular data. *This program comes with absolutely no warranty. No liability is accepted for any loss and risk to public health resulting from use of this software. @@ -37,19 +37,19 @@ ds[1:5, :] ### Import: -``` +```@example dsexample di = MetidaStats.dataimport(ds, vars = [:var1, :var2], sort = [:col, :row]) ``` ### Statistics: -``` +```@example dsexample des = MetidaStats.descriptives(di; skipmissing = true, skipnonpositive = true, stats = MetidaStats.STATLIST) ``` ### Make DataFrame -``` +```@example dsexample df = DataFrame(des) ``` diff --git a/src/descriptive.jl b/src/descriptive.jl index 29f36a1..6a53e41 100644 --- a/src/descriptive.jl +++ b/src/descriptive.jl @@ -82,38 +82,41 @@ end * kwargs: - `skipmissing` - drop NaN and Missing values, default = true; - `skipnonpositive` - drop non-positive values (and NaN, Missing) for "log-statistics" - :geom, :geomean, :logmean, :logvar, :geocv; -- `stats` - default set `stats = [:n, :mean, :sd, :se, :median, :min, :max]` +- `stats` - default set `stats = [:n, :mean, :sd, :se, :median, :min, :max]`; +- `corrected` - use corrected var (true); +- `level` - level for confidence intervals (0.95); Possible values for `stats` is: + * :n - number of observbations; -:posn - positive (non-negative) number of observations; -:mean - arithmetic mean; -:var - variance; -:bvar - variance with no correction; -:geom - geometric mean; -:logmean - arithmetic mean for log-transformed data; -:logvar - variance for log-transformed data ``σ^2_{log}``; -:sd - standard deviation (or σ); -:se - standard error; -:cv - coefficient of variation; -:geocv - coefficient of variation for log-transformed data (``CV = sqrt{exp(σ^2_{log})-1}``); -:lci - lower confidence interval; -:uci - upper confidence interval; -:lmeanci - lower confidence interval for mean; -:umeanci - lower confidence interval for mean; -:median - median,; -:min - minimum; -:max - maximum; -:range - range; -:q1 - lower quartile; -:q3, -:iqr, -:kurt, -:skew, -:harmmean, -:ses, -:sek, -:sum +* :posn - positive (non-negative) number of observations; +* :mean - arithmetic mean; +* :var - variance; +* :bvar - variance with no correction; +* :geom - geometric mean; +* :logmean - arithmetic mean for log-transformed data; +* :logvar - variance for log-transformed data ``σ^2_{log}``; +* :sd - standard deviation (or σ); +* :se - standard error; +* :cv - coefficient of variation; +* :geocv - coefficient of variation for log-transformed data (``CV = sqrt{exp(σ^2_{log})-1}``); +* :lci - lower confidence interval; +* :uci - upper confidence interval; +* :lmeanci - lower confidence interval for mean; +* :umeanci - lower confidence interval for mean; +* :median - median,; +* :min - minimum; +* :max - maximum; +* :range - range; +* :q1 - lower quartile; +* :q3 - upper quartile; +* :iqr - inter quartile range; +* :kurt - kurtosis; +* :skew - skewness; +* :harmmean - harmonic mean; +* :ses standard error of skewness; +* :sek - standard error of kurtosis; +* :sum - sum. """ function descriptives(data, vars, sort = nothing; kwargs...) From 4c271aa544a52968773908a89b3262a24d867c1d Mon Sep 17 00:00:00 2001 From: PharmCat <13901158+PharmCat@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:21:12 +0300 Subject: [PATCH 2/5] update --- README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/README.md b/README.md index f3e8419..bf9d56f 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,69 @@ Metida descriptive statistics. ``` import Pkg; Pkg.add(url = "https://github.com/PharmCat/MetidaStats.jl.git") ``` + +## Import DataFrame + +``` +data = CSV.File("somedata.csv") |> DataFrame + +# variables to analyze +vars = [:Cmax, :AUClast] + +# sorting variables +sort = [:form, :period] + +ds = dataimport(data; vars = vars, sort = sort) +``` + +## Get descriptive statistics + +``` +descriptives(ds, stats = [:n, :mean, :var]) +``` + +## Or without dataimport step + +``` +descriptives(data; vars = vars, sort = sort, stats = [:n, :mean, :var]) +``` + +Keywords: + +- `skipmissing` - drop NaN and Missing values, default = true; +- `skipnonpositive` - drop non-positive values (and NaN, Missing) for "log-statistics" - :geom, :geomean, :logmean, :logvar, :geocv; +- `stats` - default set `stats = [:n, :mean, :sd, :se, :median, :min, :max]`; +- `corrected` - use corrected var (true); +- `level` - level for confidence intervals (0.95); + +Possible values for `stats` is: + +* :n - number of observbations; +* :posn - positive (non-negative) number of observations; +* :mean - arithmetic mean; +* :var - variance; +* :bvar - variance with no correction; +* :geom - geometric mean; +* :logmean - arithmetic mean for log-transformed data; +* :logvar - variance for log-transformed data; +* :sd - standard deviation (or σ); +* :se - standard error; +* :cv - coefficient of variation; +* :geocv - coefficient of variation for log-transformed data; +* :lci - lower confidence interval; +* :uci - upper confidence interval; +* :lmeanci - lower confidence interval for mean; +* :umeanci - lower confidence interval for mean; +* :median - median; +* :min - minimum; +* :max - maximum; +* :range - range; +* :q1 - lower quartile; +* :q3 - upper quartile; +* :iqr - inter quartile range; +* :kurt - kurtosis; +* :skew - skewness; +* :harmmean - harmonic mean; +* :ses standard error of skewness; +* :sek - standard error of kurtosis; +* :sum - sum. \ No newline at end of file From 700b7c236ea3fc6b959774444e65146bf00d288d Mon Sep 17 00:00:00 2001 From: PharmCat <13901158+PharmCat@users.noreply.github.com> Date: Tue, 16 Jul 2024 23:02:19 +0300 Subject: [PATCH 3/5] minor changes --- Project.toml | 2 +- src/descriptive.jl | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Project.toml b/Project.toml index 54ed630..05a9a84 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MetidaStats" uuid = "75cdad26-409a-4e43-8ad7-d54b4fa665a0" authors = ["PharmCat "] -version = "0.2.1" +version = "0.2.2" [deps] diff --git a/src/descriptive.jl b/src/descriptive.jl index 6a53e41..97e5c0a 100644 --- a/src/descriptive.jl +++ b/src/descriptive.jl @@ -214,10 +214,10 @@ function descriptives_(obsvec, kwargs, logstats, cicalk) end n_ = length(vec) if cicalk - if n_ > 1 q = quantile(TDist(n_ - 1), 1 - (1-kwargs[:level])/2) end + if n_ > 1 q = quantile(TDist(n_ - 1), 1 - (1 - kwargs[:level]) / 2) end # add tdist / normal option # add multiple CI ? end # skipnonpositive - #logstats = makelogvec #calk logstats + # logstats = makelogvec #calk logstats if logstats if kwargs[:skipnonpositive] logvec = log.(skipnonpositive(obsvec)) @@ -275,21 +275,21 @@ function descriptives_(obsvec, kwargs, logstats, cicalk) elseif s == :uci haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end - result[s] = result[:mean] + q*result[:sd] + result[s] = result[:mean] + q * result[:sd] elseif s == :lci haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end - result[s] = result[:mean] - q*result[:sd] + result[s] = result[:mean] - q * result[:sd] elseif s == :umeanci haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end haskey(result, :se) || begin result[:se] = result[:sd] / sqrt(n_) end - result[s] = result[:mean] + q*result[:se] + result[s] = result[:mean] + q * result[:se] elseif s == :lmeanci haskey(result, :mean) || begin result[:mean] = sum(vec) / n_ end haskey(result, :sd) || begin result[:sd] = std(vec; corrected = kwargs[:corrected], mean = result[:mean]) end haskey(result, :se) || begin result[:se] = result[:sd] / sqrt(n_) end - result[s] = result[:mean] - q*result[:se] + result[s] = result[:mean] - q * result[:se] elseif s == :median result[s] = median(vec) elseif s == :min @@ -406,13 +406,13 @@ function MetidaBase.metida_table_(obj::DataSet{DS}; sort = nothing, stats = noth stats ⊆ STATLIST || error("Some statistics not known!") if isa(stats, Symbol) stats = [stats] end if isnothing(sort) - ressetl = collect(intersect(resset, stats)) + ressetl = sortbyvec!(collect(intersect(resset, stats)), collect(keys(first(obj).result))) else ressetl = sortbyvec!(collect(intersect(resset, stats)), sort) end else if isnothing(sort) - ressetl = collect(resset) + ressetl = sortbyvec!(collect(resset), collect(keys(first(obj).result))) else ressetl = sortbyvec!(collect(resset), sort) end From 1ccf58a23769e412b1d56c3a0ebbeea2ae9767e8 Mon Sep 17 00:00:00 2001 From: PharmCat <13901158+PharmCat@users.noreply.github.com> Date: Thu, 18 Jul 2024 01:20:13 +0300 Subject: [PATCH 4/5] minor changes --- Project.toml | 2 +- src/descriptive.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 05a9a84..3bcfa46 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MetidaStats" uuid = "75cdad26-409a-4e43-8ad7-d54b4fa665a0" authors = ["PharmCat "] -version = "0.2.2" +version = "0.2.3" [deps] diff --git a/src/descriptive.jl b/src/descriptive.jl index 97e5c0a..5b62384 100644 --- a/src/descriptive.jl +++ b/src/descriptive.jl @@ -127,6 +127,7 @@ function descriptives(data, vars, sort = nothing; kwargs...) if eltype(vars) <: Integer vars = Tables.columnnames(data)[vars] end if !isnothing(sort) vars = setdiff(vars, sort) + if length(sort) == 0 sort = nothing end end descriptives(dataimport_(data, vars, sort); kwargs...) end From d26332c1775744834baaf41eba2244dfe71e112b Mon Sep 17 00:00:00 2001 From: PharmCat <13901158+PharmCat@users.noreply.github.com> Date: Thu, 18 Jul 2024 01:21:10 +0300 Subject: [PATCH 5/5] fix --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3bcfa46..05a9a84 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MetidaStats" uuid = "75cdad26-409a-4e43-8ad7-d54b4fa665a0" authors = ["PharmCat "] -version = "0.2.3" +version = "0.2.2" [deps]