From 4521606ee001cc04bd0874928b77e0010f550cf7 Mon Sep 17 00:00:00 2001 From: kagalenko-m-b <16374215+kagalenko-m-b@users.noreply.github.com> Date: Tue, 6 Jul 2021 18:00:21 +0300 Subject: [PATCH 1/3] Avoid an extra call to f in Statistic.mean(f, A) --- src/Statistics.jl | 70 ++++++++++++++++++++++++++--------------------- test/runtests.jl | 11 +++++++- 2 files changed, 49 insertions(+), 32 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 29aae101..00d39e7e 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -165,16 +165,26 @@ mean(A::AbstractArray; dims=:) = _mean(identity, A, dims) _mean_promote(x::T, y::S) where {T,S} = convert(promote_type(T, S), y) + +function _promoted_sum(f, A::AbstractArray; dims) # calls f() length(x) +1 times + x1 = f(first(A)) / 1 + result = sum(x -> _mean_promote(x1, f(x)), A; dims) +end +function _promoted_sum(f, A::AbstractVector; dims) # calls f() length(x) times + x1 = f(first(A)) / 1 + result = sum(x -> _mean_promote(x1, f(x)), @view A[begin+1:end]; dims, + init = x1) +end + # ::Dims is there to force specializing on Colon (as it is a Function) function _mean(f, A::AbstractArray, dims::Dims=:) where Dims - isempty(A) && return sum(f, A, dims=dims)/0 + isempty(A) && return sum(f, A; dims)/0 if dims === (:) n = length(A) else n = mapreduce(i -> size(A, i), *, unique(dims); init=1) end - x1 = f(first(A)) / 1 - result = sum(x -> _mean_promote(x1, f(x)), A, dims=dims) + result = _promoted_sum(f, A; dims) if dims === (:) return result / n else @@ -316,7 +326,7 @@ whereas the sum is scaled with `n` if `corrected` is If `itr` is an `AbstractArray`, `dims` can be provided to compute the variance over dimensions. In that case, `mean` must be an array with the same shape as -`mean(itr, dims=dims)` (additional trailing singleton dimensions are allowed). +`mean(itr; dims)` (additional trailing singleton dimensions are allowed). !!! note If array contains `NaN` or [`missing`](@ref) values, the result is also @@ -327,7 +337,7 @@ over dimensions. In that case, `mean` must be an array with the same shape as varm(A::AbstractArray, m::AbstractArray; corrected::Bool=true, dims=:) = _varm(A, m, corrected, dims) _varm(A::AbstractArray{T}, m, corrected::Bool, region) where {T} = - varm!(Base.reducedim_init(t -> abs2(t)/2, +, A, region), A, m; corrected=corrected) + varm!(Base.reducedim_init(t -> abs2(t)/2, +, A, region), A, m; corrected) varm(A::AbstractArray, m; corrected::Bool=true) = _varm(A, m, corrected, :) @@ -356,7 +366,7 @@ If `itr` is an `AbstractArray`, `dims` can be provided to compute the variance over dimensions. A pre-computed `mean` may be provided. When `dims` is specified, `mean` must be -an array with the same shape as `mean(itr, dims=dims)` (additional trailing +an array with the same shape as `mean(itr; dims)` (additional trailing singleton dimensions are allowed). !!! note @@ -369,16 +379,16 @@ var(A::AbstractArray; corrected::Bool=true, mean=nothing, dims=:) = _var(A, corr function _var(A::AbstractArray, corrected::Bool, mean, dims) if mean === nothing - mean = Statistics.mean(A, dims=dims) + mean = Statistics.mean(A; dims) end - return varm(A, mean; corrected=corrected, dims=dims) + return varm(A, mean; corrected, dims) end function _var(A::AbstractArray, corrected::Bool, mean, ::Colon) if mean === nothing mean = Statistics.mean(A) end - return real(varm(A, mean; corrected=corrected)) + return real(varm(A, mean; corrected)) end varm(iterable, m; corrected::Bool=true) = _var(iterable, corrected, m) @@ -419,8 +429,7 @@ function sqrt!(A::AbstractArray) A end -stdm(A::AbstractArray, m; corrected::Bool=true) = - sqrt.(varm(A, m; corrected=corrected)) +stdm(A::AbstractArray, m; corrected::Bool=true) = sqrt.(varm(A, m; corrected)) """ std(itr; corrected::Bool=true, mean=nothing[, dims]) @@ -440,7 +449,7 @@ If `itr` is an `AbstractArray`, `dims` can be provided to compute the standard d over dimensions, and `means` may contain means for each dimension of `itr`. A pre-computed `mean` may be provided. When `dims` is specified, `mean` must be -an array with the same shape as `mean(itr, dims=dims)` (additional trailing +an array with the same shape as `mean(itr; dims)` (additional trailing singleton dimensions are allowed). !!! note @@ -452,19 +461,19 @@ singleton dimensions are allowed). std(A::AbstractArray; corrected::Bool=true, mean=nothing, dims=:) = _std(A, corrected, mean, dims) _std(A::AbstractArray, corrected::Bool, mean, dims) = - sqrt.(var(A; corrected=corrected, mean=mean, dims=dims)) + sqrt.(var(A; corrected, mean, dims)) _std(A::AbstractArray, corrected::Bool, mean, ::Colon) = - sqrt.(var(A; corrected=corrected, mean=mean)) + sqrt.(var(A; corrected, mean)) _std(A::AbstractArray{<:AbstractFloat}, corrected::Bool, mean, dims) = - sqrt!(var(A; corrected=corrected, mean=mean, dims=dims)) + sqrt!(var(A; corrected, mean, dims)) _std(A::AbstractArray{<:AbstractFloat}, corrected::Bool, mean, ::Colon) = - sqrt.(var(A; corrected=corrected, mean=mean)) + sqrt.(var(A; corrected, mean)) std(iterable; corrected::Bool=true, mean=nothing) = - sqrt(var(iterable, corrected=corrected, mean=mean)) + sqrt(var(iterable; corrected, mean)) """ stdm(itr, mean; corrected::Bool=true) @@ -482,7 +491,7 @@ whereas the sum is scaled with `n` if `corrected` is If `itr` is an `AbstractArray`, `dims` can be provided to compute the standard deviation over dimensions. In that case, `mean` must be an array with the same shape as -`mean(itr, dims=dims)` (additional trailing singleton dimensions are allowed). +`mean(itr; dims)` (additional trailing singleton dimensions are allowed). !!! note If array contains `NaN` or [`missing`](@ref) values, the result is also @@ -490,8 +499,7 @@ over dimensions. In that case, `mean` must be an array with the same shape as Use the [`skipmissing`](@ref) function to omit `missing` entries and compute the standard deviation of non-missing values. """ -stdm(iterable, m; corrected::Bool=true) = - std(iterable, corrected=corrected, mean=m) +stdm(iterable, mean; corrected::Bool=true) = std(iterable; corrected, mean) ###### covariance ###### @@ -553,13 +561,13 @@ end ## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector} ## which can't be handled by broadcast covm(x::AbstractVector, xmean; corrected::Bool=true) = - covzm(map(t -> t - xmean, x); corrected=corrected) + covzm(map(t -> t - xmean, x); corrected) covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = - covzm(x .- xmean, vardim; corrected=corrected) + covzm(x .- xmean, vardim; corrected) covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = - covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) + covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected) covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) = - covzm(x .- xmean, y .- ymean, vardim; corrected=corrected) + covzm(x .- xmean, y .- ymean, vardim; corrected) # cov (API) """ @@ -568,7 +576,7 @@ covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corr Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`. """ -cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected) +cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected) """ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) @@ -578,7 +586,7 @@ is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is sc if `corrected` is `false` where `n = size(X, dims)`. """ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) = - covm(X, _vmean(X, dims), dims; corrected=corrected) + covm(X, _vmean(X, dims), dims; corrected) """ cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) @@ -589,7 +597,7 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. """ cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = - covm(x, mean(x), y, mean(y); corrected=corrected) + covm(x, mean(x), y, mean(y); corrected) """ cov(X::AbstractVecOrMat, Y::AbstractVecOrMat; dims::Int=1, corrected::Bool=true) @@ -599,7 +607,7 @@ Compute the covariance between the vectors or matrices `X` and `Y` along the dim the sum is scaled with `n` if `corrected` is `false` where `n = size(X, dims) = size(Y, dims)`. """ cov(X::AbstractVecOrMat, Y::AbstractVecOrMat; dims::Int=1, corrected::Bool=true) = - covm(X, _vmean(X, dims), Y, _vmean(Y, dims), dims; corrected=corrected) + covm(X, _vmean(X, dims), Y, _vmean(Y, dims), dims; corrected) ##### correlation ##### @@ -986,9 +994,9 @@ end require_one_based_indexing(v) n = length(v) - + @assert n > 0 # this case should never happen here - + m = alpha + p * (one(alpha) - alpha - beta) aleph = n*p + oftype(p, m) j = clamp(trunc(Int, aleph), 1, n-1) @@ -1001,7 +1009,7 @@ end a = v[j] b = v[j + 1] end - + if isfinite(a) && isfinite(b) return a + γ*(b-a) else diff --git a/test/runtests.jl b/test/runtests.jl index 00cdad10..997a9eed 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -152,7 +152,7 @@ end ≈ float(typemax(Int))) end let x = rand(10000) # mean should use sum's accurate pairwise algorithm - @test mean(x) == sum(x) / length(x) + @test mean(x) == sum((@view x[begin + 1:end]), init=x[1]) / length(x) end @test mean(Number[1, 1.5, 2+3im]) === 1.5+1im # mixed-type array @test mean(v for v in Number[1, 1.5, 2+3im]) === 1.5+1im @@ -162,6 +162,15 @@ end @test (@inferred mean(Iterators.filter(x -> true, Int[]))) === 0/0 @test (@inferred mean(Iterators.filter(x -> true, Float32[]))) === 0.f0/0 @test (@inferred mean(Iterators.filter(x -> true, Float64[]))) === 0/0 + # Check that mean does not call function argument an extra time + let _cnt = 0, N = 100, x = rand(Int, N) + f(x) = begin; _cnt += 1; x; end + @test mean(1:N) == mean(f, 1:N) + @test _cnt == N + _cnt = 0 + @test mean(x) == mean(f, x) + @test _cnt == N + end end @testset "mean/median for ranges" begin From 26b7dd12b34bc8e6743b14ab3ceb1767ad49b708 Mon Sep 17 00:00:00 2001 From: kagalenko-m-b <16374215+kagalenko-m-b@users.noreply.github.com> Date: Wed, 7 Jul 2021 18:27:41 +0300 Subject: [PATCH 2/3] Removed unrelated changes and improved code formatting --- src/Statistics.jl | 66 +++++++++++++++++++++++------------------------ test/runtests.jl | 4 +-- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 00d39e7e..30246050 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -165,16 +165,11 @@ mean(A::AbstractArray; dims=:) = _mean(identity, A, dims) _mean_promote(x::T, y::S) where {T,S} = convert(promote_type(T, S), y) - -function _promoted_sum(f, A::AbstractArray; dims) # calls f() length(x) +1 times - x1 = f(first(A)) / 1 - result = sum(x -> _mean_promote(x1, f(x)), A; dims) -end -function _promoted_sum(f, A::AbstractVector; dims) # calls f() length(x) times - x1 = f(first(A)) / 1 - result = sum(x -> _mean_promote(x1, f(x)), @view A[begin+1:end]; dims, - init = x1) -end +# calls f(A[1]) twice +_promoted_sum(f, A::AbstractArray; init, dims) = sum(x -> _mean_promote(init, f(x)), A; dims) + # calls f(A[1]) once +_promoted_sum(f, A::AbstractVector; init, dims) = + sum(x -> _mean_promote(x1, f(x)), @view A[begin+1:end]; init, dims) # ::Dims is there to force specializing on Colon (as it is a Function) function _mean(f, A::AbstractArray, dims::Dims=:) where Dims @@ -184,7 +179,8 @@ function _mean(f, A::AbstractArray, dims::Dims=:) where Dims else n = mapreduce(i -> size(A, i), *, unique(dims); init=1) end - result = _promoted_sum(f, A; dims) + init = f(first(A)) / 1 + result = _promoted_sum(f, A; init, dims) if dims === (:) return result / n else @@ -326,7 +322,7 @@ whereas the sum is scaled with `n` if `corrected` is If `itr` is an `AbstractArray`, `dims` can be provided to compute the variance over dimensions. In that case, `mean` must be an array with the same shape as -`mean(itr; dims)` (additional trailing singleton dimensions are allowed). +`mean(itr, dims=dims)` (additional trailing singleton dimensions are allowed). !!! note If array contains `NaN` or [`missing`](@ref) values, the result is also @@ -337,7 +333,7 @@ over dimensions. In that case, `mean` must be an array with the same shape as varm(A::AbstractArray, m::AbstractArray; corrected::Bool=true, dims=:) = _varm(A, m, corrected, dims) _varm(A::AbstractArray{T}, m, corrected::Bool, region) where {T} = - varm!(Base.reducedim_init(t -> abs2(t)/2, +, A, region), A, m; corrected) + varm!(Base.reducedim_init(t -> abs2(t)/2, +, A, region), A, m; corrected=corrected) varm(A::AbstractArray, m; corrected::Bool=true) = _varm(A, m, corrected, :) @@ -366,7 +362,7 @@ If `itr` is an `AbstractArray`, `dims` can be provided to compute the variance over dimensions. A pre-computed `mean` may be provided. When `dims` is specified, `mean` must be -an array with the same shape as `mean(itr; dims)` (additional trailing +an array with the same shape as `mean(itr, dims=dims)` (additional trailing singleton dimensions are allowed). !!! note @@ -379,16 +375,16 @@ var(A::AbstractArray; corrected::Bool=true, mean=nothing, dims=:) = _var(A, corr function _var(A::AbstractArray, corrected::Bool, mean, dims) if mean === nothing - mean = Statistics.mean(A; dims) + mean = Statistics.mean(A, dims=dims) end - return varm(A, mean; corrected, dims) + return varm(A, mean; corrected=corrected, dims=dims) end function _var(A::AbstractArray, corrected::Bool, mean, ::Colon) if mean === nothing mean = Statistics.mean(A) end - return real(varm(A, mean; corrected)) + return real(varm(A, mean; corrected=corrected)) end varm(iterable, m; corrected::Bool=true) = _var(iterable, corrected, m) @@ -429,7 +425,8 @@ function sqrt!(A::AbstractArray) A end -stdm(A::AbstractArray, m; corrected::Bool=true) = sqrt.(varm(A, m; corrected)) +stdm(A::AbstractArray, m; corrected::Bool=true) = + sqrt.(varm(A, m; corrected=corrected)) """ std(itr; corrected::Bool=true, mean=nothing[, dims]) @@ -449,7 +446,7 @@ If `itr` is an `AbstractArray`, `dims` can be provided to compute the standard d over dimensions, and `means` may contain means for each dimension of `itr`. A pre-computed `mean` may be provided. When `dims` is specified, `mean` must be -an array with the same shape as `mean(itr; dims)` (additional trailing +an array with the same shape as `mean(itr, dims=dims)` (additional trailing singleton dimensions are allowed). !!! note @@ -461,19 +458,19 @@ singleton dimensions are allowed). std(A::AbstractArray; corrected::Bool=true, mean=nothing, dims=:) = _std(A, corrected, mean, dims) _std(A::AbstractArray, corrected::Bool, mean, dims) = - sqrt.(var(A; corrected, mean, dims)) + sqrt.(var(A; corrected=corrected, mean=mean, dims=dims)) _std(A::AbstractArray, corrected::Bool, mean, ::Colon) = - sqrt.(var(A; corrected, mean)) + sqrt.(var(A; corrected=corrected, mean=mean)) _std(A::AbstractArray{<:AbstractFloat}, corrected::Bool, mean, dims) = - sqrt!(var(A; corrected, mean, dims)) + sqrt!(var(A; corrected=corrected, mean=mean, dims=dims)) _std(A::AbstractArray{<:AbstractFloat}, corrected::Bool, mean, ::Colon) = - sqrt.(var(A; corrected, mean)) + sqrt.(var(A; corrected=corrected, mean=mean)) std(iterable; corrected::Bool=true, mean=nothing) = - sqrt(var(iterable; corrected, mean)) + sqrt(var(iterable, corrected=corrected, mean=mean)) """ stdm(itr, mean; corrected::Bool=true) @@ -491,7 +488,7 @@ whereas the sum is scaled with `n` if `corrected` is If `itr` is an `AbstractArray`, `dims` can be provided to compute the standard deviation over dimensions. In that case, `mean` must be an array with the same shape as -`mean(itr; dims)` (additional trailing singleton dimensions are allowed). +`mean(itr, dims=dims)` (additional trailing singleton dimensions are allowed). !!! note If array contains `NaN` or [`missing`](@ref) values, the result is also @@ -499,7 +496,8 @@ over dimensions. In that case, `mean` must be an array with the same shape as Use the [`skipmissing`](@ref) function to omit `missing` entries and compute the standard deviation of non-missing values. """ -stdm(iterable, mean; corrected::Bool=true) = std(iterable; corrected, mean) +stdm(iterable, m; corrected::Bool=true) = + std(iterable, corrected=corrected, mean=m) ###### covariance ###### @@ -561,13 +559,13 @@ end ## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector} ## which can't be handled by broadcast covm(x::AbstractVector, xmean; corrected::Bool=true) = - covzm(map(t -> t - xmean, x); corrected) + covzm(map(t -> t - xmean, x); corrected=corrected) covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = - covzm(x .- xmean, vardim; corrected) + covzm(x .- xmean, vardim; corrected=corrected) covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = - covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected) + covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) = - covzm(x .- xmean, y .- ymean, vardim; corrected) + covzm(x .- xmean, y .- ymean, vardim; corrected=corrected) # cov (API) """ @@ -576,7 +574,7 @@ covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corr Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`. """ -cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected) +cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected) """ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) @@ -586,7 +584,7 @@ is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is sc if `corrected` is `false` where `n = size(X, dims)`. """ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) = - covm(X, _vmean(X, dims), dims; corrected) + covm(X, _vmean(X, dims), dims; corrected=corrected) """ cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) @@ -597,7 +595,7 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. """ cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = - covm(x, mean(x), y, mean(y); corrected) + covm(x, mean(x), y, mean(y); corrected=corrected) """ cov(X::AbstractVecOrMat, Y::AbstractVecOrMat; dims::Int=1, corrected::Bool=true) @@ -607,7 +605,7 @@ Compute the covariance between the vectors or matrices `X` and `Y` along the dim the sum is scaled with `n` if `corrected` is `false` where `n = size(X, dims) = size(Y, dims)`. """ cov(X::AbstractVecOrMat, Y::AbstractVecOrMat; dims::Int=1, corrected::Bool=true) = - covm(X, _vmean(X, dims), Y, _vmean(Y, dims), dims; corrected) + covm(X, _vmean(X, dims), Y, _vmean(Y, dims), dims; corrected=corrected) ##### correlation ##### diff --git a/test/runtests.jl b/test/runtests.jl index 997a9eed..e5118ffa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -163,8 +163,8 @@ end @test (@inferred mean(Iterators.filter(x -> true, Float32[]))) === 0.f0/0 @test (@inferred mean(Iterators.filter(x -> true, Float64[]))) === 0/0 # Check that mean does not call function argument an extra time - let _cnt = 0, N = 100, x = rand(Int, N) - f(x) = begin; _cnt += 1; x; end + let _cnt = 0, N = 100, x = rand(Int, N) + f(x) = (_cnt += 1; x) @test mean(1:N) == mean(f, 1:N) @test _cnt == N _cnt = 0 From 38a952facceb00452fe61ef60c13b7022e022fd0 Mon Sep 17 00:00:00 2001 From: kagalenko-m-b <16374215+kagalenko-m-b@users.noreply.github.com> Date: Wed, 7 Jul 2021 18:40:34 +0300 Subject: [PATCH 3/3] Bug fix --- src/Statistics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 30246050..0b6946e3 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -169,7 +169,7 @@ _mean_promote(x::T, y::S) where {T,S} = convert(promote_type(T, S), y) _promoted_sum(f, A::AbstractArray; init, dims) = sum(x -> _mean_promote(init, f(x)), A; dims) # calls f(A[1]) once _promoted_sum(f, A::AbstractVector; init, dims) = - sum(x -> _mean_promote(x1, f(x)), @view A[begin+1:end]; init, dims) + sum(x -> _mean_promote(init, f(x)), @view A[begin+1:end]; init, dims) # ::Dims is there to force specializing on Colon (as it is a Function) function _mean(f, A::AbstractArray, dims::Dims=:) where Dims