diff --git a/src/basicfuns.jl b/src/basicfuns.jl index e013adf..859db5b 100644 --- a/src/basicfuns.jl +++ b/src/basicfuns.jl @@ -165,9 +165,14 @@ Return `log(1+exp(x))` evaluated carefully for largish `x`. This is also called the ["softplus"](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) transformation, being a smooth approximation to `max(0,x)`. Its inverse is [`logexpm1`](@ref). +The generalized `softplus` function (Wiemann et al., 2024) takes an additional optional parameter `a` that control +the approximation error with respect to the linear spline. It defaults to `a=1.0`, in which case the softplus is +equivalent to `log1pexp`. + See: * Martin Maechler (2012) [“Accurately Computing log(1 − exp(− |a|))”](http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf) -""" + * Wiemann, P. F., Kneib, T., & Hambuckers, J. (2024). Using the softplus function to construct alternative link functions in generalized linear models and beyond. Statistical Papers, 65(5), 3155-3180. + """ log1pexp(x::Real) = _log1pexp(float(x)) # ensures that BigInt/BigFloat, Int/Float64 etc. dispatch to the same algorithm # Approximations based on Maechler (2012) @@ -255,10 +260,22 @@ Return `log(exp(x) - 1)` or the “invsoftplus” function. It is the inverse o [`log1pexp`](@ref) (aka “softplus”). """ logexpm1(x::Real) = x <= 18.0 ? log(_expm1(x)) : x <= 33.3 ? x - exp(-x) : oftype(exp(-x), x) -logexpm1(x::Float32) = x <= 9f0 ? log(expm1(x)) : x <= 16f0 ? x - exp(-x) : oftype(exp(-x), x) +logexpm1(x::Float32) = x <= 9.0f0 ? log(expm1(x)) : x <= 16.0f0 ? x - exp(-x) : oftype(exp(-x), x) + +function softplus(x; a::Real=1.0) + if a == 1.0 + return log1pexp(x) + end + return log1pexp(a * x) / a +end + +function invsoftplus(y; a::Real=1.0) + if a == 1.0 + return logexpm1(y) + end + return logexpm1(a * y) / a +end -const softplus = log1pexp -const invsoftplus = logexpm1 """ $(SIGNATURES)