diff --git a/NAMESPACE b/NAMESPACE index baab2f7..b2a21db 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,8 +12,7 @@ export(pbs, pgs,pls, pmand, pmzsm, ppareto, ppoilog, ppower, prbs, pvolkov, pzip export(qbs, qgs, qls, qmand, qmzsm, qpareto, qpoilog, qpower, qrbs, qvolkov, qzipf, qpoig, qpoix) ## Fitting functions for each distribution export(fitbs, fitgamma, fitgeom, fitgs, fitlnorm, fitls, fitmand, fitmzsm, - fitnbinom, fitpareto, fitpoilog, fitpower, fitrbs, fitvolkov, fitweibull, fitzipf, - fitpoig, fitpoix) + fitnbinom, fitpareto, fitpoilog, fitpower, fitrbs, fitvolkov, fitweibull, fitzipf) ## General fitting and ploting functions export(fitrad, fitsad, octav, octavpred, radpred, pprad, ppsad, qqrad, qqsad, rad) ## Accessory functions @@ -22,7 +21,7 @@ export(plotprofmle, pred.logser, dtrunc, ptrunc, qtrunc, rsad, trueLL, updatesad exportClasses(rad, octav, fitsad, fitrad) exportMethods(plot, points, lines, octavpred, radpred, qqsad, qqrad, trueLL) # Methods that override bbmle counterparts: -exportMethods(nobs, AIC, AICc) +exportMethods(show, nobs, AIC, AICc) ## IMPORTS ## ## Import specific functions from other packages used by new methods importFrom("graphics", plot, points, lines) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..fbe8efc --- /dev/null +++ b/NEWS.md @@ -0,0 +1,25 @@ +# Version 0.2.0 (Milestone) +### Major changes +- Reworked the Volkov distribution for performance. +- Reworked density, quantile and distribution functions for more consistent behaviour, +error handling and large performance gains. +- Added some missing functions for mzsm, poig and poix families. +- New function *updatesad* to update a fitsad with better fit from profile. +- *radpred* now uses exact solutions for extreme data points. +- Reimplemented the *AIC* and *AICc* functions for better integration with fitsad and fitrad classes. + +### Enhancements +- Improved grammar and clearer text in some manual pages and vignette. +- Improved starting values for several distributions. +- Replaced table(cut(x)) for hist for improved performance. +- octav and rad classes now have validation functions and are more consistent. +- More informative calls in *show* and *summary*. +- Reduced code redundancy and improved readability. + +### Bugfixes +- Fixed the Broken-Stick family to behave like a Discrete distribution. +- Small but importante fixes in *octavpred*. +- Workarounds for bugs and issues of the bbmle package. + +# Version 0.1.10 +- Initial release on CRAN. diff --git a/R/dpoig.R b/R/dpoig.R index 261ff47..0383cd5 100644 --- a/R/dpoig.R +++ b/R/dpoig.R @@ -1,5 +1,4 @@ dpoig <- function(x, frac, rate, shape, log=FALSE) { - x[ ! is.wholenumber(x) | x < 0 ] <- NaN frac[ !is.finite(frac) | frac <= 0 ] <- NaN rate[ !is.finite(rate) | rate <= 0 ] <- NaN shape[ !is.finite(shape) | shape <= 0 ] <- NaN @@ -7,5 +6,7 @@ dpoig <- function(x, frac, rate, shape, log=FALSE) { c <- lfactorial(x)+lgamma(shape)+(x+shape)*log(frac+rate) vals<-b-c if (any(is.nan(vals))) warning ("NaNs produced") + if (any(!is.wholenumber(x))) warning("non integer values in x") + vals[ ! is.wholenumber(x) | x < 0] <- -Inf if(log)vals else exp(vals) } diff --git a/R/dpoix.R b/R/dpoix.R index a4180db..26e38be 100644 --- a/R/dpoix.R +++ b/R/dpoix.R @@ -1,5 +1,4 @@ dpoix <- function(x, frac, rate, log=FALSE) { - x[ ! is.wholenumber(x) | x < 0 ] <- NaN frac[ !is.finite(frac) | frac <= 0 ] <- NaN rate[ !is.finite(rate) | rate <= 0 ] <- NaN b <- x*log(frac) @@ -7,6 +6,8 @@ dpoix <- function(x, frac, rate, log=FALSE) { n <- (x+1)*log(rate+frac) vals <- b+m-n if (any(is.nan(vals))) warning ("NaNs produced") + if (any(!is.wholenumber(x))) warning("non integer values in x") + vals[ ! is.wholenumber(x) | x < 0] <- -Inf if(log) vals else exp(vals) } diff --git a/R/fitpoig.R b/R/fitpoig.R deleted file mode 100644 index 43b8016..0000000 --- a/R/fitpoig.R +++ /dev/null @@ -1,24 +0,0 @@ -fitpoig <- function(x, trunc = 0, start.value, ...){ - if (missing(start.value)){ - ## NEEDS BETTER START VALUE!! - frac = 0.5 - rate = 1/mean(x) - shape = 1 - } - else{ - frac <- start.value[1] - rate <- start.value[2] - shape <- start.value[3] - } - if (!missing(trunc)){ - if (min(x)<=trunc) stop("truncation point should be lower than the lowest data value") - else{ - LL <- function(frac, rate, shape) -sum(dtrunc("poig", x = x, coef = list(frac = frac, rate = rate, shape=shape), trunc = trunc, log = TRUE)) - } - } - if (missing(trunc)){ - LL <- function(frac, rate, shape) -sum(dpoig(x, frac, rate, shape, log = TRUE)) - } - result <- do.call("mle2", c(list(LL, start = list(frac = frac, rate=rate, shape=shape), data = list(x = x)), ...)) - new("fitsad", result, sad = "poig", distr = "D", trunc = ifelse(missing(trunc), NaN, trunc)) -} diff --git a/R/fitpoix.R b/R/fitpoix.R deleted file mode 100644 index 0d2aee9..0000000 --- a/R/fitpoix.R +++ /dev/null @@ -1,22 +0,0 @@ -fitpoix <- function(x, trunc = 0, start.value, ...){ - if (missing(start.value)){ - ## NEEDS BETTER START VALUE!! - frac = 0.5 - rate = 1/mean(x) - } - else{ - frac <- start.value[1] - rate <- start.value[2] - } - if (!missing(trunc)){ - if (min(x)<=trunc) stop("truncation point should be lower than the lowest data value") - else{ - LL <- function(frac, rate) -sum(dtrunc("poix", x = x, coef = list(frac = frac, rate = rate), trunc = trunc, log = TRUE)) - } - } - if (missing(trunc)){ - LL <- function(frac, rate) -sum(dpoix(x, frac, rate, log = TRUE)) - } - result <- do.call("mle2", c(list(LL, start = list(frac=frac, rate=rate), data = list(x = x)), ...)) - new("fitsad", result, sad = "poix", distr = "D", trunc = ifelse(missing(trunc), NaN, trunc)) -} diff --git a/R/ppoig.R b/R/ppoig.R index 3e2a370..ce86dd4 100644 --- a/R/ppoig.R +++ b/R/ppoig.R @@ -1,4 +1,5 @@ ppoig <- function(q, frac, rate, shape, lower.tail=TRUE, log.p=FALSE) { + if (any(!is.wholenumber(q))) warning("non integer values in q") y <- suppressWarnings(cumsumW("poig", q, list(frac=frac, rate=rate, shape=shape), lower.tail, log.p, pad=FALSE)) if(any(is.nan(y))) warning("NaNs produced") return(y) diff --git a/R/ppoix.R b/R/ppoix.R index f259560..54d0571 100644 --- a/R/ppoix.R +++ b/R/ppoix.R @@ -1,4 +1,5 @@ ppoix <- function(q, frac, rate, lower.tail=TRUE, log.p=FALSE) { + if (any(!is.wholenumber(q))) warning("non integer values in q") y <- suppressWarnings(cumsumW("poix", q, list(frac=frac, rate=rate), lower.tail, log.p, pad=FALSE)) if(any(is.nan(y))) warning("NaNs produced") return(y) diff --git a/R/sads-methods.R b/R/sads-methods.R index 72f1347..da15010 100644 --- a/R/sads-methods.R +++ b/R/sads-methods.R @@ -196,6 +196,40 @@ setMethod("nobs", "fitrad", function(object) length(object@rad.tab$abund) ) +### Copy of functions from mle2, including some error-checking, slots specific to fitrad/fitsad and +### truncating the display of the call +showmle2 <- function(object) { + cat("Maximum likelihood estimation\nType:") + if (object@distr == "C") cat (" continuous ") + else cat (" discrete ") + if (inherits(object, "fitsad")) cat ("species abundance distribution") + else cat ("rank abundance distribution") + cat("\nCall:\n") + # Summarizes the call to avoid printing pages of data + d <- object@call.orig$data$x + if (length(d) > 6) { + d <- c(as.list(as.numeric(d[1:5])), "etc") + object@call.orig$data$x <- d + } + print(object@call.orig) + cat("\nCoefficients:\n") + print(coef(object)) + if(!is.nan(object@trunc)) { + cat(paste("\nTruncation point:", object@trunc, "\n")) + } + cat("\nLog-likelihood: ") + cat(round(as.numeric(logLik(object)),2),"\n") + if (object@optimizer=="optimx" && length(object@method)>1) { + cat("Best method:",object@details$method.used,"\n") + } + if (!is.null(object@details$convergence)) + if(object@details$convergence > 0) + cat("\nWarning: optimization did not converge (code ", + object@details$convergence,": ",object@details$message,")\n",sep="") + } +setMethod("show", "fitsad", function(object){showmle2(object)}) +setMethod("show", "fitrad", function(object){showmle2(object)}) + ## radpred generic functions and methods ### setGeneric("radpred", def = function(object, sad, rad, coef, trunc, distr, S, N) standardGeneric("radpred") diff --git a/README.md b/README.md index 27f2f24..9201d24 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ### R package for fitting species abundance distributions -**Paulo I. Prado and Murilo Miranda** +**Paulo I. Prado, Murilo Miranda and Andre Chalom** ### Installation diff --git a/man/dpoig.Rd b/man/dpoig.Rd index e99f707..1eb8066 100644 --- a/man/dpoig.Rd +++ b/man/dpoig.Rd @@ -56,7 +56,7 @@ qpoig(p, frac, rate, shape, S=30, lower.tail=TRUE, log.p=FALSE) A compound Poisson-gamma distribution is a Poisson probability distribution where its single parameter, the process mean rate, is frac*n, at which n is a random variable with gamma distribution. The density function - is given by Green & Plotkin (2007) + is given by Green & Plotkin (2007). In ecology, this distribution gives the probability that a species has an abundance of x individuals in a random sample of a fraction frac of @@ -75,6 +75,8 @@ qpoig(p, frac, rate, shape, S=30, lower.tail=TRUE, log.p=FALSE) Binomial explicitly as a compound distribution. The Fisher log-series (Fisher 1943) is a limiting case where the dispersion parameter of the Negative Binomial tends to zero. + As in the case of the Poisson-exponential, the user should not fit to + the Poisson-gamma directly, and should use instead the \code{fitls} function. } \value{ diff --git a/man/dpoix.Rd b/man/dpoix.Rd index 83ee338..643f269 100644 --- a/man/dpoix.Rd +++ b/man/dpoix.Rd @@ -72,6 +72,10 @@ qpoix(p, frac, rate, S=30, lower.tail=TRUE, log.p=FALSE) fraction of total individuals sampled is small enough to approximate a sample with replacement. See Engen (1977) and Alonso et al. (2008) for critic evaluations. + Notice that the Poisson-exponential can be seen as a different form for the + MacArthur's Broken stick model (Baczkowski, 2000), so instead of fitting to a + Poisson-exponential distribution directly, the user should use \code{fitbs}. + } \value{ (log) density of the (zero-truncated) density. diff --git a/man/fitrad-class.Rd b/man/fitrad-class.Rd index 36c224d..8557bc7 100644 --- a/man/fitrad-class.Rd +++ b/man/fitrad-class.Rd @@ -5,6 +5,7 @@ \alias{fitrad-class} %\alias{octavpred,fitrad,missing,missing,missing,missing,ANY,missing,missing-method} \alias{plot,fitrad,ANY-method} +\alias{show,fitrad-method} %\alias{pprad,fitrad-method} \alias{pprad,fitrad,missing,missing-method} \alias{pprad,fitrad,missing,missing,missing,missing-method} @@ -73,6 +74,7 @@ abundance octave, see \code{\link{octav}} and \code{\link{octavpred}}.} \item{plot}{\code{signature(x = "fitrad", y = "ANY")}: diagnostic plots of the fitted model. } + \item{show}{\code{signature(object = "fitrad")}: Displays object.} \item{pprad}{\code{signature(x = "fitrad", sad = "missing", coef = "missing", trunc = "missing")}: plot of observed vs predicted percentiles of the abundance distribution, details in diff --git a/man/fitsad-class.Rd b/man/fitsad-class.Rd index 9804c23..ff6aa1c 100644 --- a/man/fitsad-class.Rd +++ b/man/fitsad-class.Rd @@ -4,6 +4,7 @@ \alias{fitsad-class} \alias{plot,fitsad,ANY-method} +\alias{show,fitsad-method} %\alias{ppsad,fitsad-method} \alias{ppsad,fitsad,missing,missing-method} \alias{ppsad,fitsad,missing,missing,missing-method} @@ -68,6 +69,7 @@ abundance octave, see \code{\link{octav}} and \code{\link{octavpred}}.} \item{plot}{\code{signature(x = "fitsad", y = "ANY")}: diagnostic plots of the fitted model. } + \item{show}{\code{signature(object = "fitsad")}: Displays object.} \item{ppsad}{\code{signature(x = "fitsad", sad = "missing", coef = "missing", trunc = "missing")}: plot of observed vs predicted percentiles of the abundance distribution, details in diff --git a/man/fitsad.Rd b/man/fitsad.Rd index 1ed6685..e7e7c28 100644 --- a/man/fitsad.Rd +++ b/man/fitsad.Rd @@ -12,8 +12,6 @@ \alias{fitpower} \alias{fitvolkov} \alias{fitweibull} -\alias{fitpoig} -\alias{fitpoix} \title{ML fitting of species abundance distributions} @@ -48,10 +46,6 @@ fitpower(x, trunc, start.value, upper = 20, \dots) fitvolkov(x, trunc, start.value, \dots) fitweibull(x, trunc, start.value, \dots) - -fitpoig(x, trunc = 0, start.value, \dots) - -fitpoix(x, trunc = 0, start.value, \dots) } \arguments{ @@ -208,21 +202,19 @@ fitpoix(x, trunc = 0, start.value, \dots) diagnostic plots and so on (see \code{\link{fitsad-class}}). - Functions \code{fitpoilog}, \code{fitpoix} and \code{fitpoig} - fit the Poisson-lognormal, Poisson-exponential and Poisson-gamma distributions. - These are compound distributions that describe the abundances - of species in a Poisson sample of community that follows a - lognormal, exponential or gamma SAD. - The Poisson-lognormal is + Function \code{fitpoilog} fits the Poisson-lognormal distribution. + This is a compound distributions that describes the abundances + of species in Poisson sample of community that follows a + lognormal SAD. This is a sampling model of SAD, which is approximated by the \sQuote{veil line} truncation of the lognormal (Preston 1948). The Poisson-lognormal is the analytic solution for this sampling model, as Fisher's log-series - is an analytic limit case for a Poisson-gamma + is a analytic limit case for a Poisson-gamma (a.k.a negative binomial) distribution. As geometric and - negative binomial distributions, these compound distributions - include zero, but the fit is zero-truncated - by default. - + negative binomial distributions, the Poisson-lognormal + includes zero, but the fit is zero-truncated + by default, as for \code{fitgeom}, \code{fitnbinom}. + \code{fitmzsm} fits the metacommunity Zero-sum multinomial distribution \code{\link{dmzsm}} from the Neutral Theory of Biodiversity (Alonso and McKane 2004). The mZSM describes the SAD of a sample diff --git a/vignettes/sads_intro.Rnw b/vignettes/sads_intro.Rnw index ea3a6e4..5758b93 100644 --- a/vignettes/sads_intro.Rnw +++ b/vignettes/sads_intro.Rnw @@ -7,6 +7,7 @@ \usepackage{Sweave} \usepackage{natbib} \usepackage{framed, color} +\usepackage{xspace} \definecolor{shadecolor}{rgb}{0.9, 0.9, 0.9} \setlength{\parindent}{0pt} \setlength{\hoffset}{-0.5in} @@ -14,7 +15,9 @@ \setlength{\voffset}{-0.1in} %\pdfpagewidth=\paperwidth %\pdfpageheight=\paperheight -\newcommand{\R}{{\sf R}} +\newcommand{\R}{\textnormal{\sffamily\bfseries R}\xspace} +% altered bc \sf is obsolete, see: +% http://tex.stackexchange.com/questions/74478/latex-command-incantation-for-r \newcommand{\code}[1]{\texttt{#1}} \SweaveOpts{eval=TRUE, keep.source=TRUE, echo=TRUE} %\VignetteIndexEntry{Introduction to sads} @@ -22,10 +25,10 @@ \begin{document} \title{Fitting species abundance models with maximum likelihood \\ Quick reference for \code{sads} package} -\author{Paulo In\'acio Prado and Murilo Dantas Miranda \\ Theoretical Ecology Lab \\ LAGE at the Dep of Ecology, USP, Brazil \\ +\author{Paulo In\'acio Prado, Murilo Dantas Miranda and Andre Chalom \\ Theoretical Ecology Lab \\ LAGE at the Dep of Ecology, USP, Brazil \\ \url{http://ecologia.ib.usp.br/let/} \\ \url{prado@ib.usp.br}} -\date{February, 08, 2015} +\date{May, 21, 2015} \maketitle @@ -56,7 +59,6 @@ The package is available on CRAN and can be installed in \R with the command: @ <>= install.packages('sads') -library(sads) @ %def then loaded by @@ -76,9 +78,6 @@ library(devtools) install_github(repo = 'piklprado/sads', ref= 'dev') @ - - - \section{Exploratory analyses} \label{sec:analise-exploratoria} @@ -320,6 +319,11 @@ legend("topright", lty=1, col=c("blue","red", "green")) @ %def +\textbf{NOTICE} that the information criterion methods do not differentiate between +\code{fitsad} and \code{fitrad} methods. Because of this, it is possible to include +\code{fitsad} and \code{fitrad} objects in the same IC-table without generating an error, +but the result will be meaningless. + \section{Simulations} The function \code{rsad} returns random samples of a community with $S$ species. The mean abundances of the species in the communities