diff --git a/NAMESPACE b/NAMESPACE index b2a21db..d4252cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,7 +16,7 @@ export(fitbs, fitgamma, fitgeom, fitgs, fitlnorm, fitls, fitmand, fitmzsm, ## General fitting and ploting functions export(fitrad, fitsad, octav, octavpred, radpred, pprad, ppsad, qqrad, qqsad, rad) ## Accessory functions -export(plotprofmle, pred.logser, dtrunc, ptrunc, qtrunc, rsad, trueLL, updatesad, updaterad) +export(plotprofmle, pred.logser, dtrunc, ptrunc, qtrunc, rsad, updatesad, updaterad) ## Explicit classes and methods export exportClasses(rad, octav, fitsad, fitrad) exportMethods(plot, points, lines, octavpred, radpred, qqsad, qqrad, trueLL) diff --git a/NEWS.md b/NEWS.md index fbe8efc..567dd5b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ error handling and large performance gains. - New function *updatesad* to update a fitsad with better fit from profile. - *radpred* now uses exact solutions for extreme data points. - Reimplemented the *AIC* and *AICc* functions for better integration with fitsad and fitrad classes. +- Reworked the way in which *trueLL* is implemented ### Enhancements - Improved grammar and clearer text in some manual pages and vignette. diff --git a/R/sads-classes.R b/R/sads-classes.R index 51b1088..634f074 100644 --- a/R/sads-classes.R +++ b/R/sads-classes.R @@ -21,4 +21,6 @@ setClass("rad", representation("data.frame"), validity = function(object) { setClass("fitsad", representation("mle2", sad="character", distr="character", trunc="numeric")) setClass("fitrad", representation("mle2", rad="character", distr="character", trunc="numeric", rad.tab="rad")) +# "Promoting" histogram to S4 in order to declare it as a valid object for trueLL methods +setClass("histogram", representation("list")) #setClass("fitsadlist", representation("list")) diff --git a/R/sads-methods.R b/R/sads-methods.R index da15010..c208ba5 100644 --- a/R/sads-methods.R +++ b/R/sads-methods.R @@ -748,37 +748,3 @@ setMethod("pprad", pprad(x=y, rad=rad, coef=coef, trunc=trunc, plot=plot, line=line, ...) } ) - -## Generic function and methods for trueLL ## -setGeneric("trueLL", - def = function(x, dens, coef, trunc, dec.places = 0, log = TRUE, ...) standardGeneric("trueLL") - ) - -## If x is numeric arguments dens, coef and decimal places should be provided -setMethod("trueLL", - signature(x="numeric", dens="character", coef="list", dec.places = "numeric", log="ANY"), - function(x, dens, coef, trunc, dec.places = 0, log = TRUE, ...){ - dots <- list(...) - D <- 10^(-dec.places)/2 - x <- round(x, dec.places) - if(missing(trunc)||is.na(trunc)){ - cdf <- get(paste("p", dens, sep=""), mode = "function") - probs <- do.call(cdf, c(list(q = x+D), as.list(coef), dots)) - do.call(cdf, c(list(q = x-D), as.list(coef), dots)) - } - else{ - probs <- do.call(ptrunc, c(list(dens, q = x+D, coef = as.list(coef), trunc=trunc), dots))- - do.call(ptrunc, c(list(dens, q = x-D, coef = as.list(coef), trunc=trunc), dots)) - } - if(log) sum(log(probs)) else prod(probs) - } - ) - -## If x is of class fitsad only argument dec.places should be provided -setMethod("trueLL", - signature(x="fitsad", coef="missing", trunc="missing", dec.places = "numeric", log="ANY"), - function(x, dens, coef, trunc, dec.places, log, ...){ - if(x@distr != "C") stop("trueLL only makes sense for continuous distributions models") - trueLL(x = x@data$x, dens = x@sad, coef = as.list(x@coef), - trunc=x@trunc, dec.places = dec.places, log, ...) - } - ) diff --git a/R/trueLL.R b/R/trueLL.R index 53af6b3..76af024 100644 --- a/R/trueLL.R +++ b/R/trueLL.R @@ -1,28 +1,54 @@ -trueLL <- function(x, dens, coef, trunc, dec.places = 0, breaks, counts, log = TRUE, ...){ - dots <- list(...) - if(missing(breaks)){ - D <- 10^(-dec.places)/2 - x <- round(x, dec.places) - if(missing(trunc)){ - cdf <- get(paste("p", dens, sep=""), mode = "function") - probs <- do.call(cdf, c(list(q = x+D), as.list(coef), dots)) - do.call(cdf, c(list(q = x-D), as.list(coef), dots)) - } - else{ - probs <- do.call(ptrunc, c(list(dens, q = x+D, coef = as.list(coef), trunc=trunc), dots))- - do.call(ptrunc, c(list(dens, q = x-D, coef = as.list(coef), trunc=trunc), dots)) - } - } - else{ - h1 <- hist(x, breaks=breaks, plot=FALSE) - if(missing(trunc)){ - cdf <- get(paste("p", dens, sep=""), mode = "function") - probs <- diff(do.call(cdf, c(list(q = h1$breaks), as.list(coef), dots))) - } - else{ - probs <- diff(do.call(ptrunc, c(list(dens, q = h1$breaks, coef = as.list(coef), trunc=trunc), dots))) - } - if(missing(counts)) counts <- h1$counts - probs <- rep(probs, counts) - } - if(log) sum(log(probs)) else prod(probs) -} +# trueLL generic and methods +setGeneric("trueLL", + def=function(object, dist, coef, trunc, dec.places=0, breaks, ...) + standardGeneric("trueLL") + ) +setMethod("trueLL", signature(object="fitsad", dist="missing", coef="missing", trunc="missing", + dec.places="ANY", breaks="ANY"), + function(object, dec.places=0, breaks, ...){ + if(missing(breaks)) + return(trueLL(object=object@data$x, dist=object@sad, coef=as.list(bbmle::coef(object)), + trunc=object@trunc, dec.places=dec.places, ...)) + else + return(trueLL(object=object@data$x, dist=object@sad, coef=as.list(bbmle::coef(object)), + trunc=object@trunc, breaks=breaks, ...)) + }) +setMethod("trueLL", signature(object="fitrad", dist="missing", coef="missing", trunc="missing", + dec.places="ANY", breaks="ANY"), + function(object, dec.places=0, breaks, ...){ + if(missing(breaks)) + return(trueLL(object=object@rad.tab$abund, dist=object@rad, coef=as.list(bbmle::coef(object)), + trunc=object@trunc, dec.places=dec.places, breaks=breaks, ...)) + else + return(trueLL(object=object@rad.tab$abund, dist=object@rad, coef=as.list(bbmle::coef(object)), + trunc=object@trunc, breaks=breaks, ...)) + }) +# object is numeric +setMethod("trueLL", signature(object="numeric", dist="character", coef="list", trunc="ANY", + dec.places="ANY", breaks="ANY"), + function(object, dist, coef, trunc, dec.places=0, breaks, ...){ + dots <- list(...) + if(missing(breaks)){ + D <- 10^(-dec.places)/2 + breaks <- sort(unique(c(object - D, object + D))) + } + h1 <- hist(object, breaks=breaks, plot=FALSE) + if(missing(trunc)) trunc <- NaN + trueLL(object=h1, dist=dist, coef=coef, trunc=trunc) + }) +# workhorse method, all other methods redirect here +setMethod("trueLL", signature(object="histogram", dist="character", coef="list", trunc="ANY", + dec.places="missing", breaks="missing"), + function(object, dist, coef, trunc, ...){ + dots <- list(...) + if(missing(trunc)) trunc <- NaN + if(is.nan(trunc)){ + cdf <- get(paste("p", dist, sep=""), mode = "function") + probs <- diff(do.call(cdf, c(list(q = object$breaks), as.list(coef), dots))) + } + else{ + probs <- diff(do.call(ptrunc, c(list(dist, q = object$breaks, coef = as.list(coef), trunc=trunc), dots))) + } + probs <- rep(probs, object$counts) + return(sum(log(probs))) + }) diff --git a/R/updatesad.R b/R/updatesad.R index eb607cb..1eaad8a 100644 --- a/R/updatesad.R +++ b/R/updatesad.R @@ -1,7 +1,7 @@ ## updates a fitsad/fitrad object by running the optimizer again starting ## on better fit returned by profile updatesad <- function(object, ...) { - dots <- as.list(...) + dots <- list(...) prof <- profile(object) if(class(prof) == "profile.mle2") stop("Cannot update, profile did not find a better fit!") newcall <- as.list(object@call) @@ -9,9 +9,15 @@ updatesad <- function(object, ...) { newcall[["control"]] <- NULL # removes the "control" slot # merges the original call with arguments supplied by ... for (v in names(dots)) newcall[[v]] <- dots[[v]] - names(newcall$start) -> name # bugfix? profile returns coefficient "prob" name as "prob.prob" - newcall$start <- list(prof@fullcoef) # TODO: might be coef instead? how to deal with fixed? - names(newcall$start) <- name + # bugfix? profile returns coefficient "prob" name as "prob.prob", sometimes changes parameters + # from start to fixed... + names(newcall$start) -> name + for (v in name) { + if (v %in% names(prof@coef)) + newcall$start[[v]] <- as.numeric(prof@coef[[v]]) + else # then it must be in fixed + newcall$start[[v]] <- as.numeric(prof@call.orig$fixed[v]) + } newobj <- do.call("mle2", newcall) if(class(object) == "fitsad") return (new("fitsad", newobj, sad=object@sad, distr=object@distr, trunc=object@trunc)) diff --git a/man/trueLL.Rd b/man/trueLL.Rd index 177e45d..b90299c 100644 --- a/man/trueLL.Rd +++ b/man/trueLL.Rd @@ -1,76 +1,73 @@ \name{trueLL-methods} - \docType{methods} - +\alias{trueLL} \alias{trueLL-methods} -\alias{trueLL,fitsad,ANY,missing,missing,numeric-method} -\alias{trueLL,numeric,character,list,ANY,numeric-method} +\alias{trueLL,fitsad,missing,missing,missing,ANY,ANY-method} +\alias{trueLL,fitrad,missing,missing,missing,ANY,ANY-method} +\alias{trueLL,numeric,character,list,ANY,ANY,ANY-method} +\alias{trueLL,histogram,character,list,ANY,missing,missing-method} \title{True likelihood for continuous variables} \description{ - Calculates the correct likelihood for independent observations of a + Calculates the corrected likelihood for independent observations of a continuous variable that follows a given (truncated) density function, - given the measurement precision. + given a measurement precision. } \section{Methods}{ \describe{ - - \item{\code{signature(x = "fitsad", dens = "ANY", coef = "missing", trunc = "missing", dec.places = "numeric")}}{ - (log) likelihood value of an abundance distribution model \code{x} fitted with + \item{\code{signature(object = "fitsad", dist = "missing", coef = "missing", + trunc = "missing", dec.places = "ANY", breaks="ANY", \dots)}}{ + log-likelihood value of an abundance distribution model \code{object} fitted with function \code{fitsad}. } - - \item{\code{signature(x = "numeric", dens = "character", coef = "list", - trunc = "ANY", dec.places = "numeric")}}{ - (log) likelihood value of a vector of abundances \code{x}, fitted to a - continuous distribution named by \code{dens} with parameters defined in \code{coef}. + \item{\code{signature(object = "fitrad", dist = "missing", coef = "missing", + trunc = "missing", dec.places = "ANY", breaks="ANY", \dots)}}{ + log-likelihood value of an abundance distribution model \code{object} fitted with + function \code{fitrad}. + } + \item{\code{signature(x = "numeric", dist = "character", coef = "list", + trunc = "ANY", dec.places = "ANY", breaks="ANY", \dots)}}{ + log-likelihood value of a vector of abundances \code{object}, fitted to a + continuous distribution named by \code{dist} with parameters defined in \code{coef}. + } + \item{\code{signature(object = "histogram", dist = "character", coef = "list", + trunc = "missing", dec.places = "ANY", breaks="ANY", \dots)}}{ + log-likelihood value of the abundances given by histogram \code{object}, fitted to a + continuous distribution named by \code{dist} with parameters defined in \code{coef}. } }} \arguments{ - \item{x}{ - vector of observed values or an object of \code{\link{fitsad-class}} + \item{object}{ + vector or histogram of observed values, or an object of \code{\link{fitsad-class}} + or \code{\link{fitrad-class}} } - \item{dens}{ + \item{dist}{ character; root name of the continuous density distribution of the variable - e.g., \kbd{lnorm} for the lognormal distribution; \kbd{gamma} for the gamma distribution. } \item{coef}{named list; values of the coefficients of the continuous density distribution, in the same order and with the same names as in - the probability function defined by the argument \code{dens}. + the probability function defined by the argument \code{dist}. } - \item{trunc}{ real number \code{trunc <= min(x)}; value from which the density distribution is truncated. Currently only lower-tail truncation (or right-truncation) is implemented. If this argument is omitted the whole distribution is used (default). } - \item{dec.places}{ positive integer; number of decimal places used in the measurement of the observed values. Observed values will be rounded to this number of decimals. This argument defines the measurement precision, see details. + If neither \code{dec.places} nor \code{breaks} is given, this defaults to zero. } - \item{breaks}{ - if \code{counts} is provided a numeric vector giving the breakpoints - between count classes. Else, any valid object to be passed to the - argument \code{breaks} of the function \code{\link{hist}}. - } - - \item{counts}{ - integer vector; number of values in \code{x} in each class interval - defined by \code{breaks}. - } - - \item{log}{ - logical; convert probabilities to logs and sum them to get - log-likelihoods? The log-likelihood value is returned if \kbd{TRUE} and the likelihood - value if \kbd{FALSE} (quickly tends to zero as number of observation increases). + A numeric vector giving the breakpoints between count classes. Either + \code{dec.places} or \code{breaks} can be provided, but not both. } \item{\dots}{ named arguments to be passed to the probability function defined by the argument \code{dens}. @@ -118,7 +115,13 @@ } \author{ - Paulo I. Prado \email{prado@ib.usp.br} + Paulo I. Prado \email{prado@ib.usp.br} and Andre Chalom +} + +\note{ + WARNING: this function is extremely sensitive to the interval breaks + provided (or to the decimal.places), which may result in surprising results. + Use with great caution. } \seealso{ @@ -135,20 +138,18 @@ logLik(fitdistr(x, "lognormal")) ## Which is the sum of log of densities sum( dlnorm(x, meanlog=mean(log(x)), sdlog=sd(log(x)), log=TRUE) ) ## Correct log-likelihood - trueLL(x, "lnorm", coef=list(meanlog=mean(log(x)), sdlog=sd(log(x))), dec.places=1) +# Alternative invocation +trueLL(fitlnorm(x)) ## Data in classes xoc <- octav(x) xc <- as.numeric(as.character(xoc$octave)) xb <- 2^(c(min(xc)-1, xc)) xh <- hist(x, breaks=xb, plot=FALSE) -xll <- trueLL(x, dens="lnorm", breaks = xb, counts = xoc$Freq, - coef = list(meanlog=mean(log(x)), sd=sd(log(x)))) +xll <- trueLL(xh, dist="lnorm", coef = list(meanlog=mean(log(x)), sd=sd(log(x)))) xp <- diff(plnorm(xh$breaks, mean(log(x)), sd(log(x)))) xll2 <- sum( rep(log(xp), xh$counts)) all.equal(xll, xll2) # should be TRUE } - \keyword{methods} -