Merge pull request #51 from andrechalom/trueLL

True LL
piLaboratory · May 29, 2015 · b84b078 · b84b078
2 parents dd0bba8 + 0ed5061
commit b84b078
Show file tree

Hide file tree

Showing 6 changed files with 100 additions and 104 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -16,7 +16,7 @@ export(fitbs, fitgamma, fitgeom, fitgs, fitlnorm, fitls, fitmand, fitmzsm,
 ## General fitting and ploting  functions
 export(fitrad, fitsad, octav, octavpred, radpred, pprad, ppsad, qqrad, qqsad, rad)
 ## Accessory functions
-export(plotprofmle, pred.logser, dtrunc, ptrunc, qtrunc, rsad, trueLL, updatesad, updaterad)
+export(plotprofmle, pred.logser, dtrunc, ptrunc, qtrunc, rsad, updatesad, updaterad)
 ## Explicit classes and methods export
 exportClasses(rad, octav, fitsad, fitrad)
 exportMethods(plot, points, lines, octavpred, radpred, qqsad, qqrad, trueLL)

diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@ error handling and large performance gains.
 - New function *updatesad* to update a fitsad with better fit from profile.
 - *radpred* now uses exact solutions for extreme data points.
 - Reimplemented the *AIC* and *AICc* functions for better integration with fitsad and fitrad classes.
+- Reworked the way in which *trueLL* is implemented
 
 ### Enhancements
 - Improved grammar and clearer text in some manual pages and vignette.

diff --git a/R/sads-classes.R b/R/sads-classes.R
@@ -21,4 +21,6 @@ setClass("rad", representation("data.frame"), validity = function(object) {
 
 setClass("fitsad", representation("mle2", sad="character", distr="character", trunc="numeric"))
 setClass("fitrad", representation("mle2", rad="character", distr="character", trunc="numeric", rad.tab="rad"))
+# "Promoting" histogram to S4 in order to declare it as a valid object for trueLL methods
+setClass("histogram", representation("list")) 
 #setClass("fitsadlist", representation("list"))
diff --git a/R/sads-methods.R b/R/sads-methods.R
@@ -748,37 +748,3 @@ setMethod("pprad",
               pprad(x=y, rad=rad, coef=coef, trunc=trunc, plot=plot, line=line, ...)
           }
           )
-
-## Generic function and methods for trueLL ##
-setGeneric("trueLL",
-           def = function(x, dens, coef, trunc, dec.places = 0, log = TRUE, ...) standardGeneric("trueLL")
-           )
-
-## If x is numeric arguments dens, coef and decimal places should be provided
-setMethod("trueLL",
-          signature(x="numeric", dens="character", coef="list", dec.places = "numeric", log="ANY"),
-          function(x, dens, coef, trunc, dec.places = 0, log = TRUE, ...){
-              dots <- list(...)
-              D <- 10^(-dec.places)/2
-              x <- round(x, dec.places)
-              if(missing(trunc)||is.na(trunc)){
-                  cdf <- get(paste("p", dens, sep=""), mode = "function")
-                  probs <- do.call(cdf, c(list(q = x+D), as.list(coef), dots)) - do.call(cdf, c(list(q = x-D), as.list(coef), dots))
-              }
-              else{
-                  probs <- do.call(ptrunc, c(list(dens, q = x+D, coef = as.list(coef), trunc=trunc), dots))-
-                      do.call(ptrunc, c(list(dens, q = x-D, coef = as.list(coef), trunc=trunc), dots))
-              }
-              if(log) sum(log(probs)) else prod(probs)
-          }
-          )
-
-## If x is of class fitsad only argument dec.places should be provided
-setMethod("trueLL",
-          signature(x="fitsad", coef="missing", trunc="missing", dec.places = "numeric", log="ANY"),
-          function(x, dens, coef, trunc, dec.places, log, ...){
-              if(x@distr != "C") stop("trueLL only makes sense for continuous distributions models")
-              trueLL(x = x@data$x, dens = x@sad, coef = as.list(x@coef),
-                               trunc=x@trunc, dec.places = dec.places, log, ...)
-          }
-          )
diff --git a/R/trueLL.R b/R/trueLL.R
@@ -1,28 +1,54 @@
-trueLL <- function(x, dens, coef, trunc, dec.places = 0, breaks, counts, log = TRUE, ...){
-    dots <- list(...)
-    if(missing(breaks)){
-        D <- 10^(-dec.places)/2
-        x <- round(x, dec.places)
-        if(missing(trunc)){
-            cdf <- get(paste("p", dens, sep=""), mode = "function")
-            probs <- do.call(cdf, c(list(q = x+D), as.list(coef), dots)) - do.call(cdf, c(list(q = x-D), as.list(coef), dots))
-        }
-        else{
-            probs <- do.call(ptrunc, c(list(dens, q = x+D, coef = as.list(coef), trunc=trunc), dots))-
-                do.call(ptrunc, c(list(dens, q = x-D, coef = as.list(coef), trunc=trunc), dots))
-        }
-    }
-    else{
-        h1 <- hist(x, breaks=breaks, plot=FALSE)
-        if(missing(trunc)){
-            cdf <- get(paste("p", dens, sep=""), mode = "function")
-            probs <- diff(do.call(cdf, c(list(q = h1$breaks), as.list(coef), dots)))
-        }
-        else{
-            probs <- diff(do.call(ptrunc, c(list(dens, q = h1$breaks, coef = as.list(coef), trunc=trunc), dots)))
-        }
-        if(missing(counts)) counts <- h1$counts
-        probs <- rep(probs, counts)
-    }
-    if(log) sum(log(probs)) else prod(probs)
-}
+# trueLL generic and methods
+setGeneric("trueLL", 
+		   def=function(object, dist, coef, trunc, dec.places=0, breaks, ...)
+		   standardGeneric("trueLL")
+		   )
+setMethod("trueLL", signature(object="fitsad", dist="missing", coef="missing", trunc="missing", 
+							  dec.places="ANY", breaks="ANY"),
+		  function(object, dec.places=0, breaks, ...){
+			  if(missing(breaks))
+					  return(trueLL(object=object@data$x, dist=object@sad, coef=as.list(bbmle::coef(object)),
+							 trunc=object@trunc, dec.places=dec.places, ...))
+			  else 
+					  return(trueLL(object=object@data$x, dist=object@sad, coef=as.list(bbmle::coef(object)),
+							 trunc=object@trunc, breaks=breaks, ...))
+		  })
+setMethod("trueLL", signature(object="fitrad", dist="missing", coef="missing", trunc="missing", 
+							  dec.places="ANY", breaks="ANY"),
+		  function(object, dec.places=0, breaks, ...){
+			  if(missing(breaks))
+				  return(trueLL(object=object@rad.tab$abund, dist=object@rad, coef=as.list(bbmle::coef(object)),
+						 trunc=object@trunc, dec.places=dec.places, breaks=breaks, ...))
+			  else
+				  return(trueLL(object=object@rad.tab$abund, dist=object@rad, coef=as.list(bbmle::coef(object)),
+						 trunc=object@trunc, breaks=breaks, ...))
+		  })
+# object is numeric
+setMethod("trueLL", signature(object="numeric", dist="character", coef="list", trunc="ANY", 
+							  dec.places="ANY", breaks="ANY"),
+		  function(object, dist, coef, trunc, dec.places=0, breaks, ...){
+			  dots <- list(...)
+			  if(missing(breaks)){
+				  D <- 10^(-dec.places)/2
+				  breaks <- sort(unique(c(object - D, object + D)))
+			  }
+			  h1 <- hist(object, breaks=breaks, plot=FALSE)
+			  if(missing(trunc)) trunc <- NaN
+			  trueLL(object=h1, dist=dist, coef=coef, trunc=trunc)
+		  })
+# workhorse method, all other methods redirect here
+setMethod("trueLL", signature(object="histogram", dist="character", coef="list", trunc="ANY", 
+							  dec.places="missing", breaks="missing"),
+		  function(object, dist, coef, trunc, ...){
+			  dots <- list(...)
+			  if(missing(trunc)) trunc <- NaN
+			  if(is.nan(trunc)){
+				  cdf <- get(paste("p", dist, sep=""), mode = "function")
+				  probs <- diff(do.call(cdf, c(list(q = object$breaks), as.list(coef), dots)))
+			  }
+			  else{
+				  probs <- diff(do.call(ptrunc, c(list(dist, q = object$breaks, coef = as.list(coef), trunc=trunc), dots)))
+			  }
+			  probs <- rep(probs, object$counts)
+			  return(sum(log(probs)))
+		  })
diff --git a/man/trueLL.Rd b/man/trueLL.Rd
@@ -1,76 +1,73 @@
 \name{trueLL-methods}
-
 \docType{methods}
-
+\alias{trueLL}
 \alias{trueLL-methods}
-\alias{trueLL,fitsad,ANY,missing,missing,numeric-method}
-\alias{trueLL,numeric,character,list,ANY,numeric-method}
+\alias{trueLL,fitsad,missing,missing,missing,ANY,ANY-method}
+\alias{trueLL,fitrad,missing,missing,missing,ANY,ANY-method}
+\alias{trueLL,numeric,character,list,ANY,ANY,ANY-method}
+\alias{trueLL,histogram,character,list,ANY,missing,missing-method}
 
 \title{True likelihood for continuous variables}
 
 \description{
-  Calculates the correct likelihood for independent observations of a
+  Calculates the corrected likelihood for independent observations of a
   continuous variable that follows a given (truncated) density function,
-  given the measurement precision.
+  given a measurement precision.
 }
 
 \section{Methods}{
   \describe{
-
-    \item{\code{signature(x = "fitsad", dens = "ANY", coef = "missing", trunc = "missing", dec.places = "numeric")}}{
-      (log) likelihood value of an abundance distribution model \code{x} fitted with
+    \item{\code{signature(object = "fitsad", dist = "missing", coef = "missing", 
+			trunc = "missing", dec.places = "ANY", breaks="ANY", \dots)}}{
+      log-likelihood value of an abundance distribution model \code{object} fitted with
       function \code{fitsad}.
     }
-
-    \item{\code{signature(x = "numeric", dens = "character", coef = "list",
-	trunc = "ANY", dec.places = "numeric")}}{
-      (log) likelihood value of a vector of abundances \code{x}, fitted to a
-      continuous distribution named by \code{dens} with parameters defined in \code{coef}.
+    \item{\code{signature(object = "fitrad", dist = "missing", coef = "missing", 
+			trunc = "missing", dec.places = "ANY", breaks="ANY", \dots)}}{
+      log-likelihood value of an abundance distribution model \code{object} fitted with
+      function \code{fitrad}.
+    }
+    \item{\code{signature(x = "numeric", dist = "character", coef = "list",
+	trunc = "ANY", dec.places = "ANY", breaks="ANY", \dots)}}{
+      log-likelihood value of a vector of abundances \code{object}, fitted to a
+      continuous distribution named by \code{dist} with parameters defined in \code{coef}.
+    }
+    \item{\code{signature(object = "histogram", dist = "character", coef = "list", 
+			trunc = "missing", dec.places = "ANY", breaks="ANY", \dots)}}{
+      log-likelihood value of the abundances given by histogram \code{object}, fitted to a
+      continuous distribution named by \code{dist} with parameters defined in \code{coef}.
     }
 }}
 
 \arguments{
-  \item{x}{
-    vector of observed values or an object of \code{\link{fitsad-class}}
+  \item{object}{
+    vector or histogram of observed values, or an object of \code{\link{fitsad-class}} 
+	or \code{\link{fitrad-class}}
   }
-  \item{dens}{
+  \item{dist}{
     character; root name of the continuous density distribution of the variable -
     e.g., \kbd{lnorm} for the lognormal distribution; \kbd{gamma} for the gamma
     distribution.
   }
   \item{coef}{named list; values of the coefficients of the continuous
     density distribution, in the same order and with the same names as in
-    the probability function defined by the argument \code{dens}.
+    the probability function defined by the argument \code{dist}.
   }
-
   \item{trunc}{
     real number \code{trunc <= min(x)}; value from which the density distribution is
     truncated. Currently only lower-tail truncation (or right-truncation)
     is implemented. If this argument is omitted the whole
     distribution is used (default).
   }
-
   \item{dec.places}{
     positive integer; number of decimal places used in the measurement of
     the observed values. Observed values will be rounded to this number of
     decimals. This argument defines the measurement precision, see details. 
+	If neither \code{dec.places} nor \code{breaks} is given, this defaults to zero.
   }
-
   \item{breaks}{
-    if \code{counts} is provided a numeric vector giving the breakpoints
-    between count classes. Else, any valid object to be passed to the
-    argument \code{breaks} of the function \code{\link{hist}}.
-  }
-
-  \item{counts}{
-    integer vector; number of values in \code{x} in each class interval
-    defined by \code{breaks}.
-  }
-
-  \item{log}{
-    logical; convert probabilities to logs and sum them to get
-    log-likelihoods? The log-likelihood value is returned if \kbd{TRUE} and the likelihood
-    value if \kbd{FALSE} (quickly tends to zero as number of observation increases).
+    A numeric vector giving the breakpoints between count classes. Either 
+	\code{dec.places} or \code{breaks} can be provided, but not both.
   }
   \item{\dots}{
     named arguments to be passed to the probability function defined by the argument \code{dens}.
@@ -118,7 +115,13 @@
 }
 
 \author{
-  Paulo I. Prado \email{prado@ib.usp.br}
+  Paulo I. Prado \email{prado@ib.usp.br} and Andre Chalom
+}
+
+\note{
+	WARNING: this function is extremely sensitive to the interval breaks
+	provided (or to the decimal.places), which may result in surprising results.
+	Use with great caution.
 }
 
 \seealso{
@@ -135,20 +138,18 @@ logLik(fitdistr(x, "lognormal"))
 ## Which is the sum of log of densities
 sum( dlnorm(x, meanlog=mean(log(x)), sdlog=sd(log(x)), log=TRUE) )
 ## Correct log-likelihood
-
 trueLL(x, "lnorm", coef=list(meanlog=mean(log(x)), sdlog=sd(log(x))), dec.places=1)
+# Alternative invocation
+trueLL(fitlnorm(x))
 
 ## Data in classes
 xoc <- octav(x)
 xc <- as.numeric(as.character(xoc$octave))
 xb <- 2^(c(min(xc)-1, xc))
 xh <- hist(x, breaks=xb, plot=FALSE)
-xll <- trueLL(x, dens="lnorm", breaks = xb, counts = xoc$Freq,
-   coef = list(meanlog=mean(log(x)), sd=sd(log(x))))
+xll <- trueLL(xh, dist="lnorm", coef = list(meanlog=mean(log(x)), sd=sd(log(x))))
 xp <- diff(plnorm(xh$breaks, mean(log(x)), sd(log(x))))
 xll2 <- sum( rep(log(xp), xh$counts))
 all.equal(xll, xll2) # should be TRUE
 }
-
 \keyword{methods}
-