Updates for additional vignette

USGS-R · Jul 20, 2015 · 37edc35 · 37edc35
1 parent 9c23ddb
commit 37edc35
Show file tree

Hide file tree

Showing 86 changed files with 4,750 additions and 238 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,22 +1,26 @@
 Package: rloadest
 Type: Package
 Title: River Load Estimation
-Version: 0.4.0
-Date: 2015-02-27
+Version: 0.4.2
+Date: 2015-07-20
 Author: Dave Lorenz, Rob Runkel, Laura De Cicco
 Maintainer: Dave Lorenz <[email protected]>
 Description: Collection of functions to make constituent load estimations
     based on the LOADEST program.
 License: CC0
 Depends:
     R (>= 3.0.0),
+    smwrBase,
     smwrGraphs,
     smwrStats,
     smwrQW
 Imports:
-    stats
+    lubridate,
+    stats,
+	  segmented
 Suggests:
-    smwrBase,
-    dataRetrieval
+    dataRetrieval,
+    EGRET,
+    survival
 LazyLoad: yes
 LazyData: yes
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
-# Generated by roxygen2 (4.0.2): do not edit by hand
+# Generated by roxygen2 (4.1.0): do not edit by hand
 
+S3method(censoring,Surv)
 S3method(coef,loadReg)
 S3method(fitted,loadReg)
 S3method(logLik,loadReg)
@@ -25,8 +26,14 @@ export(nashSutcliffe)
 export(predConc)
 export(predLoad)
 export(resampleUVdata)
+export(seg)
+export(segLoadReg)
+export(segment)
 export(selBestModel)
 export(selBestSubset)
+import(lubridate)
+import(segmented)
+import(smwrBase)
 import(smwrGraphs)
 import(smwrQW)
 import(smwrStats)

diff --git a/R/Models.R b/R/Models.R
@@ -0,0 +1,19 @@
+#' Models
+
+#' A dataset that describes the equivalent formula for each of the
+#'predefined model number.
+#' @name Models
+#' @docType data
+#' @usage Models
+#' @format Data frame with 9 rows and 2 columns\cr
+#'\tabular{lll}{
+#'Name \tab Type \tab Description\cr
+#'Number \tab integer \tab The predefined model number\cr
+#'Formula \tab factor \tab The equivalent formula for the predefined model number\cr
+#'}
+#' @keywords datasets
+#' @seealso \code{\link{model}}
+#' @examples
+#' data(Models)
+#' print(Models)
+NULL
diff --git a/R/censoring.Surv.R b/R/censoring.Surv.R
@@ -0,0 +1,32 @@
+#' Describe Censoring
+#'
+#' Gets the type of censoring ("none," "left," "multiple") for an object.
+#'
+#'
+#' @param x the object to get the type of censoring. For an object of class
+#'"Surv," the type must be "interval."
+#' @return A character string "none," "left," or "multiple" describing the type
+#'of censoring present in the object.
+#' @note This function is mostly used within other functions to determine the
+#''best' technique to use for analysis.
+#' @keywords censored attribute
+#' @examples
+#'\dontrun{
+#'library(survival)
+#'censoring(Surv(2.3, 2.3, type="interval2"))
+#'}
+#' @export
+#' @method censoring Surv
+censoring.Surv <- function(x) {
+  if(attr(x, "type") != "interval")
+    stop("Only interval type censoring supported")
+  # Strip NAs from x
+  x <- x[!is.na(x)]
+  if(all(x[, 3] == 1)) {
+    return("none")
+  } else if(any(x[, 3] %in% c(0,3))) {
+    return("multiple")
+  }
+  return("left")
+}
+
diff --git a/R/loadReg.R b/R/loadReg.R
@@ -9,8 +9,14 @@
 #'For un- or left-censored data, AMLE is used unless weights are specified in
 #'the model, then MLE is used, through a call to \code{survreg}. For any other 
 #'censored data, MLE is used.\cr
-#'See \code{\link{loadConvFactor}} for details about valid values for 
-#'\code{flow.units}, \code{con.units} and \code{load.units}.
+#'
+#'Typically, \code{loadReg} expects the response variable to have units of
+#'concentration, mass per volume. For these models, See \code{\link{loadConvFactor}}
+#'for details about valid values for \code{flow.units}, \code{conc.units} and 
+#'\code{load.units}. For some applications, like bed load estimation, the response
+#'variable can have units of flux, mass per time. For these models, \code{conc.units}
+#'can be expressed as any valid \code{load.units} per day. The rate must be expressed
+#'in terms of "/d," "/dy," or "/day."
 #'
 #' @param formula a formula describing the regression model. See \bold{Details}.
 #' @param data the data to search for the variables in \code{formula}.
@@ -44,14 +50,22 @@
 #'  flow = "FLOW", dates = "DATES", conc.units="mg/L",
 #'  station="Illinois River at Marseilles, Ill.")
 #'print(app1.lr)
-#'
+#' @import smwrBase
 #' @import smwrQW
 #' @export
 loadReg <- function(formula, data, subset, na.action, flow, dates,
                     flow.units="cfs", conc.units="", load.units="kg", 
                     time.step="day", station="") {
-  ## Coding history:
-  ##    2013May31 DLLorenz Original Coding
+  ## Function to check if conc.units are in terms of loads
+  ## returns logical with attribute of loads if TRUE
+  ckLoad <- function(units) {
+    # check if rate is /d, /dy, or /day (actually any /d)
+    retval <- grepl("/d", units)
+    if(retval) { # strip off rate
+      attr(retval, "load") <- sub("/d.*", "", units)
+    }
+    return(retval)
+  }
   ##
   ## Trap model number specification
   PredMod <- terms(formula, "model", data = data)
@@ -199,41 +213,49 @@ loadReg <- function(formula, data, subset, na.action, flow, dates,
     }
     Tdys <- min(Tdifs)
     if(Tdys < 7)
-      warning("The minimum spacing between daily loads is ", Tdys,
+      warning("The minimum spacing between daily loads is ", signif(Tdys, 2L),
               " days. The time between observations should be at least ",
               " 7 days to avoid autocorrelation issues.")
   } else { # Need unit checks too
     Tdys <- difftime(Time, shiftData(Time), units="days")
     Tdys <- min(Tdys, na.rm=TRUE)
     if(Tdys < 7)
-      warning("The minimum spacing between daily loads is ", Tdys,
+      warning("The minimum spacing between observed loads is ", signif(Tdys, 2L),
               " days. The time between observations should be at least ",
               " 7 days to avoid autocorrelation issues.")
   }
-  ## OK, construct the concentration fit
-  if(class(Y) == "lcens") {
-    cfit <- censReg_AMLE.fit(Y, X, "lognormal")
-    fit1 <- censReg_AMLE.fit(Y, X[,1L, drop=FALSE], "lognormal")
-  } else {
-    cfit <- censReg_MLE.fit(Y, X, rep(1, length(Y)), "lognormal")
-    fit1 <- censReg_MLE.fit(Y, X[,1L, drop=FALSE], 
-                             rep(1, length(Y)), "lognormal")
-  }
-  cfit$LLR1 <- fit1$LLR
-  cfit$call <- call
-  cfit$terms <- Terms
-  cfit$na.action <- saved.na.action
-  cfit$na.message <- na.message
-  cfit$xlevels <- xlevels
-  class(cfit) <- "censReg"
-  ## Not the load model fit
-  ## Check on concentration units
+  ## Checks on concentration units
   if(conc.units == "") {
     warning("Concentration units assumed to be mg/L")
     conc.units <- "mg/L"
   }
-  CF <- loadConvFactor(flow.units, conc.units, load.units)
-  Y <- Y * CF * Flow
+  load.only <- ckLoad(conc.units)
+  if(load.only) {
+    cfit <- NULL
+    ## Prepare for load model fit
+    CF <- loadUnitConv(attr(load.only, "load"), load.units)
+    Y <- Y * CF
+  } else {
+    ## OK, construct the concentration fit
+    if(class(Y) == "lcens") {
+      cfit <- censReg_AMLE.fit(Y, X, "lognormal")
+      fit1 <- censReg_AMLE.fit(Y, X[,1L, drop=FALSE], "lognormal")
+    } else {
+      cfit <- censReg_MLE.fit(Y, X, rep(1, length(Y)), "lognormal")
+      fit1 <- censReg_MLE.fit(Y, X[,1L, drop=FALSE], 
+                              rep(1, length(Y)), "lognormal")
+    }
+    cfit$LLR1 <- fit1$LLR
+    cfit$call <- call
+    cfit$terms <- Terms
+    cfit$na.action <- saved.na.action
+    cfit$na.message <- na.message
+    cfit$xlevels <- xlevels
+    class(cfit) <- "censReg"
+    ## Now prepare the load model fit
+    CF <- loadConvFactor(flow.units, conc.units, load.units)
+    Y <- Y * CF * Flow
+  }
   if(class(Y) == "lcens") {
     lfit <- censReg_AMLE.fit(Y, X, "lognormal")
     fit1 <- censReg_AMLE.fit(Y, X[,1L, drop=FALSE], "lognormal")

diff --git a/R/plot.loadReg.R b/R/plot.loadReg.R
@@ -147,7 +147,7 @@ plot.loadReg <- function(x, which='All', set.up=TRUE, span=1.0, ...) {
   if(doPlot[4L])
     corGram(dectime(x$Time), Resids)
   ## Add details of call on regression model to next plots
-  Mod <- format(x$lfit$call$formula)
+  Mod <- paste0(as.character(x$lfit$call$formula[2L]), " ~ model(", x$model.no, ")")
   ## 3rd plot, S-L
   RSE <- rmse(x$lfit) # now the resid std. error
   if(doPlot[3L]) {

diff --git a/R/predConc.R b/R/predConc.R
@@ -39,17 +39,14 @@
 #'  station="Illinois River at Marseilles, Ill.")
 #'predConc(app1.lr, app1.calib)
 #' @useDynLib rloadest estlday
+#' @import lubridate
 #' @export
 predConc <- function(fit, newdata, by="day", 
                      allow.incomplete=FALSE, conf.int=0.95) {
-  ## Coding history:
-  ##    2013Jul18 DLLorenz Original Coding from predLoad
-  ##    2013Dec05 DLLorenz Bug fix
-  ##    2014Sep23 DLLorenz Missing check on newdata
   ##
   ## By options and other preliminary code
-  if(any(is.na(newdata)))
-    stop("newdata contains missing values, remove before prediction")
+  if(is.null(fit$cfit))
+    stop("model cannot predict concentration values")
   if(nrow(newdata) > 176000L)
     stop("newdata has too many rows, the size limit is 176000")
   ByOpt <- c("unit", "day")
@@ -97,6 +94,8 @@ predConc <- function(fit, newdata, by="day",
   } else {
     model.inp <- setXLDat(newdata, flow, dates, Qadj, Tadj, model.no)
   }
+  if(any(is.na(model.inp)))
+    stop("Prediction data contains missing values, remove before prediction")
   ## Construct the structure for aggregating the data
   ## Deal with byn == 0 directly in the last estimation section
   checkDays <- list() # Create empty list for day checking

diff --git a/R/predLoad.R b/R/predLoad.R
@@ -64,8 +64,6 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
   ##    2014Sep23 DLLorenz Missing check on newdata
   ##
   ## By options and other preliminary code
-  if(any(is.na(newdata)))
-    stop("newdata contains missing values, remove before prediction")
   if(nrow(newdata) > 176000L)
     stop("newdata has too many rows, the size limit is 176000")
   ByOpt <- c("unit", "day", "month", "water year", "calendar year", "total")
@@ -106,12 +104,9 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
     newdata <- aggregate(newdata, list(time.step=gps), mean)
     attr(newdata[[dates]], "tzone") <- ntz
   } else { # Must be instantaneous
-    gps <- format(newdata[[dates]])
-    difdays <- range(newdata[[dates]])
-    difdays <- difftime(ceiling_date(difdays[2L], unit="day"), 
-                        floor_date(difdays[1L], unit="day"),
-                        units="days")
-    gps.nday <- round(nrow(newdata)/as.numeric(difdays), 6L) # should get to near integer
+    gps <- format(newdata[[dates]], format="%Y-%m-%d")
+    numdays <- length(unique(gps)) # the number of days in the datset
+    gps.nday <- round(nrow(newdata)/as.numeric(numdays), 6L) # should get to near integer
     if((gps.nday %% 1) != 0 && !(by %in% c("unit", "day")))
       stop("newdata must have the same number of observations each day")
     gps.nday <- as.integer(gps.nday)
@@ -124,6 +119,8 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
   } else {
     model.inp <- setXLDat(newdata, flow, dates, Qadj, Tadj, model.no)
   }
+  if(any(is.na(model.inp)))
+    stop("Prediction data contains missing values, remove before prediction")
   ## Construct the structure for aggregating the data
   ## Deal with byn == 0 directly in the last estimation section
   checkDays <- list() # Create empty list for day checking