From d9fe0541a5a4e18c8e3667850dfd2802157abbba Mon Sep 17 00:00:00 2001 From: kellijohnson-NOAA Date: Wed, 1 Mar 2023 07:21:00 -0800 Subject: [PATCH] Fix(getLength): Stops if units other than CM or MM Thanks to @brianlangseth-NOAA for mentioning this problem when units were not in CM or MM. Before they were blindly changed. Now, there is a stop message and users must fix all rows before they can move on. Close #83 --- R/convertlength_skate.R | 14 ++- R/getLength.R | 196 +++++++++++++++++++++++++--------------- 2 files changed, 134 insertions(+), 76 deletions(-) diff --git a/R/convertlength_skate.R b/R/convertlength_skate.R index 48a9f21..d240394 100644 --- a/R/convertlength_skate.R +++ b/R/convertlength_skate.R @@ -22,8 +22,18 @@ #' convertlength_skate <- function(Pdata, returntype = c("all", "estimated")) { matchcol <- function(data) { - apply(data[, c("SEX", "FISH_LENGTH_TYPE")], MARGIN = 1, - FUN = paste, collapse = "_") + var_sex <- grep("SEX", colnames(data), value = TRUE)[1] + var_fish_length_type <- grep( + "FISH_LENGTH_TYPE", + colnames(data), + value = TRUE + )[1] + apply( + data[, c(var_sex, var_fish_length_type)], + MARGIN = 1, + FUN = paste, + collapse = "_" + ) } returntype <- match.arg(returntype, several.ok = FALSE) diff --git a/R/getLength.R b/R/getLength.R index b43cb7d..0ae2108 100644 --- a/R/getLength.R +++ b/R/getLength.R @@ -12,55 +12,90 @@ #' @return A vector of lengths in millimeters. Values of `NA` indicate that #' the length should not be used in down-stream calculations. #' -getLength <- function(Pdata, verbose = FALSE, keep) { +getLength <- function(Pdata, verbose = TRUE, keep) { - #### Initial checks - # Can only accommodate good types - goodtypes <- c("", "A", "D", "F", "R", "S", "T", "U", NA) - if (any(!Pdata$FISH_LENGTH_TYPE %in% goodtypes)) { - stop("cleanPacFIN can only accommodate the following FISH_LENGTH_TYPEs:\n", - sprintf("'%s' ", goodtypes), - "\nPlease contact the package maintainer to add additional types.") + # Initial checks + # Early return + if (all(is.na(Pdata[["FISH_LENGTH"]]))) { + if (verbose) { + cli::cli_alert_success("No lengths were found, moving on.") + } + return(rep(NA, NROW(Pdata))) + } + # Stop + data_with_length <- dplyr::filter( + .data = Pdata, + !is.na(FISH_LENGTH), + !(FISH_LENGTH_UNITS %in% c("MM", "CM")) + ) + if (NROW(data_with_length)) { + cli::cli_abort( + glue::glue( + "FISH_LENGTH_UNITS contains units other than 'CM' or 'MM' for fish", + " with lengths, please assign a unit like 'CM' or 'MM' to each row." + ), + wrap = TRUE, + call = NULL + ) } # Find columns - col.spid <- grep("SPID|PACFIN_SPECIES_CODE", colnames(Pdata), value = TRUE)[1] - if (length(col.spid) != 1) { - stop("Species ID column not found in Pdata.") - } - col.state <- grep("SOURCE_AGID|AGENCY_CODE", colnames(Pdata), value = TRUE)[1] - if (length(col.state) != 1) { - stop("State ID column not found in Pdata.") + var_spid <- grep( + pattern = "^SPID$|PACFIN_SPECIES_CODE", + x = colnames(Pdata), + value = TRUE + )[1] + var_state <- grep("SOURCE_AGID|AGENCY_CODE", colnames(Pdata), value = TRUE)[1] + var_fish_length_type <- grep( + pattern = "^FISH_LENGTH_TYPE$|^FISH_LENGTH_TYPE_CODE", + x = colnames(Pdata), + value = TRUE + ) + + # Can only accommodate good types + good_types <- c("", "A", "D", "F", "R", "S", "T", "U", NA) + good_types_string <- glue::glue_collapse(sQuote(good_types), sep = ", ") + if (any(!Pdata[[var_fish_length_type]] %in% good_types)) { + cli::cli_abort( + glue::glue( + "getLength can only accommodate the following FISH_LENGTH_TYPEs, + please contact the package maintainer to add additional types: + {good_types_string} + " + ), + call = NULL + ) } # Check for "F" FISH_LENGTH_TYPE from California for spiny dogfish, a hack that # will eventually be removed (todo). check.calt <- which( - Pdata[[col.spid]] == "DSRK" & - Pdata[[col.state]] == "C" & - Pdata[["FISH_LENGTH_TYPE"]] == "F" - ) + Pdata[[var_spid]] == "DSRK" & + Pdata[[var_state]] == "C" & + Pdata[[var_fish_length_type]] == "F" + ) if (length(check.calt) > 0) { message("Changing ", length(check.calt), " CA FISH_LENGTH_TYPE == 'F' to 'T'.", " Vlada is working on getting these entries fixed in PacFIN.") - Pdata[check.calt, "FISH_LENGTH_TYPE"] <- "T" + Pdata[check.calt, var_fish_length_type] <- "T" } rm(check.calt) # Move FISH_LENGTH to FORK_LENGTH if FORK_LENGTH is NA and type is F # for downstream code to work + Pdata[, "FORK_LENGTH"] <- ifelse( - is.na(Pdata[, "FORK_LENGTH"]) & Pdata[, "FISH_LENGTH_TYPE"] == "F", - Pdata[, "FISH_LENGTH"], - Pdata[, "FORK_LENGTH"] - ) + is.na(Pdata[["FORK_LENGTH"]]) & Pdata[[var_fish_length_type]] == "F", + yes = Pdata[, "FISH_LENGTH"], + no = Pdata[, "FORK_LENGTH"] + ) - #### Species-specific code + # Species-specific code # Convert FISH_LENGTH from disk width to length width2length <- convertlength_skate(Pdata, returntype = "estimated") # Spiny dogfish (Squalus suckleyi; DSRK) - check.dogfish <- Pdata[[col.spid]] == "DSRK" & !is.na(Pdata[["FORK_LENGTH"]]) + check.dogfish <- Pdata[[var_spid]] == "DSRK" & !is.na(Pdata[["FORK_LENGTH"]]) if (sum(check.dogfish) > 0 & verbose) { message(sum(check.dogfish), " fork lengths were converted to total lengths using\n", "Tribuzio and Kruse (2012).") @@ -76,99 +111,112 @@ getLength <- function(Pdata, verbose = FALSE, keep) { tolower(Pdata[, "FISH_LENGTH_UNITS"]) == "cm" & Pdata[, "FISH_LENGTH"] > 90, "MM", Pdata[, "FISH_LENGTH_UNITS"] - ) + ) } - #### Make "length" column in mm + # Make "length" column in mm # Start with fork lengths for those that are available and if "F" in keep - Pdata$length <- ifelse(Pdata$FISH_LENGTH_TYPE %in% c("", "A", "F", NA), - Pdata$FORK_LENGTH, NA) + Pdata$length <- ifelse( + Pdata[[var_fish_length_type]] %in% c("", "A", "F", NA), + Pdata$FORK_LENGTH, + NA + ) # Work with skate data # A is disc width # R is inter-spiracle width for skates (used by WDFW) if (all(Pdata$SPID %in% c("LSKT", "BSKT"))) { Pdata$length <- ifelse( - "A" %in% keep & Pdata$FISH_LENGTH_TYPE == "A", + "A" %in% keep & Pdata[[var_fish_length_type]] == "A", width2length, - Pdata$length) + Pdata$length + ) } Pdata$length <- ifelse( - "R" %in% keep & Pdata$FISH_LENGTH_TYPE == "R", + "R" %in% keep & Pdata[[var_fish_length_type]] == "R", width2length, - Pdata$length) + Pdata$length + ) # Work with dorsal length if ( verbose & "D" %in% keep & - length(grep("D", Pdata[["FISH_LENGTH_TYPE"]]) > 0) + length(grep("D", Pdata[[var_fish_length_type]]) > 0) ) { message("Using dorsal lengths, are you sure you want dorsal lengths?") } Pdata$length <- ifelse( - "D" %in% keep & Pdata$FISH_LENGTH_TYPE == "D" & + "D" %in% keep & Pdata[[var_fish_length_type]] == "D" & Pdata$FORK_LENGTH != Pdata$FISH_LENGTH, - Pdata$FORK_LENGTH, Pdata$length) + Pdata$FORK_LENGTH, Pdata$length + ) # Work with standard length measurements and unknown type Pdata$length <- ifelse( - "S" %in% keep & Pdata$FISH_LENGTH_TYPE == "S", + "S" %in% keep & Pdata[[var_fish_length_type]] == "S", Pdata$FISH_LENGTH, - Pdata$length) + Pdata$length + ) Pdata$length <- ifelse( - "T" %in% keep & Pdata$FISH_LENGTH_TYPE == "T", + "T" %in% keep & Pdata[[var_fish_length_type]] == "T", Pdata$FISH_LENGTH, - Pdata$length) + Pdata$length + ) Pdata$length <- ifelse( - "U" %in% keep & Pdata$FISH_LENGTH_TYPE == "U", + "U" %in% keep & Pdata[[var_fish_length_type]] == "U", ifelse(is.na(Pdata$FORK_LENGTH), Pdata$FISH_LENGTH, Pdata$FORK_LENGTH), - Pdata$length) + Pdata$length + ) Pdata$length <- ifelse( - "" %in% keep & Pdata$FISH_LENGTH_TYPE == "", + "" %in% keep & Pdata[[var_fish_length_type]] == "", Pdata$FISH_LENGTH, - Pdata$length) + Pdata$length + ) Pdata$length <- ifelse( - NA %in% keep & is.na(Pdata$FISH_LENGTH_TYPE), + NA %in% keep & is.na(Pdata[[var_fish_length_type]]), ifelse(is.na(Pdata$FORK_LENGTH), Pdata$FISH_LENGTH, Pdata$FORK_LENGTH), - Pdata$length) + Pdata$length + ) # A double check that lengths for methods not in keep are NA Pdata$length <- ifelse( - Pdata$FISH_LENGTH_TYPE %in% keep, + Pdata[[var_fish_length_type]] %in% keep, Pdata$length, - NA) + NA + ) # Assign all fish of length zero to NA - if (verbose & any(Pdata[["length"]] == 0, na.rm = TRUE)) { - message(sum(Pdata[["length"]] == 0), - " fish had a length equal to 0 (mm) and were assigned a length of NA.") - } - Pdata$length[Pdata$length == 0] <- NA + i_length_0 <- Pdata[["length"]] == 0 + Pdata$length[i_length_0] <- NA # Ensure everything is in mm - if ("FISH_LENGTH_UNITS" %in% colnames(Pdata)) { - Pdata$length <- ifelse( - tolower(Pdata[, "FISH_LENGTH_UNITS"]) == "cm", - Pdata[, "length"] * 10, - Pdata[, "length"]) - } else { - if (verbose) message("Length assumed to be in mm.") - } + # As of 2023-02-28 there are only two valid units in PacFIN for length + # MM and CM, everything else is NULL or UNK + Pdata$length <- ifelse( + test = Pdata[, "FISH_LENGTH_UNITS"] == "CM", + yes = Pdata[, "length"] * 10, + no = Pdata[, "length"] + ) + if (verbose) { - message("\nThe following length types were kept in the data:") - utils::capture.output(type = "message", - table(output = Pdata[ - !is.na(Pdata[["length"]]), - grep("LENGTH_TYPE", colnames(Pdata)) - ]) - ) - message( - "Lengths range from ", - paste(collapse = " to ", range(Pdata[["length"]], na.rm = TRUE)), - " (mm)." - ) + cli::cli_alert_info(paste( + "The following length types were kept in the data:", + sQuote(unique(Pdata[!is.na(Pdata[["length"]]), var_fish_length_type])) + )) + cli::cli_alert_info(glue::glue( + "Lengths ranged from {min(Pdata[['length']], na.rm = TRUE)}--", + "{max(Pdata[['length']], na.rm = TRUE)} (mm)" + )) + cli::cli_alert_info(glue::glue( + sum(Pdata[["length"]] == 0, na.rm = TRUE), + " fish had lengths of 0 (mm) and were changed to NAs" + )) + cli::cli_alert_info(glue::glue( + "{sum(Pdata[['FISH_LENGTH_UNITS']] == 'CM', na.rm = TRUE)}", + " lengths (cm) and were converted to mm" + )) } return(Pdata[["length"]]) }