Skip to content

Commit

Permalink
Fix(getLength): Stops if units other than CM or MM
Browse files Browse the repository at this point in the history
Thanks to @brianlangseth-NOAA for mentioning this problem when units were not
in CM or MM. Before they were blindly changed. Now, there is a stop message and
users must fix all rows before they can move on.

Close #83
  • Loading branch information
kellijohnson-NOAA committed Mar 1, 2023
1 parent b63d8a6 commit d9fe054
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 76 deletions.
14 changes: 12 additions & 2 deletions R/convertlength_skate.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,18 @@
#'
convertlength_skate <- function(Pdata, returntype = c("all", "estimated")) {
matchcol <- function(data) {
apply(data[, c("SEX", "FISH_LENGTH_TYPE")], MARGIN = 1,
FUN = paste, collapse = "_")
var_sex <- grep("SEX", colnames(data), value = TRUE)[1]
var_fish_length_type <- grep(
"FISH_LENGTH_TYPE",
colnames(data),
value = TRUE
)[1]
apply(
data[, c(var_sex, var_fish_length_type)],
MARGIN = 1,
FUN = paste,
collapse = "_"
)
}
returntype <- match.arg(returntype, several.ok = FALSE)

Expand Down
196 changes: 122 additions & 74 deletions R/getLength.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,55 +12,90 @@
#' @return A vector of lengths in millimeters. Values of `NA` indicate that
#' the length should not be used in down-stream calculations.
#'
getLength <- function(Pdata, verbose = FALSE, keep) {
getLength <- function(Pdata, verbose = TRUE, keep) {

#### Initial checks
# Can only accommodate good types
goodtypes <- c("", "A", "D", "F", "R", "S", "T", "U", NA)
if (any(!Pdata$FISH_LENGTH_TYPE %in% goodtypes)) {
stop("cleanPacFIN can only accommodate the following FISH_LENGTH_TYPEs:\n",
sprintf("'%s' ", goodtypes),
"\nPlease contact the package maintainer to add additional types.")
# Initial checks
# Early return
if (all(is.na(Pdata[["FISH_LENGTH"]]))) {
if (verbose) {
cli::cli_alert_success("No lengths were found, moving on.")
}
return(rep(NA, NROW(Pdata)))
}
# Stop
data_with_length <- dplyr::filter(
.data = Pdata,
!is.na(FISH_LENGTH),
!(FISH_LENGTH_UNITS %in% c("MM", "CM"))
)
if (NROW(data_with_length)) {
cli::cli_abort(
glue::glue(
"FISH_LENGTH_UNITS contains units other than 'CM' or 'MM' for fish",
" with lengths, please assign a unit like 'CM' or 'MM' to each row."
),
wrap = TRUE,
call = NULL
)
}

# Find columns
col.spid <- grep("SPID|PACFIN_SPECIES_CODE", colnames(Pdata), value = TRUE)[1]
if (length(col.spid) != 1) {
stop("Species ID column not found in Pdata.")
}
col.state <- grep("SOURCE_AGID|AGENCY_CODE", colnames(Pdata), value = TRUE)[1]
if (length(col.state) != 1) {
stop("State ID column not found in Pdata.")
var_spid <- grep(
pattern = "^SPID$|PACFIN_SPECIES_CODE",
x = colnames(Pdata),
value = TRUE
)[1]
var_state <- grep("SOURCE_AGID|AGENCY_CODE", colnames(Pdata), value = TRUE)[1]
var_fish_length_type <- grep(
pattern = "^FISH_LENGTH_TYPE$|^FISH_LENGTH_TYPE_CODE",
x = colnames(Pdata),
value = TRUE
)

# Can only accommodate good types
good_types <- c("", "A", "D", "F", "R", "S", "T", "U", NA)
good_types_string <- glue::glue_collapse(sQuote(good_types), sep = ", ")
if (any(!Pdata[[var_fish_length_type]] %in% good_types)) {
cli::cli_abort(
glue::glue(
"getLength can only accommodate the following FISH_LENGTH_TYPEs,
please contact the package maintainer to add additional types:
{good_types_string}
"
),
call = NULL
)
}

# Check for "F" FISH_LENGTH_TYPE from California for spiny dogfish, a hack that
# will eventually be removed (todo).
check.calt <- which(
Pdata[[col.spid]] == "DSRK" &
Pdata[[col.state]] == "C" &
Pdata[["FISH_LENGTH_TYPE"]] == "F"
)
Pdata[[var_spid]] == "DSRK" &
Pdata[[var_state]] == "C" &
Pdata[[var_fish_length_type]] == "F"
)
if (length(check.calt) > 0) {
message("Changing ", length(check.calt), " CA FISH_LENGTH_TYPE == 'F' to 'T'.",
" Vlada is working on getting these entries fixed in PacFIN.")
Pdata[check.calt, "FISH_LENGTH_TYPE"] <- "T"
Pdata[check.calt, var_fish_length_type] <- "T"
}
rm(check.calt)

# Move FISH_LENGTH to FORK_LENGTH if FORK_LENGTH is NA and type is F
# for downstream code to work

Pdata[, "FORK_LENGTH"] <- ifelse(
is.na(Pdata[, "FORK_LENGTH"]) & Pdata[, "FISH_LENGTH_TYPE"] == "F",
Pdata[, "FISH_LENGTH"],
Pdata[, "FORK_LENGTH"]
)
is.na(Pdata[["FORK_LENGTH"]]) & Pdata[[var_fish_length_type]] == "F",
yes = Pdata[, "FISH_LENGTH"],
no = Pdata[, "FORK_LENGTH"]
)

#### Species-specific code
# Species-specific code
# Convert FISH_LENGTH from disk width to length
width2length <- convertlength_skate(Pdata, returntype = "estimated")

# Spiny dogfish (Squalus suckleyi; DSRK)
check.dogfish <- Pdata[[col.spid]] == "DSRK" & !is.na(Pdata[["FORK_LENGTH"]])
check.dogfish <- Pdata[[var_spid]] == "DSRK" & !is.na(Pdata[["FORK_LENGTH"]])
if (sum(check.dogfish) > 0 & verbose) {
message(sum(check.dogfish), " fork lengths were converted to total lengths using\n",
"Tribuzio and Kruse (2012).")
Expand All @@ -76,99 +111,112 @@ getLength <- function(Pdata, verbose = FALSE, keep) {
tolower(Pdata[, "FISH_LENGTH_UNITS"]) == "cm" & Pdata[, "FISH_LENGTH"] > 90,
"MM",
Pdata[, "FISH_LENGTH_UNITS"]
)
)
}

#### Make "length" column in mm
# Make "length" column in mm
# Start with fork lengths for those that are available and if "F" in keep
Pdata$length <- ifelse(Pdata$FISH_LENGTH_TYPE %in% c("", "A", "F", NA),
Pdata$FORK_LENGTH, NA)
Pdata$length <- ifelse(
Pdata[[var_fish_length_type]] %in% c("", "A", "F", NA),
Pdata$FORK_LENGTH,
NA
)

# Work with skate data
# A is disc width
# R is inter-spiracle width for skates (used by WDFW)
if (all(Pdata$SPID %in% c("LSKT", "BSKT"))) {
Pdata$length <- ifelse(
"A" %in% keep & Pdata$FISH_LENGTH_TYPE == "A",
"A" %in% keep & Pdata[[var_fish_length_type]] == "A",
width2length,
Pdata$length)
Pdata$length
)
}
Pdata$length <- ifelse(
"R" %in% keep & Pdata$FISH_LENGTH_TYPE == "R",
"R" %in% keep & Pdata[[var_fish_length_type]] == "R",
width2length,
Pdata$length)
Pdata$length
)

# Work with dorsal length
if (
verbose &
"D" %in% keep &
length(grep("D", Pdata[["FISH_LENGTH_TYPE"]]) > 0)
length(grep("D", Pdata[[var_fish_length_type]]) > 0)
) {
message("Using dorsal lengths, are you sure you want dorsal lengths?")
}
Pdata$length <- ifelse(
"D" %in% keep & Pdata$FISH_LENGTH_TYPE == "D" &
"D" %in% keep & Pdata[[var_fish_length_type]] == "D" &
Pdata$FORK_LENGTH != Pdata$FISH_LENGTH,
Pdata$FORK_LENGTH, Pdata$length)
Pdata$FORK_LENGTH, Pdata$length
)

# Work with standard length measurements and unknown type
Pdata$length <- ifelse(
"S" %in% keep & Pdata$FISH_LENGTH_TYPE == "S",
"S" %in% keep & Pdata[[var_fish_length_type]] == "S",
Pdata$FISH_LENGTH,
Pdata$length)
Pdata$length
)
Pdata$length <- ifelse(
"T" %in% keep & Pdata$FISH_LENGTH_TYPE == "T",
"T" %in% keep & Pdata[[var_fish_length_type]] == "T",
Pdata$FISH_LENGTH,
Pdata$length)
Pdata$length
)
Pdata$length <- ifelse(
"U" %in% keep & Pdata$FISH_LENGTH_TYPE == "U",
"U" %in% keep & Pdata[[var_fish_length_type]] == "U",
ifelse(is.na(Pdata$FORK_LENGTH), Pdata$FISH_LENGTH, Pdata$FORK_LENGTH),
Pdata$length)
Pdata$length
)
Pdata$length <- ifelse(
"" %in% keep & Pdata$FISH_LENGTH_TYPE == "",
"" %in% keep & Pdata[[var_fish_length_type]] == "",
Pdata$FISH_LENGTH,
Pdata$length)
Pdata$length
)
Pdata$length <- ifelse(
NA %in% keep & is.na(Pdata$FISH_LENGTH_TYPE),
NA %in% keep & is.na(Pdata[[var_fish_length_type]]),
ifelse(is.na(Pdata$FORK_LENGTH), Pdata$FISH_LENGTH, Pdata$FORK_LENGTH),
Pdata$length)
Pdata$length
)

# A double check that lengths for methods not in keep are NA
Pdata$length <- ifelse(
Pdata$FISH_LENGTH_TYPE %in% keep,
Pdata[[var_fish_length_type]] %in% keep,
Pdata$length,
NA)
NA
)

# Assign all fish of length zero to NA
if (verbose & any(Pdata[["length"]] == 0, na.rm = TRUE)) {
message(sum(Pdata[["length"]] == 0),
" fish had a length equal to 0 (mm) and were assigned a length of NA.")
}
Pdata$length[Pdata$length == 0] <- NA
i_length_0 <- Pdata[["length"]] == 0
Pdata$length[i_length_0] <- NA

# Ensure everything is in mm
if ("FISH_LENGTH_UNITS" %in% colnames(Pdata)) {
Pdata$length <- ifelse(
tolower(Pdata[, "FISH_LENGTH_UNITS"]) == "cm",
Pdata[, "length"] * 10,
Pdata[, "length"])
} else {
if (verbose) message("Length assumed to be in mm.")
}
# As of 2023-02-28 there are only two valid units in PacFIN for length
# MM and CM, everything else is NULL or UNK
Pdata$length <- ifelse(
test = Pdata[, "FISH_LENGTH_UNITS"] == "CM",
yes = Pdata[, "length"] * 10,
no = Pdata[, "length"]
)


if (verbose) {
message("\nThe following length types were kept in the data:")
utils::capture.output(type = "message",
table(output = Pdata[
!is.na(Pdata[["length"]]),
grep("LENGTH_TYPE", colnames(Pdata))
])
)
message(
"Lengths range from ",
paste(collapse = " to ", range(Pdata[["length"]], na.rm = TRUE)),
" (mm)."
)
cli::cli_alert_info(paste(
"The following length types were kept in the data:",
sQuote(unique(Pdata[!is.na(Pdata[["length"]]), var_fish_length_type]))
))
cli::cli_alert_info(glue::glue(
"Lengths ranged from {min(Pdata[['length']], na.rm = TRUE)}--",
"{max(Pdata[['length']], na.rm = TRUE)} (mm)"
))
cli::cli_alert_info(glue::glue(
sum(Pdata[["length"]] == 0, na.rm = TRUE),
" fish had lengths of 0 (mm) and were changed to NAs"
))
cli::cli_alert_info(glue::glue(
"{sum(Pdata[['FISH_LENGTH_UNITS']] == 'CM', na.rm = TRUE)}",
" lengths (cm) and were converted to mm"
))
}
return(Pdata[["length"]])
}

0 comments on commit d9fe054

Please sign in to comment.