-
Notifications
You must be signed in to change notification settings - Fork 235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor convert_input
to Perform tasks via helper function
#3338
base: develop
Are you sure you want to change the base?
Changes from 23 commits
614b8f9
838af61
d5e8d24
f22b962
d884203
68d9516
5208b02
f570646
e479c46
d9e911b
ed581f7
63ac964
0f9ac13
b98617f
4b771d3
63f270f
dbb7a6d
74003d9
95fb810
2bcb7c4
fcae9bd
c8e8a02
766174f
d9074df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,121 @@ | ||||||||||||||||
#' Return new arrangement of database while adding code to deal with ensembles | ||||||||||||||||
#' | ||||||||||||||||
#' @param result list of results from the download function | ||||||||||||||||
#' @param con database connection | ||||||||||||||||
#' @param start_date start date of the data | ||||||||||||||||
#' @param end_date end date of the data | ||||||||||||||||
#' @param write whether to write to the database | ||||||||||||||||
#' @param overwrite Logical: If a file already exists, create a fresh copy? | ||||||||||||||||
#' @param insert.new.file whether to insert a new file | ||||||||||||||||
#' @param input.args input arguments obtained from the convert_input function | ||||||||||||||||
#' @param machine machine information | ||||||||||||||||
#' @param mimetype data product specific file format | ||||||||||||||||
#' @param formatname format name of the data | ||||||||||||||||
#' @param allow.conflicting.dates whether to allow conflicting dates | ||||||||||||||||
#' @param ensemble ensemble id | ||||||||||||||||
#' @param ensemble_name ensemble name | ||||||||||||||||
#' @param existing.input existing input records | ||||||||||||||||
#' @param existing.dbfile existing dbfile records | ||||||||||||||||
#' @param input input records | ||||||||||||||||
#' @return list of input and dbfile ids | ||||||||||||||||
#' | ||||||||||||||||
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko | ||||||||||||||||
|
||||||||||||||||
add.database.entries <- function( | ||||||||||||||||
result, con, start_date, | ||||||||||||||||
end_date, write, overwrite, | ||||||||||||||||
insert.new.file, input.args, | ||||||||||||||||
machine, mimetype, formatname, | ||||||||||||||||
allow.conflicting.dates, ensemble, | ||||||||||||||||
ensemble_name, existing.input, | ||||||||||||||||
existing.dbfile, input) { | ||||||||||||||||
if (write) { | ||||||||||||||||
# Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. | ||||||||||||||||
# This list will be returned. | ||||||||||||||||
newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. | ||||||||||||||||
|
||||||||||||||||
for (i in 1:length(result)) { # Master for loop | ||||||||||||||||
id_not_added <- TRUE | ||||||||||||||||
|
||||||||||||||||
if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 && | ||||||||||||||||
(existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { | ||||||||||||||||
# Updating record with new dates | ||||||||||||||||
db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con) | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just for readability
Suggested change
|
||||||||||||||||
id_not_added <- FALSE | ||||||||||||||||
|
||||||||||||||||
# The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration. | ||||||||||||||||
newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) | ||||||||||||||||
newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id) | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
if (overwrite) { | ||||||||||||||||
# A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record) | ||||||||||||||||
if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) { | ||||||||||||||||
db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con) | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) { | ||||||||||||||||
db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con) | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
# If there is no ensemble then for each record there should be one parent | ||||||||||||||||
# But when you have ensembles, all of the members have one parent !! | ||||||||||||||||
parent.id <- if (is.numeric(ensemble)) { | ||||||||||||||||
ifelse(is.null(input[[i]]), NA, input[[1]]$id) | ||||||||||||||||
} else { | ||||||||||||||||
ifelse(is.null(input[[i]]), NA, input[[i]]$id) | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { | ||||||||||||||||
site.id <- input.args$newsite | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
if (insert.new.file && id_not_added) { | ||||||||||||||||
dbfile.id <- dbfile.insert( | ||||||||||||||||
in.path = dirname(result[[i]]$file[1]), | ||||||||||||||||
in.prefix = result[[i]]$dbfile.name[1], | ||||||||||||||||
"Input", | ||||||||||||||||
existing.input[[i]]$id, | ||||||||||||||||
con, | ||||||||||||||||
reuse = TRUE, | ||||||||||||||||
hostname = machine$hostname | ||||||||||||||||
) | ||||||||||||||||
|
||||||||||||||||
newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) | ||||||||||||||||
newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) | ||||||||||||||||
} else if (id_not_added) { | ||||||||||||||||
# This is to tell input.insert if we are writing ensembles | ||||||||||||||||
# Why does it need it? Because it checks for inputs with the same time period, site, and machine | ||||||||||||||||
# and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition | ||||||||||||||||
ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
|
||||||||||||||||
new_entry <- dbfile.input.insert( | ||||||||||||||||
in.path = dirname(result[[i]]$file[1]), | ||||||||||||||||
in.prefix = result[[i]]$dbfile.name[1], | ||||||||||||||||
siteid = site.id, | ||||||||||||||||
startdate = start_date, | ||||||||||||||||
enddate = end_date, | ||||||||||||||||
mimetype = mimetype, | ||||||||||||||||
formatname = formatname, | ||||||||||||||||
parentid = parent.id, | ||||||||||||||||
con = con, | ||||||||||||||||
hostname = machine$hostname, | ||||||||||||||||
allow.conflicting.dates = allow.conflicting.dates, | ||||||||||||||||
ens = ens.flag | ||||||||||||||||
) | ||||||||||||||||
|
||||||||||||||||
newinput$input.id <- c(newinput$input.id, new_entry$input.id) | ||||||||||||||||
newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) | ||||||||||||||||
} | ||||||||||||||||
} # End for loop | ||||||||||||||||
|
||||||||||||||||
successful <- TRUE | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On a quick skim it looks like none of the cases that can set |
||||||||||||||||
return(newinput) | ||||||||||||||||
} else { | ||||||||||||||||
PEcAn.logger::logger.warn("Input was not added to the database") | ||||||||||||||||
successful <- TRUE | ||||||||||||||||
return(NULL) | ||||||||||||||||
} | ||||||||||||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,47 @@ | ||||||
#' Function to check if result has empty or missing files | ||||||
#' | ||||||
#' @param result A list of dataframes with file paths | ||||||
#' @param outname Name of the output file | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not used (right?)
Suggested change
|
||||||
#' @param existing.input Existing input records | ||||||
#' @param existing.dbfile Existing dbfile records | ||||||
#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records | ||||||
#' | ||||||
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko | ||||||
|
||||||
check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (need to update calls too) |
||||||
result_sizes <- purrr::map_dfr( | ||||||
result, | ||||||
~ dplyr::mutate( | ||||||
., | ||||||
file_size = purrr::map_dbl(file, file.size), | ||||||
missing = is.na(file_size), | ||||||
empty = file_size == 0 | ||||||
) | ||||||
) | ||||||
|
||||||
if (any(result_sizes$missing) || any(result_sizes$empty)) { | ||||||
log_format_df <- function(df) { | ||||||
formatted_df <- rbind(colnames(df), format(df)) | ||||||
formatted_text <- purrr::reduce(formatted_df, paste, sep = " ") | ||||||
paste(formatted_text, collapse = "\n") | ||||||
} | ||||||
Comment on lines
+23
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's pull this function definition out to the end of the file -- I think the runtime overhead of nested function definitions is small, but I like to avoid them for clarity anyway. |
||||||
|
||||||
PEcAn.logger::logger.severe( | ||||||
"Requested Processing produced empty files or Nonexistent files:\n", | ||||||
log_format_df(result_sizes[, c(1, 8, 9, 10)]), | ||||||
"\n Table of results printed above.", | ||||||
wrap = FALSE | ||||||
) | ||||||
} | ||||||
|
||||||
|
||||||
# Wrap in a list for consistant processing later | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
if (exists("existing.input") && is.data.frame(existing.input)) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unless I'm forgetting something,
Suggested change
|
||||||
existing.input <- list(existing.input) | ||||||
} | ||||||
|
||||||
if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
existing.dbfile <- list(existing.dbfile) | ||||||
} | ||||||
return(list(existing.input, existing.dbfile)) | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it'd be clearer to leave this
if
outside of this function, so that convert.input contains something that amounts toif (write) { add.database.entries } else { logger.warn("Input was not added to the database")}