diff --git a/scripts/release/update_release_version_by_folder.R b/scripts/release/update_release_version_by_folder.R new file mode 100644 index 00000000..c0aa519e --- /dev/null +++ b/scripts/release/update_release_version_by_folder.R @@ -0,0 +1,286 @@ +# Description: Update release version variables in files given a folder. +# Author: Xindi Guo +# Date: 2024-02-05 + +# pre-setup --------------------------- + +tic = as.double(Sys.time()) + +library(optparse) +library(glue) +library(dplyr) +library(synapser) + +# constants +ALL = "all" + +waitifnot <- function(cond, msg) { + if (!cond) { + + for (str in msg) { + message(str) + } + message("Press control-C to exit and try again.") + + while(T) {} + } +} + +# user input ---------------------------- + +option_list <- list( + make_option(c("-i", "--synid_folder_input"), type = "character", + help="Synapse ID of folder with clinical release files (required)"), + make_option(c("-o", "--synid_folder_output"), type = "character", default = NA, + help="Synapse ID of output folder for updated release files (default: write locally)"), + make_option(c("-u", "--updated_version"), type = "character", + help=glue("Updated release version number")), + make_option(c("-v", "--verbose"), action="store_true", default = FALSE, + help="Output script messages to the user (default: FALSE)"), + make_option(c("-a", "--auth"), + type = "character", + default = NA, + help="Synapse personal access token or path to .synapseConfig (default: normal synapse login behavior)") +) +opt <- parse_args(OptionParser(option_list=option_list)) +waitifnot(!is.null(opt$synid_folder_input), + msg = "Rscript update_release_version_by_folder.R -h") + +synid_folder_input <- opt$synid_folder_input +synid_folder_output <- opt$synid_folder_output +updated_version <- opt$updated_version +verbose <- opt$verbose +auth <- opt$auth + +# functions ---------------------------- + +#' Extract personal access token from .synapseConfig +#' located at a custom path. +#' +#' @param path Path to .synapseConfig +#' @return personal acccess token +get_auth_token <- function(path) { + + lines <- scan(path, what = "character", sep = "\t", quiet = T) + line <- grep(pattern = "^authtoken = ", x = lines, value = T) + + token <- strsplit(line, split = ' ')[[1]][3] + return(token) +} + +#' Override of synapser::synLogin() function to accept +#' custom path to .synapseConfig file or personal authentication +#' token. If no arguments are supplied, performs standard synLogin(). +#' +#' @param auth full path to .synapseConfig file or authentication token +#' @param silent verbosity control on login +#' @return TRUE for successful login; F otherwise +synLogin <- function(auth = NA, silent = T) { + + secret <- Sys.getenv("SCHEDULED_JOB_SECRETS") + if (secret != "") { + # Synapse token stored as secret in json string + syn = synapser::synLogin(silent = T, authToken = fromJSON(secret)$SYNAPSE_AUTH_TOKEN) + } else if (auth == "~/.synapseConfig" || is.na(auth)) { + # default Synapse behavior + syn <- synapser::synLogin(silent = silent) + } else { + + # in case pat passed directly + token <- auth + + # extract token from custom path to .synapseConfig + if (grepl(x = auth, pattern = "\\.synapseConfig$")) { + token = get_auth_token(auth) + + if (is.na(token)) { + return(F) + } + } + + # login with token + syn <- tryCatch({ + synapser::synLogin(authToken = token, silent = silent) + }, error = function(cond) { + return(F) + }) + } + + # NULL returned indicates successful login + if (is.null(syn)) { + return(T) + } + return(F) +} + +#' Download and load data stored in csv or other delimited format on Synapse +#' into an R data frame. +#' +#' @param synapse_id Synapse ID +#' @version Version of the Synapse entity to download. NA will load current +#' version +#' @param set Delimiter for file +#' @param na.strings Vector of strings to be read in as NA values +#' @param header TRUE if the file contains a header row; FALSE otherwise. +#' @param check_names TRUE if column names should be modified for compatibility +#' with R upon reading; FALSE otherwise. +#' @param comment.char character designating comment lines to ignore +#' @return data frame +get_synapse_entity_data_in_csv <- function(synapse_id, + version = NA, + sep = ",", + na.strings = c("NA"), + header = T, + check_names = F, + comment.char = "#", + colClasses = "character") { + + if (is.na(version)) { + entity <- synGet(synapse_id) + } else { + entity <- synGet(synapse_id, version = version) + } + + data <- read.csv(entity$path, stringsAsFactors = F, + na.strings = na.strings, sep = sep, check.names = check_names, + header = header, comment.char = comment.char, colClasses = colClasses) + return(data) +} + +#' Store a file on Synapse with options to define provenance. +#' +#' @param path Path to the file on the local machine. +#' @param parent_id Synapse ID of the folder or project to which to load the file. +#' @param file_name Name of the Synapse entity once loaded +#' @param prov_name Provenance short description title +#' @param prov_desc Provenance long description +#' @param prov_used Vector of Synapse IDs of data used to create the current +#' file to be loaded. +#' @param prov_exec String representing URL to script used to create the file. +#' @return Synapse ID of entity representing file +save_to_synapse <- function(path, + parent_id, + file_name = NA, + prov_name = NA, + prov_desc = NA, + prov_used = NA, + prov_exec = NA) { + + if (is.na(file_name)) { + file_name = path + } + file <- File(path = path, parentId = parent_id, name = file_name) + + if (!is.na(prov_name) || !is.na(prov_desc) || !is.na(prov_used) || !is.na(prov_exec)) { + act <- Activity(name = prov_name, + description = prov_desc, + used = prov_used, + executed = prov_exec) + file <- synStore(file, activity = act, forceVersion=FALSE) + } else { + file <- synStore(file, forceVersion=FALSE) + } + + return(file$properties$id) +} + +#' Get all child entities of a synapse folder. +#' +#' @param synapse_id Synapse ID of the folder +#' @param include_types Types of child entities to return +#' @return Vector with values as Synapse IDs and names as entity names. +get_synapse_folder_children <- function(synapse_id, + include_types=list("folder", "file", "table", "link", "entityview", "dockerrepo")) { + + ent <- as.list(synGetChildren(synapse_id, includeTypes = include_types)) + + children <- c() + + if (length(ent) > 0) { + for (i in 1:length(ent)) { + children[ent[[i]]$name] <- ent[[i]]$id + } + } + + return(children) +} + +#' Get the name of a Synapse entity. +#' +#' @param synapse_id Synapse ID string +#' @return String representing entity name +#' @example get_synapse_entity_name("syn12345") +get_synapse_entity_name <- function(synapse_id) { + return(synGet(synapse_id, downloadFile = F)$properties$name) +} + +#' Return current time as a string. +#' +#' @param timeOnly If TRUE, return only time; otherwise return date and time +#' @param tz Time Zone +#' @return Time stamp as string +#' @example +#' now(timeOnly = T) +now <- function(timeOnly = F, tz = "US/Pacific") { + + Sys.setenv(TZ=tz) + + if(timeOnly) { + return(format(Sys.time(), "%H:%M:%S")) + } + + return(format(Sys.time(), "%Y-%m-%d %H:%M:%S")) +} + +# synapse login -------------------- + +status <- synLogin(auth = auth) + +# read ---------------------------- + +synid_file_children <- get_synapse_folder_children(synapse_id = synid_folder_input, + include_types=list("file")) + +# main ---------------------------- + +if (verbose) { + print(glue("{now()}: updating release version to {updated_version} in clinical files..")) +} + +for (i in 1:length(synid_file_children)) { + + file_local <- names(synid_file_children)[i] + synid_file_child <- as.character(synid_file_children[i]) + + if (verbose) { + print(glue("{now()}: updating release version from '{file_local}' ({synid_file_child})...")) + } + + raw <- get_synapse_entity_data_in_csv(synid_file_child, na.strings = c("")) + mod <- raw + mod$release_version = updated_version + write.csv(mod, file = file_local, row.names = F, quote = T, na = "") + + if (verbose) { + print(glue("{now()}: release version is updated for '{file_local}' ({synid_file_child}).")) + } + + if (!is.na(synid_folder_output)) { + synid_file_dest <- save_to_synapse(path = file_local, + parent_id = synid_folder_output, + prov_name = "update clinical files", + prov_desc = "update release version variable in clinical files", + prov_used = c(synid_file_child), + prov_exec = "https://github.com/Sage-Bionetworks/genie-bpc-pipeline/blob/develop/scripts/release/update_release_version_by_folder.R") + file.remove(file_local) + + if (verbose) { + print(glue("{now()}: uploaded {file_local} to {synid_file_dest}")) + } + } +} + +# close out ---------------------------- + +toc = as.double(Sys.time()) +print(glue("Runtime: {round(toc - tic)} s"))