From 6d99551324d5eac1eafacc1527f36f085dd08058 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Fri, 15 Dec 2023 13:08:49 -0600 Subject: [PATCH 01/14] Adds dep to rsconnect and imports a couple functions --- DESCRIPTION | 4 ++-- NAMESPACE | 2 ++ R/package.R | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3e00192..1d2f332 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,8 @@ Imports: vctrs, processx, httr2, - rstudioapi + rstudioapi, + rsconnect URL: https://github.com/mlverse/pysparklyr BugReports: https://github.com/mlverse/pysparklyr/issues Suggests: @@ -40,4 +41,3 @@ Suggests: tibble, withr Config/testthat/edition: 3 - diff --git a/NAMESPACE b/NAMESPACE index 551d89c..7331fea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -127,6 +127,8 @@ importFrom(rlang,quo_is_null) importFrom(rlang,set_names) importFrom(rlang,sym) importFrom(rlang,warn) +importFrom(rsconnect,accounts) +importFrom(rsconnect,deployApp) importFrom(rstudioapi,jobRunScript) importFrom(rstudioapi,showQuestion) importFrom(sparklyr,connection_is_open) diff --git a/R/package.R b/R/package.R index df08954..585eeb2 100644 --- a/R/package.R +++ b/R/package.R @@ -21,6 +21,7 @@ #' @importFrom rlang enquo `!!` `!!!` quo_is_null sym warn abort `%||%` #' @importFrom rlang is_string is_character parse_exprs set_names #' @importFrom rlang exec arg_match as_utf8_character +#' @importFrom rsconnect accounts deployApp #' @importFrom methods new is setOldClass #' @importFrom tidyselect matches #' @importFrom utils head type.convert compareVersion From c8f951b7dc2a6490a0053d79651a4fbf927abb64 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Fri, 15 Dec 2023 16:51:52 -0600 Subject: [PATCH 02/14] Adds deploy --- NAMESPACE | 2 ++ R/deploy.R | 23 +++++++++++++++++++++++ R/package.R | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 R/deploy.R diff --git a/NAMESPACE b/NAMESPACE index 7331fea..3b85e84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,6 +73,7 @@ S3method(tbl_ptype,tbl_pyspark) S3method(tidyselect_data_has_predicates,tbl_pyspark) export("%>%") export(connection_databricks_shinyapp) +export(deploy) export(install_databricks) export(install_pyspark) export(installed_components) @@ -129,6 +130,7 @@ importFrom(rlang,sym) importFrom(rlang,warn) importFrom(rsconnect,accounts) importFrom(rsconnect,deployApp) +importFrom(rstudioapi,getSourceEditorContext) importFrom(rstudioapi,jobRunScript) importFrom(rstudioapi,showQuestion) importFrom(sparklyr,connection_is_open) diff --git a/R/deploy.R b/R/deploy.R new file mode 100644 index 0000000..bbd8b9e --- /dev/null +++ b/R/deploy.R @@ -0,0 +1,23 @@ +#' @export +deploy <- function(appDir = NULL, python = NULL, ...) { + check_rstudio <- try(RStudio.Version(), silent = TRUE) + in_rstudio <- !inherits(check_rstudio, "try-error") + editor_doc <- NULL + if(is.null(appDir)) { + if(interactive() && in_rstudio) { + editor_doc <- getSourceEditorContext() + appDir <- path_dir(path_expand(editor_doc$path)) + cli_div(theme = cli_colors()) + cli_alert_info("{.header Source:{.emph '{appDir}'}}") + cli_end() + } + } + + + # deployApp( + # appDir = here::here("doc-subfolder"), + # python = "/Users/user/.virtualenvs/r-sparklyr-databricks-14.1/bin/python", + # envVars = c("DATABRICKS_HOST", "DATABRICKS_TOKEN"), + # lint = FALSE + # ) +} diff --git a/R/package.R b/R/package.R index 585eeb2..8526d83 100644 --- a/R/package.R +++ b/R/package.R @@ -28,7 +28,7 @@ #' @importFrom tidyr pivot_longer #' @importFrom vctrs vec_as_names #' @importFrom processx process -#' @importFrom rstudioapi jobRunScript showQuestion +#' @importFrom rstudioapi jobRunScript showQuestion getSourceEditorContext #' @importFrom stats terms #' @importFrom utils capture.output installed.packages #' @importFrom magrittr %>% From 6f9812fe54b5cd77f2a0d9d33fac7e0831a2f584 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Fri, 15 Dec 2023 18:05:57 -0600 Subject: [PATCH 03/14] Initial environment finder for deployment --- R/data-write.R | 18 +++++++------- R/databricks-utils.R | 2 +- R/deploy.R | 56 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/R/data-write.R b/R/data-write.R index f37f983..221fb9b 100644 --- a/R/data-write.R +++ b/R/data-write.R @@ -1,10 +1,10 @@ #' @export spark_write_table.tbl_pyspark <- function(x, - name, - mode = NULL, - options = list(), - partition_by = NULL, - ...) { + name, + mode = NULL, + options = list(), + partition_by = NULL, + ...) { args <- list(...) save_action <- ifelse(identical(mode, "append"), "insertInto", "saveAsTable") pyspark_write_generic( @@ -135,9 +135,7 @@ pyspark_write_generic <- function( options, args, save_action = "save", - expand_path = TRUE - ) { - + expand_path = TRUE) { query <- tbl_pyspark_sdf(x) if (is.null(partition_by)) { @@ -150,13 +148,13 @@ pyspark_write_generic <- function( path <- ifelse(expand_path, path_expand(path), path) - if(!is.null(format)) { + if (!is.null(format)) { x <- py_invoke(query_prep, "format", format) } else { x <- query_prep } - if(!is.null(mode)) { + if (!is.null(mode)) { x <- py_invoke(x, "mode", mode) } diff --git a/R/databricks-utils.R b/R/databricks-utils.R index f46992e..01628ed 100644 --- a/R/databricks-utils.R +++ b/R/databricks-utils.R @@ -130,7 +130,7 @@ databricks_dbr_version <- function(cluster_id, cluster_id = cluster_id, host = host, token = token - ) + ) vn$version } diff --git a/R/deploy.R b/R/deploy.R index bbd8b9e..fa3307f 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -1,18 +1,29 @@ #' @export -deploy <- function(appDir = NULL, python = NULL, ...) { +deploy <- function( + appDir = NULL, + python = NULL, + version = NULL, + method = "databricks_connect", + ...) { + cli_div(theme = cli_colors()) check_rstudio <- try(RStudio.Version(), silent = TRUE) in_rstudio <- !inherits(check_rstudio, "try-error") editor_doc <- NULL - if(is.null(appDir)) { - if(interactive() && in_rstudio) { + if (is.null(appDir)) { + if (interactive() && in_rstudio) { editor_doc <- getSourceEditorContext() - appDir <- path_dir(path_expand(editor_doc$path)) - cli_div(theme = cli_colors()) + appDir <- dirname(editor_doc$path) cli_alert_info("{.header Source:{.emph '{appDir}'}}") - cli_end() } } + python <- deploy_find_environment( + python = python, + version = version, + method = method + ) + + print(python) # deployApp( # appDir = here::here("doc-subfolder"), @@ -20,4 +31,37 @@ deploy <- function(appDir = NULL, python = NULL, ...) { # envVars = c("DATABRICKS_HOST", "DATABRICKS_TOKEN"), # lint = FALSE # ) + cli_end() +} + +deploy_find_environment <- function( + version = NULL, + python = NULL, + method) { + ret <- NULL + failed <- NULL + cli_progress_step( + msg = "Searching and validating Python path", + msg_done = "{.header Python:{.emph '{ret}'}}", + msg_failed = "Environment not found: {.emph '{failed}'}" + ) + if (is.null(python)) { + env_name <- use_envname(version = version, method = method) + if (names(env_name) == "exact") { + check_conda <- try(conda_python(env_name), silent = TRUE) + check_virtualenv <- try(virtualenv_python(env_name), silent = TRUE) + if (!inherits(check_conda, "try-error")) ret <- check_conda + if (!inherits(check_virtualenv, "try-error")) ret <- check_virtualenv + } + if (is.null(ret)) failed <- env_name + } else { + validate_python <- file_exists(python) + if (validate_python) { + ret <- python + } else { + failed <- python + } + } + if (is.null(ret)) cli_progress_done(result = "failed") + path_expand(ret) } From 8f093384aa24c25352dfa88e77e7cf7df7197678 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 17 Dec 2023 16:01:42 -0600 Subject: [PATCH 04/14] Adds cluster_id arg --- R/deploy.R | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/R/deploy.R b/R/deploy.R index fa3307f..7cc15e5 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -3,6 +3,7 @@ deploy <- function( appDir = NULL, python = NULL, version = NULL, + cluster_id = NULL, method = "databricks_connect", ...) { cli_div(theme = cli_colors()) @@ -19,11 +20,16 @@ deploy <- function( python <- deploy_find_environment( python = python, + cluster_id = cluster_id, version = version, method = method ) - print(python) + # CONNECT_DB_HOST + # CONNECT_DB_TOKEN + # Use them if user pases host and token as arguments + + # deployApp( # appDir = here::here("doc-subfolder"), @@ -37,6 +43,7 @@ deploy <- function( deploy_find_environment <- function( version = NULL, python = NULL, + cluster_id = NULL, method) { ret <- NULL failed <- NULL @@ -46,7 +53,17 @@ deploy_find_environment <- function( msg_failed = "Environment not found: {.emph '{failed}'}" ) if (is.null(python)) { - env_name <- use_envname(version = version, method = method) + if (is.null(version) && !is.null(cluster_id)) { + version <- databricks_dbr_version( + cluster_id = cluster_id, + host = databricks_host(), + token = databricks_token() + ) + } + env_name <- use_envname( + version = version, + method = method + ) if (names(env_name) == "exact") { check_conda <- try(conda_python(env_name), silent = TRUE) check_virtualenv <- try(virtualenv_python(env_name), silent = TRUE) From 32ae2c529c8b23a41af68c7e32174d21def78b4c Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 17 Dec 2023 16:26:32 -0600 Subject: [PATCH 05/14] Adds another way to find the env --- R/deploy.R | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/R/deploy.R b/R/deploy.R index 7cc15e5..6d4469f 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -1,6 +1,7 @@ #' @export deploy <- function( appDir = NULL, + lint = FALSE, python = NULL, version = NULL, cluster_id = NULL, @@ -14,10 +15,9 @@ deploy <- function( if (interactive() && in_rstudio) { editor_doc <- getSourceEditorContext() appDir <- dirname(editor_doc$path) - cli_alert_info("{.header Source:{.emph '{appDir}'}}") + cli_alert_info("{.header Source: {.emph '{appDir}'}}") } } - python <- deploy_find_environment( python = python, cluster_id = cluster_id, @@ -29,8 +29,6 @@ deploy <- function( # CONNECT_DB_TOKEN # Use them if user pases host and token as arguments - - # deployApp( # appDir = here::here("doc-subfolder"), # python = "/Users/user/.virtualenvs/r-sparklyr-databricks-14.1/bin/python", @@ -44,7 +42,7 @@ deploy_find_environment <- function( version = NULL, python = NULL, cluster_id = NULL, - method) { + method = "databricks_connect") { ret <- NULL failed <- NULL cli_progress_step( @@ -53,6 +51,7 @@ deploy_find_environment <- function( msg_failed = "Environment not found: {.emph '{failed}'}" ) if (is.null(python)) { + # TODO: Move to deploy_databricks() when is created if (is.null(version) && !is.null(cluster_id)) { version <- databricks_dbr_version( cluster_id = cluster_id, @@ -60,15 +59,22 @@ deploy_find_environment <- function( token = databricks_token() ) } - env_name <- use_envname( - version = version, - method = method + if(!is.null(version)) { + env_name <- use_envname( + version = version, + method = method ) - if (names(env_name) == "exact") { - check_conda <- try(conda_python(env_name), silent = TRUE) - check_virtualenv <- try(virtualenv_python(env_name), silent = TRUE) - if (!inherits(check_conda, "try-error")) ret <- check_conda - if (!inherits(check_virtualenv, "try-error")) ret <- check_virtualenv + if (names(env_name) == "exact") { + check_conda <- try(conda_python(env_name), silent = TRUE) + check_virtualenv <- try(virtualenv_python(env_name), silent = TRUE) + if (!inherits(check_conda, "try-error")) ret <- check_conda + if (!inherits(check_virtualenv, "try-error")) ret <- check_virtualenv + } + } else { + py_exe_path <- py_exe() + if(grepl("r-sparklyr-", py_exe_path)) { + ret <- py_exe_path + } } if (is.null(ret)) failed <- env_name } else { From 3650f7518f0a33b7159492a70ea06e5c11b86431 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 17 Dec 2023 16:46:21 -0600 Subject: [PATCH 06/14] Adds support for new connect env vars --- R/databricks-utils.R | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/R/databricks-utils.R b/R/databricks-utils.R index 01628ed..47a3fcd 100644 --- a/R/databricks-utils.R +++ b/R/databricks-utils.R @@ -1,6 +1,16 @@ databricks_host <- function(host = NULL, fail = TRUE) { - host <- host %||% Sys.getenv("DATABRICKS_HOST", unset = NA) - if (is.null(host) | is.na(host)) { + if(!is.null(host)) { + return(set_names(host, "argument")) + } + env_host <- Sys.getenv("DATABRICKS_HOST", unset = NA) + connect_host <- Sys.getenv("CONNECT_DATABRICKS_HOST", unset = NA) + if(!is.na(env_host)) { + host <- set_names(env_host, "environment") + } + if(!is.na(connect_host)) { + host <- set_names(connect_host, "environment") + } + if (is.null(host)) { if (fail) { cli_abort(c( paste0( @@ -17,19 +27,24 @@ databricks_host <- function(host = NULL, fail = TRUE) { } databricks_token <- function(token = NULL, fail = FALSE) { - name <- "argument" + if(!is.null(token)) { + return(set_names(token, "argument")) + } # Checks for OAuth Databricks token inside the RStudio API if (is.null(token) && exists(".rs.api.getDatabricksToken")) { getDatabricksToken <- get(".rs.api.getDatabricksToken") - name <- "oauth" - token <- getDatabricksToken(databricks_host()) + token <- set_names(getDatabricksToken(databricks_host()), "oauth") } # Checks the Environment Variable if (is.null(token)) { env_token <- Sys.getenv("DATABRICKS_TOKEN", unset = NA) + connect_token <- Sys.getenv("CONNECT_DATABRICKS_TOKEN", unset = NA) if (!is.na(env_token)) { - name <- "environment" - token <- env_token + token <- set_names(env_token, "environment") + } else { + if(!is.na(connect_token)) { + token <- set_names(connect_token, "environment_connect") + } } } if (is.null(token)) { @@ -44,11 +59,10 @@ databricks_token <- function(token = NULL, fail = FALSE) { "Please add your Token to 'DATABRICKS_TOKEN' inside your .Renviron file." )) } else { - name <- NULL token <- "" } } - set_names(token, name) + token } databricks_dbr_version_name <- function(cluster_id, From 9df35e61ed9427b6d39356b5ac5a0786b1b77502 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 17 Dec 2023 18:03:51 -0600 Subject: [PATCH 07/14] Adds deploy_databricks() --- R/deploy.R | 68 +++++++++++++++++++++++++++++++--------- man/deploy_databricks.Rd | 20 ++++++++++++ 2 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 man/deploy_databricks.Rd diff --git a/R/deploy.R b/R/deploy.R index 6d4469f..95cdde5 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -1,12 +1,61 @@ -#' @export -deploy <- function( +#' @@export +deploy_databricks <- function( appDir = NULL, lint = FALSE, python = NULL, version = NULL, cluster_id = NULL, + host = NULL, + token = NULL, + ... + ) { + if (is.null(version) && !is.null(cluster_id)) { + version <- databricks_dbr_version( + cluster_id = cluster_id, + host = databricks_host(), + token = databricks_token() + ) + } + env_vars <- NULL + if(!is.null(host)) { + Sys.setenv("CONNECT_DATABRICKS_HOST" = host) + env_vars <- "CONNECT_DATABRICKS_HOST" + } else { + host <- databricks_host() + if(names(host) == "environment") { + env_vars <- "DATABRICKS_HOST" + } + } + if(!is.null(token)) { + Sys.setenv("CONNECT_DATABRICKS_TOKEN" = token) + env_vars <- c(env_vars, "CONNECT_DATABRICKS_TOKEN") + } else { + token <- databricks_token() + if(names(token) == "environment") { + env_vars <- c(env_vars, "DATABRICKS_TOKEN") + } + } + deploy( + appDir = appDir, lint = lint, + python = python, + version = version, method = "databricks_connect", + envVars = env_vars + ) +} + +#' @export +deploy <- function( + appDir = NULL, + lint = FALSE, + envVars = NULL, + python = NULL, + version = NULL, + method = NULL, ...) { + if(is.null(method)) { + abort("'method' is empty, please provide one") + } cli_div(theme = cli_colors()) check_rstudio <- try(RStudio.Version(), silent = TRUE) in_rstudio <- !inherits(check_rstudio, "try-error") @@ -20,13 +69,13 @@ deploy <- function( } python <- deploy_find_environment( python = python, - cluster_id = cluster_id, version = version, method = method ) - # CONNECT_DB_HOST - # CONNECT_DB_TOKEN + print(envVars) + # CONNECT_DATABRICKS_HOST + # CONNECT_DATABRICKS_TOKEN # Use them if user pases host and token as arguments # deployApp( @@ -41,7 +90,6 @@ deploy <- function( deploy_find_environment <- function( version = NULL, python = NULL, - cluster_id = NULL, method = "databricks_connect") { ret <- NULL failed <- NULL @@ -51,14 +99,6 @@ deploy_find_environment <- function( msg_failed = "Environment not found: {.emph '{failed}'}" ) if (is.null(python)) { - # TODO: Move to deploy_databricks() when is created - if (is.null(version) && !is.null(cluster_id)) { - version <- databricks_dbr_version( - cluster_id = cluster_id, - host = databricks_host(), - token = databricks_token() - ) - } if(!is.null(version)) { env_name <- use_envname( version = version, diff --git a/man/deploy_databricks.Rd b/man/deploy_databricks.Rd new file mode 100644 index 0000000..b0279e1 --- /dev/null +++ b/man/deploy_databricks.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deploy.R +\name{deploy_databricks} +\alias{deploy_databricks} +\title{@export} +\usage{ +deploy_databricks( + appDir = NULL, + lint = FALSE, + python = NULL, + version = NULL, + cluster_id = NULL, + host = NULL, + token = NULL, + ... +) +} +\description{ +@export +} From 21486a7c5bf7cf84f42bccb51f812bffa3cdc934 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Sun, 17 Dec 2023 18:18:12 -0600 Subject: [PATCH 08/14] Handles no version or cluster_id better --- R/deploy.R | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/R/deploy.R b/R/deploy.R index 95cdde5..43e6e17 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -73,17 +73,13 @@ deploy <- function( method = method ) - print(envVars) - # CONNECT_DATABRICKS_HOST - # CONNECT_DATABRICKS_TOKEN - # Use them if user pases host and token as arguments - - # deployApp( - # appDir = here::here("doc-subfolder"), - # python = "/Users/user/.virtualenvs/r-sparklyr-databricks-14.1/bin/python", - # envVars = c("DATABRICKS_HOST", "DATABRICKS_TOKEN"), - # lint = FALSE - # ) + x <- list( + appDir = appDir, + python = python, + envVars = envVars, + lint = FALSE, + ... + ) cli_end() } @@ -93,10 +89,11 @@ deploy_find_environment <- function( method = "databricks_connect") { ret <- NULL failed <- NULL + env_name <- "" cli_progress_step( msg = "Searching and validating Python path", msg_done = "{.header Python:{.emph '{ret}'}}", - msg_failed = "Environment not found: {.emph '{failed}'}" + msg_failed = "Environment not found: {.emph {failed}}" ) if (is.null(python)) { if(!is.null(version)) { @@ -110,13 +107,15 @@ deploy_find_environment <- function( if (!inherits(check_conda, "try-error")) ret <- check_conda if (!inherits(check_virtualenv, "try-error")) ret <- check_virtualenv } + if (is.null(ret)) failed <- env_name } else { py_exe_path <- py_exe() if(grepl("r-sparklyr-", py_exe_path)) { ret <- py_exe_path + } else { + failed <- "Please pass a 'version' or a 'cluster_id'" } } - if (is.null(ret)) failed <- env_name } else { validate_python <- file_exists(python) if (validate_python) { @@ -125,6 +124,10 @@ deploy_find_environment <- function( failed <- python } } - if (is.null(ret)) cli_progress_done(result = "failed") - path_expand(ret) + if (is.null(ret)) { + cli_progress_done(result = "failed") + } else { + ret <- path_expand(ret) + } + ret } From 032812ea052353a04e49341ada26483b6ba0ce85 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Mon, 18 Dec 2023 08:35:27 -0600 Subject: [PATCH 09/14] Adds server checks --- R/deploy.R | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/R/deploy.R b/R/deploy.R index 43e6e17..912fc24 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -1,8 +1,10 @@ #' @@export deploy_databricks <- function( appDir = NULL, - lint = FALSE, + account = NULL, + server = NULL, python = NULL, + lint = FALSE, version = NULL, cluster_id = NULL, host = NULL, @@ -40,13 +42,18 @@ deploy_databricks <- function( python = python, version = version, method = "databricks_connect", - envVars = env_vars + envVars = env_vars, + account = account, + server = server, + ... ) } #' @export deploy <- function( appDir = NULL, + account = NULL, + server = NULL, lint = FALSE, envVars = NULL, python = NULL, @@ -56,10 +63,23 @@ deploy <- function( if(is.null(method)) { abort("'method' is empty, please provide one") } + rs_accounts <- accounts() + if(nrow(rs_accounts) == 0) { + abort("There are no server accounts setup") + } else { + if(is.null(account)) { + account <- rs_accounts$name[1] + } + if(is.null(server)) { + server <- rs_accounts$server[1] + } + } cli_div(theme = cli_colors()) + cli_h1("Starting deployment") check_rstudio <- try(RStudio.Version(), silent = TRUE) in_rstudio <- !inherits(check_rstudio, "try-error") editor_doc <- NULL + cli_alert_info("{.header Server:} {server} | {.header Account:} {account}") if (is.null(appDir)) { if (interactive() && in_rstudio) { editor_doc <- getSourceEditorContext() @@ -72,15 +92,16 @@ deploy <- function( version = version, method = method ) - - x <- list( + cli_end() + deployApp( appDir = appDir, python = python, envVars = envVars, + server = server, + account = account, lint = FALSE, ... ) - cli_end() } deploy_find_environment <- function( @@ -126,6 +147,7 @@ deploy_find_environment <- function( } if (is.null(ret)) { cli_progress_done(result = "failed") + cli_abort("No Python environment could be found") } else { ret <- path_expand(ret) } From 121c2a5c3041bd37406918206358c0bc08787ee1 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Mon, 18 Dec 2023 10:00:07 -0600 Subject: [PATCH 10/14] Adds envar messages, starts choices --- R/databricks-utils.R | 2 +- R/deploy.R | 32 ++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/R/databricks-utils.R b/R/databricks-utils.R index 47a3fcd..375be92 100644 --- a/R/databricks-utils.R +++ b/R/databricks-utils.R @@ -8,7 +8,7 @@ databricks_host <- function(host = NULL, fail = TRUE) { host <- set_names(env_host, "environment") } if(!is.na(connect_host)) { - host <- set_names(connect_host, "environment") + host <- set_names(connect_host, "environment_connect") } if (is.null(host)) { if (fail) { diff --git a/R/deploy.R b/R/deploy.R index 912fc24..361d4e2 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -14,11 +14,12 @@ deploy_databricks <- function( if (is.null(version) && !is.null(cluster_id)) { version <- databricks_dbr_version( cluster_id = cluster_id, - host = databricks_host(), - token = databricks_token() + host = databricks_host(host), + token = databricks_token(token) ) } env_vars <- NULL + env_var_message <- NULL if(!is.null(host)) { Sys.setenv("CONNECT_DATABRICKS_HOST" = host) env_vars <- "CONNECT_DATABRICKS_HOST" @@ -28,6 +29,9 @@ deploy_databricks <- function( env_vars <- "DATABRICKS_HOST" } } + if(!is.null(host)) { + env_var_message <- c(" " = glue("|- Host: {host}")) + } if(!is.null(token)) { Sys.setenv("CONNECT_DATABRICKS_TOKEN" = token) env_vars <- c(env_vars, "CONNECT_DATABRICKS_TOKEN") @@ -37,12 +41,19 @@ deploy_databricks <- function( env_vars <- c(env_vars, "DATABRICKS_TOKEN") } } + if(!is.null(token)) { + env_var_message <- c( + env_var_message, + " " = glue("|- Token: ''") + ) + } deploy( appDir = appDir, lint = lint, python = python, version = version, method = "databricks_connect", envVars = env_vars, + env_var_message = env_var_message, account = account, server = server, ... @@ -59,11 +70,13 @@ deploy <- function( python = NULL, version = NULL, method = NULL, + env_var_message = NULL, ...) { if(is.null(method)) { abort("'method' is empty, please provide one") } rs_accounts <- accounts() + accts_msg <- NULL if(nrow(rs_accounts) == 0) { abort("There are no server accounts setup") } else { @@ -73,26 +86,37 @@ deploy <- function( if(is.null(server)) { server <- rs_accounts$server[1] } + if(nrow(rs_accounts > 1)) { + accts_msg <- "Change Publishing Target (Posit Connect server)" + } } cli_div(theme = cli_colors()) cli_h1("Starting deployment") check_rstudio <- try(RStudio.Version(), silent = TRUE) in_rstudio <- !inherits(check_rstudio, "try-error") editor_doc <- NULL - cli_alert_info("{.header Server:} {server} | {.header Account:} {account}") if (is.null(appDir)) { if (interactive() && in_rstudio) { editor_doc <- getSourceEditorContext() appDir <- dirname(editor_doc$path) - cli_alert_info("{.header Source: {.emph '{appDir}'}}") + } } + cli_inform("{.class - App and Spark -}") + cli_alert_info("{.header Source: {.emph '{appDir}'}}") python <- deploy_find_environment( python = python, version = version, method = method ) + cli_inform("{.class - Publishing target -}") + cli_alert_info("{.header Server:} {server} | {.header Account:} {account}") + if(!is.null(env_var_message)) { + cli_bullets(c("i" = "{.header Environment variables:}", env_var_message)) + } + cli_inform("Proceed?") cli_end() + choice <- utils::menu(title = "Proceed", choices = c("Yes", "No", accts_msg)) deployApp( appDir = appDir, python = python, From 3cf9e4b88d045fe507ec8885fc9d9390a0bc7af3 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Mon, 18 Dec 2023 10:12:18 -0600 Subject: [PATCH 11/14] Adds choice to change server target --- NAMESPACE | 1 + R/deploy.R | 12 +++++++++++- R/package.R | 2 +- man/deploy_databricks.Rd | 4 +++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3b85e84..cf0a8f2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -113,6 +113,7 @@ importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(purrr,map_lgl) importFrom(purrr,pmap_chr) +importFrom(purrr,transpose) importFrom(rlang,`!!!`) importFrom(rlang,`!!`) importFrom(rlang,`%||%`) diff --git a/R/deploy.R b/R/deploy.R index 361d4e2..756b60b 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -116,7 +116,17 @@ deploy <- function( } cli_inform("Proceed?") cli_end() - choice <- utils::menu(title = "Proceed", choices = c("Yes", "No", accts_msg)) + choice <- utils::menu(choices = c("Yes", "No", accts_msg)) + if(choice == 2) { + return(invisible()) + } + if(choice == 3) { + chr_accounts <- rs_accounts %>% + transpose() %>% + map_chr(~ glue("Server: {.x$server} | Account: {.x$name}")) + choice <- utils::menu(title = "Select publishing target:", chr_accounts) + } + return(print("temp stop")) deployApp( appDir = appDir, python = python, diff --git a/R/package.R b/R/package.R index 8526d83..7eeea90 100644 --- a/R/package.R +++ b/R/package.R @@ -16,8 +16,8 @@ #' @importFrom dplyr tbl collect tibble same_src compute as_tibble group_vars #' @importFrom dplyr sample_n sample_frac slice_sample select tbl_ptype group_by #' @importFrom dplyr filter mutate -#' @importFrom purrr map_lgl map_chr map pmap_chr imap discard #' @importFrom purrr map_lgl map_chr map pmap_chr imap +#' @importFrom purrr map_chr discard transpose #' @importFrom rlang enquo `!!` `!!!` quo_is_null sym warn abort `%||%` #' @importFrom rlang is_string is_character parse_exprs set_names #' @importFrom rlang exec arg_match as_utf8_character diff --git a/man/deploy_databricks.Rd b/man/deploy_databricks.Rd index b0279e1..a8c7c00 100644 --- a/man/deploy_databricks.Rd +++ b/man/deploy_databricks.Rd @@ -6,8 +6,10 @@ \usage{ deploy_databricks( appDir = NULL, - lint = FALSE, + account = NULL, + server = NULL, python = NULL, + lint = FALSE, version = NULL, cluster_id = NULL, host = NULL, From bea55f698b4a2f5d41b0ed551a4eab21d66db3d6 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Mon, 18 Dec 2023 10:49:37 -0600 Subject: [PATCH 12/14] Documents deploy_databricks() --- NAMESPACE | 2 +- R/deploy.R | 34 ++++++++++++++++++++++++++++--- man/deploy_databricks.Rd | 43 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index cf0a8f2..1f090a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,7 +73,7 @@ S3method(tbl_ptype,tbl_pyspark) S3method(tidyselect_data_has_predicates,tbl_pyspark) export("%>%") export(connection_databricks_shinyapp) -export(deploy) +export(deploy_databricks) export(install_databricks) export(install_pyspark) export(installed_components) diff --git a/R/deploy.R b/R/deploy.R index 756b60b..7af05e7 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -1,9 +1,38 @@ -#' @@export +#' Deploys Databricks backed content to publishing server +#' +#' @description +#' This is a convenience function that is meant to make it easier for +#' you to publish your Databricks backed content to a publishing server. It is +#' meant to be primarily used with Posit Connect. +#' +#' @param appDir A directory containing an application (e.g. a Shiny app or plumber API) +#' Defaults to NULL. If left NULL, and if called within RStudio, it will attempt +#' to use the folder of the currently opened document within the IDE. +#' @param python Full path to a python binary for use by `reticulate.` It defaults to NULL. +#' If left NULL, this function will attempt to find a viable local Python +#' environment to replicate using the following hierarchy: +#' 1. `version` - Cluster's DBR version +#' 2. `cluster_id` - Query the cluster to obtain its DBR version +#' 3. If one is loaded in the current R session, it will verify that the Python +#' environment is suited to be used as the one to use +#' @param account The name of the account to use to publish +#' @param server The name of the target server to publish +#' @param lint Lint the project before initiating the project? Default to FALSE. +#' It has been causing issues for this type of content. +#' @param version The Databricks Runtime (DBR) version. Use if `python` is NULL. +#' @param cluster_id The Databricks cluster ID. Use if `python`, and `version` are +#' NULL +#' @param host The Databricks host URL. Defaults to NULL. If left NULL, it will +#' use the environment variable `DATABRICKS_HOST` +#' @param token The Databricks authentication token. Defaults to NULL. If left NULL, it will +#' use the environment variable `DATABRICKS_TOKEN` +#' @param ... Additional named arguments passed to `rsconnect::deployApp()` function +#' @export deploy_databricks <- function( appDir = NULL, + python = NULL, account = NULL, server = NULL, - python = NULL, lint = FALSE, version = NULL, cluster_id = NULL, @@ -60,7 +89,6 @@ deploy_databricks <- function( ) } -#' @export deploy <- function( appDir = NULL, account = NULL, diff --git a/man/deploy_databricks.Rd b/man/deploy_databricks.Rd index a8c7c00..d901a8d 100644 --- a/man/deploy_databricks.Rd +++ b/man/deploy_databricks.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/deploy.R \name{deploy_databricks} \alias{deploy_databricks} -\title{@export} +\title{Deploys Databricks backed content to publishing server} \usage{ deploy_databricks( appDir = NULL, + python = NULL, account = NULL, server = NULL, - python = NULL, lint = FALSE, version = NULL, cluster_id = NULL, @@ -17,6 +17,43 @@ deploy_databricks( ... ) } +\arguments{ +\item{appDir}{A directory containing an application (e.g. a Shiny app or plumber API) +Defaults to NULL. If left NULL, and if called within RStudio, it will attempt +to use the folder of the currently opened document within the IDE.} + +\item{python}{Full path to a python binary for use by \code{reticulate.} It defaults to NULL. +If left NULL, this function will attempt to find a viable local Python +environment to replicate using the following hierarchy: +\enumerate{ +\item \code{version} - Cluster's DBR version +\item \code{cluster_id} - Query the cluster to obtain its DBR version +\item If one is loaded in the current R session, it will verify that the Python +environment is suited to be used as the one to use +}} + +\item{account}{The name of the account to use to publish} + +\item{server}{The name of the target server to publish} + +\item{lint}{Lint the project before initiating the project? Default to FALSE. +It has been causing issues for this type of content.} + +\item{version}{The Databricks Runtime (DBR) version. Use if \code{python} is NULL.} + +\item{cluster_id}{The Databricks cluster ID. Use if \code{python}, and \code{version} are +NULL} + +\item{host}{The Databricks host URL. Defaults to NULL. If left NULL, it will +use the environment variable \code{DATABRICKS_HOST}} + +\item{token}{The Databricks authentication token. Defaults to NULL. If left NULL, it will +use the environment variable \code{DATABRICKS_TOKEN}} + +\item{...}{Additional named arguments passed to \code{rsconnect::deployApp()} function} +} \description{ -@export +This is a convenience function that is meant to make it easier for +you to publish your Databricks backed content to a publishing server. It is +meant to be primarily used with Posit Connect. } From a56ee7370f9828e8221e390d6d99bcf367c0b90a Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Mon, 18 Dec 2023 10:55:44 -0600 Subject: [PATCH 13/14] Removes the temp stop --- R/deploy.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/deploy.R b/R/deploy.R index 7af05e7..086a5bb 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -142,7 +142,8 @@ deploy <- function( if(!is.null(env_var_message)) { cli_bullets(c("i" = "{.header Environment variables:}", env_var_message)) } - cli_inform("Proceed?") + cli_inform("") + cli_inform("Does everything look correct?") cli_end() choice <- utils::menu(choices = c("Yes", "No", accts_msg)) if(choice == 2) { @@ -154,7 +155,6 @@ deploy <- function( map_chr(~ glue("Server: {.x$server} | Account: {.x$name}")) choice <- utils::menu(title = "Select publishing target:", chr_accounts) } - return(print("temp stop")) deployApp( appDir = appDir, python = python, From b2dbbc8f134f26a2f4b44523ee5ea132ffe87691 Mon Sep 17 00:00:00 2001 From: Edgar Ruiz Date: Mon, 18 Dec 2023 18:02:52 -0600 Subject: [PATCH 14/14] Reverts appDir to getwd() when not able to establish it, ver bump, adds NEWS item --- DESCRIPTION | 2 +- NEWS.md | 5 ++++- R/deploy.R | 12 +++++++++--- man/deploy_databricks.Rd | 4 +++- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1d2f332..76fe553 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pysparklyr Title: Provides a 'PySpark' Back-End for the 'sparklyr' Package -Version: 0.1.2.9000 +Version: 0.1.2.9001 Authors@R: c( person("Edgar", "Ruiz", , "edgar@posit.co", role = c("aut", "cre")), person(given = "Posit Software, PBC", role = c("cph", "fnd")) diff --git a/NEWS.md b/NEWS.md index 506e213..b567fee 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,13 +2,16 @@ ### Improvements +* Adds `deploy_databricks()` function. It will simplify publishing to Posit +Connect by automating much of the needed setup, and triggers the publication. + * Adds support for `spark_write_table()` * Simplifies to `spark_connect()` connection output. * Improves how it process host, token and cluster ID. If it doesn't find a token, it no longer fails. It will pass nothing for that argument, letting -databricks.connect find the token. This allows for Databricks configurations +'databricks.connect' find the token. This allows for Databricks configurations files to work. # pysparklyr 0.1.2 diff --git a/R/deploy.R b/R/deploy.R index 086a5bb..92740ee 100644 --- a/R/deploy.R +++ b/R/deploy.R @@ -7,7 +7,9 @@ #' #' @param appDir A directory containing an application (e.g. a Shiny app or plumber API) #' Defaults to NULL. If left NULL, and if called within RStudio, it will attempt -#' to use the folder of the currently opened document within the IDE. +#' to use the folder of the currently opened document within the IDE. If there are +#' no opened documents, or not working in the RStudio IDE, then it will use +#' `getwd()` as the default value. #' @param python Full path to a python binary for use by `reticulate.` It defaults to NULL. #' If left NULL, this function will attempt to find a viable local Python #' environment to replicate using the following hierarchy: @@ -126,10 +128,14 @@ deploy <- function( if (is.null(appDir)) { if (interactive() && in_rstudio) { editor_doc <- getSourceEditorContext() - appDir <- dirname(editor_doc$path) - + if(!is.null(editor_doc)) { + appDir <- dirname(editor_doc$path) + } } } + if(is.null(appDir)) { + appDir <- getwd() + } cli_inform("{.class - App and Spark -}") cli_alert_info("{.header Source: {.emph '{appDir}'}}") python <- deploy_find_environment( diff --git a/man/deploy_databricks.Rd b/man/deploy_databricks.Rd index d901a8d..4e4ff82 100644 --- a/man/deploy_databricks.Rd +++ b/man/deploy_databricks.Rd @@ -20,7 +20,9 @@ deploy_databricks( \arguments{ \item{appDir}{A directory containing an application (e.g. a Shiny app or plumber API) Defaults to NULL. If left NULL, and if called within RStudio, it will attempt -to use the folder of the currently opened document within the IDE.} +to use the folder of the currently opened document within the IDE. If there are +no opened documents, or not working in the RStudio IDE, then it will use +\code{getwd()} as the default value.} \item{python}{Full path to a python binary for use by \code{reticulate.} It defaults to NULL. If left NULL, this function will attempt to find a viable local Python