Merge pull request #95 from mlverse/deploy

Adds `deploy_databricks()`
mlverse · Dec 19, 2023 · dcc61bd · dcc61bd
2 parents cdf86e6 + b2dbbc8
commit dcc61bd
Show file tree

Hide file tree

Showing 8 changed files with 331 additions and 26 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: pysparklyr
 Title: Provides a 'PySpark' Back-End for the 'sparklyr' Package
-Version: 0.1.2.9000
+Version: 0.1.2.9001
 Authors@R: c(
     person("Edgar", "Ruiz", , "[email protected]", role = c("aut", "cre")),
     person(given = "Posit Software, PBC", role = c("cph", "fnd"))
@@ -30,7 +30,8 @@ Imports:
     vctrs,
     processx,
     httr2,
-    rstudioapi
+    rstudioapi,
+    rsconnect
 URL: https://github.com/mlverse/pysparklyr
 BugReports: https://github.com/mlverse/pysparklyr/issues
 Suggests: 
@@ -40,4 +41,3 @@ Suggests:
     tibble,
     withr
 Config/testthat/edition: 3
-
diff --git a/NAMESPACE b/NAMESPACE
@@ -73,6 +73,7 @@ S3method(tbl_ptype,tbl_pyspark)
 S3method(tidyselect_data_has_predicates,tbl_pyspark)
 export("%>%")
 export(connection_databricks_shinyapp)
+export(deploy_databricks)
 export(install_databricks)
 export(install_pyspark)
 export(installed_components)
@@ -112,6 +113,7 @@ importFrom(purrr,map)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_lgl)
 importFrom(purrr,pmap_chr)
+importFrom(purrr,transpose)
 importFrom(rlang,`!!!`)
 importFrom(rlang,`!!`)
 importFrom(rlang,`%||%`)
@@ -127,6 +129,9 @@ importFrom(rlang,quo_is_null)
 importFrom(rlang,set_names)
 importFrom(rlang,sym)
 importFrom(rlang,warn)
+importFrom(rsconnect,accounts)
+importFrom(rsconnect,deployApp)
+importFrom(rstudioapi,getSourceEditorContext)
 importFrom(rstudioapi,jobRunScript)
 importFrom(rstudioapi,showQuestion)
 importFrom(sparklyr,connection_is_open)

diff --git a/NEWS.md b/NEWS.md
@@ -2,13 +2,16 @@
 
 ### Improvements
 
+* Adds `deploy_databricks()` function. It will simplify publishing to Posit
+Connect by automating much of the needed setup, and triggers the publication.
+
 * Adds support for `spark_write_table()`
 
 * Simplifies to `spark_connect()` connection output.
 
 * Improves how it process host, token and cluster ID. If it doesn't find a
 token, it no longer fails. It will pass nothing for that argument, letting
-databricks.connect find the token. This allows for Databricks configurations
+'databricks.connect' find the token. This allows for Databricks configurations
 files to work.
 
 # pysparklyr 0.1.2

diff --git a/R/data-write.R b/R/data-write.R
@@ -1,10 +1,10 @@
 #' @export
 spark_write_table.tbl_pyspark <- function(x,
-                                        name,
-                                        mode = NULL,
-                                        options = list(),
-                                        partition_by = NULL,
-                                        ...) {
+                                          name,
+                                          mode = NULL,
+                                          options = list(),
+                                          partition_by = NULL,
+                                          ...) {
   args <- list(...)
   save_action <- ifelse(identical(mode, "append"), "insertInto", "saveAsTable")
   pyspark_write_generic(
@@ -135,9 +135,7 @@ pyspark_write_generic <- function(
     options,
     args,
     save_action = "save",
-    expand_path = TRUE
-    ) {
-
+    expand_path = TRUE) {
   query <- tbl_pyspark_sdf(x)
 
   if (is.null(partition_by)) {
@@ -150,13 +148,13 @@ pyspark_write_generic <- function(
 
   path <- ifelse(expand_path, path_expand(path), path)
 
-  if(!is.null(format)) {
+  if (!is.null(format)) {
     x <- py_invoke(query_prep, "format", format)
   } else {
     x <- query_prep
   }
 
-  if(!is.null(mode)) {
+  if (!is.null(mode)) {
     x <- py_invoke(x, "mode", mode)
   }
 

diff --git a/R/databricks-utils.R b/R/databricks-utils.R
@@ -1,6 +1,16 @@
 databricks_host <- function(host = NULL, fail = TRUE) {
-  host <- host %||% Sys.getenv("DATABRICKS_HOST", unset = NA)
-  if (is.null(host) | is.na(host)) {
+  if(!is.null(host)) {
+    return(set_names(host, "argument"))
+  }
+  env_host <-  Sys.getenv("DATABRICKS_HOST", unset = NA)
+  connect_host <- Sys.getenv("CONNECT_DATABRICKS_HOST", unset = NA)
+  if(!is.na(env_host)) {
+    host <- set_names(env_host, "environment")
+  }
+  if(!is.na(connect_host)) {
+    host <- set_names(connect_host, "environment_connect")
+  }
+  if (is.null(host)) {
     if (fail) {
       cli_abort(c(
         paste0(
@@ -17,19 +27,24 @@ databricks_host <- function(host = NULL, fail = TRUE) {
 }
 
 databricks_token <- function(token = NULL, fail = FALSE) {
-  name <- "argument"
+  if(!is.null(token)) {
+    return(set_names(token, "argument"))
+  }
   # Checks for OAuth Databricks token inside the RStudio API
   if (is.null(token) && exists(".rs.api.getDatabricksToken")) {
     getDatabricksToken <- get(".rs.api.getDatabricksToken")
-    name <- "oauth"
-    token <- getDatabricksToken(databricks_host())
+    token <- set_names(getDatabricksToken(databricks_host()), "oauth")
   }
   # Checks the Environment Variable
   if (is.null(token)) {
     env_token <- Sys.getenv("DATABRICKS_TOKEN", unset = NA)
+    connect_token <- Sys.getenv("CONNECT_DATABRICKS_TOKEN", unset = NA)
     if (!is.na(env_token)) {
-      name <- "environment"
-      token <- env_token
+      token <- set_names(env_token, "environment")
+    } else {
+      if(!is.na(connect_token)) {
+        token <- set_names(connect_token, "environment_connect")
+      }
     }
   }
   if (is.null(token)) {
@@ -44,11 +59,10 @@ databricks_token <- function(token = NULL, fail = FALSE) {
         "Please add your Token to 'DATABRICKS_TOKEN' inside your .Renviron file."
       ))
     } else {
-      name <- NULL
       token <- ""
     }
   }
-  set_names(token, name)
+  token
 }
 
 databricks_dbr_version_name <- function(cluster_id,
@@ -130,7 +144,7 @@ databricks_dbr_version <- function(cluster_id,
     cluster_id = cluster_id,
     host = host,
     token = token
-    )
+  )
   vn$version
 }