Skip to content

Commit

Permalink
Merge pull request #95 from mlverse/deploy
Browse files Browse the repository at this point in the history
Adds `deploy_databricks()`
  • Loading branch information
edgararuiz authored Dec 19, 2023
2 parents cdf86e6 + b2dbbc8 commit dcc61bd
Show file tree
Hide file tree
Showing 8 changed files with 331 additions and 26 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pysparklyr
Title: Provides a 'PySpark' Back-End for the 'sparklyr' Package
Version: 0.1.2.9000
Version: 0.1.2.9001
Authors@R: c(
person("Edgar", "Ruiz", , "[email protected]", role = c("aut", "cre")),
person(given = "Posit Software, PBC", role = c("cph", "fnd"))
Expand Down Expand Up @@ -30,7 +30,8 @@ Imports:
vctrs,
processx,
httr2,
rstudioapi
rstudioapi,
rsconnect
URL: https://github.com/mlverse/pysparklyr
BugReports: https://github.com/mlverse/pysparklyr/issues
Suggests:
Expand All @@ -40,4 +41,3 @@ Suggests:
tibble,
withr
Config/testthat/edition: 3

5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ S3method(tbl_ptype,tbl_pyspark)
S3method(tidyselect_data_has_predicates,tbl_pyspark)
export("%>%")
export(connection_databricks_shinyapp)
export(deploy_databricks)
export(install_databricks)
export(install_pyspark)
export(installed_components)
Expand Down Expand Up @@ -112,6 +113,7 @@ importFrom(purrr,map)
importFrom(purrr,map_chr)
importFrom(purrr,map_lgl)
importFrom(purrr,pmap_chr)
importFrom(purrr,transpose)
importFrom(rlang,`!!!`)
importFrom(rlang,`!!`)
importFrom(rlang,`%||%`)
Expand All @@ -127,6 +129,9 @@ importFrom(rlang,quo_is_null)
importFrom(rlang,set_names)
importFrom(rlang,sym)
importFrom(rlang,warn)
importFrom(rsconnect,accounts)
importFrom(rsconnect,deployApp)
importFrom(rstudioapi,getSourceEditorContext)
importFrom(rstudioapi,jobRunScript)
importFrom(rstudioapi,showQuestion)
importFrom(sparklyr,connection_is_open)
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

### Improvements

* Adds `deploy_databricks()` function. It will simplify publishing to Posit
Connect by automating much of the needed setup, and triggers the publication.

* Adds support for `spark_write_table()`

* Simplifies to `spark_connect()` connection output.

* Improves how it process host, token and cluster ID. If it doesn't find a
token, it no longer fails. It will pass nothing for that argument, letting
databricks.connect find the token. This allows for Databricks configurations
'databricks.connect' find the token. This allows for Databricks configurations
files to work.

# pysparklyr 0.1.2
Expand Down
18 changes: 8 additions & 10 deletions R/data-write.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' @export
spark_write_table.tbl_pyspark <- function(x,
name,
mode = NULL,
options = list(),
partition_by = NULL,
...) {
name,
mode = NULL,
options = list(),
partition_by = NULL,
...) {
args <- list(...)
save_action <- ifelse(identical(mode, "append"), "insertInto", "saveAsTable")
pyspark_write_generic(
Expand Down Expand Up @@ -135,9 +135,7 @@ pyspark_write_generic <- function(
options,
args,
save_action = "save",
expand_path = TRUE
) {

expand_path = TRUE) {
query <- tbl_pyspark_sdf(x)

if (is.null(partition_by)) {
Expand All @@ -150,13 +148,13 @@ pyspark_write_generic <- function(

path <- ifelse(expand_path, path_expand(path), path)

if(!is.null(format)) {
if (!is.null(format)) {
x <- py_invoke(query_prep, "format", format)
} else {
x <- query_prep
}

if(!is.null(mode)) {
if (!is.null(mode)) {
x <- py_invoke(x, "mode", mode)
}

Expand Down
34 changes: 24 additions & 10 deletions R/databricks-utils.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
databricks_host <- function(host = NULL, fail = TRUE) {
host <- host %||% Sys.getenv("DATABRICKS_HOST", unset = NA)
if (is.null(host) | is.na(host)) {
if(!is.null(host)) {
return(set_names(host, "argument"))
}
env_host <- Sys.getenv("DATABRICKS_HOST", unset = NA)
connect_host <- Sys.getenv("CONNECT_DATABRICKS_HOST", unset = NA)
if(!is.na(env_host)) {
host <- set_names(env_host, "environment")
}
if(!is.na(connect_host)) {
host <- set_names(connect_host, "environment_connect")
}
if (is.null(host)) {
if (fail) {
cli_abort(c(
paste0(
Expand All @@ -17,19 +27,24 @@ databricks_host <- function(host = NULL, fail = TRUE) {
}

databricks_token <- function(token = NULL, fail = FALSE) {
name <- "argument"
if(!is.null(token)) {
return(set_names(token, "argument"))
}
# Checks for OAuth Databricks token inside the RStudio API
if (is.null(token) && exists(".rs.api.getDatabricksToken")) {
getDatabricksToken <- get(".rs.api.getDatabricksToken")
name <- "oauth"
token <- getDatabricksToken(databricks_host())
token <- set_names(getDatabricksToken(databricks_host()), "oauth")
}
# Checks the Environment Variable
if (is.null(token)) {
env_token <- Sys.getenv("DATABRICKS_TOKEN", unset = NA)
connect_token <- Sys.getenv("CONNECT_DATABRICKS_TOKEN", unset = NA)
if (!is.na(env_token)) {
name <- "environment"
token <- env_token
token <- set_names(env_token, "environment")
} else {
if(!is.na(connect_token)) {
token <- set_names(connect_token, "environment_connect")
}
}
}
if (is.null(token)) {
Expand All @@ -44,11 +59,10 @@ databricks_token <- function(token = NULL, fail = FALSE) {
"Please add your Token to 'DATABRICKS_TOKEN' inside your .Renviron file."
))
} else {
name <- NULL
token <- ""
}
}
set_names(token, name)
token
}

databricks_dbr_version_name <- function(cluster_id,
Expand Down Expand Up @@ -130,7 +144,7 @@ databricks_dbr_version <- function(cluster_id,
cluster_id = cluster_id,
host = host,
token = token
)
)
vn$version
}

Expand Down
Loading

0 comments on commit dcc61bd

Please sign in to comment.