diff --git a/DESCRIPTION b/DESCRIPTION index 0ef6bb5..f6b55b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: noctua Type: Package Title: Connect to 'AWS Athena' using R 'AWS SDK' 'paws' ('DBI' Interface) -Version: 2.6.0.9000 +Version: 2.6.1 Authors@R: person("Dyfan", "Jones", email="dyfan.r.jones@gmail.com", role= c("aut", "cre")) Description: Designed to be compatible with the 'R' package 'DBI' (Database Interface) diff --git a/NEWS.md b/NEWS.md index 41defc6..0562340 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# noctua 2.6.1 +## Bug Fix: +* Prevent assuming role from `AWS_ROLE_ARN`. This caused confusing when connecting through web identity ([RAthena # 177](https://github.com/DyfanJones/RAthena/issues/177)) +* Support `dbplyr::in_catalog` when working with `dplyr::tbl` ([RAthena # 178](https://github.com/DyfanJones/RAthena/issues/178)) + # noctua 2.6.0 ## Feature: * Add clear_s3_resource parameter to RAthena_options to prevent AWS Athena output AWS S3 resource being cleared up by `dbClearResult` ([RAthena # 168](https://github.com/DyfanJones/RAthena/issues/168)). Thanks to @juhoautio for the request. diff --git a/R/dplyr_integration.R b/R/dplyr_integration.R index 469b0a3..b628489 100644 --- a/R/dplyr_integration.R +++ b/R/dplyr_integration.R @@ -24,7 +24,7 @@ #' \dontrun{ #' # Note: #' # - Require AWS Account to run below example. -#' # - Different connection methods can be used please see `noctua::dbConnect` documnentation +#' # - Different connection methods can be used please see `noctua::dbConnect` documentation #' #' library(DBI) #' library(dplyr) diff --git a/README.md b/README.md index 930c3fd..66b53a9 100644 --- a/README.md +++ b/README.md @@ -252,7 +252,7 @@ tbl(con, sql("SELECT * FROM iris")) dplyr provides lazy querying with allows to short hand `tbl(con, sql("SELECT * FROM iris"))` to `tbl(con, "iris")`. For more information -please look at . +please look at ``` r tbl(con, "iris") diff --git a/cran-comments.md b/cran-comments.md index ab2b089..61219b5 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,9 @@ ## Submission -This release contains a couple of new features. +This release contains a couple of bug fixes. -## Feature: -* Add `clear_s3_resource` parameter to `RAthena_options` to prevent `Amazon Web Service Athena` output `Amazon Web Service S3` -* Support extra paws parameters for -* Support `endpoint_override` parameter allow default endpoints for each service to be overridden +## Bug Fix: +* Prevent assuming role from `AWS_ROLE_ARN`. This caused confusing when connecting through web identity +* Support `dbplyr::in_catalog` when working with `dplyr::tbl` ## Test environments * local OS X install, 4.2.0 diff --git a/docs/404.html b/docs/404.html index c910664..ddfb19d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@ noctua - 2.6.0 + 2.6.1 @@ -125,7 +125,7 @@

Page not found (404)

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 40c6764..ae7d30e 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -93,7 +93,7 @@

License

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/aws_athena_query_caching.html b/docs/articles/aws_athena_query_caching.html index 19e62f8..5ad2828 100644 --- a/docs/articles/aws_athena_query_caching.html +++ b/docs/articles/aws_athena_query_caching.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -135,30 +135,30 @@

Caching benefitsAWS S3.

-library(DBI)
-library(noctua)
-
-con = dbConnect(athena())
-
-# Start caching queries
-noctua_options(cache_size = 10)
-
-# Upload Data to AWS Athena
-dbWriteTable(con, "iris", iris, partition = c("Partition" = "01"))
-
-# initial query to AWS Athena
-system.time(df1 = dbGetQuery(con, "select * from iris"))
-
-# Info: (Data scanned: 3.63 KB)
-#   user  system elapsed 
-#  0.105   0.004   3.397 
-
-# repeat query to AWS Athena
-system.time(df2 = dbGetQuery(con, "select * from iris"))
-
-# Info: (Data scanned: 3.63 KB)
-#   user  system elapsed 
-#  0.072   0.000   0.348 
+library(DBI) +library(noctua) + +con = dbConnect(athena()) + +# Start caching queries +noctua_options(cache_size = 10) + +# Upload Data to AWS Athena +dbWriteTable(con, "iris", iris, partition = c("Partition" = "01")) + +# initial query to AWS Athena +system.time(df1 = dbGetQuery(con, "select * from iris")) + +# Info: (Data scanned: 3.63 KB) +# user system elapsed +# 0.105 0.004 3.397 + +# repeat query to AWS Athena +system.time(df2 = dbGetQuery(con, "select * from iris")) + +# Info: (Data scanned: 3.63 KB) +# user system elapsed +# 0.072 0.000 0.348

Here we can see a performance increase of x10 with repeat query execution.

@@ -170,21 +170,21 @@

Caching weakness
-# Updating iris table
-dbWriteTable(con, "iris", iris, append = T, partition = c("Partition" = "02"))
-
-dt5 = dbGetQuery(con, "select * from iris")
-
-# Stop using cache data
-noctua_options()
-
-dt6 = dbGetQuery(con, "select * from iris")
-
-nrow(dt5)
-# 150
-
-nrow(dt6)
-# 300
+# Updating iris table +dbWriteTable(con, "iris", iris, append = T, partition = c("Partition" = "02")) + +dt5 = dbGetQuery(con, "select * from iris") + +# Stop using cache data +noctua_options() + +dt6 = dbGetQuery(con, "select * from iris") + +nrow(dt5) +# 150 + +nrow(dt6) +# 300

Sadly the cached query didn’t pick up the new data from iris.

@@ -195,27 +195,27 @@

Cache memory
-# Start caching
-noctua_options(cache_size = 10)
-res1 = dbExecute(con, "select * from iris")
-
-# Stop caching
-noctua_options()
-res2 = dbExecute(con, "select * from iris")
-
-# Start caching
-noctua_options(cache_size = 10)
-res3 = dbExecute(con, "select * from iris")
-
-# Compare Query ID's
-res1@info$QueryExecutionId
-# 9a9272f5-0632-4774-9aa9-d07f151dabc5
-
-res2@info$QueryExecutionId
-# be12fe0-3ec0-4595-b3e6-b3bf67efa266
-
-res3@info$QueryExecutionId
-# 9a9272f5-0632-4774-9aa9-d07f151dabc5
+# Start caching +noctua_options(cache_size = 10) +res1 = dbExecute(con, "select * from iris") + +# Stop caching +noctua_options() +res2 = dbExecute(con, "select * from iris") + +# Start caching +noctua_options(cache_size = 10) +res3 = dbExecute(con, "select * from iris") + +# Compare Query ID's +res1@info$QueryExecutionId +# 9a9272f5-0632-4774-9aa9-d07f151dabc5 + +res2@info$QueryExecutionId +# be12fe0-3ec0-4595-b3e6-b3bf67efa266 + +res3@info$QueryExecutionId +# 9a9272f5-0632-4774-9aa9-d07f151dabc5

We can see that res1 and res3 utilise the same QueryID, even tho caching was stopped and started.

@@ -226,7 +226,7 @@

Clear down cacheclear_cache within noctua_options to TRUE

-noctua_options(clear_cache = T)
+noctua_options(clear_cache = T) @@ -248,7 +248,7 @@

Clear down cache

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/aws_athena_unload.html b/docs/articles/aws_athena_unload.html index 5c90e58..5118acb 100644 --- a/docs/articles/aws_athena_unload.html +++ b/docs/articles/aws_athena_unload.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -217,50 +217,50 @@

Performance comparison:wr.catalog.table(database="awswrangler_test", table="noaa")

Benchmark unload method using noctua.

-# R
-library(DBI)
-
-con <- dbConnect(noctua::athena())
-
-dbGetQuery(con, "select count(*) as n from awswrangler_test.noaa")
-# Info: (Data scanned: 0 Bytes)
-#           n
-# 1: 29554197
-
-# Query ran using CSV output
-system.time({
-  df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa")
-})
-# Info: (Data scanned: 80.88 MB)
-#    user  system elapsed
-#  57.004   8.430 160.567 
-
-dim(df)
-# [1] 29554197        8
-
-noctua::noctua_options(cache_size = 1)
-
-# Query ran using UNLOAD Parquet output
-system.time({
-  df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T)
-})
-# Info: (Data scanned: 80.88 MB)
-#    user  system elapsed 
-#  21.622   2.350  39.232 
-
-dim(df)
-# [1] 29554197        8
-
-# Query ran using cached UNLOAD Parquet output
-system.time({
-  df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T)
-})
-# Info: (Data scanned: 80.88 MB)
-#    user  system elapsed 
-#  13.738   1.886  11.029 
-
-dim(df)
-# [1] 29554197        8
+# R +library(DBI) + +con <- dbConnect(noctua::athena()) + +dbGetQuery(con, "select count(*) as n from awswrangler_test.noaa") +# Info: (Data scanned: 0 Bytes) +# n +# 1: 29554197 + +# Query ran using CSV output +system.time({ + df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa") +}) +# Info: (Data scanned: 80.88 MB) +# user system elapsed +# 57.004 8.430 160.567 + +dim(df) +# [1] 29554197 8 + +noctua::noctua_options(cache_size = 1) + +# Query ran using UNLOAD Parquet output +system.time({ + df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T) +}) +# Info: (Data scanned: 80.88 MB) +# user system elapsed +# 21.622 2.350 39.232 + +dim(df) +# [1] 29554197 8 + +# Query ran using cached UNLOAD Parquet output +system.time({ + df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T) +}) +# Info: (Data scanned: 80.88 MB) +# user system elapsed +# 13.738 1.886 11.029 + +dim(df) +# [1] 29554197 8 @@ -297,53 +297,53 @@

Set unload = TRUE on pack TRUE package level and all DBI functionality will use it when applicable.

-library(DBI)
-library(noctua)
-
-con <- dbConnect(athena())
-
-noctua_options(unload = TRUE)
-
-dbi_noaa = dbGetQuery(con, "select * from awswrangler_test.noaa")
+library(DBI) +library(noctua) + +con <- dbConnect(athena()) + +noctua_options(unload = TRUE) + +dbi_noaa = dbGetQuery(con, "select * from awswrangler_test.noaa")

This also give benefits for when using dplyr functionality. When setting noctua_options(unload=TRUE) all dplyr lazy evaluation will start using AWS Athena unload.

-tbl_noaa = tbl(con, dbplyr::in_schema("awswrangler_test", "noaa"))
-
-tbl_noaa %>% collect()
-
-#> # A tibble: 29,554,197 × 8
-#>    id          dt                  element value m_flag q_flag s_flag obs_time
-#>    <chr>       <dttm>              <chr>   <int> <chr>  <chr>  <chr>  <chr>   
-#>  1 ASN00074198 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  2 ASN00074222 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  3 ASN00074227 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  4 ASN00075001 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  5 ASN00075005 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  6 ASN00075006 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  7 ASN00075011 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  8 ASN00075013 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#>  9 ASN00075014 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#> 10 ASN00075018 1890-01-05 00:00:00 PRCP        0 NA     NA     a      NA      
-#> # … with 29,554,187 more rows
-
-noaa %>% filter(element == "PRCP") %>% collect()
-#> # A tibble: 15,081,580 × 8
-#>    id          dt                  element value m_flag q_flag s_flag obs_time
-#>    <chr>       <dttm>              <chr>   <int> <chr>  <chr>  <chr>  <chr>   
-#>  1 SWE00140492 1890-01-06 00:00:00 PRCP        0 NA     NA     E      NA      
-#>  2 SWE00140594 1890-01-06 00:00:00 PRCP        4 NA     NA     E      NA      
-#>  3 SWE00140746 1890-01-06 00:00:00 PRCP        0 NA     NA     E      NA      
-#>  4 SWE00140828 1890-01-06 00:00:00 PRCP        0 NA     NA     E      NA      
-#>  5 SWM00002080 1890-01-06 00:00:00 PRCP        0 NA     NA     E      NA      
-#>  6 SWM00002485 1890-01-06 00:00:00 PRCP        1 NA     NA     E      NA      
-#>  7 SWM00002584 1890-01-06 00:00:00 PRCP        0 NA     NA     E      NA      
-#>  8 TSE00147769 1890-01-06 00:00:00 PRCP       33 NA     NA     E      NA      
-#>  9 TSE00147775 1890-01-06 00:00:00 PRCP      150 NA     NA     E      NA      
-#> 10 UK000047811 1890-01-06 00:00:00 PRCP       49 NA     NA     E      NA      
-# … with 15,081,570 more rows
+tbl_noaa = tbl(con, dbplyr::in_schema("awswrangler_test", "noaa")) + +tbl_noaa %>% collect() + +#> # A tibble: 29,554,197 × 8 +#> id dt element value m_flag q_flag s_flag obs_time +#> <chr> <dttm> <chr> <int> <chr> <chr> <chr> <chr> +#> 1 ASN00074198 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 2 ASN00074222 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 3 ASN00074227 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 4 ASN00075001 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 5 ASN00075005 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 6 ASN00075006 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 7 ASN00075011 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 8 ASN00075013 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 9 ASN00075014 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> 10 ASN00075018 1890-01-05 00:00:00 PRCP 0 NA NA a NA +#> # … with 29,554,187 more rows + +noaa %>% filter(element == "PRCP") %>% collect() +#> # A tibble: 15,081,580 × 8 +#> id dt element value m_flag q_flag s_flag obs_time +#> <chr> <dttm> <chr> <int> <chr> <chr> <chr> <chr> +#> 1 SWE00140492 1890-01-06 00:00:00 PRCP 0 NA NA E NA +#> 2 SWE00140594 1890-01-06 00:00:00 PRCP 4 NA NA E NA +#> 3 SWE00140746 1890-01-06 00:00:00 PRCP 0 NA NA E NA +#> 4 SWE00140828 1890-01-06 00:00:00 PRCP 0 NA NA E NA +#> 5 SWM00002080 1890-01-06 00:00:00 PRCP 0 NA NA E NA +#> 6 SWM00002485 1890-01-06 00:00:00 PRCP 1 NA NA E NA +#> 7 SWM00002584 1890-01-06 00:00:00 PRCP 0 NA NA E NA +#> 8 TSE00147769 1890-01-06 00:00:00 PRCP 33 NA NA E NA +#> 9 TSE00147775 1890-01-06 00:00:00 PRCP 150 NA NA E NA +#> 10 UK000047811 1890-01-06 00:00:00 PRCP 49 NA NA E NA +# … with 15,081,570 more rows @@ -364,7 +364,7 @@

Set unload = TRUE on pack

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/aws_s3_backend.html b/docs/articles/aws_s3_backend.html index 4d0b4d4..4b0140e 100644 --- a/docs/articles/aws_s3_backend.html +++ b/docs/articles/aws_s3_backend.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -140,12 +140,12 @@

AWS S3 Structures
-dbWriteTable(con, "myschema.table", table, 
-             s3.location = "s3://mybucket/myschema/table",
-             partition = c("year" = "2020"))
-
-# AWS S3 location
-"s3://mybucket/myschema/table/year=2020/table.tsv"
+dbWriteTable(con, "myschema.table", table, + s3.location = "s3://mybucket/myschema/table", + partition = c("year" = "2020")) + +# AWS S3 location +"s3://mybucket/myschema/table/year=2020/table.tsv"

File Types @@ -191,7 +191,7 @@

diff --git a/docs/articles/changing_backend_file_parser.html b/docs/articles/changing_backend_file_parser.html index e074384..8a7920b 100644 --- a/docs/articles/changing_backend_file_parser.html +++ b/docs/articles/changing_backend_file_parser.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -129,12 +129,12 @@

Intro use vroom as the file parser, noctua_options function has been created to enable this:

-library(DBI)
-library(noctua)
-
-con = dbConnect(athena())
-
-noctua_options(file_parser = c("data.table", "vroom"))
+library(DBI) +library(noctua) + +con = dbConnect(athena()) + +noctua_options(file_parser = c("data.table", "vroom"))

By setting the file_parser to "vroom" then the backend will change to allow vroom’s file parser to be used instead of data.table.

@@ -145,8 +145,8 @@

Change back to data.tableTo go back to using data.table as the file parser it is a simple as calling the noctua_options function:

+# return to using data.table as file parser +noctua_options()

Swapping on the fly @@ -154,24 +154,24 @@

Swapping on the fly
-library(DBI)
-library(noctua)
-
-con = dbConnect(athena())
-
-# upload data
-dbWriteTable(con, "iris", iris)
-
-# use default data.table file parser
-df1 = dbGetQuery(con, "select * from iris")
-
-# use vroom as file parser
-noctua_options("vroom")
-df2 = dbGetQuery(con, "select * from iris")
-
-# return back to data.table file parser
-noctua_options()
-df3 = dbGetQuery(con, "select * from iris")

+library(DBI) +library(noctua) + +con = dbConnect(athena()) + +# upload data +dbWriteTable(con, "iris", iris) + +# use default data.table file parser +df1 = dbGetQuery(con, "select * from iris") + +# use vroom as file parser +noctua_options("vroom") +df2 = dbGetQuery(con, "select * from iris") + +# return back to data.table file parser +noctua_options() +df3 = dbGetQuery(con, "select * from iris")

Why should you consider vroom? @@ -241,7 +241,7 @@

Why should you consider vroom

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/convert_and_save_cost.html b/docs/articles/convert_and_save_cost.html index ee1596b..6d93ccc 100644 --- a/docs/articles/convert_and_save_cost.html +++ b/docs/articles/convert_and_save_cost.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -166,19 +166,19 @@

Upload Data in delimited format

Uploading Data in delimited format is the easiest method.

-library(DBI)
-library(noctua)
-
-con <- dbConnect(athena())
-
-# create a temporary database to upload data into
-res <- dbExecute(con, "CREATE IF NOT EXISTS DATABASE temp")
-dbClearResult(res)
-
-iris2 <- iris
-iris2$time_stamp <- format(Sys.Date(), "%Y%m%d")
-
-dbWriteTable(con, "temp.iris_delim", iris2)
+library(DBI) +library(noctua) + +con <- dbConnect(athena()) + +# create a temporary database to upload data into +res <- dbExecute(con, "CREATE IF NOT EXISTS DATABASE temp") +dbClearResult(res) + +iris2 <- iris +iris2$time_stamp <- format(Sys.Date(), "%Y%m%d") + +dbWriteTable(con, "temp.iris_delim", iris2)

However delimited file format isn’t the most cost effective when it comes to using AWS Athena. To overcome this we can convert this by using AWS Athena.

@@ -188,55 +188,55 @@

Convert Data into Parquet or ORC

Converting table to a non-partitioned Parquet or ORC format.

-# convert to parquet
-dbConvertTable(con,
-               obj = "temp.iris_delim",
-               name = "iris_parquet",
-               file.type = "parquet")
-               
-# convert to orc
-dbConvertTable(con,
-               obj = "temp.iris_delim",
-               name = "iris_orc",
-               file.type = "orc")
+# convert to parquet +dbConvertTable(con, + obj = "temp.iris_delim", + name = "iris_parquet", + file.type = "parquet") + +# convert to orc +dbConvertTable(con, + obj = "temp.iris_delim", + name = "iris_orc", + file.type = "orc")

NOTE: By default dbConvertTable compresses Parquet/ ORC format using snappy compression.

noctua goes a step further by allowing tables to be converted with partitions.

-# convert to parquet with partition time_stamp
-dbConvertTable(con,
-               obj = "temp.iris_delim",
-               name = "iris_parquet_partition",
-               partition = "time_stamp",
-               file.type = "parquet")
+# convert to parquet with partition time_stamp +dbConvertTable(con, + obj = "temp.iris_delim", + name = "iris_parquet_partition", + partition = "time_stamp", + file.type = "parquet")

noctua even allows SQL queries to be converted into desired file format:

-dbConvertTable(con,
-              obj = SQL("select 
-                          Sepal_Length,
-                          Sepal_Width,
-                          date_format(current_date, '%Y%m%d') as time_stamp 
-                        from temp.iris_delim"),
-              name = "iris_orc_partition",
-              partition = "time_stamp",
-              file.type = "orc")
+dbConvertTable(con, + obj = SQL("select + Sepal_Length, + Sepal_Width, + date_format(current_date, '%Y%m%d') as time_stamp + from temp.iris_delim"), + name = "iris_orc_partition", + partition = "time_stamp", + file.type = "orc")

Insert into table for ETL processes

As we have created partitioned data, we can easily insert into:

-res <- 
-  dbExecute(con, "insert into iris_orc_partition
-                  select 
-                    Sepal_Length,
-                    Sepal_Width, 
-                    date_format(date_add('date', 1, current_date) , '%Y%m%d') time_stamp 
-                  from temp.iris_delim")
-dbClearResult(res)
+res <- + dbExecute(con, "insert into iris_orc_partition + select + Sepal_Length, + Sepal_Width, + date_format(date_add('date', 1, current_date) , '%Y%m%d') time_stamp + from temp.iris_delim") +dbClearResult(res)

What this all means is that you can create ETL processes by uploading data in basic file format (delimited), and then converting / inserting into the prefer file format.

@@ -249,17 +249,17 @@

dplyr to allow converting to be done through dplyr.

-library(dplyr)
-
-iris_tbl <- tbl(con, dbplyr::in_schema("temp", "iris_delim"))
-
-r_date <- format(Sys.Date(), "%Y%m%d")
-
-iris_tbl %>% 
-  select(petal_length,
-         petal_width) %>% 
-  mutate(time_stamp = r_date) %>%
-  compute("iris_dplyr_parquet", partition = "time_stamp", file_type = "parquet")
+library(dplyr) + +iris_tbl <- tbl(con, dbplyr::in_schema("temp", "iris_delim")) + +r_date <- format(Sys.Date(), "%Y%m%d") + +iris_tbl %>% + select(petal_length, + petal_width) %>% + mutate(time_stamp = r_date) %>% + compute("iris_dplyr_parquet", partition = "time_stamp", file_type = "parquet")
@@ -293,7 +293,7 @@

Reading Material

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/getting_started.html b/docs/articles/getting_started.html index d026954..f0fe251 100644 --- a/docs/articles/getting_started.html +++ b/docs/articles/getting_started.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -143,11 +143,11 @@

Installing noctua:As noctua utilising the R AWS SDK paws the installation of noctua is pretty straight forward:

-# cran version
-install.packages("noctua")
-
-# Dev version
-remotes::install_github("dyfanjones/noctua")
+# cran version +install.packages("noctua") + +# Dev version +remotes::install_github("dyfanjones/noctua")

Docker Example:

@@ -178,106 +178,106 @@

Usage:

Low - Level API:

-library(DBI)
-library(noctua)
-
-con <- dbConnect(athena())
-
-# list all current work groups in AWS Athena
-list_work_groups(con)
-
-# Create a new work group
-create_work_group(con, "demo_work_group", description = "This is a demo work group",
-                  tags = tag_options(key= "demo_work_group", value = "demo_01"))
+library(DBI) +library(noctua) + +con <- dbConnect(athena()) + +# list all current work groups in AWS Athena +list_work_groups(con) + +# Create a new work group +create_work_group(con, "demo_work_group", description = "This is a demo work group", + tags = tag_options(key= "demo_work_group", value = "demo_01"))

DBI:

-library(DBI)
-
-con <- dbConnect(noctua::athena())
-
-# Get metadata 
-dbGetInfo(con)
-
-# $profile_name
-# [1] "default"
-# 
-# $s3_staging
-# [1] ######## NOTE: Please don't share your S3 bucket to the public
-# 
-# $dbms.name
-# [1] "default"
-# 
-# $work_group
-# [1] "primary"
-# 
-# $poll_interval
-# NULL
-# 
-# $encryption_option
-# NULL
-# 
-# $kms_key
-# NULL
-# 
-# $expiration
-# NULL
-# 
-# $region_name
-# [1] "eu-west-1"
-# 
-# $paws
-# [1] "0.1.6"
-# 
-# $noctua
-# [1] "1.5.1"
-
-# create table to AWS Athena
-dbWriteTable(con, "iris", iris)
-
-dbGetQuery(con, "select * from iris limit 10")
-# Info: (Data scanned: 860 Bytes)
-#  sepal_length sepal_width petal_length petal_width species
-# 1:           5.1         3.5          1.4         0.2  setosa
-# 2:           4.9         3.0          1.4         0.2  setosa
-# 3:           4.7         3.2          1.3         0.2  setosa
-# 4:           4.6         3.1          1.5         0.2  setosa
-# 5:           5.0         3.6          1.4         0.2  setosa
-# 6:           5.4         3.9          1.7         0.4  setosa
-# 7:           4.6         3.4          1.4         0.3  setosa
-# 8:           5.0         3.4          1.5         0.2  setosa
-# 9:           4.4         2.9          1.4         0.2  setosa
-# 10:          4.9         3.1          1.5         0.1  setosa
+library(DBI) + +con <- dbConnect(noctua::athena()) + +# Get metadata +dbGetInfo(con) + +# $profile_name +# [1] "default" +# +# $s3_staging +# [1] ######## NOTE: Please don't share your S3 bucket to the public +# +# $dbms.name +# [1] "default" +# +# $work_group +# [1] "primary" +# +# $poll_interval +# NULL +# +# $encryption_option +# NULL +# +# $kms_key +# NULL +# +# $expiration +# NULL +# +# $region_name +# [1] "eu-west-1" +# +# $paws +# [1] "0.1.6" +# +# $noctua +# [1] "1.5.1" + +# create table to AWS Athena +dbWriteTable(con, "iris", iris) + +dbGetQuery(con, "select * from iris limit 10") +# Info: (Data scanned: 860 Bytes) +# sepal_length sepal_width petal_length petal_width species +# 1: 5.1 3.5 1.4 0.2 setosa +# 2: 4.9 3.0 1.4 0.2 setosa +# 3: 4.7 3.2 1.3 0.2 setosa +# 4: 4.6 3.1 1.5 0.2 setosa +# 5: 5.0 3.6 1.4 0.2 setosa +# 6: 5.4 3.9 1.7 0.4 setosa +# 7: 4.6 3.4 1.4 0.3 setosa +# 8: 5.0 3.4 1.5 0.2 setosa +# 9: 4.4 2.9 1.4 0.2 setosa +# 10: 4.9 3.1 1.5 0.1 setosa

dplyr:

-library(dplyr)
-
-athena_iris <- tbl(con, "iris")
-
-athena_iris %>%
-  select(species, sepal_length, sepal_width) %>% 
-  head(10) %>%
-  collect()
-
-# Info: (Data scanned: 860 Bytes)
-# # A tibble: 10 x 3
-# species  sepal_length sepal_width
-# <chr>           <dbl>       <dbl>
-# 1 setosa            5.1         3.5
-# 2 setosa            4.9         3  
-# 3 setosa            4.7         3.2
-# 4 setosa            4.6         3.1
-# 5 setosa            5           3.6
-# 6 setosa            5.4         3.9
-# 7 setosa            4.6         3.4
-# 8 setosa            5           3.4
-# 9 setosa            4.4         2.9
-# 10 setosa           4.9         3.1
+library(dplyr) + +athena_iris <- tbl(con, "iris") + +athena_iris %>% + select(species, sepal_length, sepal_width) %>% + head(10) %>% + collect() + +# Info: (Data scanned: 860 Bytes) +# # A tibble: 10 x 3 +# species sepal_length sepal_width +# <chr> <dbl> <dbl> +# 1 setosa 5.1 3.5 +# 2 setosa 4.9 3 +# 3 setosa 4.7 3.2 +# 4 setosa 4.6 3.1 +# 5 setosa 5 3.6 +# 6 setosa 5.4 3.9 +# 7 setosa 4.6 3.4 +# 8 setosa 5 3.4 +# 9 setosa 4.4 2.9 +# 10 setosa 4.9 3.1
@@ -310,7 +310,7 @@
diff --git a/docs/articles/how_to_retry.html b/docs/articles/how_to_retry.html index fe8cdc1..85d2cc1 100644 --- a/docs/articles/how_to_retry.html +++ b/docs/articles/how_to_retry.html @@ -40,7 +40,7 @@ noctua - 2.6.0 + 2.6.1 @@ -143,26 +143,26 @@

Configure
-noctua_options(retry = 10, retry_quiet = TRUE)
+noctua_options(retry = 10, retry_quiet = TRUE)

If you wish to create your own custom retry function just set the retry to 0:

-library(DBI)
-library(noctua)
-
-# connection to AWS Athena
-con = dbConnect(athena())
-
-# Stop noctua retrying
-noctua_options(retry = 0)
-
-# build your own custom retry function
-custom_retry = function(x){
-  # your custom retry method
-}
-
-# apply your own retry function
-custom_retry(dbGetQuery(con, "select ..."))
+library(DBI) +library(noctua) + +# connection to AWS Athena +con = dbConnect(athena()) + +# Stop noctua retrying +noctua_options(retry = 0) + +# build your own custom retry function +custom_retry = function(x){ + # your custom retry method +} + +# apply your own retry function +custom_retry(dbGetQuery(con, "select ..."))
@@ -202,7 +202,7 @@

Reading material

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/index.html b/docs/articles/index.html index 259915f..aef44ab 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -102,7 +102,7 @@

All vignettes

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/authors.html b/docs/authors.html index 4186859..4fa158c 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -91,13 +91,13 @@

Citation

Jones D (2022). noctua: Connect to 'AWS Athena' using R 'AWS SDK' 'paws' ('DBI' Interface). -R package version 2.6.0, https://github.com/DyfanJones/noctua. +R package version 2.6.1, https://github.com/DyfanJones/noctua.

@Manual{,
   title = {noctua: Connect to 'AWS Athena' using R 'AWS SDK' 'paws' ('DBI' Interface)},
   author = {Dyfan Jones},
   year = {2022},
-  note = {R package version 2.6.0},
+  note = {R package version 2.6.1},
   url = {https://github.com/DyfanJones/noctua},
 }
@@ -112,7 +112,7 @@

Citation

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/index.html b/docs/index.html index f5d60a3..6359bde 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ noctua - 2.6.0 + 2.6.1 @@ -109,7 +109,7 @@
- +

Project Status: Active – The project has reached a stable, usable state and is being actively developed. CRAN_Status_Badge downloadsCodecov test coverage R build status noctua status badge

The goal of the noctua package is to provide a DBI-compliant interface to Amazon’s Athena (https://aws.amazon.com/athena/) using paws SDK. This allows for an efficient, easy setup connection to Athena using the paws SDK as a driver.

NOTE: Before using noctua you must have an aws account or have access to aws account with permissions allowing you to use Athena.

@@ -122,10 +122,10 @@

Installation:
-install.packages("noctua")

+install.packages("noctua")

Or to get the development version from Github with:

-remotes::install_github("dyfanjones/noctua")
+remotes::install_github("dyfanjones/noctua")

Connection Methods @@ -135,13 +135,13 @@

Hard Coding
-library(DBI)
-
-con <- dbConnect(noctua::athena(),
-                aws_access_key_id='YOUR_ACCESS_KEY_ID',
-                aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
-                s3_staging_dir='s3://path/to/query/bucket/',
-                region_name='eu-west-1')

+library(DBI) + +con <- dbConnect(noctua::athena(), + aws_access_key_id='YOUR_ACCESS_KEY_ID', + aws_secret_access_key='YOUR_SECRET_ACCESS_KEY', + s3_staging_dir='s3://path/to/query/bucket/', + region_name='eu-west-1')

AWS Profile Name @@ -155,15 +155,15 @@

Setting up AWS CLI
-library(DBI)
-con <- dbConnect(noctua::athena(),
-                 s3_staging_dir = 's3://path/to/query/bucket/')
+library(DBI) +con <- dbConnect(noctua::athena(), + s3_staging_dir = 's3://path/to/query/bucket/')

Connecting to Athena using profile name other than default.

-library(DBI)
-con <- dbConnect(noctua::athena(),
-                 profile_name = "your_profile",
-                 s3_staging_dir = 's3://path/to/query/bucket/')
+library(DBI) +con <- dbConnect(noctua::athena(), + profile_name = "your_profile", + s3_staging_dir = 's3://path/to/query/bucket/') @@ -173,21 +173,21 @@

Assuming ARN Role for connectionAnother method in connecting to Athena is to use Amazon Resource Name (ARN) role.

Setting credentials in environmental variables:

-library(noctua)
-assume_role(profile_name = "YOUR_PROFILE_NAME",
-            role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name",
-            set_env = TRUE)
-
-# Connect to Athena using temporary credentials
-con <- dbConnect(athena(),
-                s3_staging_dir = 's3://path/to/query/bucket/')
+library(noctua) +assume_role(profile_name = "YOUR_PROFILE_NAME", + role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name", + set_env = TRUE) + +# Connect to Athena using temporary credentials +con <- dbConnect(athena(), + s3_staging_dir = 's3://path/to/query/bucket/')

Connecting to Athena directly using ARN role:

-library(DBI)
- con <- dbConnect(athena(),
-                  profile_name = "YOUR_PROFILE_NAME",
-                  role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name",
-                  s3_staging_dir = 's3://path/to/query/bucket/')
+library(DBI) + con <- dbConnect(athena(), + profile_name = "YOUR_PROFILE_NAME", + role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name", + s3_staging_dir = 's3://path/to/query/bucket/')

To change the duration of ARN role session please change the parameter duration_seconds. By default duration_seconds is set to 3600 seconds (1 hour).

@@ -198,81 +198,81 @@

Basic Usage
-library(DBI)
-
-# using default profile to connect
-con <- dbConnect(noctua::athena(),
-                 s3_staging_dir = 's3://path/to/query/bucket/')
-
-res <- dbExecute(con, "SELECT * FROM one_row")
-dbFetch(res)
-dbClearResult(res)

+library(DBI) + +# using default profile to connect +con <- dbConnect(noctua::athena(), + s3_staging_dir = 's3://path/to/query/bucket/') + +res <- dbExecute(con, "SELECT * FROM one_row") +dbFetch(res) +dbClearResult(res)

To retrieve query in 1 step.

-dbGetQuery(con, "SELECT * FROM one_row")
+dbGetQuery(con, "SELECT * FROM one_row")

Intermediate Usage

To create a tables in athena, dbExecute will send the query to athena and wait until query has been executed. This makes it and idea method to create tables within athena.

-query <- 
-  "CREATE EXTERNAL TABLE impressions (
-      requestBeginTime string,
-      adId string,
-      impressionId string,
-      referrer string,
-      userAgent string,
-      userCookie string,
-      ip string,
-      number string,
-      processId string,
-      browserCookie string,
-      requestEndTime string,
-      timers struct<modelLookup:string, requestTime:string>,
-      threadId string,
-      hostname string,
-      sessionId string)
-  PARTITIONED BY (dt string)
-  ROW FORMAT  serde 'org.apache.hive.hcatalog.data.JsonSerDe'
-      with serdeproperties ( 'paths'='requestBeginTime, adId, impressionId, referrer, userAgent, userCookie, ip' )
-  LOCATION 's3://elasticmapreduce/samples/hive-ads/tables/impressions/' ;"
-  
-dbExecute(con, query)
+query <- + "CREATE EXTERNAL TABLE impressions ( + requestBeginTime string, + adId string, + impressionId string, + referrer string, + userAgent string, + userCookie string, + ip string, + number string, + processId string, + browserCookie string, + requestEndTime string, + timers struct<modelLookup:string, requestTime:string>, + threadId string, + hostname string, + sessionId string) + PARTITIONED BY (dt string) + ROW FORMAT serde 'org.apache.hive.hcatalog.data.JsonSerDe' + with serdeproperties ( 'paths'='requestBeginTime, adId, impressionId, referrer, userAgent, userCookie, ip' ) + LOCATION 's3://elasticmapreduce/samples/hive-ads/tables/impressions/' ;" + +dbExecute(con, query)

noctua has 2 extra function to return extra information around Athena tables: dbGetParitiions and dbShow

dbGetPartitions will return all the partitions (returns data.frame):

-noctua::dbGetPartition(con, "impressions")
+noctua::dbGetPartition(con, "impressions")

dbShow will return the table’s ddl, so you will able to see how the table was constructed in Athena (returns SQL character):

-noctua::dbShow(con, "impressions")
+noctua::dbShow(con, "impressions")

Advanced Usage

-library(DBI)
-con <- dbConnect(noctua::athena(),
-                 s3_staging_dir = 's3://path/to/query/bucket/')
+library(DBI) +con <- dbConnect(noctua::athena(), + s3_staging_dir = 's3://path/to/query/bucket/')

Sending data to Athena

noctua has created a method to send data.frame from R to Athena.

-# Check existing tables
-dbListTables(con)
-# Upload iris to Athena
-dbWriteTable(con, "iris", iris, 
-             partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")))
-
-# Read in iris from Athena
-dbReadTable(con, "iris")
-
-# Check new existing tables in Athena
-dbListTables(con)
-
-# Check if iris exists in Athena
-dbExistsTable(con, "iris")
+# Check existing tables +dbListTables(con) +# Upload iris to Athena +dbWriteTable(con, "iris", iris, + partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d"))) + +# Read in iris from Athena +dbReadTable(con, "iris") + +# Check new existing tables in Athena +dbListTables(con) + +# Check if iris exists in Athena +dbExistsTable(con, "iris")

Please check out noctua method for dbWriteTable for more information in how to upload data to AWS Athena and AWS S3.

For more information around how to get the most out of AWS Athena when uploading data please check out: Top 10 Performance Tuning Tips for Amazon Athena

@@ -282,15 +282,15 @@

Tidyverse Usage

Creating a connection to Athena and query and already existing table iris that was created in previous example.

-library(DBI)
-library(dplyr)
-
-con <- dbConnect(noctua::athena(),
-                aws_access_key_id='YOUR_ACCESS_KEY_ID',
-                aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
-                s3_staging_dir='s3://path/to/query/bucket/',
-                region_name='eu-west-1')
-tbl(con, sql("SELECT * FROM iris"))
+library(DBI) +library(dplyr) + +con <- dbConnect(noctua::athena(), + aws_access_key_id='YOUR_ACCESS_KEY_ID', + aws_secret_access_key='YOUR_SECRET_ACCESS_KEY', + s3_staging_dir='s3://path/to/query/bucket/', + region_name='eu-west-1') +tbl(con, sql("SELECT * FROM iris"))
# Source:   SQL [?? x 5]
 # Database: Athena 0.1.4 [eu-west-1/default]
    sepal_length sepal_width petal_length petal_width species
@@ -306,9 +306,9 @@ 

Tidyverse Usage 9 4.4 2.9 1.4 0.2 setosa 10 4.9 3.1 1.5 0.1 setosa # … with more rows

-

dplyr provides lazy querying with allows to short hand tbl(con, sql("SELECT * FROM iris")) to tbl(con, "iris"). For more information please look at https://db.rstudio.com/dplyr/.

+

dplyr provides lazy querying with allows to short hand tbl(con, sql("SELECT * FROM iris")) to tbl(con, "iris"). For more information please look at https://solutions.posit.co/connections/db/r-packages/dplyr/

-tbl(con, "iris")
+tbl(con, "iris")
# Source:   table<iris> [?? x 5]
 # Database: Athena 0.1.4 [eu-west-1/default]
    sepal_length sepal_width petal_length petal_width species
@@ -346,8 +346,8 @@ 

Tidyverse Usage10 4.5 2.3 1.3 0.3 setosa # … with more rows

-tbl(con, "iris") %>% 
-  select(contains("sepal"), contains("petal"))
+tbl(con, "iris") %>% + select(contains("sepal"), contains("petal"))
# Source:   lazy query [?? x 4]
 # Database: Athena 0.1.4 [your_profile@eu-west-1/default]
    sepal_length sepal_width petal_length petal_width
@@ -365,31 +365,31 @@ 

Tidyverse Usage# … with more rows

Upload data using dplyr function copy_to and compute.

-library(DBI)
-library(dplyr)
-
-con <- dbConnect(noctua::athena(),
-                profile_name = "your_profile",
-                s3_staging_dir='s3://path/to/query/bucket/')
+library(DBI) +library(dplyr) + +con <- dbConnect(noctua::athena(), + profile_name = "your_profile", + s3_staging_dir='s3://path/to/query/bucket/')

Write data.frame to Athena table

-copy_to(con, mtcars,
-        s3_location = "s3://mybucket/data/")
+copy_to(con, mtcars, + s3_location = "s3://mybucket/data/")

Write Athena table from tbl_sql

-athena_mtcars <- tbl(con, "mtcars")
-mtcars_filter <- athena_mtcars %>% filter(gear >=4)
+athena_mtcars <- tbl(con, "mtcars") +mtcars_filter <- athena_mtcars %>% filter(gear >=4)

Create athena with unique table name

-mtcars_filer %>% compute()
+mtcars_filer %>% compute()

Create athena with specified name and s3 location

-mtcars_filer %>% 
-  compute("mtcars_filer",
-          s3_location = "s3://mybucket/mtcars_filer/")
-
-# Disconnect from Athena
-dbDisconnect(con)
+mtcars_filer %>% + compute("mtcars_filer", + s3_location = "s3://mybucket/mtcars_filer/") + +# Disconnect from Athena +dbDisconnect(con)
@@ -397,19 +397,19 @@

Work Groups
-library(noctua)
-library(DBI)
-
-con <- dbConnect(noctua::athena(),
-                profile_name = "your_profile",
-                encryption_option = "SSE_S3",
-                s3_staging_dir='s3://path/to/query/bucket/')
-
-create_work_group(con, "demo_work_group", description = "This is a demo work group",
-                  tags = tag_options(key= "demo_work_group", value = "demo_01"))

+library(noctua) +library(DBI) + +con <- dbConnect(noctua::athena(), + profile_name = "your_profile", + encryption_option = "SSE_S3", + s3_staging_dir='s3://path/to/query/bucket/') + +create_work_group(con, "demo_work_group", description = "This is a demo work group", + tags = tag_options(key= "demo_work_group", value = "demo_01"))

List work groups:

+list_work_groups(con)
[[1]]
 [[1]]$Name
 [1] "demo_work_group"
@@ -438,10 +438,10 @@ 

Work Groups2019-08-22 16:14:47.902000+01:00

Update work group:

-update_work_group(con, "demo_work_group", description = "This is a demo work group update")
+update_work_group(con, "demo_work_group", description = "This is a demo work group update")

Return work group meta data:

-get_work_group(con, "demo_work_group")
+get_work_group(con, "demo_work_group")
$Name
 [1] "demo_work_group"
 
@@ -479,11 +479,11 @@ 

Work Groups2019-09-06 18:51:28.902000+01:00

Connect to Athena using work group:

-con <- dbConnect(noctua::athena(),
-                work_group = "demo_work_group")
+con <- dbConnect(noctua::athena(), + work_group = "demo_work_group")

Delete work group:

-delete_work_group(con, "demo_work_group")
+delete_work_group(con, "demo_work_group")
@@ -565,16 +565,7 @@

Developers

-
-

Dev status

-
    -
  • Project Status: Active – The project has reached a stable, usable state and is being actively developed.
  • -
  • CRAN_Status_Badge
  • -
  • Codecov test coverage
  • -
  • R build status
  • -
  • noctua status badge
  • -
-
+ @@ -587,7 +578,7 @@

Dev status

-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/issue_template.html b/docs/issue_template.html index d944f48..5cb6670 100644 --- a/docs/issue_template.html +++ b/docs/issue_template.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -102,8 +102,8 @@

NA

Session Info
-devtools::session_info()
-#> output
+devtools::session_info() +#> output
-

Site built with pkgdown 2.0.3.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/news/index.html b/docs/news/index.html index 0a686b5..10cad72 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -76,7 +76,15 @@

Changelog

- + +
+

Bug Fix:

+
+
+
+

Feature:

  • Add clear_s3_resource parameter to RAthena_options to prevent AWS Athena output AWS S3 resource being cleared up by dbClearResult (RAthena # 168). Thanks to @juhoautio for the request.
  • @@ -146,36 +154,36 @@

    Feature: wr.catalog.table(database="awswrangler_test", table="noaa")

-
-library(DBI)
-
-con <- dbConnect(noctua::athena())
-
-# Query ran using CSV output
-system.time({
-  df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa")
-})
-# Info: (Data scanned: 80.88 MB)
-#    user  system elapsed
-#  57.004   8.430 160.567 
-
-noctua::noctua_options(cache_size = 1)
-
-# Query ran using UNLOAD Parquet output
-system.time({
-  df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T)
-})
-# Info: (Data scanned: 80.88 MB)
-#    user  system elapsed 
-#  21.622   2.350  39.232 
-
-# Query ran using cache
-system.time({
-  df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T)
-})
-# Info: (Data scanned: 80.88 MB)
-#    user  system elapsed 
-#  13.738   1.886  11.029 
+ +library(DBI) + +con <- dbConnect(noctua::athena()) + +# Query ran using CSV output +system.time({ + df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa") +}) +# Info: (Data scanned: 80.88 MB) +# user system elapsed +# 57.004 8.430 160.567 + +noctua::noctua_options(cache_size = 1) + +# Query ran using UNLOAD Parquet output +system.time({ + df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T) +}) +# Info: (Data scanned: 80.88 MB) +# user system elapsed +# 21.622 2.350 39.232 + +# Query ran using cache +system.time({ + df = dbGetQuery(con, "SELECT * FROM awswrangler_test.noaa", unload = T) +}) +# Info: (Data scanned: 80.88 MB) +# user system elapsed +# 13.738 1.886 11.029
@@ -190,45 +198,45 @@

Feature:
-
-library(data.table)
-library(DBI)
-
-x = 5
-
-dt = data.table(
-  var1 = sample(LETTERS, size = x, T),
-  var2 = rep(list(list("var3"= 1:3, "var4" = list("var5"= letters[1:5]))), x)
-)
-
-con <- dbConnect(noctua::athena())
-
-#> Version: 2.2.0
-
-sqlData(con, dt)
-
-# Registered S3 method overwritten by 'jsonify':
-#   method     from    
-#   print.json jsonlite
-# Info: Special characters "\t" has been converted to " " to help with Athena reading file format tsv
-#    var1                                                   var2
-# 1:    1 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}}
-# 2:    2 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}}
-# 3:    3 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}}
-# 4:    4 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}}
-# 5:    5 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}}
-
-#> Version: 2.1.0
-
-sqlData(con, dt)
-
-# Info: Special characters "\t" has been converted to " " to help with Athena reading file format tsv
-#    var1                                        var2
-# 1:    1 1:3|list(var5 = c("a", "b", "c", "d", "e"))
-# 2:    2 1:3|list(var5 = c("a", "b", "c", "d", "e"))
-# 3:    3 1:3|list(var5 = c("a", "b", "c", "d", "e"))
-# 4:    4 1:3|list(var5 = c("a", "b", "c", "d", "e"))
-# 5:    5 1:3|list(var5 = c("a", "b", "c", "d", "e"))

+ +library(data.table) +library(DBI) + +x = 5 + +dt = data.table( + var1 = sample(LETTERS, size = x, T), + var2 = rep(list(list("var3"= 1:3, "var4" = list("var5"= letters[1:5]))), x) +) + +con <- dbConnect(noctua::athena()) + +#> Version: 2.2.0 + +sqlData(con, dt) + +# Registered S3 method overwritten by 'jsonify': +# method from +# print.json jsonlite +# Info: Special characters "\t" has been converted to " " to help with Athena reading file format tsv +# var1 var2 +# 1: 1 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}} +# 2: 2 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}} +# 3: 3 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}} +# 4: 4 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}} +# 5: 5 {"var3":[1,2,3],"var4":{"var5":["a","b","c","d","e"]}} + +#> Version: 2.1.0 + +sqlData(con, dt) + +# Info: Special characters "\t" has been converted to " " to help with Athena reading file format tsv +# var1 var2 +# 1: 1 1:3|list(var5 = c("a", "b", "c", "d", "e")) +# 2: 2 1:3|list(var5 = c("a", "b", "c", "d", "e")) +# 3: 3 1:3|list(var5 = c("a", "b", "c", "d", "e")) +# 4: 4 1:3|list(var5 = c("a", "b", "c", "d", "e")) +# 5: 5 1:3|list(var5 = c("a", "b", "c", "d", "e"))

v-2.2.0 now converts lists into json lines format so that AWS Athena can parse with sql array/mapping/json functions. Small down side a s3 method conflict occurs when jsonify is called to convert lists into json lines. jsonify was choose in favor to jsonlite due to the performance improvements (#156).

@@ -268,109 +276,109 @@

API Change
-
-# Old Method
-library(DBI)
-con <- dbConnect(noctua::athena(),
-                 rstudio_conn_tab = F)
-
-res <- dbExecute(con, "select 'helloworld'")
-
-# modifying parent class to influence child
-con@info$made_up <- "helloworld"
-
-# nothing happened
-res@connection@info$made_up
-# > NULL
-
-# modifying child class to influence parent
-res@connection@info$made_up <- "oh no!"
-
-# nothing happened
-con@info$made_up
-# > "helloworld"
-
-# New Method
-library(DBI)
-con <- dbConnect(noctua::athena(),
-                 rstudio_conn_tab = F)
-
-res <- dbExecute(con, "select 'helloworld'")
-
-# modifying parent class to influence child
-con@info$made_up <- "helloworld"
-
-# picked up change
-res@connection@info$made_up
-# > "helloworld"
-
-# modifying child class to influence parent
-res@connection@info$made_up <- "oh no!"
-
-# picked up change
-con@info$made_up
-# > "oh no!"
+ +# Old Method +library(DBI) +con <- dbConnect(noctua::athena(), + rstudio_conn_tab = F) + +res <- dbExecute(con, "select 'helloworld'") + +# modifying parent class to influence child +con@info$made_up <- "helloworld" + +# nothing happened +res@connection@info$made_up +# > NULL + +# modifying child class to influence parent +res@connection@info$made_up <- "oh no!" + +# nothing happened +con@info$made_up +# > "helloworld" + +# New Method +library(DBI) +con <- dbConnect(noctua::athena(), + rstudio_conn_tab = F) + +res <- dbExecute(con, "select 'helloworld'") + +# modifying parent class to influence child +con@info$made_up <- "helloworld" + +# picked up change +res@connection@info$made_up +# > "helloworld" + +# modifying child class to influence parent +res@connection@info$made_up <- "oh no!" + +# picked up change +con@info$made_up +# > "oh no!"

New Feature

  • Added support to AWS Athena data types [array, row, map, json, binary, ipaddress] (#135). Conversion types can be changed through dbConnect and noctua_options.
-
-library(DBI)
-library(noctua)
-
-# default conversion methods
-con <- dbConnect(noctua::athena())
-
-# change json conversion method
-noctua_options(json = "character")
-noctua:::athena_option_env$json
-# [1] "character"
-
-# change json conversion to custom method
-noctua_options(json = jsonify::from_json)
-noctua:::athena_option_env$json
-# function (json, simplify = TRUE, fill_na = FALSE, buffer_size = 1024) 
-# {
-#   json_to_r(json, simplify, fill_na, buffer_size)
-# }
-# <bytecode: 0x7f823b9f6830>
-#   <environment: namespace:jsonify>
-
-# change bigint conversion without affecting custom json conversion methods
-noctua_options(bigint = "numeric")
-noctua:::athena_option_env$json
-# function (json, simplify = TRUE, fill_na = FALSE, buffer_size = 1024) 
-# {
-#   json_to_r(json, simplify, fill_na, buffer_size)
-# }
-# <bytecode: 0x7f823b9f6830>
-#   <environment: namespace:jsonify>
-
-noctua:::athena_option_env$bigint
-# [1] "numeric"
-
-# change binary conversion without affect, bigint or json methods
-noctua_options(binary = "character")
-noctua:::athena_option_env$json
-# function (json, simplify = TRUE, fill_na = FALSE, buffer_size = 1024) 
-# {
-#   json_to_r(json, simplify, fill_na, buffer_size)
-# }
-# <bytecode: 0x7f823b9f6830>
-#   <environment: namespace:jsonify>
-
-noctua:::athena_option_env$bigint
-# [1] "numeric"
-
-noctua:::athena_option_env$binary
-# [1] "character"
-
-# no conversion for json objects
-con2 <- dbConnect(noctua::athena(), json = "character")
-
-# use custom json parser
-con <- dbConnect(noctua::athena(), json = jsonify::from_json)
+ +library(DBI) +library(noctua) + +# default conversion methods +con <- dbConnect(noctua::athena()) + +# change json conversion method +noctua_options(json = "character") +noctua:::athena_option_env$json +# [1] "character" + +# change json conversion to custom method +noctua_options(json = jsonify::from_json) +noctua:::athena_option_env$json +# function (json, simplify = TRUE, fill_na = FALSE, buffer_size = 1024) +# { +# json_to_r(json, simplify, fill_na, buffer_size) +# } +# <bytecode: 0x7f823b9f6830> +# <environment: namespace:jsonify> + +# change bigint conversion without affecting custom json conversion methods +noctua_options(bigint = "numeric") +noctua:::athena_option_env$json +# function (json, simplify = TRUE, fill_na = FALSE, buffer_size = 1024) +# { +# json_to_r(json, simplify, fill_na, buffer_size) +# } +# <bytecode: 0x7f823b9f6830> +# <environment: namespace:jsonify> + +noctua:::athena_option_env$bigint +# [1] "numeric" + +# change binary conversion without affect, bigint or json methods +noctua_options(binary = "character") +noctua:::athena_option_env$json +# function (json, simplify = TRUE, fill_na = FALSE, buffer_size = 1024) +# { +# json_to_r(json, simplify, fill_na, buffer_size) +# } +# <bytecode: 0x7f823b9f6830> +# <environment: namespace:jsonify> + +noctua:::athena_option_env$bigint +# [1] "numeric" + +noctua:::athena_option_env$binary +# [1] "character" + +# no conversion for json objects +con2 <- dbConnect(noctua::athena(), json = "character") + +# use custom json parser +con <- dbConnect(noctua::athena(), json = jsonify::from_json)
  • Allow users to turn off RStudio Connection Tab when working in RStudio (#136). This can be done through parameter rstudio_conn_tab within dbConnect.
@@ -386,21 +394,21 @@
  • Added optional formatting to dbGetPartition. This simply tidies up the default AWS Athena partition format.
-
-library(DBI)
-library(noctua)
-con <- dbConnect(athena())
-dbGetPartition(con, "test_df2", .format = T)
-# Info: (Data scanned: 0 Bytes)
-#    year month day
-# 1: 2020    11  17
-dbGetPartition(con, "test_df2")
-# Info: (Data scanned: 0 Bytes)
-#                    partition
-# 1: year=2020/month=11/day=17
+ +library(DBI) +library(noctua) +con <- dbConnect(athena()) +dbGetPartition(con, "test_df2", .format = T) +# Info: (Data scanned: 0 Bytes) +# year month day +# 1: 2020 11 17 +dbGetPartition(con, "test_df2") +# Info: (Data scanned: 0 Bytes) +# partition +# 1: year=2020/month=11/day=17
  • Support different formats for returning bigint, this is to align with other DBI interfaces i.e. RPostgres. Now bigint can be return in the possible formats: [“integer64”, “integer”, “numeric”, “character”]
  • -
library(DBI)
-con <- dbConnect(noctua::athena(), bigint = "numeric")
+
library(DBI)
+con <- dbConnect(noctua::athena(), bigint = "numeric")

When switching between the different file parsers the bigint to be represented according to the file parser i.e. data.table: “integer64” -> vroom: “I”.

@@ -438,29 +446,29 @@

Minor Change
  • dbRemoveTable now removes AWS S3 objects using delete_objects instead of delete_object. This allows noctua to delete AWS S3 files in batches. This will reduce the number of api calls to AWS and comes with a performance improvement.
  • -
    -library(DBI)
    -library(data.table)
    -
    -X <- 1010
    -value <- data.table(x = 1:X,
    -                    y = sample(letters, X, replace = T), 
    -                    z = sample(c(TRUE, FALSE), X, replace = T))
    -
    -con <- dbConnect(noctua::athena())
    -
    -# create a removable table with 1010 parquet files in AWS S3.
    -dbWriteTable(con, "rm_tbl", value, file.type = "parquet", overwrite = T, max.batch = 1)
    -
    -# old method: delete_object
    -system.time({dbRemoveTable(con, "rm_tbl", confirm = T)})
    -# user  system elapsed 
    -# 31.004   8.152 115.906 
    -
    -# new method: delete_objects
    -system.time({dbRemoveTable(con, "rm_tbl", confirm = T)})
    -# user  system elapsed 
    -# 17.319   0.370  22.709 
    + +library(DBI) +library(data.table) + +X <- 1010 +value <- data.table(x = 1:X, + y = sample(letters, X, replace = T), + z = sample(c(TRUE, FALSE), X, replace = T)) + +con <- dbConnect(noctua::athena()) + +# create a removable table with 1010 parquet files in AWS S3. +dbWriteTable(con, "rm_tbl", value, file.type = "parquet", overwrite = T, max.batch = 1) + +# old method: delete_object +system.time({dbRemoveTable(con, "rm_tbl", confirm = T)}) +# user system elapsed +# 31.004 8.152 115.906 + +# new method: delete_objects +system.time({dbRemoveTable(con, "rm_tbl", confirm = T)}) +# user system elapsed +# 17.319 0.370 22.709

    New Feature

    @@ -493,15 +501,15 @@
    • noctua now supports Keyboard Interrupt and will stop AWS Athena running the query when the query has been interrupted. To keep the functionality of AWS Athena running when R has been interrupt a new parameter has been added to dbConnect, keyboard_interrupt. Example:
    -
    -# Stop AWS Athena when R has been interrupted:
    -
    -con <- dbConnect(noctua::athena())
    -
    -# Let AWS Athena keep running when R has been interrupted:
    -
    -con <- dbConnect(noctua::athena(),
    -                 keyboard_interrupt = F)
    + +# Stop AWS Athena when R has been interrupted: + +con <- dbConnect(noctua::athena()) + +# Let AWS Athena keep running when R has been interrupted: + +con <- dbConnect(noctua::athena(), + keyboard_interrupt = F)
    @@ -527,11 +535,11 @@

    New Feature dbFetch is able to return data from AWS Athena in chunk. This has been achieved by passing NextToken to AthenaResult s4 class. This method won’t be as fast n = -1 as each chunk will have to be process into data frame format.
    -
    -library(DBI)
    -con <- dbConnect(noctua::athena())
    -res <- dbExecute(con, "select * from some_big_table limit 10000")
    -dbFetch(res, 5000)
    + +library(DBI) +con <- dbConnect(noctua::athena()) +res <- dbExecute(con, "select * from some_big_table limit 10000") +dbFetch(res, 5000)
    -
    -library(DBI)
    -
    -con = dbConnect(noctua::athena())
    -
    -# upload iris dataframe for removal test
    -dbWriteTable(con, "iris2", iris)
    -
    -# Athena method
    -system.time(dbRemoveTable(con, "iris2", confirm = T))
    -# user  system elapsed 
    -# 0.247   0.091   2.243 
    -
    -# upload iris dataframe for removal test
    -dbWriteTable(con, "iris2", iris)
    -
    -# Glue method
    -system.time(dbRemoveTable(con, "iris2", confirm = T))
    -# user  system elapsed 
    -# 0.110   0.045   1.094 
    + +library(DBI) + +con = dbConnect(noctua::athena()) + +# upload iris dataframe for removal test +dbWriteTable(con, "iris2", iris) + +# Athena method +system.time(dbRemoveTable(con, "iris2", confirm = T)) +# user system elapsed +# 0.247 0.091 2.243 + +# upload iris dataframe for removal test +dbWriteTable(con, "iris2", iris) + +# Glue method +system.time(dbRemoveTable(con, "iris2", confirm = T)) +# user system elapsed +# 0.110 0.045 1.094
    -
    -library(DBI)
    -con = dbConnect(noctua::athena())
    -dbWriteTable(con, "iris2", iris, file.type = "json")
    -dbGetQuery(con, "select * from iris2")
    + +library(DBI) +con = dbConnect(noctua::athena()) +dbWriteTable(con, "iris2", iris, file.type = "json") +dbGetQuery(con, "select * from iris2")

    Bug Fix

    @@ -630,83 +638,83 @@

    Bug Fixreadr::write_file is used for extra speed.
    -
    -library(readr)
    -library(microbenchmark)
    -
    -# creating some dummy data for testing
    -X <- 1e8
    -df <- 
    -data.frame(
    -    w = runif(X),
    -    x = 1:X,
    -    y = sample(letters, X, replace = T), 
    -    z = sample(c(TRUE, FALSE), X, replace = T))
    -write_csv(df, "test.csv")
    -
    -# read in text file into raw format
    -obj <- readBin("test.csv", what = "raw", n = file.size("test.csv"))
    -
    -format(object.size(obj), units = "auto")
    -# 3.3 Gb
    -
    -# writeBin in a loop
    -write_bin <- function(
    -  value,
    -  filename,
    -  chunk_size = 2L ^ 20L) {
    -  
    -  total_size <- length(value)
    -  split_vec <- seq(1, total_size, chunk_size)
    -  
    -  con <- file(filename, "a+b")
    -  on.exit(close(con))
    -  
    -  sapply(split_vec, function(x){writeBin(value[x:min(total_size,(x+chunk_size-1))],con)})
    -  invisible(TRUE)
    -}
    -
    -
    -microbenchmark(writeBin_loop = write_bin(obj, tempfile()),
    -               readr = write_file(obj, tempfile()),
    -               times = 5)
    -
    -# Unit: seconds
    -# expr       min       lq      mean    median        uq       max neval
    -# R_loop 41.463273 41.62077 42.265778 41.908908 42.022042 44.313893     5
    -# readr  2.291571  2.40495  2.496871  2.542544  2.558367  2.686921     5
    + +library(readr) +library(microbenchmark) + +# creating some dummy data for testing +X <- 1e8 +df <- +data.frame( + w = runif(X), + x = 1:X, + y = sample(letters, X, replace = T), + z = sample(c(TRUE, FALSE), X, replace = T)) +write_csv(df, "test.csv") + +# read in text file into raw format +obj <- readBin("test.csv", what = "raw", n = file.size("test.csv")) + +format(object.size(obj), units = "auto") +# 3.3 Gb + +# writeBin in a loop +write_bin <- function( + value, + filename, + chunk_size = 2L ^ 20L) { + + total_size <- length(value) + split_vec <- seq(1, total_size, chunk_size) + + con <- file(filename, "a+b") + on.exit(close(con)) + + sapply(split_vec, function(x){writeBin(value[x:min(total_size,(x+chunk_size-1))],con)}) + invisible(TRUE) +} + + +microbenchmark(writeBin_loop = write_bin(obj, tempfile()), + readr = write_file(obj, tempfile()), + times = 5) + +# Unit: seconds +# expr min lq mean median uq max neval +# R_loop 41.463273 41.62077 42.265778 41.908908 42.022042 44.313893 5 +# readr 2.291571 2.40495 2.496871 2.542544 2.558367 2.686921 5

    -
    -# Before
    -translate_sql("2019-01-01", con = con)
    -# '2019-01-01'
    -
    -# Now
    -translate_sql("2019-01-01", con = con)
    -# DATE '2019-01-01'
    + +# Before +translate_sql("2019-01-01", con = con) +# '2019-01-01' + +# Now +translate_sql("2019-01-01", con = con) +# DATE '2019-01-01'
    • Dependency data.table now restricted to (>=1.12.4) due to file compression being added to fwrite (>=1.12.4) https://github.com/Rdatatable/data.table/blob/master/NEWS.md
    • R functions paste/paste0 would use default dplyr:sql-translate-env (concat_ws). paste0 now uses Presto’s concat function and paste now uses pipes to get extra flexibility for custom separating values.
    -
    -# R code:
    -paste("hi", "bye", sep = "-")
    -
    -# SQL translation:
    -('hi'||'-'||'bye')
    + +# R code: +paste("hi", "bye", sep = "-") + +# SQL translation: +('hi'||'-'||'bye')
    • If table exists and parameter append set to TRUE then existing s3.location will be utilised (RAthena: # 73)
    • -db_compute returned table name, however when a user wished to write table to another location (RAthena: # 74). An error would be raised: Error: SYNTAX_ERROR: line 2:6: Table awsdatacatalog.default.temp.iris does not exist This has now been fixed with db_compute returning dbplyr::in_schema.
    • +db_compute returned table name, however when a user wished to write table to another location (RAthena: # 74). An error would be raised: Error: SYNTAX_ERROR: line 2:6: Table awsdatacatalog.default.temp.iris does not exist This has now been fixed with db_compute returning dbplyr::in_schema.
    -
    -library(DBI)
    -library(dplyr)
    -
    -con <- dbConnect(noctua::athena())
    -
    -tbl(con, "iris") %>%
    -  compute(name = "temp.iris")
    + +library(DBI) +library(dplyr) + +con <- dbConnect(noctua::athena()) + +tbl(con, "iris") %>% + compute(name = "temp.iris")
    • dbListFields didn’t display partitioned columns. This has now been fixed with the call to AWS Glue being altered to include more metadata allowing for column names and partitions to be returned.
    • RStudio connections tab didn’t display any partitioned columns, this has been fixed in the same manner as dbListFields @@ -725,25 +733,25 @@

      New Feature

    • Thanks to @OssiLehtinen for improving the speed of dplyr::tbl when calling Athena when using the ident method (#64):
    -
    -library(DBI)
    -library(dplyr)
    -
    -con <- dbConnect(noctua::athena())
    -
    -# ident method:
    -t1 <- system.time(tbl(con, "iris"))
    -
    -# sub query method:
    -t2 <- system.time(tbl(con, sql("select * from iris")))
    -
    -# ident method
    -# user  system elapsed 
    -# 0.082   0.012   0.288 
    -
    -# sub query method
    -# user  system elapsed 
    -# 0.993   0.138   3.660 
    + +library(DBI) +library(dplyr) + +con <- dbConnect(noctua::athena()) + +# ident method: +t1 <- system.time(tbl(con, "iris")) + +# sub query method: +t2 <- system.time(tbl(con, sql("select * from iris"))) + +# ident method +# user system elapsed +# 0.082 0.012 0.288 + +# sub query method +# user system elapsed +# 0.993 0.138 3.660
    • new function dbGetTables that returns Athena hierarchy as a data.frame
    @@ -782,8 +790,8 @@
    • Default delimited file uploaded to AWS Athena changed from “csv” to “tsv” this is due to separating value “,” in character variables. By using “tsv” file type JSON/Array objects can be passed to Athena through character types. To prevent this becoming a breaking change dbWriteTable append parameter checks and uses existing AWS Athena DDL file type. If file.type doesn’t match Athena DDL file type then user will receive a warning message:
    -
    -warning('Appended `file.type` is not compatible with the existing Athena DDL file type and has been converted to "', File.Type,'".', call. = FALSE)
    + +warning('Appended `file.type` is not compatible with the existing Athena DDL file type and has been converted to "', File.Type,'".', call. = FALSE)

    Bug fix

    @@ -820,32 +828,32 @@

    Major ChangedbWriteTable now will split gzip compressed files to improve AWS Athena performance. By default gzip compressed files will be split into 20.

    Performance results

    -
    -library(DBI)
    -X <- 1e8
    -df <- data.frame(w =runif(X),
    -                 x = 1:X,
    -                 y = sample(letters, X, replace = T), 
    -                 z = sample(c(TRUE, FALSE), X, replace = T))
    -con <- dbConnect(noctua::athena())
    -# upload dataframe with different splits
    -dbWriteTable(con, "test_split1", df, compress = T, max.batch = nrow(df), overwrite = T) # no splits
    -dbWriteTable(con, "test_split2", df, compress = T, max.batch = 0.05 * nrow(df), overwrite = T) # 20 splits
    -dbWriteTable(con, "test_split3", df, compress = T, max.batch = 0.1 * nrow(df), overwrite = T) # 10 splits
    + +library(DBI) +X <- 1e8 +df <- data.frame(w =runif(X), + x = 1:X, + y = sample(letters, X, replace = T), + z = sample(c(TRUE, FALSE), X, replace = T)) +con <- dbConnect(noctua::athena()) +# upload dataframe with different splits +dbWriteTable(con, "test_split1", df, compress = T, max.batch = nrow(df), overwrite = T) # no splits +dbWriteTable(con, "test_split2", df, compress = T, max.batch = 0.05 * nrow(df), overwrite = T) # 20 splits +dbWriteTable(con, "test_split3", df, compress = T, max.batch = 0.1 * nrow(df), overwrite = T) # 10 splits

    AWS Athena performance results from AWS console (query executed: select count(*) from .... ):

    • test_split1: (Run time: 38.4 seconds, Data scanned: 1.16 GB)
    • test_split2: (Run time: 3.73 seconds, Data scanned: 1.16 GB)
    • test_split3: (Run time: 5.47 seconds, Data scanned: 1.16 GB)
    -
    -library(DBI)
    -X <- 1e8
    -df <- data.frame(w =runif(X),
    -                 x = 1:X,
    -                 y = sample(letters, X, replace = T), 
    -                 z = sample(c(TRUE, FALSE), X, replace = T))
    -con <- dbConnect(noctua::athena())
    -dbWriteTable(con, "test_split1", df, compress = T, overwrite = T) # default will now split compressed file into 20 equal size files.
    + +library(DBI) +X <- 1e8 +df <- data.frame(w =runif(X), + x = 1:X, + y = sample(letters, X, replace = T), + z = sample(c(TRUE, FALSE), X, replace = T)) +con <- dbConnect(noctua::athena()) +dbWriteTable(con, "test_split1", df, compress = T, overwrite = T) # default will now split compressed file into 20 equal size files.

    Added information message to inform user about what files have been added to S3 location if user is overwriting an Athena table.

    @@ -895,12 +903,12 @@

    Minor ChangeBackend Change

    • helper function upload_data has been rebuilt and removed the old “horrible” if statement with paste now the function relies on sprintf to construct the s3 location path. This method now is a lot clearer in how the s3 location is created plus it enables a dbWriteTable to be simplified. dbWriteTable can now upload data to the default s3_staging directory created in dbConnect this simplifies dbWriteTable to :
    -
    -library(DBI)
    -
    -con <- dbConnect(noctua::athena())
    -
    -dbWriteTable(con, "iris", iris)
    + +library(DBI) + +con <- dbConnect(noctua::athena()) + +dbWriteTable(con, "iris", iris)

    Bug Fix

    @@ -990,7 +998,7 @@

    Athena lower level api
    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index c11ed67..484398c 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,5 +1,5 @@ -pandoc: 2.17.1.1 -pkgdown: 2.0.3 +pandoc: 2.19.2 +pkgdown: 2.0.7 pkgdown_sha: ~ articles: aws_athena_query_caching: aws_athena_query_caching.html @@ -9,5 +9,5 @@ articles: convert_and_save_cost: convert_and_save_cost.html getting_started: getting_started.html how_to_retry: how_to_retry.html -last_built: 2022-05-20T08:29Z +last_built: 2022-12-19T16:26Z diff --git a/docs/reference/AthenaConnection.html b/docs/reference/AthenaConnection.html index 5ac328d..c2528ad 100644 --- a/docs/reference/AthenaConnection.html +++ b/docs/reference/AthenaConnection.html @@ -18,7 +18,7 @@ noctua - 2.6.0 + 2.6.1 @@ -83,14 +83,15 @@

    Athena Connection Methods

    -
    # S4 method for AthenaConnection
    -show(object)
    +
    # S4 method for AthenaConnection
    +show(object)

    Arguments

    object

    Any R object

    +

    Slots

    @@ -122,7 +123,7 @@

    Slots

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/AthenaDriver.html b/docs/reference/AthenaDriver.html index dc1eeab..57f6580 100644 --- a/docs/reference/AthenaDriver.html +++ b/docs/reference/AthenaDriver.html @@ -18,7 +18,7 @@ noctua - 2.6.0 + 2.6.1 @@ -83,14 +83,15 @@

    Athena Driver Methods

    -
    # S4 method for AthenaDriver
    -show(object)
    +
    # S4 method for AthenaDriver
    +show(object)

    Arguments

    object

    Any R object

    +
    @@ -105,7 +106,7 @@

    Arguments

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/AthenaWriteTables.html b/docs/reference/AthenaWriteTables.html index 543478c..cec8154 100644 --- a/docs/reference/AthenaWriteTables.html +++ b/docs/reference/AthenaWriteTables.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,70 +81,78 @@

    Convenience functions for reading/writing DBMS tables

    -
    # S4 method for AthenaConnection,character,data.frame
    -dbWriteTable(
    -  conn,
    -  name,
    -  value,
    -  overwrite = FALSE,
    -  append = FALSE,
    -  row.names = NA,
    -  field.types = NULL,
    -  partition = NULL,
    -  s3.location = NULL,
    -  file.type = c("tsv", "csv", "parquet", "json"),
    -  compress = FALSE,
    -  max.batch = Inf,
    -  ...
    -)
    -
    -# S4 method for AthenaConnection,Id,data.frame
    -dbWriteTable(
    -  conn,
    -  name,
    -  value,
    -  overwrite = FALSE,
    -  append = FALSE,
    -  row.names = NA,
    -  field.types = NULL,
    -  partition = NULL,
    -  s3.location = NULL,
    -  file.type = c("tsv", "csv", "parquet", "json"),
    -  compress = FALSE,
    -  max.batch = Inf,
    -  ...
    -)
    -
    -# S4 method for AthenaConnection,SQL,data.frame
    -dbWriteTable(
    -  conn,
    -  name,
    -  value,
    -  overwrite = FALSE,
    -  append = FALSE,
    -  row.names = NA,
    -  field.types = NULL,
    -  partition = NULL,
    -  s3.location = NULL,
    -  file.type = c("tsv", "csv", "parquet", "json"),
    -  compress = FALSE,
    -  max.batch = Inf,
    -  ...
    -)
    +
    # S4 method for AthenaConnection,character,data.frame
    +dbWriteTable(
    +  conn,
    +  name,
    +  value,
    +  overwrite = FALSE,
    +  append = FALSE,
    +  row.names = NA,
    +  field.types = NULL,
    +  partition = NULL,
    +  s3.location = NULL,
    +  file.type = c("tsv", "csv", "parquet", "json"),
    +  compress = FALSE,
    +  max.batch = Inf,
    +  ...
    +)
    +
    +# S4 method for AthenaConnection,Id,data.frame
    +dbWriteTable(
    +  conn,
    +  name,
    +  value,
    +  overwrite = FALSE,
    +  append = FALSE,
    +  row.names = NA,
    +  field.types = NULL,
    +  partition = NULL,
    +  s3.location = NULL,
    +  file.type = c("tsv", "csv", "parquet", "json"),
    +  compress = FALSE,
    +  max.batch = Inf,
    +  ...
    +)
    +
    +# S4 method for AthenaConnection,SQL,data.frame
    +dbWriteTable(
    +  conn,
    +  name,
    +  value,
    +  overwrite = FALSE,
    +  append = FALSE,
    +  row.names = NA,
    +  field.types = NULL,
    +  partition = NULL,
    +  s3.location = NULL,
    +  file.type = c("tsv", "csv", "parquet", "json"),
    +  compress = FALSE,
    +  max.batch = Inf,
    +  ...
    +)

    Arguments

    conn

    An AthenaConnection object, produced by [DBI::dbConnect()]

    + +
    name

    A character string specifying a table name. Names will be automatically quoted so you can use any sequence of characters, not just any valid bare table name.

    + +
    value

    A data.frame to write to the database.

    + +
    overwrite

    Allows overwriting the destination table. Cannot be TRUE if append is also TRUE.

    + +
    append

    Allow appending to the destination table. Cannot be TRUE if overwrite is also TRUE. Existing Athena DDL file type will be retained and used when uploading data to AWS Athena. If parameter file.type doesn't match AWS Athena DDL file type a warning message will be created @@ -153,6 +161,8 @@

    Arguments

  • parquet: Parquet SerDe

  • json: JSON SerDe Libraries

  • + +
    row.names

    Either TRUE, FALSE, NA or a string.

    If TRUE, always translate row names to a column called "row_names". @@ -161,36 +171,53 @@

    Arguments

    A string is equivalent to TRUE, but allows you to override the default name.

    For backward compatibility, NULL is equivalent to FALSE.

    + +
    field.types

    Additional field types used to override derived types.

    + +
    partition

    Partition Athena table (needs to be a named list or vector) for example: c(var1 = "2019-20-13")

    + +
    s3.location

    s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/"). By default, the s3.location is set to s3 staging directory from AthenaConnection object. Note: When creating a table for the first time s3.location will be formatted from "s3://mybucket/data/" to the following syntax "s3://{mybucket/data}/{schema}/{table}/{parition}/" this is to support tables with the same name but existing in different schemas. If schema isn't specified in name parameter then the schema from dbConnect is used instead.

    + +
    file.type

    What file type to store data.frame on s3, noctua currently supports ["tsv", "csv", "parquet", "json"]. Default delimited file type is "tsv", in previous versions of noctua (=< 1.4.0) file type "csv" was used as default. The reason for the change is that columns containing Array/JSON format cannot be written to Athena due to the separating value ",". This would cause issues with AWS Athena. Note: "parquet" format is supported by the arrow package and it will need to be installed to utilise the "parquet" format. "json" format is supported by jsonlite package and it will need to be installed to utilise the "json" format.

    + +
    compress

    FALSE | TRUE To determine if to compress file.type. If file type is ["csv", "tsv"] then "gzip" compression is used, for file type "parquet" "snappy" compression is used. Currently noctua doesn't support compression for "json" file type.

    + +
    max.batch

    Split the data frame by max number of rows i.e. 100,000 so that multiple files can be uploaded into AWS S3. By default when compression is set to TRUE and file.type is "csv" or "tsv" max.batch will split data.frame into 20 batches. This is to help the performance of AWS Athena when working with files compressed in "gzip" format. max.batch will not split the data.frame when loading file in parquet format. For more information please go to link

    + +
    ...

    Other arguments used by individual methods.

    +

    Value

    -

    dbWriteTable() returns TRUE, invisibly. If the table exists, and both append and overwrite + + +

    dbWriteTable() returns TRUE, invisibly. If the table exists, and both append and overwrite arguments are unset, or append = TRUE and the data frame with the new data has different column names, an error is raised; the remote table remains unchanged.

    @@ -201,48 +228,48 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# List existing tables in Athena
    -dbListTables(con)
    -
    -# Write data.frame to Athena table
    -dbWriteTable(con, "mtcars", mtcars,
    -             partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    -             s3.location = "s3://mybucket/data/")
    -             
    -# Read entire table from Athena
    -dbReadTable(con, "mtcars")
    -
    -# List all tables in Athena after uploading new table to Athena
    -dbListTables(con)
    -
    -# Checking if uploaded table exists in Athena
    -dbExistsTable(con, "mtcars")
    -
    -# using default s3.location
    -dbWriteTable(con, "iris", iris)
    -
    -# Read entire table from Athena
    -dbReadTable(con, "iris")
    -
    -# List all tables in Athena after uploading new table to Athena
    -dbListTables(con)
    -
    -# Checking if uploaded table exists in Athena
    -dbExistsTable(con, "iris")
    -
    -# Disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note: 
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name 
    +con <- dbConnect(noctua::athena())
    +
    +# List existing tables in Athena
    +dbListTables(con)
    +
    +# Write data.frame to Athena table
    +dbWriteTable(con, "mtcars", mtcars,
    +             partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    +             s3.location = "s3://mybucket/data/")
    +             
    +# Read entire table from Athena
    +dbReadTable(con, "mtcars")
    +
    +# List all tables in Athena after uploading new table to Athena
    +dbListTables(con)
    +
    +# Checking if uploaded table exists in Athena
    +dbExistsTable(con, "mtcars")
    +
    +# using default s3.location
    +dbWriteTable(con, "iris", iris)
    +
    +# Read entire table from Athena
    +dbReadTable(con, "iris")
    +
    +# List all tables in Athena after uploading new table to Athena
    +dbListTables(con)
    +
    +# Checking if uploaded table exists in Athena
    +dbExistsTable(con, "iris")
    +
    +# Disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -257,7 +284,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/Query.html b/docs/reference/Query.html index 86ae7d6..86627d0 100644 --- a/docs/reference/Query.html +++ b/docs/reference/Query.html @@ -1,5 +1,5 @@ -Execute a query on Athena — Query • noctuaExecute a query on Athena — Query • noctuaConnect to Athena using R's sdk paws — dbConnect,AthenaDriver-method • noctua noctua - 2.6.0 + 2.6.1 @@ -92,12 +91,11 @@

    Connect to Athena using R's sdk paws

    It is never advised to hard-code credentials when making a connection to Athena (even though the option is there). Instead it is advised to use -profile_name (set up by AWS Command Line Interface), +profile_name (set up by AWS Command Line Interface), Amazon Resource Name roles or environmental variables. Here is a list of supported environment variables:

    • AWS_ACCESS_KEY_ID: is equivalent to the dbConnect parameter - aws_access_key_id

    • AWS_SECRET_ACCESS_KEY: is equivalent to the dbConnect parameter - aws_secret_access_key

    • AWS_SESSION_TOKEN: is equivalent to the dbConnect parameter - aws_session_token

    • -
    • AWS_ROLE_ARN: is equivalent to the dbConnect parameter - role_arn

    • AWS_EXPIRATION: is equivalent to the dbConnect parameter - duration_seconds

    • AWS_ATHENA_S3_STAGING_DIR: is equivalent to the dbConnect parameter - s3_staging_dir

    • AWS_ATHENA_WORK_GROUP: is equivalent to dbConnect parameter - work_group

    • @@ -106,32 +104,32 @@

      Connect to Athena using R's sdk paws

    -
    # S4 method for AthenaDriver
    -dbConnect(
    -  drv,
    -  aws_access_key_id = NULL,
    -  aws_secret_access_key = NULL,
    -  aws_session_token = NULL,
    -  schema_name = "default",
    -  work_group = NULL,
    -  poll_interval = NULL,
    -  encryption_option = c("NULL", "SSE_S3", "SSE_KMS", "CSE_KMS"),
    -  kms_key = NULL,
    -  profile_name = NULL,
    -  role_arn = NULL,
    -  role_session_name = sprintf("noctua-session-%s", as.integer(Sys.time())),
    -  duration_seconds = 3600L,
    -  s3_staging_dir = NULL,
    -  region_name = NULL,
    -  bigint = c("integer64", "integer", "numeric", "character"),
    -  binary = c("raw", "character"),
    -  json = c("auto", "character"),
    -  timezone = "UTC",
    -  keyboard_interrupt = TRUE,
    -  rstudio_conn_tab = TRUE,
    -  endpoint_override = NULL,
    -  ...
    -)
    +
    # S4 method for AthenaDriver
    +dbConnect(
    +  drv,
    +  aws_access_key_id = NULL,
    +  aws_secret_access_key = NULL,
    +  aws_session_token = NULL,
    +  schema_name = "default",
    +  work_group = NULL,
    +  poll_interval = NULL,
    +  encryption_option = c("NULL", "SSE_S3", "SSE_KMS", "CSE_KMS"),
    +  kms_key = NULL,
    +  profile_name = NULL,
    +  role_arn = NULL,
    +  role_session_name = sprintf("noctua-session-%s", as.integer(Sys.time())),
    +  duration_seconds = 3600L,
    +  s3_staging_dir = NULL,
    +  region_name = NULL,
    +  bigint = c("integer64", "integer", "numeric", "character"),
    +  binary = c("raw", "character"),
    +  json = c("auto", "character"),
    +  timezone = "UTC",
    +  keyboard_interrupt = TRUE,
    +  rstudio_conn_tab = TRUE,
    +  endpoint_override = NULL,
    +  ...
    +)
    @@ -140,62 +138,104 @@

    Arguments

    an object that inherits from DBIDriver, or an existing DBIConnection object (in order to clone an existing connection).

    + +
    aws_access_key_id

    AWS access key ID

    + +
    aws_secret_access_key

    AWS secret access key

    + +
    aws_session_token

    AWS temporary session token

    + +
    schema_name

    The schema_name to which the connection belongs

    + +
    work_group

    The name of the work group to run Athena queries , Currently defaulted to NULL.

    + +
    poll_interval

    Amount of time took when checking query execution status. Default set to a random interval between 0.5 - 1 seconds.

    + +
    encryption_option
    -

    Athena encryption at rest link. +

    Athena encryption at rest link. Supported Amazon S3 Encryption Options ["NULL", "SSE_S3", "SSE_KMS", "CSE_KMS"]. Connection will default to NULL, usually changing this option is not required.

    + +
    kms_key
    -

    AWS Key Management Service, +

    AWS Key Management Service, please refer to link for more information around the concept.

    + +
    profile_name

    The name of a profile to use. If not given, then the default profile is used. To set profile name, the AWS Command Line Interface (AWS CLI) will need to be configured. To configure AWS CLI please refer to: Configuring the AWS CLI.

    + +
    role_arn

    The Amazon Resource Name (ARN) of the role to assume (such as arn:aws:sts::123456789012:assumed-role/role_name/role_session_name)

    + +
    role_session_name

    An identifier for the assumed role session. By default `noctua` creates a session name sprintf("noctua-session-%s", as.integer(Sys.time()))

    + +
    duration_seconds
    -

    The duration, in seconds, of the role session. The value can range from 900 seconds (15 minutes) up to the maximum session duration setting for the role. +

    The duration, in seconds, of the role session. The value can range from 900 seconds (15 minutes) up to the maximum session duration setting for the role. This setting can have a value from 1 hour to 12 hours. By default duration is set to 3600 seconds (1 hour).

    + +
    s3_staging_dir

    The location in Amazon S3 where your query results are stored, such as s3://path/to/query/bucket/

    + +
    region_name
    -

    Default region when creating new connections. Please refer to link for +

    Default region when creating new connections. Please refer to link for AWS region codes (region code example: Region = EU (Ireland) region_name = "eu-west-1")

    + +
    bigint

    The R type that 64-bit integer types should be mapped to, default is [bit64::integer64], which allows the full range of 64 bit integers.

    + +
    binary

    The R type that [binary/varbinary] types should be mapped to, default is [raw]. If the mapping fails R will resort to [character] type. To ignore data type conversion set to ["character"].

    + +
    json

    Attempt to converts AWS Athena data types [arrays, json] using jsonlite:parse_json. If the mapping fails R will resort to [character] type. Custom Json parsers can be provide by using a function with data frame parameter. To ignore data type conversion set to ["character"].

    + +
    timezone

    Sets the timezone for the connection. The default is `UTC`. If `NULL` then no timezone is set, which defaults to the server's time zone. `AWS Athena` accepted time zones: https://docs.aws.amazon.com/athena/latest/ug/athena-supported-time-zones.html.

    + +
    keyboard_interrupt

    Stops AWS Athena process when R gets a keyboard interrupt, currently defaults to TRUE

    + +
    rstudio_conn_tab

    Optional to get AWS Athena Schema from AWS Glue Catalogue and display it in RStudio's Connections Tab. Default set to TRUE. For large `AWS Glue Catalogue` it is recommended to set `rstudio_conn_tab=FALSE` to ensure a fast connection.

    + +
    endpoint_override

    (character/list) The complete URL to use for the constructed client. Normally, paws will automatically construct the appropriate URL to use when @@ -204,6 +244,8 @@

    Arguments

    If endpoint_override is a character then AWS Athena endpoint is overridden. To override AWS S3 or AWS Glue endpoints a named list needs to be provided. The list can only have the following names ['athena', 's3', glue'] for example list(glue = "https://glue.eu-west-1.amazonaws.com")

    + +
    ...

    other parameters for paws session.

    • disable_ssl (boolean) Whether or not to use SSL. By default, SSL is used. Note that not all services support non-ssl connections.

    • @@ -219,10 +261,13 @@

      Arguments

      If the S3 Accelerate endpoint is being used then the addressing style will always be virtual.

    • use_dual_stack Setting to TRUE enables dual stack endpoint resolution.

    +

    Value

    -

    dbConnect() returns a s4 class. This object is used to communicate with AWS Athena.

    + + +

    dbConnect() returns a s4 class. This object is used to communicate with AWS Athena.

    See also

    @@ -231,31 +276,34 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Connect to Athena using your aws access keys
    - library(DBI)
    - con <- dbConnect(noctua::athena(),
    -                  aws_access_key_id='YOUR_ACCESS_KEY_ID', # 
    -                  aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
    -                  s3_staging_dir='s3://path/to/query/bucket/',
    -                  region_name='us-west-2')
    - dbDisconnect(con)
    - 
    -# Connect to Athena using your profile name
    -# Profile name can be created by using AWS CLI
    - con <- dbConnect(noctua::athena(),
    -                  profile_name = "YOUR_PROFILE_NAME",
    -                  s3_staging_dir = 's3://path/to/query/bucket/')
    - dbDisconnect(con)
    - 
    -# Connect to Athena using ARN role
    - con <- dbConnect(noctua::athena(),
    -                  profile_name = "YOUR_PROFILE_NAME",
    -                  role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name",
    -                  s3_staging_dir = 's3://path/to/query/bucket/')
    -                 
    - dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Connect to Athena using your aws access keys
    +library(DBI)
    +con <- dbConnect(noctua::athena(),
    +  aws_access_key_id = "YOUR_ACCESS_KEY_ID", #
    +  aws_secret_access_key = "YOUR_SECRET_ACCESS_KEY",
    +  s3_staging_dir = "s3://path/to/query/bucket/",
    +  region_name = "us-west-2"
    +)
    +dbDisconnect(con)
    +
    +# Connect to Athena using your profile name
    +# Profile name can be created by using AWS CLI
    +con <- dbConnect(noctua::athena(),
    +  profile_name = "YOUR_PROFILE_NAME",
    +  s3_staging_dir = "s3://path/to/query/bucket/"
    +)
    +dbDisconnect(con)
    +
    +# Connect to Athena using ARN role
    +con <- dbConnect(noctua::athena(),
    +  profile_name = "YOUR_PROFILE_NAME",
    +  role_arn = "arn:aws:sts::123456789012:assumed-role/role_name/role_session_name",
    +  s3_staging_dir = "s3://path/to/query/bucket/"
    +)
    +
    +dbDisconnect(con)
    +}
     
    @@ -270,7 +318,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbConvertTable.html b/docs/reference/dbConvertTable.html index 0d0e701..5ed5589 100644 --- a/docs/reference/dbConvertTable.html +++ b/docs/reference/dbConvertTable.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,86 +81,107 @@

    Simple wrapper to convert Athena backend file types

    -
    dbConvertTable(conn, obj, name, ...)
    -
    -# S4 method for AthenaConnection
    -dbConvertTable(
    -  conn,
    -  obj,
    -  name,
    -  partition = NULL,
    -  s3.location = NULL,
    -  file.type = c("NULL", "csv", "tsv", "parquet", "json", "orc"),
    -  compress = TRUE,
    -  data = TRUE,
    -  ...
    -)
    +
    dbConvertTable(conn, obj, name, ...)
    +
    +# S4 method for AthenaConnection
    +dbConvertTable(
    +  conn,
    +  obj,
    +  name,
    +  partition = NULL,
    +  s3.location = NULL,
    +  file.type = c("NULL", "csv", "tsv", "parquet", "json", "orc"),
    +  compress = TRUE,
    +  data = TRUE,
    +  ...
    +)

    Arguments

    conn

    An AthenaConnection object, produced by [DBI::dbConnect()]

    + +
    obj
    -

    Athena table or SQL DML query to be converted. For SQL, the query need to be wrapped with DBI::SQL() and +

    Athena table or SQL DML query to be converted. For SQL, the query need to be wrapped with DBI::SQL() and follow AWS Athena DML format link

    + +
    name

    Name of destination table

    + +
    ...

    Extra parameters, currently not used

    + +
    partition

    Partition Athena table

    + +
    s3.location

    location to store output file, must be in s3 uri format for example ("s3://mybucket/data/").

    + +
    file.type
    -

    File type for name, currently support ["NULL","csv", "tsv", "parquet", "json", "orc"]. +

    File type for name, currently support ["NULL","csv", "tsv", "parquet", "json", "orc"]. "NULL" will let Athena set the file type for you.

    + +
    compress

    Compress name, currently can only compress ["parquet", "orc"] (AWS Athena CTAS)

    + +
    data

    If name should be created with data or not.

    +

    Value

    -

    dbConvertTable() returns TRUE but invisible.

    + + +

    dbConvertTable() returns TRUE but invisible.

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `RAthena::dbConnect` documnentation
    -
    -library(DBI)
    -library(noctua)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(athena())
    -                 
    -# write iris table to Athena in defualt delimited format                 
    -dbWriteTable(con, "iris", iris)
    -
    -# convert delimited table to parquet
    -dbConvertTable(con, 
    -              obj = "iris",
    -              name = "iris_parquet",
    -              file.type = "parquet")
    -
    -# Create partitioned table from non-partitioned 
    -# iris table using SQL DML query
    -dbConvertTable(con,
    -               obj = SQL("select 
    -                            iris.*, 
    -                            date_format(current_date, '%Y%m%d') as time_stamp 
    -                          from iris"),
    -               name = "iris_orc_partitioned",
    -               file.type = "orc",
    -               partition = "time_stamp")
    -
    -# disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `RAthena::dbConnect` documnentation
    +
    +library(DBI)
    +library(noctua)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(athena())
    +
    +# write iris table to Athena in defualt delimited format
    +dbWriteTable(con, "iris", iris)
    +
    +# convert delimited table to parquet
    +dbConvertTable(con,
    +  obj = "iris",
    +  name = "iris_parquet",
    +  file.type = "parquet"
    +)
    +
    +# Create partitioned table from non-partitioned
    +# iris table using SQL DML query
    +dbConvertTable(con,
    +  obj = SQL("select
    +                            iris.*,
    +                            date_format(current_date, '%Y%m%d') as time_stamp
    +                          from iris"),
    +  name = "iris_orc_partitioned",
    +  file.type = "orc",
    +  partition = "time_stamp"
    +)
    +
    +# disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -175,7 +196,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbDataType.html b/docs/reference/dbDataType.html index 62a9cd4..d7e4930 100644 --- a/docs/reference/dbDataType.html +++ b/docs/reference/dbDataType.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,17 +81,17 @@

    Determine SQL data type of object

    -
    # S4 method for AthenaDriver,ANY
    -dbDataType(dbObj, obj, ...)
    -
    -# S4 method for AthenaDriver,list
    -dbDataType(dbObj, obj, ...)
    -
    -# S4 method for AthenaConnection,ANY
    -dbDataType(dbObj, obj, ...)
    -
    -# S4 method for AthenaConnection,data.frame
    -dbDataType(dbObj, obj, ...)
    +
    # S4 method for AthenaDriver,ANY
    +dbDataType(dbObj, obj, ...)
    +
    +# S4 method for AthenaDriver,list
    +dbDataType(dbObj, obj, ...)
    +
    +# S4 method for AthenaConnection,ANY
    +dbDataType(dbObj, obj, ...)
    +
    +# S4 method for AthenaConnection,data.frame
    +dbDataType(dbObj, obj, ...)
    @@ -99,14 +99,21 @@

    Arguments

    dbObj

    A object inheriting from DBIDriver or DBIConnection

    + +
    obj

    An R object whose SQL type we want to determine.

    + +
    ...

    Other arguments passed on to methods.

    +

    Value

    -

    dbDataType returns the Athena type that correspond to the obj argument as an non-empty character string.

    + + +

    dbDataType returns the Athena type that correspond to the obj argument as an non-empty character string.

    See also

    @@ -115,43 +122,44 @@

    See also

    Examples

    -
    library(noctua)
    -dbDataType(athena(), 1:5)
    +    
    library(noctua)
    +dbDataType(athena(), 1:5)
     #> [1] "INT"
    -dbDataType(athena(), 1)
    +dbDataType(athena(), 1)
     #> [1] "DOUBLE"
    -dbDataType(athena(), TRUE)
    +dbDataType(athena(), TRUE)
     #> [1] "BOOLEAN"
    -dbDataType(athena(), Sys.Date())
    +dbDataType(athena(), Sys.Date())
     #> [1] "DATE"
    -dbDataType(athena(), Sys.time())
    +dbDataType(athena(), Sys.time())
     #> [1] "TIMESTAMP"
    -dbDataType(athena(), c("x", "abc"))
    +dbDataType(athena(), c("x", "abc"))
     #> [1] "STRING"
    -dbDataType(athena(), list(raw(10), raw(20)))
    +dbDataType(athena(), list(raw(10), raw(20)))
     #> [1] "STRING"
    -
    -vapply(iris, function(x) dbDataType(noctua::athena(), x),
    -       FUN.VALUE = character(1), USE.NAMES = TRUE)
    +
    +vapply(iris, function(x) dbDataType(noctua::athena(), x),
    +  FUN.VALUE = character(1), USE.NAMES = TRUE
    +)
     #> Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
     #>     "DOUBLE"     "DOUBLE"     "DOUBLE"     "DOUBLE"     "STRING" 
    -
    -if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Sending Queries to Athena
    -dbDataType(con, iris)
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +
    +if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Sending Queries to Athena
    +dbDataType(con, iris)
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -166,7 +174,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbDisconnect.html b/docs/reference/dbDisconnect.html index ca496d1..4bd2fa0 100644 --- a/docs/reference/dbDisconnect.html +++ b/docs/reference/dbDisconnect.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    Disconnect (close) an Athena connection

    -
    # S4 method for AthenaConnection
    -dbDisconnect(conn, ...)
    +
    # S4 method for AthenaConnection
    +dbDisconnect(conn, ...)
    @@ -90,12 +90,17 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbDisconnect() returns TRUE, invisibly.

    + + +

    dbDisconnect() returns TRUE, invisibly.

    See also

    @@ -104,19 +109,19 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -131,7 +136,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbExistsTable.html b/docs/reference/dbExistsTable.html index fc3e194..4dee0a4 100644 --- a/docs/reference/dbExistsTable.html +++ b/docs/reference/dbExistsTable.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    Does Athena table exist?

    -
    # S4 method for AthenaConnection,character
    -dbExistsTable(conn, name, ...)
    +
    # S4 method for AthenaConnection,character
    +dbExistsTable(conn, name, ...)
    @@ -90,6 +90,8 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    name

    The table name, passed on to dbQuoteIdentifier(). Options are:

    • a character string with the unquoted DBMS table name, e.g. "table_name",

    • @@ -98,12 +100,17 @@

      Arguments

    • a call to SQL() with the quoted and fully qualified table name given verbatim, e.g. SQL('"my_schema"."table_name"')

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbExistsTable() returns logical scalar. TRUE if the table exists, FALSE otherwise.

    + + +

    dbExistsTable() returns logical scalar. TRUE if the table exists, FALSE otherwise.

    See also

    @@ -112,27 +119,28 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Write data.frame to Athena table
    -dbWriteTable(con, "mtcars", mtcars,
    -             partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    -             s3.location = "s3://mybucket/data/")
    -             
    -# Check if table exists from Athena
    -dbExistsTable(con, "mtcars")
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Write data.frame to Athena table
    +dbWriteTable(con, "mtcars", mtcars,
    +  partition = c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    +  s3.location = "s3://mybucket/data/"
    +)
    +
    +# Check if table exists from Athena
    +dbExistsTable(con, "mtcars")
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -147,7 +155,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbFetch.html b/docs/reference/dbFetch.html index c6bde31..c00da73 100644 --- a/docs/reference/dbFetch.html +++ b/docs/reference/dbFetch.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    Fetch records from previously executed query

    -
    # S4 method for AthenaResult
    -dbFetch(res, n = -1, ...)
    +
    # S4 method for AthenaResult
    +dbFetch(res, n = -1, ...)
    @@ -90,15 +90,22 @@

    Arguments

    res

    An object inheriting from DBIResult, created by dbSendQuery().

    + +
    n

    maximum number of records to retrieve per fetch. Use n = -1 or n = Inf to retrieve all pending records. Some implementations may recognize other special values. If entire dataframe is required use n = -1 or n = Inf.

    + +
    ...

    Other arguments passed on to methods.

    +

    Value

    -

    dbFetch() returns a data frame.

    + + +

    dbFetch() returns a data frame.

    See also

    @@ -107,23 +114,23 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -res <- dbSendQuery(con, "show databases")
    -dbFetch(res)
    -dbClearResult(res)
    -
    -# Disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +res <- dbSendQuery(con, "show databases")
    +dbFetch(res)
    +dbClearResult(res)
    +
    +# Disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -138,7 +145,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbGetInfo.html b/docs/reference/dbGetInfo.html index 4d6ef13..b384036 100644 --- a/docs/reference/dbGetInfo.html +++ b/docs/reference/dbGetInfo.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,11 +81,11 @@

    Get DBMS metadata

    -
    # S4 method for AthenaConnection
    -dbGetInfo(dbObj, ...)
    -
    -# S4 method for AthenaResult
    -dbGetInfo(dbObj, ...)
    +
    # S4 method for AthenaConnection
    +dbGetInfo(dbObj, ...)
    +
    +# S4 method for AthenaResult
    +dbGetInfo(dbObj, ...)
    @@ -94,12 +94,17 @@

    Arguments

    An object inheriting from DBIObject, i.e. DBIDriver, DBIConnection, or a DBIResult

    + +
    ...

    Other arguments to methods.

    +

    Value

    -

    a named list

    + + +

    a named list

    See also

    @@ -108,29 +113,29 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -                 
    -# Returns metadata from connnection object
    -metadata <- dbGetInfo(con)
    -
    -# Return metadata from Athena query object
    -res <- dbSendQuery(con, "show databases")
    -dbGetInfo(res)
    -
    -# Clear result
    -dbClearResult(res)
    -
    -# disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Returns metadata from connnection object
    +metadata <- dbGetInfo(con)
    +
    +# Return metadata from Athena query object
    +res <- dbSendQuery(con, "show databases")
    +dbGetInfo(res)
    +
    +# Clear result
    +dbClearResult(res)
    +
    +# disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -145,7 +150,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbGetPartition.html b/docs/reference/dbGetPartition.html index efd5132..48f2eb9 100644 --- a/docs/reference/dbGetPartition.html +++ b/docs/reference/dbGetPartition.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,10 +81,10 @@

    Athena table partitions

    -
    dbGetPartition(conn, name, ..., .format = FALSE)
    -
    -# S4 method for AthenaConnection
    -dbGetPartition(conn, name, ..., .format = FALSE)
    +
    dbGetPartition(conn, name, ..., .format = FALSE)
    +
    +# S4 method for AthenaConnection
    +dbGetPartition(conn, name, ..., .format = FALSE)
    @@ -92,6 +92,8 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    name

    The table name, passed on to dbQuoteIdentifier(). Options are:

    • a character string with the unquoted DBMS table name, e.g. "table_name",

    • @@ -100,42 +102,50 @@

      Arguments

    • a call to SQL() with the quoted and fully qualified table name given verbatim, e.g. SQL('"my_schema"."table_name"')

    + +
    ...

    Other parameters passed on to methods.

    + +
    .format

    re-formats AWS Athena partitions format. So that each column represents a partition from the AWS Athena table. Default set to FALSE to prevent breaking previous package behaviour.

    +

    Value

    -

    data.frame that returns all partitions in table, if no partitions in Athena table then + + +

    data.frame that returns all partitions in table, if no partitions in Athena table then function will return error from Athena.

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -                 
    -# write iris table to Athena                  
    -dbWriteTable(con, "iris",
    -             iris,
    -             partition = c("timestamp" = format(Sys.Date(), "%Y%m%d")),
    -             s3.location = "s3://path/to/store/athena/table/")
    -
    -# return table partitions
    -noctua::dbGetPartition(con, "iris")
    -
    -# disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# write iris table to Athena
    +dbWriteTable(con, "iris",
    +  iris,
    +  partition = c("timestamp" = format(Sys.Date(), "%Y%m%d")),
    +  s3.location = "s3://path/to/store/athena/table/"
    +)
    +
    +# return table partitions
    +noctua::dbGetPartition(con, "iris")
    +
    +# disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -150,7 +160,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbGetQuery.html b/docs/reference/dbGetQuery.html index b984108..6734e14 100644 --- a/docs/reference/dbGetQuery.html +++ b/docs/reference/dbGetQuery.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    Send query, retrieve results and then clear result set

    -
    # S4 method for AthenaConnection,character
    -dbGetQuery(conn, statement, statistics = FALSE, unload = athena_unload(), ...)
    +
    # S4 method for AthenaConnection,character
    +dbGetQuery(conn, statement, statistics = FALSE, unload = athena_unload(), ...)
    @@ -90,19 +90,30 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    statement

    a character string containing SQL.

    + +
    statistics

    If set to TRUE will print out AWS Athena statistics of query.

    + +
    unload

    boolean input to modify `statement` to align with AWS Athena UNLOAD, default is set to FALSE.

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbGetQuery() returns a dataframe.

    + + +

    dbGetQuery() returns a dataframe.

    Note

    @@ -118,22 +129,22 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Sending Queries to Athena
    -dbGetQuery(con, "show databases")
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Sending Queries to Athena
    +dbGetQuery(con, "show databases")
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -148,7 +159,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbGetStatement.html b/docs/reference/dbGetStatement.html index f25de7f..9753b74 100644 --- a/docs/reference/dbGetStatement.html +++ b/docs/reference/dbGetStatement.html @@ -18,7 +18,7 @@ noctua - 2.6.0 + 2.6.1 @@ -83,20 +83,25 @@

    Get the statement associated with a result set

    -
    # S4 method for AthenaResult
    -dbGetStatement(res, ...)
    +
    # S4 method for AthenaResult
    +dbGetStatement(res, ...)

    Arguments

    res

    An object inheriting from DBIResult.

    + +
    ...

    Other arguments passed on to methods.

    +

    Value

    -

    dbGetStatement() returns a character.

    + + +

    dbGetStatement() returns a character.

    See also

    @@ -105,19 +110,19 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -rs <- dbSendQuery(con, "SHOW TABLES in default")
    -dbGetStatement(rs)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +rs <- dbSendQuery(con, "SHOW TABLES in default")
    +dbGetStatement(rs)
    +}
     
    @@ -132,7 +137,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbGetTables.html b/docs/reference/dbGetTables.html index 7041866..a9682da 100644 --- a/docs/reference/dbGetTables.html +++ b/docs/reference/dbGetTables.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,10 +81,10 @@

    List Athena Schema, Tables and Table Types

    -
    dbGetTables(conn, ...)
    -
    -# S4 method for AthenaConnection
    -dbGetTables(conn, schema = NULL, ...)
    +
    dbGetTables(conn, ...)
    +
    +# S4 method for AthenaConnection
    +dbGetTables(conn, schema = NULL, ...)
    @@ -92,36 +92,43 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    ...

    Other parameters passed on to methods.

    + +
    schema

    Athena schema, default set to NULL to return all tables from all Athena schemas. Note: The use of DATABASE and SCHEMA is interchangeable within Athena.

    +

    Value

    -

    dbGetTables() returns a data.frame.

    + + +

    dbGetTables() returns a data.frame.

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -library(noctua)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -             
    -# Return hierarchy of tables in Athena
    -dbGetTables(con)
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +library(noctua)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Return hierarchy of tables in Athena
    +dbGetTables(con)
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -136,7 +143,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbHasCompleted.html b/docs/reference/dbHasCompleted.html index 49ddf96..409a041 100644 --- a/docs/reference/dbHasCompleted.html +++ b/docs/reference/dbHasCompleted.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,20 +81,25 @@

    Completion status

    -
    # S4 method for AthenaResult
    -dbHasCompleted(res, ...)
    +
    # S4 method for AthenaResult
    +dbHasCompleted(res, ...)

    Arguments

    res

    An object inheriting from DBIResult.

    + +
    ...

    Other arguments passed on to methods.

    +

    Value

    -

    dbHasCompleted() returns a logical scalar. TRUE if the query has completed, FALSE otherwise.

    + + +

    dbHasCompleted() returns a logical scalar. TRUE if the query has completed, FALSE otherwise.

    See also

    @@ -103,25 +108,25 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Check if query has completed
    -res <- dbSendQuery(con, "show databases")
    -dbHasCompleted(res)
    -
    -dbClearResult(res)
    -
    -# Disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Check if query has completed
    +res <- dbSendQuery(con, "show databases")
    +dbHasCompleted(res)
    +
    +dbClearResult(res)
    +
    +# Disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -136,7 +141,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbIsValid.html b/docs/reference/dbIsValid.html index 3db2701..d0bb645 100644 --- a/docs/reference/dbIsValid.html +++ b/docs/reference/dbIsValid.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,11 +81,11 @@

    Is this DBMS object still valid?

    -
    # S4 method for AthenaConnection
    -dbIsValid(dbObj, ...)
    -
    -# S4 method for AthenaResult
    -dbIsValid(dbObj, ...)
    +
    # S4 method for AthenaConnection
    +dbIsValid(dbObj, ...)
    +
    +# S4 method for AthenaResult
    +dbIsValid(dbObj, ...)
    @@ -94,12 +94,17 @@

    Arguments

    An object inheriting from DBIObject, i.e. DBIDriver, DBIConnection, or a DBIResult

    + +
    ...

    Other arguments to methods.

    +

    Value

    -

    dbIsValid() returns logical scalar, TRUE if the object (dbObj) is valid, FALSE otherwise.

    + + +

    dbIsValid() returns logical scalar, TRUE if the object (dbObj) is valid, FALSE otherwise.

    See also

    @@ -108,31 +113,31 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Check is connection is valid
    -dbIsValid(con)
    -
    -# Check is query is valid
    -res <- dbSendQuery(con, "show databases")
    -dbIsValid(res)
    -
    -# Check if query is valid after clearing result
    -dbClearResult(res)
    -dbIsValid(res)
    -
    -# Check if connection if valid after closing connection
    -dbDisconnect(con)
    -dbIsValid(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Check is connection is valid
    +dbIsValid(con)
    +
    +# Check is query is valid
    +res <- dbSendQuery(con, "show databases")
    +dbIsValid(res)
    +
    +# Check if query is valid after clearing result
    +dbClearResult(res)
    +dbIsValid(res)
    +
    +# Check if connection if valid after closing connection
    +dbDisconnect(con)
    +dbIsValid(con)
    +}
     
    @@ -147,7 +152,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbListFields.html b/docs/reference/dbListFields.html index df03e02..9d2881c 100644 --- a/docs/reference/dbListFields.html +++ b/docs/reference/dbListFields.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    List Field names of Athena table

    -
    # S4 method for AthenaConnection,character
    -dbListFields(conn, name, ...)
    +
    # S4 method for AthenaConnection,character
    +dbListFields(conn, name, ...)
    @@ -90,6 +90,8 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    name

    The table name, passed on to dbQuoteIdentifier(). Options are:

    • a character string with the unquoted DBMS table name, e.g. "table_name",

    • @@ -98,12 +100,17 @@

      Arguments

    • a call to SQL() with the quoted and fully qualified table name given verbatim, e.g. SQL('"my_schema"."table_name"')

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbListFields() returns a character vector with all the fields from an Athena table.

    + + +

    dbListFields() returns a character vector with all the fields from an Athena table.

    See also

    @@ -112,27 +119,28 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Write data.frame to Athena table
    -dbWriteTable(con, "mtcars", mtcars,
    -             partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    -             s3.location = "s3://mybucket/data/")
    -             
    -# Return list of fields in table
    -dbListFields(con, "mtcars")
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Write data.frame to Athena table
    +dbWriteTable(con, "mtcars", mtcars,
    +  partition = c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    +  s3.location = "s3://mybucket/data/"
    +)
    +
    +# Return list of fields in table
    +dbListFields(con, "mtcars")
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -147,7 +155,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbListTables.html b/docs/reference/dbListTables.html index 1757c1e..17d09e7 100644 --- a/docs/reference/dbListTables.html +++ b/docs/reference/dbListTables.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    List Athena Tables

    -
    # S4 method for AthenaConnection
    -dbListTables(conn, schema = NULL, ...)
    +
    # S4 method for AthenaConnection
    +dbListTables(conn, schema = NULL, ...)
    @@ -90,15 +90,22 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    schema

    Athena schema, default set to NULL to return all tables from all Athena schemas. Note: The use of DATABASE and SCHEMA is interchangeable within Athena.

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbListTables() returns a character vector with all the tables from Athena.

    + + +

    dbListTables() returns a character vector with all the tables from Athena.

    See also

    @@ -107,22 +114,22 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -             
    -# Return list of tables in Athena
    -dbListTables(con)
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Return list of tables in Athena
    +dbListTables(con)
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -137,7 +144,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbQuote.html b/docs/reference/dbQuote.html index bf5e5ca..f71443b 100644 --- a/docs/reference/dbQuote.html +++ b/docs/reference/dbQuote.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,17 +81,17 @@

    Quote Identifiers

    -
    # S4 method for AthenaConnection,character
    -dbQuoteString(conn, x, ...)
    -
    -# S4 method for AthenaConnection,POSIXct
    -dbQuoteString(conn, x, ...)
    -
    -# S4 method for AthenaConnection,Date
    -dbQuoteString(conn, x, ...)
    -
    -# S4 method for AthenaConnection,SQL
    -dbQuoteIdentifier(conn, x, ...)
    +
    # S4 method for AthenaConnection,character
    +dbQuoteString(conn, x, ...)
    +
    +# S4 method for AthenaConnection,POSIXct
    +dbQuoteString(conn, x, ...)
    +
    +# S4 method for AthenaConnection,Date
    +dbQuoteString(conn, x, ...)
    +
    +# S4 method for AthenaConnection,SQL
    +dbQuoteIdentifier(conn, x, ...)
    @@ -99,14 +99,23 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    x

    A character vector to quote as string.

    + +
    ...

    Other arguments passed on to methods.

    +

    Value

    -

    Returns a character object, for more information please check out: dbQuoteString, dbQuoteIdentifier

    + + +

    Returns a character object, for more information please check out: dbQuoteString, dbQuoteIdentifier

    + +

    See also

    @@ -125,7 +134,7 @@

    See also

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbRemoveTable.html b/docs/reference/dbRemoveTable.html index 99d331b..81441c3 100644 --- a/docs/reference/dbRemoveTable.html +++ b/docs/reference/dbRemoveTable.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,8 +81,8 @@

    Remove table from Athena

    -
    # S4 method for AthenaConnection,character
    -dbRemoveTable(conn, name, delete_data = TRUE, confirm = FALSE, ...)
    +
    # S4 method for AthenaConnection,character
    +dbRemoveTable(conn, name, delete_data = TRUE, confirm = FALSE, ...)
    @@ -90,6 +90,8 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    name

    The table name, passed on to dbQuoteIdentifier(). Options are:

    • a character string with the unquoted DBMS table name, e.g. "table_name",

    • @@ -98,17 +100,26 @@

      Arguments

    • a call to SQL() with the quoted and fully qualified table name given verbatim, e.g. SQL('"my_schema"."table_name"')

    + +
    delete_data

    Deletes S3 files linking to AWS Athena table

    + +
    confirm

    Allows for S3 files to be deleted without the prompt check. It is recommend to leave this set to FALSE to avoid deleting other S3 files when the table's definition points to the root of S3 bucket.

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbRemoveTable() returns TRUE, invisibly.

    + + +

    dbRemoveTable() returns TRUE, invisibly.

    Note

    @@ -121,27 +132,28 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Write data.frame to Athena table
    -dbWriteTable(con, "mtcars", mtcars,
    -             partition=c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    -             s3.location = "s3://mybucket/data/")
    -             
    -# Remove Table from Athena
    -dbRemoveTable(con, "mtcars")
    -
    -# Disconnect conenction
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# Write data.frame to Athena table
    +dbWriteTable(con, "mtcars", mtcars,
    +  partition = c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
    +  s3.location = "s3://mybucket/data/"
    +)
    +
    +# Remove Table from Athena
    +dbRemoveTable(con, "mtcars")
    +
    +# Disconnect conenction
    +dbDisconnect(con)
    +}
     
    @@ -156,7 +168,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbShow.html b/docs/reference/dbShow.html index cc521fa..0da2f87 100644 --- a/docs/reference/dbShow.html +++ b/docs/reference/dbShow.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,10 +81,10 @@

    Show Athena table's DDL

    -
    dbShow(conn, name, ...)
    -
    -# S4 method for AthenaConnection
    -dbShow(conn, name, ...)
    +
    dbShow(conn, name, ...)
    +
    +# S4 method for AthenaConnection
    +dbShow(conn, name, ...)
    @@ -92,6 +92,8 @@

    Arguments

    conn

    A DBIConnection object, as returned by dbConnect().

    + +
    name

    The table name, passed on to dbQuoteIdentifier(). Options are:

    • a character string with the unquoted DBMS table name, e.g. "table_name",

    • @@ -100,38 +102,44 @@

      Arguments

    • a call to SQL() with the quoted and fully qualified table name given verbatim, e.g. SQL('"my_schema"."table_name"')

    + +
    ...

    Other parameters passed on to methods.

    +

    Value

    -

    dbShow() returns SQL characters of the Athena table DDL.

    + + +

    dbShow() returns SQL characters of the Athena table DDL.

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -                 
    -# write iris table to Athena                  
    -dbWriteTable(con, "iris",
    -             iris,
    -             partition = c("timestamp" = format(Sys.Date(), "%Y%m%d")),
    -             s3.location = "s3://path/to/store/athena/table/")
    -
    -# return table ddl
    -noctua::dbShow(con, "iris")
    -
    -# disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +# write iris table to Athena
    +dbWriteTable(con, "iris",
    +  iris,
    +  partition = c("timestamp" = format(Sys.Date(), "%Y%m%d")),
    +  s3.location = "s3://path/to/store/athena/table/"
    +)
    +
    +# return table ddl
    +noctua::dbShow(con, "iris")
    +
    +# disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -146,7 +154,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbStatistics.html b/docs/reference/dbStatistics.html index edb675f..5c6e0ea 100644 --- a/docs/reference/dbStatistics.html +++ b/docs/reference/dbStatistics.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,44 +81,48 @@

    Show AWS Athena Statistics

    -
    dbStatistics(res, ...)
    -
    -# S4 method for AthenaResult
    -dbStatistics(res, ...)
    +
    dbStatistics(res, ...)
    +
    +# S4 method for AthenaResult
    +dbStatistics(res, ...)

    Arguments

    res

    An object inheriting from DBIResult.

    + +
    ...

    Other arguments passed on to methods.

    +

    Value

    -

    dbStatistics() returns list containing Athena Statistics return from paws.

    + + +

    dbStatistics() returns list containing Athena Statistics return from paws.

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `RAthena::dbConnect` documnentation
    -
    -library(DBI)
    -library(noctua)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -res <- dbSendQuery(con, "show databases")
    -dbStatistics(res)
    -
    -# Clean up
    -dbClearResult(res)
    -
    -}
    +    
    if (FALSE) {
    +# Note:
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `RAthena::dbConnect` documnentation
    +
    +library(DBI)
    +library(noctua)
    +
    +# Demo connection to Athena using profile name
    +con <- dbConnect(noctua::athena())
    +
    +res <- dbSendQuery(con, "show databases")
    +dbStatistics(res)
    +
    +# Clean up
    +dbClearResult(res)
    +}
     
    @@ -133,7 +137,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/db_compute.html b/docs/reference/db_compute.html index 9f48d77..648cc9c 100644 --- a/docs/reference/db_compute.html +++ b/docs/reference/db_compute.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,27 +81,36 @@

    S3 implementation of db_compute for Athena

    -
    db_compute.AthenaConnection(con, table, sql, ...)
    +
    db_compute.AthenaConnection(con, table, sql, ...)

    Arguments

    con

    A dbConnect object, as returned by dbConnect()

    + +
    table

    Table name, if left default noctua will use the default from dplyr's compute function.

    + +
    sql

    SQL code to be sent to the data

    + +
    ...

    passes noctua table creation parameters: [file_type,s3_location,partition]

    • file_type: What file type to store data.frame on s3, noctua currently supports ["NULL","csv", "parquet", "json"]. "NULL" will let Athena set the file_type for you.

    • s3_location: s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/")

    • partition: Partition Athena table, requires to be a partitioned variable from previous table.

    +

    Value

    -

    db_compute returns table name

    + + +

    db_compute returns table name

    See also

    @@ -110,37 +119,37 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -library(dplyr)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# Write data.frame to Athena table
    -copy_to(con, mtcars,
    -        s3_location = "s3://mybucket/data/")
    -             
    -# Write Athena table from tbl_sql
    -athena_mtcars <- tbl(con, "mtcars")
    -mtcars_filter <- athena_mtcars %>% filter(gear >=4)
    -
    -# create athena with unique table name
    -mtcars_filer %>% 
    -  compute()
    -
    -# create athena with specified name and s3 location
    -mtcars_filer %>% 
    -    compute("mtcars_filer",
    -            s3_location = "s3://mybucket/mtcars_filer/")
    -
    -# Disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note: 
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documentation
    +
    +library(DBI)
    +library(dplyr)
    +
    +# Demo connection to Athena using profile name 
    +con <- dbConnect(noctua::athena())
    +
    +# Write data.frame to Athena table
    +copy_to(con, mtcars,
    +        s3_location = "s3://mybucket/data/")
    +             
    +# Write Athena table from tbl_sql
    +athena_mtcars <- tbl(con, "mtcars")
    +mtcars_filter <- athena_mtcars %>% filter(gear >=4)
    +
    +# create athena with unique table name
    +mtcars_filer %>% 
    +  compute()
    +
    +# create athena with specified name and s3 location
    +mtcars_filer %>% 
    +    compute("mtcars_filer",
    +            s3_location = "s3://mybucket/mtcars_filer/")
    +
    +# Disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -155,7 +164,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/db_connection_describe.html b/docs/reference/db_connection_describe.html index 7ce27a7..a0748ee 100644 --- a/docs/reference/db_connection_describe.html +++ b/docs/reference/db_connection_describe.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,18 +81,25 @@

    S3 implementation of db_connection_describe for Athena (api ver
    -
    db_connection_describe.AthenaConnection(con)
    +
    db_connection_describe.AthenaConnection(con)

    Arguments

    con

    A dbConnect object, as returned by dbConnect()

    +

    Value

    -

    Character variable containing Meta Data about query sent to Athena. The Meta Data is returned in the following format: -"Athena <paws version> [<profile_name>@region/database]"

    + + +

    Character variable containing Meta Data about query sent to Athena. The Meta Data is returned in the following format:

    + + +

    "Athena <paws version> [<profile_name>@region/database]"

    + +
    @@ -107,7 +114,7 @@

    Value

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/db_copy_to.html b/docs/reference/db_copy_to.html index 697344b..b7e8102 100644 --- a/docs/reference/db_copy_to.html +++ b/docs/reference/db_copy_to.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,63 +81,88 @@

    S3 implementation of db_copy_to for Athena

    -
    db_copy_to.AthenaConnection(
    -  con,
    -  table,
    -  values,
    -  overwrite = FALSE,
    -  append = FALSE,
    -  types = NULL,
    -  partition = NULL,
    -  s3_location = NULL,
    -  file_type = c("csv", "tsv", "parquet"),
    -  compress = FALSE,
    -  max_batch = Inf,
    -  ...
    -)
    +
    db_copy_to.AthenaConnection(
    +  con,
    +  table,
    +  values,
    +  overwrite = FALSE,
    +  append = FALSE,
    +  types = NULL,
    +  partition = NULL,
    +  s3_location = NULL,
    +  file_type = c("csv", "tsv", "parquet"),
    +  compress = FALSE,
    +  max_batch = Inf,
    +  ...
    +)

    Arguments

    con

    A dbConnect object, as returned by dbConnect()

    + +
    table

    A character string specifying a table name. Names will be automatically quoted so you can use any sequence of characters, not just any valid bare table name.

    + +
    values

    A data.frame to write to the database.

    + +
    overwrite

    Allows overwriting the destination table. Cannot be TRUE if append is also TRUE.

    + +
    append

    Allow appending to the destination table. Cannot be TRUE if overwrite is also TRUE. Existing Athena DDL file type will be retained and used when uploading data to AWS Athena. If parameter file.type doesn't match AWS Athena DDL file type a warning message will be created notifying user and noctua will use the file type for the Athena DDL.

    + +
    types

    Additional field types used to override derived types.

    + +
    partition

    Partition Athena table (needs to be a named list or vector) for example: c(var1 = "2019-20-13")

    + +
    s3_location

    s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/")

    + +
    file_type

    What file type to store data.frame on s3, noctua currently supports ["tsv", "csv", "parquet"]. Default delimited file type is "tsv", in previous versions of noctua (=< 1.4.0) file type "csv" was used as default. The reason for the change is that columns containing Array/JSON format cannot be written to Athena due to the separating value ",". This would cause issues with AWS Athena. Note: "parquet" format is supported by the arrow package and it will need to be installed to utilise the "parquet" format.

    + +
    compress

    FALSE | TRUE To determine if to compress file.type. If file type is ["csv", "tsv"] then "gzip" compression is used, for file type "parquet" "snappy" compression is used.

    + +
    max_batch

    Split the data frame by max number of rows i.e. 100,000 so that multiple files can be uploaded into AWS S3. By default when compression is set to TRUE and file.type is "csv" or "tsv" max.batch will split data.frame into 20 batches. This is to help the performance of AWS Athena when working with files compressed in "gzip" format. max.batch will not split the data.frame when loading file in parquet format. For more information please go to link

    + +
    ...

    other parameters currently not supported in noctua

    +

    Value

    -

    db_copy_to returns table name

    + + +

    db_copy_to returns table name

    See also

    @@ -146,39 +171,39 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -library(dplyr)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# List existing tables in Athena
    -dbListTables(con)
    -
    -# Write data.frame to Athena table
    -copy_to(con, mtcars,
    -        s3_location = "s3://mybucket/data/")
    -             
    -# Checking if uploaded table exists in Athena
    -dbExistsTable(con, "mtcars")
    -
    -# Write Athena table from tbl_sql
    -athena_mtcars <- tbl(con, "mtcars")
    -mtcars_filter <- athena_mtcars %>% filter(gear >=4)
    -
    -copy_to(con, mtcars_filter)
    -
    -# Checking if uploaded table exists in Athena
    -dbExistsTable(con, "mtcars_filter") 
    -
    -# Disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note: 
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +library(dplyr)
    +
    +# Demo connection to Athena using profile name 
    +con <- dbConnect(noctua::athena())
    +
    +# List existing tables in Athena
    +dbListTables(con)
    +
    +# Write data.frame to Athena table
    +copy_to(con, mtcars,
    +        s3_location = "s3://mybucket/data/")
    +             
    +# Checking if uploaded table exists in Athena
    +dbExistsTable(con, "mtcars")
    +
    +# Write Athena table from tbl_sql
    +athena_mtcars <- tbl(con, "mtcars")
    +mtcars_filter <- athena_mtcars %>% filter(gear >=4)
    +
    +copy_to(con, mtcars_filter)
    +
    +# Checking if uploaded table exists in Athena
    +dbExistsTable(con, "mtcars_filter") 
    +
    +# Disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -193,7 +218,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/db_desc.html b/docs/reference/db_desc.html index eb220e1..34d05e6 100644 --- a/docs/reference/db_desc.html +++ b/docs/reference/db_desc.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,18 +81,25 @@

    S3 implementation of db_desc for Athena (api version 1).

    -
    db_desc.AthenaConnection(x)
    +
    db_desc.AthenaConnection(x)

    Arguments

    x

    A dbConnect object, as returned by dbConnect()

    +

    Value

    -

    Character variable containing Meta Data about query sent to Athena. The Meta Data is returned in the following format: -"Athena <paws version> [<profile_name>@region/database]"

    + + +

    Character variable containing Meta Data about query sent to Athena. The Meta Data is returned in the following format:

    + + +

    "Athena <paws version> [<profile_name>@region/database]"

    + +
    @@ -107,7 +114,7 @@

    Value

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/dbplyr_edition.html b/docs/reference/dbplyr_edition.html index 8db8077..5e16208 100644 --- a/docs/reference/dbplyr_edition.html +++ b/docs/reference/dbplyr_edition.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,17 +81,20 @@

    Declare which version of dbplyr API is being called.

    -
    dbplyr_edition.AthenaConnection(con)
    +
    dbplyr_edition.AthenaConnection(con)

    Arguments

    con

    A dbConnect object, as returned by dbConnect()

    +

    Value

    -

    Integer for which version of `dbplyr` is going to be used.

    + + +

    Integer for which version of `dbplyr` is going to be used.

    @@ -106,7 +109,7 @@

    Value

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/index.html b/docs/reference/index.html index aa716c0..0de0876 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -207,7 +207,7 @@

    All functions

    Method

    Declare which version of dbplyr API is being called.

    -

    noctua-package

    +

    noctua noctua-package

    noctua: a DBI interface into Athena using paws SDK

    @@ -247,7 +247,7 @@

    All functions
    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/noctua-package.html b/docs/reference/noctua-package.html index ef5eeae..20ccc74 100644 --- a/docs/reference/noctua-package.html +++ b/docs/reference/noctua-package.html @@ -18,7 +18,7 @@ noctua - 2.6.0 + 2.6.1 @@ -122,7 +122,7 @@

    Author

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/noctua_options.html b/docs/reference/noctua_options.html index 3aeb7e8..19eef6c 100644 --- a/docs/reference/noctua_options.html +++ b/docs/reference/noctua_options.html @@ -18,7 +18,7 @@ noctua - 2.6.0 + 2.6.1 @@ -83,19 +83,19 @@

    A method to configure noctua backend options.

    -
    noctua_options(
    -  file_parser,
    -  bigint,
    -  binary,
    -  json,
    -  cache_size,
    -  clear_cache,
    -  retry,
    -  retry_quiet,
    -  unload,
    -  clear_s3_resource,
    -  verbose
    -)
    +
    noctua_options(
    +  file_parser,
    +  bigint,
    +  binary,
    +  json,
    +  cache_size,
    +  clear_cache,
    +  retry,
    +  retry_quiet,
    +  unload,
    +  clear_s3_resource,
    +  verbose
    +)
    @@ -103,47 +103,70 @@

    Arguments

    file_parser

    Method to read and write tables to Athena, currently default to "data.table". The file_parser also determines the data format returned for example "data.table" will return data.table and "vroom" will return tibble.

    + +
    bigint

    The R type that 64-bit integer types should be mapped to (default: "integer64"). Inbuilt bigint conversion types ["integer64", "integer", "numeric", "character"].

    + +
    binary

    The R type that [binary/varbinary] types should be mapped to (default "raw"). Inbuilt binary conversion types ["raw", "character"].

    + +
    json

    Attempt to converts AWS Athena data types [arrays, json] using jsonlite:parse_json (default: "auto"). Inbuilt json conversion types ["auto", "character"]. Custom Json parsers can be provide by using a function with data frame parameter.

    + +
    cache_size

    Number of queries to be cached. Currently only support caching up to 100 distinct queries (default: 0).

    + +
    clear_cache

    Clears all previous cached query metadata

    + +
    retry

    Maximum number of requests to attempt (default: 5).

    + +
    retry_quiet

    This method is deprecated please use verbose instead.

    + +
    unload

    set AWS Athena unload functionality globally (default: FALSE)

    + +
    clear_s3_resource

    Clear down `AWS Athena` `AWS S3` resource (s3_staging_dir location). This is useful for users that don't have the `AWS IAM role` permissions delete from `s3_staging_dir` (default: TRUE)

    + +
    verbose

    print package info messages (default: TRUE)

    +

    Value

    -

    noctua_options() returns NULL, invisibly.

    + + +

    noctua_options() returns NULL, invisibly.

    Examples

    -
    library(noctua)
    -
    -# change file parser from default data.table to vroom
    -noctua_options("vroom")
    -
    -# cache queries locally
    -noctua_options(cache_size = 5)
    +    
    library(noctua)
    +
    +# change file parser from default data.table to vroom
    +noctua_options("vroom")
    +
    +# cache queries locally
    +noctua_options(cache_size = 5)
     
    @@ -158,7 +181,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/session_token.html b/docs/reference/session_token.html index c3d18c8..83ed010 100644 --- a/docs/reference/session_token.html +++ b/docs/reference/session_token.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,14 +81,14 @@

    Get Session Tokens for PAWS Connection

    -
    get_session_token(
    -  profile_name = NULL,
    -  region_name = NULL,
    -  serial_number = NULL,
    -  token_code = NULL,
    -  duration_seconds = 3600L,
    -  set_env = FALSE
    -)
    +
    get_session_token(
    +  profile_name = NULL,
    +  region_name = NULL,
    +  serial_number = NULL,
    +  token_code = NULL,
    +  duration_seconds = 3600L,
    +  set_env = FALSE
    +)
    @@ -97,47 +97,62 @@

    Arguments

    The name of a profile to use. If not given, then the default profile is used. To set profile name, the AWS Command Line Interface (AWS CLI) will need to be configured. To configure AWS CLI please refer to: Configuring the AWS CLI.

    + +
    region_name

    Default region when creating new connections. Please refer to link for AWS region codes (region code example: Region = EU (Ireland) region_name = "eu-west-1")

    + +
    serial_number

    The identification number of the MFA device that is associated with the IAM user who is making the GetSessionToken call. Specify this value if the IAM user has a policy that requires MFA authentication. The value is either the serial number for a hardware device (such as `GAHT12345678`) or an Amazon Resource Name (ARN) for a virtual device (such as arn:aws:iam::123456789012:mfa/user).

    + +
    token_code

    The value provided by the MFA device, if MFA is required. If any policy requires the IAM user to submit an MFA code, specify this value. If MFA authentication is required, the user must provide a code when requesting a set of temporary security credentials. A user who fails to provide the code receives an "access denied" response when requesting resources that require MFA authentication.

    + +
    duration_seconds

    The duration, in seconds, that the credentials should remain valid. Acceptable duration for IAM user sessions range from 900 seconds (15 minutes) to 129,600 seconds (36 hours), with 3,600 seconds (1 hour) as the default.

    + +
    set_env

    If set to TRUE environmental variables AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN will be set.

    +

    Value

    -

    get_session_token() returns a list containing: "AccessKeyId", "SecretAccessKey", "SessionToken" and "Expiration"

    + + +

    get_session_token() returns a list containing: "AccessKeyId", "SecretAccessKey", "SessionToken" and "Expiration"

    + +

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -
    -library(noctua)
    -library(DBI)
    -
    -# Create Temporary Credentials duration 1 hour
    -get_session_token("YOUR_PROFILE_NAME",
    -                  serial_number='arn:aws:iam::123456789012:mfa/user',
    -                  token_code = "531602",
    -                  set_env = TRUE)
    -
    -# Connect to Athena using temporary credentials
    -con <- dbConnect(athena())
    -}
    +    
    if (FALSE) {
    +# Note: 
    +# - Require AWS Account to run below example.
    +
    +library(noctua)
    +library(DBI)
    +
    +# Create Temporary Credentials duration 1 hour
    +get_session_token("YOUR_PROFILE_NAME",
    +                  serial_number='arn:aws:iam::123456789012:mfa/user',
    +                  token_code = "531602",
    +                  set_env = TRUE)
    +
    +# Connect to Athena using temporary credentials
    +con <- dbConnect(athena())
    +}
     
    @@ -152,7 +167,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/sqlCreateTable.html b/docs/reference/sqlCreateTable.html index a1e547d..fb74dda 100644 --- a/docs/reference/sqlCreateTable.html +++ b/docs/reference/sqlCreateTable.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,24 +81,26 @@

    Creates query to create a simple Athena table

    -
    # S4 method for AthenaConnection
    -sqlCreateTable(
    -  con,
    -  table,
    -  fields,
    -  field.types = NULL,
    -  partition = NULL,
    -  s3.location = NULL,
    -  file.type = c("tsv", "csv", "parquet", "json"),
    -  compress = FALSE,
    -  ...
    -)
    +
    # S4 method for AthenaConnection
    +sqlCreateTable(
    +  con,
    +  table,
    +  fields,
    +  field.types = NULL,
    +  partition = NULL,
    +  s3.location = NULL,
    +  file.type = c("tsv", "csv", "parquet", "json"),
    +  compress = FALSE,
    +  ...
    +)

    Arguments

    con

    A database connection.

    + +
    table

    The table name, passed on to dbQuoteIdentifier(). Options are:

    • a character string with the unquoted DBMS table name, e.g. "table_name",

    • @@ -107,6 +109,8 @@

      Arguments

    • a call to SQL() with the quoted and fully qualified table name given verbatim, e.g. SQL('"my_schema"."table_name"')

    + +
    fields

    Either a character vector or a data frame.

    A named character vector: Names are column names, values are types. @@ -114,28 +118,43 @@

    Arguments

    Field types are unescaped.

    A data frame: field types are generated using dbDataType().

    + +
    field.types

    Additional field types used to override derived types.

    + +
    partition

    Partition Athena table (needs to be a named list or vector) for example: c(var1 = "2019-20-13")

    + +
    s3.location

    s3 bucket to store Athena table, must be set as a s3 uri for example ("s3://mybucket/data/"). By default s3.location is set s3 staging directory from AthenaConnection object.

    + +
    file.type

    What file type to store data.frame on s3, noctua currently supports ["tsv", "csv", "parquet", "json"]. Default delimited file type is "tsv", in previous versions of noctua (=< 1.4.0) file type "csv" was used as default. The reason for the change is that columns containing Array/JSON format cannot be written to Athena due to the separating value ",". This would cause issues with AWS Athena. Note: "parquet" format is supported by the arrow package and it will need to be installed to utilise the "parquet" format. "json" format is supported by jsonlite package and it will need to be installed to utilise the "json" format.

    + +
    compress

    FALSE | TRUE To determine if to compress file.type. If file type is ["csv", "tsv"] then "gzip" compression is used, for file type "parquet" "snappy" compression is used. Currently noctua doesn't support compression for "json" file type.

    + +
    ...

    Other arguments used by individual methods.

    +

    Value

    -

    sqlCreateTable returns data.frame's DDL in the SQL format.

    + + +

    sqlCreateTable returns data.frame's DDL in the SQL format.

    See also

    @@ -144,33 +163,33 @@

    See also

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(DBI)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -                 
    -# Create DDL for iris data.frame
    -sqlCreateTable(con, "iris", iris, s3.location = "s3://path/to/athena/table")
    -
    -# Create DDL for iris data.frame with partition
    -sqlCreateTable(con, "iris", iris, 
    -               partition = "timestamp",
    -               s3.location = "s3://path/to/athena/table")
    -               
    -# Create DDL for iris data.frame with partition and file.type parquet
    -sqlCreateTable(con, "iris", iris, 
    -               partition = "timestamp",
    -               s3.location = "s3://path/to/athena/table",
    -               file.type = "parquet")
    -
    -# Disconnect from Athena
    -dbDisconnect(con)
    -}
    +    
    if (FALSE) {
    +# Note: 
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(DBI)
    +
    +# Demo connection to Athena using profile name 
    +con <- dbConnect(noctua::athena())
    +                 
    +# Create DDL for iris data.frame
    +sqlCreateTable(con, "iris", iris, s3.location = "s3://path/to/athena/table")
    +
    +# Create DDL for iris data.frame with partition
    +sqlCreateTable(con, "iris", iris, 
    +               partition = "timestamp",
    +               s3.location = "s3://path/to/athena/table")
    +               
    +# Create DDL for iris data.frame with partition and file.type parquet
    +sqlCreateTable(con, "iris", iris, 
    +               partition = "timestamp",
    +               s3.location = "s3://path/to/athena/table",
    +               file.type = "parquet")
    +
    +# Disconnect from Athena
    +dbDisconnect(con)
    +}
     
    @@ -185,7 +204,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/sqlData.html b/docs/reference/sqlData.html index 5f0bc6a..cc4a26e 100644 --- a/docs/reference/sqlData.html +++ b/docs/reference/sqlData.html @@ -17,7 +17,7 @@ noctua - 2.6.0 + 2.6.1 @@ -81,22 +81,26 @@

    Converts data frame into suitable format to be uploaded to Athena

    -
    # S4 method for AthenaConnection
    -sqlData(
    -  con,
    -  value,
    -  row.names = NA,
    -  file.type = c("tsv", "csv", "parquet", "json"),
    -  ...
    -)
    +
    # S4 method for AthenaConnection
    +sqlData(
    +  con,
    +  value,
    +  row.names = NA,
    +  file.type = c("tsv", "csv", "parquet", "json"),
    +  ...
    +)

    Arguments

    con

    A database connection.

    + +
    value

    A data frame

    + +
    row.names

    Either TRUE, FALSE, NA or a string.

    If TRUE, always translate row names to a column called "row_names". @@ -105,15 +109,25 @@

    Arguments

    A string is equivalent to TRUE, but allows you to override the default name.

    For backward compatibility, NULL is equivalent to FALSE.

    + +
    file.type

    What file type to store data.frame on s3, noctua currently supports ["csv", "tsv", "parquet", "json"]. Note: This parameter is used for format any special characters that clash with file type separator.

    + +
    ...

    Other arguments used by individual methods.

    +

    Value

    -

    sqlData returns a dataframe formatted for Athena. Currently converts list variable types into character split by '|', similar to how data.table writes out to files.

    + + +

    sqlData returns a dataframe formatted for Athena. Currently converts list variable types into character

    + + +

    split by '|', similar to how data.table writes out to files.

    See also

    @@ -132,7 +146,7 @@

    See also

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/sql_translate_env.html b/docs/reference/sql_translate_env.html index d7185be..2f28dce 100644 --- a/docs/reference/sql_translate_env.html +++ b/docs/reference/sql_translate_env.html @@ -19,7 +19,7 @@ noctua - 2.6.0 + 2.6.1 @@ -85,11 +85,11 @@

    AWS Athena backend dbplyr version 1 and 2

    -
    sql_translation.AthenaConnection(con)
    -
    -sql_translate_env.AthenaConnection(con)
    -
    -sql_escape_string.AthenaConnection(con, x)
    +
    sql_translation.AthenaConnection(con)
    +
    +sql_translate_env.AthenaConnection(con)
    +
    +sql_escape_string.AthenaConnection(con, x)
    @@ -97,11 +97,14 @@

    Arguments

    con

    An AthenaConnection object, produced by [DBI::dbConnect()]

    + +
    x

    An object to escape. Existing sql vectors will be left as is, character vectors are escaped with single quotes, numeric vectors have trailing `.0` added if they're whole numbers, identifiers are escaped with double quotes.

    +
    @@ -116,7 +119,7 @@

    Arguments

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/work_group.html b/docs/reference/work_group.html index b1a4465..4774c74 100644 --- a/docs/reference/work_group.html +++ b/docs/reference/work_group.html @@ -43,7 +43,7 @@ noctua - 2.6.0 + 2.6.1 @@ -130,52 +130,64 @@

    Athena Work Groups

    -
    create_work_group(
    -  conn,
    -  work_group = NULL,
    -  enforce_work_group_config = FALSE,
    -  publish_cloud_watch_metrics = FALSE,
    -  bytes_scanned_cut_off = 10000000L,
    -  description = NULL,
    -  tags = tag_options(key = NULL, value = NULL)
    -)
    -
    -tag_options(key = NULL, value = NULL)
    -
    -delete_work_group(conn, work_group = NULL, recursive_delete_option = FALSE)
    -
    -list_work_groups(conn)
    -
    -get_work_group(conn, work_group = NULL)
    -
    -update_work_group(
    -  conn,
    -  work_group = NULL,
    -  remove_output_location = FALSE,
    -  enforce_work_group_config = FALSE,
    -  publish_cloud_watch_metrics = FALSE,
    -  bytes_scanned_cut_off = 10000000L,
    -  description = NULL,
    -  state = c("ENABLED", "DISABLED")
    -)
    +
    create_work_group(
    +  conn,
    +  work_group = NULL,
    +  enforce_work_group_config = FALSE,
    +  publish_cloud_watch_metrics = FALSE,
    +  bytes_scanned_cut_off = 10000000L,
    +  description = NULL,
    +  tags = tag_options(key = NULL, value = NULL)
    +)
    +
    +tag_options(key = NULL, value = NULL)
    +
    +delete_work_group(conn, work_group = NULL, recursive_delete_option = FALSE)
    +
    +list_work_groups(conn)
    +
    +get_work_group(conn, work_group = NULL)
    +
    +update_work_group(
    +  conn,
    +  work_group = NULL,
    +  remove_output_location = FALSE,
    +  enforce_work_group_config = FALSE,
    +  publish_cloud_watch_metrics = FALSE,
    +  bytes_scanned_cut_off = 10000000L,
    +  description = NULL,
    +  state = c("ENABLED", "DISABLED")
    +)

    Arguments

    conn

    A dbConnect object, as returned by dbConnect()

    + +
    work_group

    The Athena workgroup name.

    + +
    enforce_work_group_config

    If set to TRUE, the settings for the workgroup override client-side settings. If set to FALSE, client-side settings are used. For more information, see Workgroup Settings Override Client-Side Settings.

    + +
    publish_cloud_watch_metrics

    Indicates that the Amazon CloudWatch metrics are enabled for the workgroup.

    + +
    bytes_scanned_cut_off

    The upper data usage limit (cutoff) for the amount of bytes a single query in a workgroup is allowed to scan.

    + +
    description

    The workgroup description.

    + +
    tags

    A tag that you can add to a resource. A tag is a label that you assign to an AWS Athena resource (a workgroup). Each tag consists of a key and an optional value, both of which you define. Tags enable you to categorize workgroups in Athena, for example, @@ -183,24 +195,37 @@

    Arguments

    The maximum tag key length is 128 Unicode characters in UTF-8. The maximum tag value length is 256 Unicode characters in UTF-8. You can use letters and numbers representable in UTF-8, and the following characters: "+ - = . _ : / @". Tag keys and values are case-sensitive. Tag keys must be unique per resource. Please use the helper function tag_options() to create tags for work group, if no tags are required please put NULL for this parameter.

    + +
    key

    A tag key. The tag key length is from 1 to 128 Unicode characters in UTF-8. You can use letters and numbers representable in UTF-8, and the following characters: "+ - = . _ : / @". Tag keys are case-sensitive and must be unique per resource.

    + +
    value

    A tag value. The tag value length is from 0 to 256 Unicode characters in UTF-8. You can use letters and numbers representable in UTF-8, and the following characters: "+ - = . _ : / @". Tag values are case-sensitive.

    + +
    recursive_delete_option

    The option to delete the workgroup and its contents even if the workgroup contains any named queries

    + +
    remove_output_location

    If set to TRUE, indicates that the previously-specified query results location (also known as a client-side setting) for queries in this workgroup should be ignored and set to null. If set to FALSE the out put location in the workgroup's result configuration will be updated with the new value. For more information, see Workgroup Settings Override Client-Side Settings.

    + +
    state

    The workgroup state that will be updated for the given workgroup.

    +

    Value

    -
    create_work_group
    + + +
    create_work_group

    Returns NULL but invisible

    tag_options
    @@ -223,46 +248,46 @@

    Value

    Examples

    -
    if (FALSE) {
    -# Note: 
    -# - Require AWS Account to run below example.
    -# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    -
    -library(noctua)
    -
    -# Demo connection to Athena using profile name 
    -con <- dbConnect(noctua::athena())
    -
    -# List current work group available
    -list_work_groups(con)
    -
    -# Create a new work group
    -wg <- create_work_group(con,
    -                  "demo_work_group",
    -                   description = "This is a demo work group",
    -                   tags = tag_options(key= "demo_work_group", value = "demo_01"))
    - 
    -# List work groups to see new work group
    -list_work_groups(con)
    -
    -# get meta data from work group
    -wg <- get_work_group(con, "demo_work_group")
    -
    -# Update work group
    -wg <- update_work_group(con, "demo_work_group",
    -                  description = "This is a demo work group update")
    -
    -
    -# get updated meta data from work group
    -wg <- get_work_group(con, "demo_work_group") 
    -
    -# Delete work group
    -delete_work_group(con, "demo_work_group")
    -
    -# Disconect from Athena
    -dbDisconnect(con)
    -}
    -
    +    
    if (FALSE) {
    +# Note: 
    +# - Require AWS Account to run below example.
    +# - Different connection methods can be used please see `noctua::dbConnect` documnentation
    +
    +library(noctua)
    +
    +# Demo connection to Athena using profile name 
    +con <- dbConnect(noctua::athena())
    +
    +# List current work group available
    +list_work_groups(con)
    +
    +# Create a new work group
    +wg <- create_work_group(con,
    +                  "demo_work_group",
    +                   description = "This is a demo work group",
    +                   tags = tag_options(key= "demo_work_group", value = "demo_01"))
    + 
    +# List work groups to see new work group
    +list_work_groups(con)
    +
    +# get meta data from work group
    +wg <- get_work_group(con, "demo_work_group")
    +
    +# Update work group
    +wg <- update_work_group(con, "demo_work_group",
    +                  description = "This is a demo work group update")
    +
    +
    +# get updated meta data from work group
    +wg <- get_work_group(con, "demo_work_group") 
    +
    +# Delete work group
    +delete_work_group(con, "demo_work_group")
    +
    +# Disconect from Athena
    +dbDisconnect(con)
    +}
    +
     
    @@ -277,7 +302,7 @@

    Examples

    -

    Site built with pkgdown 2.0.3.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/man/db_compute.Rd b/man/db_compute.Rd index 59aa316..9b70b5e 100644 --- a/man/db_compute.Rd +++ b/man/db_compute.Rd @@ -31,7 +31,7 @@ This is a backend function for dplyr's \code{compute} function. Users won't be r \dontrun{ # Note: # - Require AWS Account to run below example. -# - Different connection methods can be used please see `noctua::dbConnect` documnentation +# - Different connection methods can be used please see `noctua::dbConnect` documentation library(DBI) library(dplyr)