diff --git a/R/AzureHDI.R b/R/AzureHDI.R index 8ebaf0d..bd5bc61 100644 --- a/R/AzureHDI.R +++ b/R/AzureHDI.R @@ -258,7 +258,7 @@ azureCreateHDI <- function(azureActiveContext, resourceGroup, location, #' @family HDInsight functions #' @export azureResizeHDI <- function(azureActiveContext, clustername, - role = c("worker", "head", "edge"), + role = c("workernode", "headnode", "edgenode"), size = 2, mode = c("Sync", "Async"), subscriptionID, resourceGroup, verbose = FALSE) { @@ -267,7 +267,7 @@ azureResizeHDI <- function(azureActiveContext, clustername, assert_that(is_resource_group(resourceGroup)) assert_that(is_clustername(clustername)) - assert_that(is.integer(size)) + assert_that(is.integer(as.integer(size))) role <- match.arg(role) mode <- match.arg(mode) diff --git a/R/AzureHive.R b/R/AzureHive.R index fdae837..3aca7ef 100644 --- a/R/AzureHive.R +++ b/R/AzureHive.R @@ -30,14 +30,18 @@ azureHiveStatus <- function(azureActiveContext, clustername, hdiAdmin, if (!length(HP)) { stop("Error: No Valid hdiPassword provided") } - + verbosity <- set_verbosity(verbose) + azureActiveContext$hdiAdmin <- HA azureActiveContext$hdiPassword <- HP azureActiveContext$clustername <- CN - + cat(HA) + cat(HP) + uri <- paste0("https://", CN, ".azurehdinsight.net/templeton/v1/status") - r <- GET(URL, add_headers(.headers = c(`Content-type` = "application/json")), + cat(uri) + r <- GET(uri, add_headers(.headers = c(`Content-type` = "application/json")), authenticate(HA, HP), verbosity) if (status_code(r) != 200 && status_code(r) != 201) { stop(paste0("Error: Return code(", status_code(r), ")")) @@ -130,9 +134,9 @@ azureHiveSQL <- function(azureActiveContext, CMD, clustername, hdiAdmin, if (DUR < 5) DUR <- DUR + 1 if (df$status$state == "PREP") - message("P") + message("P",appendLF = FALSE) if (df$status$state == "RUNNING") - message("R") + message("R",appendLF = FALSE) # print(df$status$state) r <- GET(URL, add_headers(.headers = c(`Content-type` = "application/json")), @@ -142,9 +146,9 @@ azureHiveSQL <- function(azureActiveContext, CMD, clustername, hdiAdmin, df <- fromJSON(rl) } if (df$status$state == "SUCCEEDED") - message("S") + message("S",appendLF = FALSE) if (df$status$state == "FAILED") - message("F") + message("F",appendLF = FALSE) STATE <- df$status$state message("Finished Running statement: ", Sys.time()) diff --git a/R/AzureSpark.R b/R/AzureSpark.R index 8b4483e..4799552 100644 --- a/R/AzureSpark.R +++ b/R/AzureSpark.R @@ -276,13 +276,17 @@ azureSparkCMD <- function(azureActiveContext, CMD, clustername, hdiAdmin, sep = "") # print(URL) message(paste("CMD Running: ", Sys.time())) - message("Running(R), Completed(C)") + message("Running(R) Waiting(W) Completed(C)") - while (df$state == "running") { + while (df$state == "running" || df$state == "waiting") { Sys.sleep(DUR) if (DUR < 5) DUR <- DUR + 1 - message("R") + if (df$state == "running") + message("R",appendLF = FALSE) + if (df$state == "waiting") + message("W",appendLF = FALSE) + r <- GET(URL, add_headers(.headers = c(`Content-type` = "application/json")), authenticate(HA, HP)) rl <- content(r, "text", encoding = "UTF-8") @@ -290,7 +294,7 @@ azureSparkCMD <- function(azureActiveContext, CMD, clustername, hdiAdmin, df <- fromJSON(rl) } - message("C") + message("C",appendLF = FALSE) message("Finished Running statement: ", Sys.time()) RET <- df$output$data[1] # rownames(RET) <- 'Return Value' diff --git a/README.md b/README.md index 9528362..1bc585b 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ To get started with this package, see the vignettes: - * [Tutorial](http://htmlpreview.github.io/?https://github.com/Microsoft/AzureSMR/blob/master/vignettes/tutorial.html) - * [Getting Authenticated](http://htmlpreview.github.io/?https://github.com/Microsoft/AzureSMR/blob/master/vignettes/Authentication.html) + * [Tutorial](http://htmlpreview.github.io/?https://github.com/Microsoft/AzureSMR/blob/master/inst/doc/tutorial.html) + * [Getting Authenticated](http://htmlpreview.github.io/?https://github.com/Microsoft/AzureSMR/blob/master/inst/doc/Authentication.html) To access the package help, just type `?AzureSMR` into your code editor. diff --git a/inst/examples/example_azureDeleteHDI.R b/inst/examples/example_azureDeleteHDI.R new file mode 100644 index 0000000..300ba55 --- /dev/null +++ b/inst/examples/example_azureDeleteHDI.R @@ -0,0 +1,5 @@ +\dontrun{ +library(AzureSMR) + +azureDeleteHDI(asc, clustername = "azuresmrclustername") +} diff --git a/vignettes/Authentication.R b/vignettes/Authentication.R deleted file mode 100644 index d33720d..0000000 --- a/vignettes/Authentication.R +++ /dev/null @@ -1,4 +0,0 @@ -## ---- eval = FALSE------------------------------------------------------- -# sc <- createAzureContext(tenantID = "{TID}", clientID = "{CID}", authKey= "{KEY}") -# rgs <- azureListRG(sc) -# rgs diff --git a/vignettes/Authentication.Rmd b/vignettes/Authentication.Rmd index a85150f..1deae5f 100644 --- a/vignettes/Authentication.Rmd +++ b/vignettes/Authentication.Rmd @@ -47,7 +47,7 @@ To apply access control azToken Resource Group 16. Identify the resource group you will associate with this application. -17. Choose the Users menu item from the Resource scope. +17. Choose the Access Control(IAM) menu item from the Resource scope. 18. In the resulting scope click the `+ Add` button. @@ -62,7 +62,7 @@ Alternatively you can access control azToken Subscription Level 16. Identify the Subscription you will associate with this application. -17. Choose the Users(access) menu item. +17. Choose the Access Control(IAM) menu item. 18. In the resulting scope click the + Add button. @@ -71,7 +71,6 @@ Alternatively you can access control azToken Subscription Level 20. Select the resulting list item for that App then click Select in that scope then OK in the "Add access" scope. The user will be added to the list. - That is all. You can test this by trying: ```{r, eval = FALSE} diff --git a/vignettes/Authentication.html b/vignettes/Authentication.html deleted file mode 100644 index 87aad75..0000000 --- a/vignettes/Authentication.html +++ /dev/null @@ -1,136 +0,0 @@ - - - - - - - - - - - - - - - - -Getting Authorised for the AzureSMR Package - - - - - - - - - - - - - - - - - -

Getting Authorised for the AzureSMR Package

-

Alan Weaver and Andrie de Vries

-

2016-12-22

- - - -
-

Configuration instructions

-

Follow these instructions to create an active directory. You will need to collect the tenant ID (tenantID), client ID (clientID) and authentication key (authKey) in order to authenticate with the createAzureContect() function.

-
-

Create an Active Directory App.

-
    -
  1. Login to the Classic (i.e., the old) Portal https://manage.windowsazure.com/.

  2. -
  3. On the left hand menu you should see amongst all the items one called ACTIVE DIRECTORY. Click the item and an active directory DIRECTORY will be listed.

  4. -
  5. Click on an entry under the Name column (if there is only one entry the choice is easy!) to take you to a page of options to get started with some text like I WANT TO.

  6. -
  7. Along the top menu click APPLICATIONS.

  8. -
  9. You probably want to create a new application so type a name for it in the Search box (I called mine AzureSMR). The search result will come back with no results and a button that says ADD AN APPLICATION -> which you should click.

  10. -
  11. Give the application a name and choose WEB APPLICATION AND/OR WEB API. Then go to the next page ->.

  12. -
  13. Provide some dummy URLs. They are not used but they must be valid URLs. Click on the tick to continue to create the application.

  14. -
  15. Under the Configure menu button take note of the client ID.9. Under the Keys section choose a 1 year duration (or 2) and click the Save button azToken the bottom of the screen. A key is generated which you should copy now and save it somewhere.

  16. -
  17. We also need the tenant ID. Click the VIEW ENDPOINTS button on the bottom of the screen and find a list of endpoints all including the tenant ID as a sequence of hexadecimals.

  18. -
  19. Now we need to set up the applications permissions. Click the Add application button. In the resulting window scroll to Windows Azure Service Management API and select it. Then click the Tick icon.

  20. -
  21. Under the resulting “permissions to other applications” section, for the Windows Azure Service Management API entry, from the “Delegated Permissions:0” drop down tick the Access Azure Service Management as organization.

  22. -
  23. Click on the Save icon azToken the bottom of the window again.

  24. -
  25. Now we need to assign the application a role and to do so we go to the (new) Azure portal. https://portal.azure.com/

  26. -
-

To apply access control azToken Resource Group

-
    -
  1. Click on Resource Groups menu item on the left in the portal.

  2. -
  3. Identify the resource group you will associate with this application.

  4. -
  5. Choose the Users menu item from the Resource scope.

  6. -
  7. In the resulting scope click the + Add button.

  8. -
  9. Choose the role as Owner and under the user search box enter the name of the App, e.g., AzureSMR.

  10. -
  11. Select the resulting list item for that App then click Select in that scope then OK in the “Add access” scope. The user will be added to the list.

  12. -
-

Alternatively you can access control azToken Subscription Level

-
    -
  1. Click on Subscriptions on the left menu item in the portal.

  2. -
  3. Identify the Subscription you will associate with this application.

  4. -
  5. Choose the Users(access) menu item.

  6. -
  7. In the resulting scope click the + Add button.

  8. -
  9. Choose the role as Owner and under the user search box enter the name of the App, e.g., AzureSMR.

  10. -
  11. Select the resulting list item for that App then click Select in that scope then OK in the “Add access” scope. The user will be added to the list.

  12. -
-

That is all. You can test this by trying:

-
sc <- createAzureContext(tenantID = "{TID}", clientID = "{CID}", authKey= "{KEY}")
-rgs <- AzureListRG(sc)
-rgs
-

For more information see the tutorial Use portal to create Active Directory application and service principal that can access resources

-
-
- - - - - - - - diff --git a/vignettes/tutorial.R b/vignettes/tutorial.R deleted file mode 100644 index eb0bf4d..0000000 --- a/vignettes/tutorial.R +++ /dev/null @@ -1,140 +0,0 @@ -## ---- eval=FALSE--------------------------------------------------------- -# # Install devtools -# if(!require("devtools")) install.packages("devtools") -# devtools::install_github("Microsoft/AzureSMR") -# library(AzureSMR) - -## ---- eval=FALSE--------------------------------------------------------- -# sc <- createAzureContext(tenantID = "{TID}", clientID = "{CID}", authKey= "{KEY}") -# sc - -## ---- eval=FALSE--------------------------------------------------------- -# azureListSubscriptions(sc) -# - -## ---- eval=FALSE--------------------------------------------------------- -# # list resource groups -# azureListRG(sc) -# -# # list all resources -# azureListAllResources(sc) -# -# azureListAllResources(sc, location = "northeurope") -# -# azureListAllResources(sc, type = "Microsoft.Sql/servers", location = "northeurope") -# -# azureListAllResources(sc, resourceGroup = "Analytics") -# -# azureCreateResourceGroup(sc, resourceGroup = "testme", location = "northeurope") -# -# azureDeleteResourceGroup(sc, resourceGroup = "testme") -# -# azureListRG(sc)$name -# - -## ---- eval=FALSE--------------------------------------------------------- -# azureListVM(sc, resourceGroup = "AWHDIRG") -# -# ## Name Location Type OS State Admin -# ## 1 DSVM1 northeurope Microsoft.Compute/virtualMachines Linux Succeeded alanwe -# -# azureStartVM(sc, vmName = "DSVM1") -# azureStopVM(sc, vmName = "DSVM1") - -## ---- eval=FALSE--------------------------------------------------------- -# sKey <- AzureSAGetKey(sc, resourceGroup = "Analytics", storageAccount = "analyticsfiles") - -## ---- eval=FALSE--------------------------------------------------------- -# azListContainers(sc, storageAccount = "analyticsfiles", containers = "Test") - -## ---- eval=FALSE--------------------------------------------------------- -# azureListStorageBlobs(sc, storageAccount = "analyticsfiles", container = "test") - -## ---- eval=FALSE--------------------------------------------------------- -# azurePutBlob(sc, StorageAccount = "analyticsfiles", container = "test", -# contents = "Hello World", -# blob = "HELLO") - -## ---- eval=FALSE--------------------------------------------------------- -# azureGetBlob(sc, storageAccount = "analyticsfiles", container = "test", -# blob="HELLO", -# type="text") - -## ---- eval=FALSE--------------------------------------------------------- -# azureListHDI(sc) -# azureListHDI(sc, resourceGroup ="Analytics") -# - -## ---- eval=FALSE--------------------------------------------------------- -# azureResizeHDI(sc, resourceGroup = "Analytics", clusterName = "{HDIClusterName}", -# Role="workernode",Size=2) -# -# ## AzureResizeHDI: Request Submitted: 2016-06-23 18:50:57 -# ## Resizing(R), Succeeded(S) -# ## RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR -# ## RRRRRRRRRRRRRRRRRRS -# ## Finished Resizing Sucessfully: 2016-06-23 19:04:43 -# ## Finished: 2016-06-23 19:04:43 -# ## ## Information -# ## " headnode ( 2 * Standard_D3_v2 ) workernode ( 5 * Standard_D3_v2 ) zookeepernode ( 3 * Medium ) edgenode0 ( 1 * Standard_D4_v2 )" - -## ---- eval=FALSE--------------------------------------------------------- -# azureDeployTemplate(sc, resourceGroup = "Analytics", deplName = "Deploy1", -# templateURL = "{TEMPLATEURL}", paramURL = "{PARAMURL}") -# -# ## AzureDeployTemplate: Request Submitted: 2016-06-23 18:50:57 -# ## Resizing(R), Succeeded(S) -# ## RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR -# ## RRRRRRRRRRRRRRRRRRS -# ## Finished Deployed Sucessfully: 2016-06-23 19:04:43 -# ## Finished: 2016-06-23 19:04:43 - -## ---- eval=FALSE--------------------------------------------------------- -# azureHiveStatus(sc, clusterName = "{hdicluster}", -# hdiAdmin = "admin", -# hdiPassword = "********") -# AzureHiveSQL(sc, -# CMD = "select * from airports", -# Path = "wasb://{container}@{hdicluster}.blob.core.windows.net/") -# -# stdout <- AzureGetBlob(sc, Container = "test", Blob = "stdout") -# -# read.delim(text=stdout, header=TRUE, fill=TRUE) -# - -## ---- eval=FALSE--------------------------------------------------------- -# azureSparkNewSession(sc, clusterName = "{hdicluster}", -# hdiAdmin = "admin", -# hdiPassword = "********", -# kind = "pyspark") - -## ---- eval=FALSE--------------------------------------------------------- -# azureSparkListSessions(sc, clusterName = "{hdicluster}") - -## ---- eval=FALSE--------------------------------------------------------- -# # SAMPLE PYSPARK SCRIPT TO CALCULATE PI -# pythonCmd <- ' -# from pyspark import SparkContext -# from operator import add -# import sys -# from random import random -# partitions = 1 -# n = 20000000 * partitions -# def f(_): -# x = random() * 2 - 1 -# y = random() * 2 - 1 -# return 1 if x ** 2 + y ** 2 < 1 else 0 -# -# count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add) -# Pi = (4.0 * count / n) -# print("Pi is roughly %f" % Pi)' -# -# azureSparkCMD(sc, cmd = pythonCmd, sessionID = "5") -# -# ## [1] "Pi is roughly 3.140285" - -## ---- eval=FALSE--------------------------------------------------------- -# azureSparkCMD(sc, clusterName = "{hdicluster}", cmd = "print Pi", sessionID="5") -# -# #[1] "3.1422" - diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 7358d33..2ff56ed 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -53,8 +53,9 @@ sc <- createAzureContext(tenantID = "{TID}", clientID = "{CID}", authKey= "{KEY} sc ``` - -To get an authorisation token use `azureAuthenticate()`. Note this token will time our after a period and therefore you need to run it again occasionally. TIP: Use AzureAuthenticate before a long running task. +If you provide autentication paramters to createAzureContext() the function will automatically authenticate. +To manually get an authorisation token use `azureAuthenticate()`. +Note this token will time our after a period and therefore you need to run it again occasionally. TIP: Use AzureAuthenticate before a long running task. The `azureListSubscriptions()` function lists all the available subscriptions. If you only have one it sets the default Subscription in the `azureActiveContext` to that subscription ID. @@ -76,28 +77,31 @@ azureListAllResources(sc, location = "northeurope") azureListAllResources(sc, type = "Microsoft.Sql/servers", location = "northeurope") -azureListAllResources(sc, resourceGroup = "Analytics") azureCreateResourceGroup(sc, resourceGroup = "testme", location = "northeurope") -azureDeleteResourceGroup(sc, resourceGroup = "testme") +azureCreateStorageAccount(sc,storageAccount="testmystorage1",resourceGroup = "testme") + +azureListAllResources(sc, resourceGroup = "testme") -azureListRG(sc)$name +# When finished, to delete a Resource Group use azureDeleteResourceGroup +azureDeleteResourceGroup(sc, resourceGroup = "testme") ``` ## Manage Virtual Machines -Use these functions to list, start and stop Virtual Machines (see templates for Creation). +Use these functions to list, start and stop existing Virtual Machines (see templates for Creation). To Create VMs please refer to Resource Templates below. ```{r, eval=FALSE} -azureListVM(sc, resourceGroup = "AWHDIRG") +## List VMs in a ResourceGroup +azureListVM(sc, resourceGroup = "testme") ## Name Location Type OS State Admin -## 1 DSVM1 northeurope Microsoft.Compute/virtualMachines Linux Succeeded alanwe +## 1 DSVM1 northeurope Microsoft.Compute/virtualMachines Linux Succeeded azureStartVM(sc, vmName = "DSVM1") azureStopVM(sc, vmName = "DSVM1") @@ -109,34 +113,41 @@ In order to access Storage Blobs you need to have a key. Use `azureSAGetKey()` t ```{r, eval=FALSE} -sKey <- azureSAGetKey(sc, resourceGroup = "Analytics", storageAccount = "analyticsfiles") +sKey <- azureSAGetKey(sc, resourceGroup = "testme", storageAccount = "testmystorage1") ``` -To list containers in a storage account use `azureListContainers()` +To create containers in a storage account use`azureCreateStorageContainer()` ```{r, eval=FALSE} -azureListContainers(sc, storageAccount = "analyticsfiles", containers = "Test") +azureCreateStorageContainer(sc,"opendata",storageAccount = "testmystorage1", resourceGroup = "testme") ``` -To list blobs in a container use `azureListStorageBlobs()` +To list containers in a storage account use `azureListContainers()` ```{r, eval=FALSE} -azureListStorageBlobs(sc, storageAccount = "analyticsfiles", container = "test") +azureListStorageContainers(sc, storageAccount = "testmystorage1", resourceGroup = "testme") ``` To Write a Blobs use `azurePutBlob()` ```{r, eval=FALSE} -azurePutBlob(sc, StorageAccount = "analyticsfiles", container = "test", +azurePutBlob(sc, storageAccount = "testmystorage1", container = "opendata", contents = "Hello World", blob = "HELLO") ``` +To list blobs in a container use `azureListStorageBlobs()` + +```{r, eval=FALSE} +azureListStorageBlobs(sc, storageAccount = "testmystorage1", container = "opendata") +``` + + To read a blob in a container use `azureGetBlob()` ```{r, eval=FALSE} -azureGetBlob(sc, storageAccount = "analyticsfiles", container = "test", +azureGetBlob(sc, storageAccount = "testmystorage1", container = "opendata", blob="HELLO", type="text") ``` @@ -144,25 +155,35 @@ azureGetBlob(sc, storageAccount = "analyticsfiles", container = "test", ## Manage HDInsight Clusters -You can use `AzureSMR` to manage Azure HDInsight clusters. To create clusters use Resource Templates (See below). +You can use `AzureSMR` to manage Azure HDInsight clusters. To create clusters use azureCreateHDI or for advanced configurations use Resource Templates (See below). Also see functions for submitting Hive and Spark jobs. +```{r, eval=FALSE} +azureCreateHDI(sc, + resourceGroup = "testme", + clustername = "smrhdi", # only low case letters, digit, and dash. + storageAccount = "testmystorage1", + adminUser = "hdiadmin", + adminPassword = "AzureSMR_password123", + sshUser = "hdisshuser", + sshPassword = "AzureSMR_password123", + kind = "rserver") +``` + Use `azureListHDI()` to list available Clusters. ```{r, eval=FALSE} -azureListHDI(sc) -azureListHDI(sc, resourceGroup ="Analytics") +azureListHDI(sc, resourceGroup ="testme") ``` Use `azureResizeHDI()` to resize a cluster ```{r, eval=FALSE} -azureResizeHDI(sc, resourceGroup = "Analytics", clusterName = "{HDIClusterName}", - Role="workernode",Size=2) +azureResizeHDI(sc, resourceGroup = "testme", clustername = "smrhdi", role="workernode",size=3) -## AzureResizeHDI: Request Submitted: 2016-06-23 18:50:57 +## azureResizeHDI: Request Submitted: 2016-06-23 18:50:57 ## Resizing(R), Succeeded(S) ## RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR ## RRRRRRRRRRRRRRRRRRS @@ -182,7 +203,7 @@ To create a resource using a template in AzureSM use AzureDeployTemplate. The Te azureDeployTemplate(sc, resourceGroup = "Analytics", deplName = "Deploy1", templateURL = "{TEMPLATEURL}", paramURL = "{PARAMURL}") -## AzureDeployTemplate: Request Submitted: 2016-06-23 18:50:57 +## azureDeployTemplate: Request Submitted: 2016-06-23 18:50:57 ## Resizing(R), Succeeded(S) ## RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR ## RRRRRRRRRRRRRRRRRRS @@ -196,12 +217,14 @@ ADMIN TIP: If a deployment fails. Go to the Azure Portal and look azToken Actvit These functions facilitate the use of hive jobs on a HDInsight Cluster ```{r, eval=FALSE} -azureHiveStatus(sc, clusterName = "{hdicluster}", - hdiAdmin = "admin", - hdiPassword = "********") +azureHiveStatus(sc, clusterName = "smrhdi", + hdiAdmin = "hdiadmin", + hdiPassword = "AzureSMR_password123") azureHiveSQL(sc, - CMD = "select * from airports", - Path = "wasb://{container}@{hdicluster}.blob.core.windows.net/") + CMD = "select * from hivesampletable", + path = "wasb://opendata@testmystorage1.blob.core.windows.net/") + +azureListStorageBlobs(sc, storageAccount = "testmystorage1", container = "opendata") stdout <- azureGetBlob(sc, Container = "test", Blob = "stdout") @@ -217,19 +240,20 @@ read.delim(text=stdout, header=TRUE, fill=TRUE) To Create a new Spark Session (Via Livy) use `azureSparkNewSession()` ```{r, eval=FALSE} -azureSparkNewSession(sc, clusterName = "{hdicluster}", - hdiAdmin = "admin", - hdiPassword = "********", +azureSparkNewSession(sc, clustername = "smrhdi", + hdiAdmin = "hdiadmin", + hdiPassword = "AzureSMR_password123", kind = "pyspark") ``` To view the status of sessions use AzureSparkListSessions +Wait for status to be Idle ```{r, eval=FALSE} -azureSparkListSessions(sc, clusterName = "{hdicluster}") +azureSparkListSessions(sc, clustername = "smrhdi") ``` -To send a command to the Spark Session use `azureSparkCMD()`. In this case it submits a Python routine +To send a command to the Spark Session use `azureSparkCMD()`. In this case it submits a Python routine. Ensure you preserve indents for Python. ```{r, eval=FALSE} # SAMPLE PYSPARK SCRIPT TO CALCULATE PI @@ -249,7 +273,7 @@ count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add) Pi = (4.0 * count / n) print("Pi is roughly %f" % Pi)' -azureSparkCMD(sc, cmd = pythonCmd, sessionID = "5") +azureSparkCMD(sc, CMD = pythonCmd, sessionID = "0") ## [1] "Pi is roughly 3.140285" ``` @@ -257,8 +281,20 @@ azureSparkCMD(sc, cmd = pythonCmd, sessionID = "5") Check Session variables are retained ```{r, eval=FALSE} -azureSparkCMD(sc, clusterName = "{hdicluster}", cmd = "print Pi", sessionID="5") +azureSparkCMD(sc, clustername = "smrhdi", CMD = "print Pi", sessionID="0") #[1] "3.1422" ``` +You can also run SparkR sessions + +```{r, eval=FALSE} +azureSparkNewSession(sc, clustername = "smrhdi", + hdiAdmin = "hdiadmin", + hdiPassword = "AzureSMR_password123", + kind = "sparkr") +azureSparkCMD(sc, clustername = "smrhdi", CMD = "HW<-'hello R'", sessionID="2") +azureSparkCMD(sc, clustername = "smrhdi", CMD = "cat(HW)", sessionID="2") + +``` + diff --git a/vignettes/tutorial.html b/vignettes/tutorial.html deleted file mode 100644 index 8e82814..0000000 --- a/vignettes/tutorial.html +++ /dev/null @@ -1,261 +0,0 @@ - - - - - - - - - - - - - - - - -AzureSMR tutorial - - - - - - - - - - - - - - - - - -

AzureSMR tutorial

-

Alan Weaver and Andrie de Vries

-

2016-12-22

- - - -

Use this package to manage Azure Resources from within an R Session. This is not a full SDK just a collection of functions that should prove useful for a Data Scientist who needs to access and manage Azure Resources.

-
-

Installation instructions

-

Install the development version of the package directly from GitHub with:

-
# Install devtools
-if(!require("devtools")) install.packages("devtools")
-devtools::install_github("Microsoft/AzureSMR")
-library(AzureSMR)
-
-
-

Overview

-

AzureSMR provides an interface to manage resources on Microsoft Azure. The main functions address the following Azure Services:

- -

For a detailed list of AzureSM functions and their syntax please refer to the Help pages.

-
-
-

Getting Authorisation configured

-

To get started, please refer to the Authorisation tutorial. https://github.com/Microsoft/AzureSMR/blob/master/vignettes/Authentication.Rmd

-
-
-

Authenticating against the service

-

The AzureAPIs require lots of parameters to be managed. Rather than supplying all the paramters for each function call AzureSMR implements an AzureContext Variable which caches the last time a paramters is used so that it doesnt need to be repeatedly supplied.

-

To create an AzureContext object and attempt to authenticate against the Azure service, use:

-
sc <- createAzureContext(tenantID = "{TID}", clientID = "{CID}", authKey= "{KEY}")
-sc
-

To get an authorisation token use azureAuthenticate(). Note this token will time our after a period and therefore you need to run it again occasionally. TIP: Use AzureAuthenticate before a long running task.

-

The azureListSubscriptions() funtion lists all the available subscriptions. If you only have one it sets the default Subscription in the azureActiveContext to that subscription ID.

-
azureListSubscriptions(sc)
-
-
-

Manage resource Groups

-
# list resource groups
-AzureListRG(sc)
-
-# list all resources
-azureListAllResources(sc)
-
-azureListAllResources(sc, location = "northeurope")
-
-azureListAllResources(sc, type = "Microsoft.Sql/servers", location = "northeurope")
-
-azureListAllResources(sc, resourceGroup = "Analytics")
-
-azureCreateResourceGroup(sc, resourceGroup = "testme", location = "northeurope")
-
-azureDeleteResourceGroup(sc, resourceGroup = "testme")
-
-azureListRG(sc)$name
-
-
-

Manage Virtual Machines

-

Use these functions to list, start and stop Virtual Machines (see templates for Creation).

-

To Create VMs please refer to Resource Templates below.

-
azureListVM(sc, resourceGroup = "AWHDIRG")
-
-##            Name    Location                             Type    OS     State  Admin
-## 1         DSVM1 northeurope Microsoft.Compute/virtualMachines Linux Succeeded alanwe
-
-azureStartVM(sc, vmName = "DSVM1")
-azureStopVM(sc, vmName = "DSVM1")
-
-
-

Access Storage Blobs

-

In order to access Storage Blobs you need to have a key. Use azureSAGetKey() to get a Key or alternatively supply your own key. When you provide your own key you no longer need to use azureAuthenticate() since the API uses a diferent authentication approach.

-
sKey <- AzureSAGetKey(sc, resourceGroup = "Analytics", storageAccount = "analyticsfiles")
-

To list containers in a storage account use azureListContainers()

-
azListContainers(sc, storageAccount = "analyticsfiles", containers = "Test")
-

To list blobs in a container use azureListStorageBlobs()

-
azureListStorageBlobs(sc, storageAccount = "analyticsfiles", container = "test")
-

To Write a Blobs use azurePutBlob()

-
AzurePutBlob(sc, StorageAccount = "analyticsfiles", container = "test", 
-             contents = "Hello World",
-             blob = "HELLO") 
-

To read a blob in a container use azureGetBlob()

-
azureGetBlob(sc, storageAccount = "analyticsfiles", container = "test",
-             blob="HELLO",
-             type="text") 
-
-
-

Manage HDInsight Clusters

-

You can use AzureSMR to manage Azure HDInsight clusters. To create clusters use Resource Templates (See below).

-

Also see functions for submitting Hive and Spark jobs.

-

Use azureListHDI() to list available Clusters.

-
azureListHDI(sc)
-azureListHDI(sc, resourceGroup ="Analytics")
-

Use azureResizeHDI() to resize a cluster

-
azureResizeHDI(sc, resourceGroup = "Analytics", clusterName = "{HDIClusterName}", 
-               Role="workernode",Size=2)
-
-## AzureResizeHDI: Request Submitted:  2016-06-23 18:50:57
-## Resizing(R), Succeeded(S)
-## RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR
-## RRRRRRRRRRRRRRRRRRS
-## Finished Resizing Sucessfully:  2016-06-23 19:04:43
-## Finished:  2016-06-23 19:04:43
-##                                                                                                                        ## Information 
-## " headnode ( 2 * Standard_D3_v2 ) workernode ( 5 * Standard_D3_v2 ) zookeepernode ( 3 * Medium ) edgenode0 ( 1 * Standard_D4_v2 )" 
-
-
-

Resource Templates - Create Azure Resources

-

The easiest way to create resources on Azure is to use Azure Templates. To create Azure Resources such as HDInsight clusters there can a large quantity of parameters. Resource templates can be built be creating a resource in the Azure Portal and then going into Settings > Automation scripts. Example templates can be found azToken this URL https://github.com/Azure/AzureStack-QuickStart-Templates.

-

To create a resource using a template in AzureSM use AzureDeployTemplate. The Template and Paramters must be available in a public URL (Azure Blob). It may be worth getting the Azure Administrator to build a working template.

-
azureDeployTemplate(sc, resourceGroup = "Analytics", deplName = "Deploy1", 
-                    templateURL = "{TEMPLATEURL}", paramURL = "{PARAMURL}")
-
-## AzureDeployTemplate: Request Submitted:  2016-06-23 18:50:57
-## Resizing(R), Succeeded(S)
-## RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR
-## RRRRRRRRRRRRRRRRRRS
-## Finished Deployed Sucessfully:  2016-06-23 19:04:43
-## Finished:  2016-06-23 19:04:43
-

ADMIN TIP: If a deployment fails. Go to the Azure Portal and look azToken Actvity logs and look for failed deployments which should explain why the deployment failed.

-
-
-

Hive Functions

-

These functions facilitate the use of hive jobs on a HDInsight Cluster

-
azureHiveStatus(sc, clusterName = "{hdicluster}", 
-                hdiAdmin = "admin", 
-                hdiPassword = "********")
-AzureHiveSQL(sc, 
-             CMD = "select * from airports", 
-             Path = "wasb://{container}@{hdicluster}.blob.core.windows.net/")
-
-stdout <- AzureGetBlob(sc, Container = "test", Blob = "stdout")
- 
-read.delim(text=stdout,  header=TRUE, fill=TRUE)
-
-
-

Spark functions (experimental)

-

AzureSMR provides some functions that allow HDInsight Spark Sessions and jobs to be managed within an R Session

-

To Create a new Spark Session (Via Livy) use azureSparkNewSession()

-
azureSparkNewSession(sc, clusterName = "{hdicluster}", 
-                     hdiAdmin = "admin", 
-                     hdiPassword = "********",
-                     kind = "pyspark")
-

To view the status of sessions use AzureSparkListSessions

-
azureSparkListSessions(sc, clusterName = "{hdicluster}")
-

To send a command to the Spark Session use azureSparkCMD(). In this case it submits a Python routine

-
# SAMPLE PYSPARK SCRIPT TO CALCULATE PI
-pythonCmd <- '
-from pyspark import SparkContext
-from operator import add
-import sys
-from random import random
-partitions = 1
-n = 20000000 * partitions
-def f(_):
-  x = random() * 2 - 1
-  y = random() * 2 - 1
-  return 1 if x ** 2 + y ** 2 < 1 else 0
- 
-count = sc.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
-Pi = (4.0 * count / n)
-print("Pi is roughly %f" % Pi)'                   
- 
-azureSparkCMD(sc, cmd = pythonCmd, sessionID = "5")
-
-## [1] "Pi is roughly 3.140285"
-

Check Session variables are retained

-
azureSparkCMD(sc, clusterName = "{hdicluster}", cmd = "print Pi", sessionID="5")
-
-#[1] "3.1422"
-
- - - - - - - -