diff --git a/DESCRIPTION b/DESCRIPTION index ef0c2d2..98a5432 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: AzureSMR Title: Manage and Interact with Azure Resources Description: Helps users to manage Azure Services and objects from within an R Session. This includes Azure Storage (e.g. containers and blobs), Virtual - Machines and HDInsight (Spark, Hive). To use the package, you must configure - an Azure Active Directory application and service principal in the Azure portal. + Machines and HDInsight (Spark, Hive). To use the package, you must configure an + Azure Active Directory application and service principal in the Azure portal. Type: Package Version: 0.2.5 Date: 2017-06-06 @@ -18,16 +18,17 @@ URL: https://github.com/Microsoft/AzureSMR BugReports: https://github.com/Microsoft/AzureSMR/issues NeedsCompilation: no Imports: - assertthat, + assertthat, httr, jsonlite, XML, base64enc, digest, - shiny (>= 0.13), - miniUI (>= 0.1.1), - rstudioapi (>= 0.5), - DT + shiny (>= 0.13), + miniUI (>= 0.1.1), + rstudioapi (>= 0.5), + DT, + lubridate, Depends: R(>= 3.0.0) Suggests: @@ -36,5 +37,5 @@ Suggests: testthat VignetteBuilder: knitr LazyData: TRUE -RoxygenNote: 6.0.1 +RoxygenNote: 5.0.1 Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index c553922..011517b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(azureCreateHDI) export(azureCreateResourceGroup) export(azureCreateStorageAccount) export(azureCreateStorageContainer) +export(azureDataConsumption) export(azureDeleteBlob) export(azureDeleteDeploy) export(azureDeleteHDI) @@ -24,6 +25,7 @@ export(azureDeleteVM) export(azureDeletestorageAccount) export(azureDeployStatus) export(azureDeployTemplate) +export(azureExpenseCalculator) export(azureGetAllVMstatus) export(azureGetBlob) export(azureHDIConf) @@ -40,6 +42,7 @@ export(azureListStorageBlobs) export(azureListStorageContainers) export(azureListSubscriptions) export(azureListVM) +export(azurePricingRates) export(azurePutBlob) export(azureResizeHDI) export(azureRunScriptAction) @@ -60,6 +63,8 @@ export(dumpAzureContext) export(is.azureActiveContext) export(read.AzureSMR.config) export(setAzureContext) +import(dplyr) +import(magrittr) importFrom(DT,dataTableOutput) importFrom(DT,renderDataTable) importFrom(XML,htmlParse) @@ -82,6 +87,9 @@ importFrom(httr,headers) importFrom(httr,http_status) importFrom(httr,status_code) importFrom(jsonlite,fromJSON) +importFrom(lubridate,hour) +importFrom(lubridate,minute) +importFrom(lubridate,second) importFrom(miniUI,gadgetTitleBar) importFrom(miniUI,miniContentPanel) importFrom(miniUI,miniPage) diff --git a/R/AzureCost.R b/R/AzureCost.R new file mode 100644 index 0000000..c5e7582 --- /dev/null +++ b/R/AzureCost.R @@ -0,0 +1,403 @@ +#' Get data consumption of an Azure subscription for a time period. Aggregation +#' method can be either daily based or hourly based. +#' +#' @note Formats of start time point and end time point follow ISO 8601 standard +#' Say if one would like to calculate data consumption between Feb 21, 2017 to +#' Feb 25, 2017, with an aggregation granularity of "daily based", the inputs +#' should be "2017-02-21 00:00:00" and "2017-02-25 00:00:00", for start time +#' point and end time point, respectively. If the aggregation granularity is +#' hourly based, the inputs can be "2017-02-21 01:00:00" and +#' "2017-02-21 02:00:00", for start and end time point, respectively. +#' NOTE by default the Azure data +#' consumption API does not allow an aggregation granularity that is finer +#' than an hour. In the case of "hourly based" granularity, if the time +#' difference between start and end time point is less than an hour, data +#' consumption will still be calculated hourly based with end time postponed. +#' For example, if the start time point and end time point are "2017-02-21 +#' 00:00:00" and "2017-02-21 00:45:00", the actual returned results are +#' data consumption in the interval of "2017-02-21 00:00:00" and +#' "2017-02-21 01:00:00". However this calculation is merely for retrieving +#' the information of an existing instance instance (e.g., meterId) with +#' which the pricing rate is multiplied by to obtain the overall expense. +#' Time zone of all time inputs are synchronized to UTC. +#' +#' @inheritParams setAzureContext +#' +#' @param instance Instance name that one would like to check expe +#' nse. It is by default empty, which returns data consumption for +#' all instances under subscription. +#' +#' @param timeStart Start time. +#' +#' @param timeEnd End time. +#' +#' @param granularity Aggregation granularity. Can be either "Daily" or +#' "Hourly". +#' @export +azureDataConsumption <- function(azureActiveContext, + instance="", + timeStart, + timeEnd, + granularity="Hourly", + verbose=FALSE) { + + # check the validity of credentials. + + assert_that(is.azureActiveContext(azureActiveContext)) + + # renew token if it expires. + + azureCheckToken(azureActiveContext) + + # preconditions here... + + if(missing(timeStart)) + stop("Please specify a starting time point in YYYY-MM-DD HH:MM:SS format.") + + if(missing(timeEnd)) + stop("Please specify an ending time point in YYYY-MM-DD HH:MM:SS format.") + + ds <- try(as.POSIXlt(timeStart, format= "%Y-%m-%d %H:%M:%S", tz="UTC")) + de <- try(as.POSIXlt(timeEnd, format= "%Y-%m-%d %H:%M:%S", tz="UTC")) + + if (class(ds) == "try-error" || + is.na(ds) || + class(de) == "try-error" || + is.na(de)) + stop("Input date format should be YYYY-MM-DD HH:MM:SS.") + + timeStart <- ds + timeEnd <- de + + if (timeStart >= timeEnd) + stop("End time is no later than start time!") + + lubridate::minute(timeStart) <- 0 + lubridate::second(timeStart) <- 0 + lubridate::minute(timeEnd) <- 0 + lubridate::second(timeEnd) <- 0 + + if (granularity == "Daily") { + + # timeStart and timeEnd should be some day at midnight. + + lubridate::hour(timeStart) <- 0 + lubridate::hour(timeEnd) <- 0 + + } + + # If the computation time is less than a hour, timeEnd will be incremented by + # an hour to get the total cost within an hour aggregated from timeStart. + # However, only the consumption on computation is considered in the returned + # data, and the computation consumption will then be replaced with the actual + # timeEnd - timeStart. + + # NOTE: estimation of cost in this case is rough though, it captures the major + # component of total cost, which originates from running an Azure instance. + # Other than computation cost, there are also cost on activities such as data + # transfer, software library license, etc. This is not included in the + # approximation here until a solid method for capturing those consumption data + # is found. Data ingress does not generate cost, but data egress does. Usually + # the occurrence of data transfer is not that frequent as computation, and + # pricing rates for data transfer is also less than computation (e.g., price + # rate of "data transfer in" is ~ 40% of that of computation on an A3 virtual + # machine). + + # TODO: inlude other types of cost for jobs that take less than an hour. + + if (as.numeric(timeEnd - timeStart) == 0) { + writeLines("Difference between timeStart and timeEnd is less than the + aggregation granularity. Cost is estimated solely on computation + running time.") + + # increment timeEnd by one hour. + + timeEnd <- timeEnd + 3600 + } + + # reformat time variables to make them compatible with API call. + + start <- URLencode(paste(as.Date(timeStart), + "T", + sprintf("%02d", lubridate::hour(timeStart)), + ":", + sprintf("%02d", lubridate::minute(timeStart)), + ":", + sprintf("%02d", lubridate::second(timeStart)), + "+", + "00:00", + sep=""), + reserved=TRUE) + + end <- URLencode(paste(as.Date(timeEnd), + "T", + sprintf("%02d", lubridate::hour(timeEnd)), + ":", + sprintf("%02d", lubridate::minute(timeEnd)), + ":", + sprintf("%02d", lubridate::second(timeEnd)), + "+", + "00:00", + sep=""), + reserved=TRUE) + + url <- + sprintf("https://management.azure.com/subscriptions/%s/providers/ + Microsoft.Commerce/UsageAggregates?api-version=%s + &reportedStartTime=%s&reportedEndTime=%s + &aggregationgranularity=%s&showDetails=%s", + azureActiveContext$subscriptionID, + "2015-06-01-preview", + start, + end, + granularity, + "false" + ) + + r <- call_azure_sm(azureActiveContext, + uri=url, + verb="GET", + verbose=verbose) + + stopWithAzureError(r) + + rl <- content(r, "text", encoding="UTF-8") + + df <- fromJSON(rl) + + df_use <- df$value$properties + + inst_data <- lapply(df$value$properties$instanceData, fromJSON) + + # retrieve results that match instance name. + + if (instance != "") { + instance_detect <- function(inst_data) { + return(basename(inst_data$Microsoft.Resources$resourceUri) == instance) + } + + index_instance <- which(unlist(lapply(inst_data, instance_detect))) + + if(!missing(instance)) { + if(length(index_instance) == 0) + stop("No data consumption records found for the instance during the + given period.") + df_use <- df_use[index_instance, ] + } else if(missing(instance)) { + if(length(index_resource) == 0) + stop("No data consumption records found for the resource group during + the given period.") + df_use <- df_use[index_resource, ] + } + } + + # if time difference is less than one hour. Only return one row of computation + # consumption whose value is the time difference. + + # timeEnd <- timeEnd - 3600 + + if(as.numeric(timeEnd - timeStart) == 0) { + + time_diff <- as.numeric(de - ds) / 3600 + + df_use <- df_use[which(df_use$meterName == "Compute Hours"), ] + df_use <- df_use[1, ] + + df_use$quantity <- df_use$time_diff + + } else { + + # NOTE the maximum number of records returned from API is limited to 1000. + + if (nrow(df_use) == 1000 && + max(as.POSIXct(df_use$usageEndTime)) < as.POSIXct(end)) { + warning(sprintf("The number of records in the specified time period %s + to %s exceeds the limit that can be returned from API call. + Consumption information is truncated. Please use a small + period instead.", timeStart, timeEnd)) + } + } + + df_use <- df_use[, c("usageStartTime", + "usageEndTime", + "meterName", + "meterCategory", + "meterSubCategory", + "unit", + "meterId", + "quantity", + "meterRegion")] + + df_use$usageStartTime <- as.POSIXct(df_use$usageStartTime) + df_use$usageEndTime <- as.POSIXct(df_use$usageEndTime) + + writeLines(sprintf("The data consumption for %s between %s and %s is", + instance, + as.character(timeStart), + as.character(timeEnd))) + + return(df_use) +} + +#' Get pricing details of resources under a subscription. +#' +#' @inheritParams setAzureContext +#' +#' @param currency Currency in which price rating is measured. +#' +#' @param locale Locality information of subscription. +#' +#' @param offerId Offer ID of the subscription. Detailed information can be +#' found at https://azure.microsoft.com/en-us/support/legal/offer-details/ +#' +#' @param region region information about the subscription. +#' +#' @note The pricing rates function wraps API calls to Azure RateCard and +#' current only the API supports only for Pay-As-You-Go offer scheme. +#' +#' @export +azurePricingRates <- function(azureActiveContext, + currency, + locale, + offerId, + region, + verbose=FALSE +) { + # renew token if it expires. + + azureCheckToken(azureActiveContext) + + # preconditions. + + if(missing(currency)) + stop("Error: please provide currency information.") + + if(missing(locale)) + stop("Error: please provide locale information.") + + if(missing(offerId)) + stop("Error: please provide offer ID.") + + if(missing(region)) + stop("Error: please provide region information.") + + url <- paste( + "https://management.azure.com/subscriptions/", + azureActiveContext$subscriptionID, + "/providers/Microsoft.Commerce/RateCard?api-version=2016-08-31-preview& + $filter=", + "OfferDurableId eq '", offerId, "'", + " and Currency eq '", currency, "'", + " and Locale eq '", locale, "'", + " and RegionInfo eq '", region, "'", + sep="") + + url <- URLencode(url) + + r <- call_azure_sm(azureActiveContext, + uri=url, + verb="GET", + verbose=verbose) + + stopWithAzureError(r) + + rl <- fromJSON(content(r, "text", encoding="UTF-8"), simplifyDataFrame=TRUE) + + df_meter <- rl$Meters + df_meter$MeterRate <- rl$Meters$MeterRates$`0` + + # NOTE: an irresponsible drop of MeterRates and MeterTags. Will add them back + # after having a better handle of them. + + df_meter <- subset(df_meter, select=-MeterRates) + df_meter <- subset(df_meter, select=-MeterTags) + + names(df_meter) <- paste0(tolower(substring(names(df_meter), + 1, + 1)), + substring(names(df_meter), 2)) + + df_meter +} + +#' Calculate cost of using a specific instance of Azure for certain period. +#' +#' @inheritParams setAzureContext +#' +#' @inheritParams azureDataConsumption +#' +#' @inheritParams azurePricingRates +#' +#' @return Total cost measured in the given currency of the specified Azure +#' instance in the period. +#' +#' @note Note if difference between \code{timeStart} and \code{timeEnd} is +#' less than the finest granularity, e.g., "Hourly" (we notice this is a +#' usual case when one needs to be aware of the charges of a job that takes +#' less than an hour), the expense will be estimated based solely on computation +#' hour. That is, the total expense is the multiplication of computation hour +#' and pricing rate of the requested instance. +#' +#' @export +azureExpenseCalculator <- function(azureActiveContext, + instance="", + timeStart, + timeEnd, + granularity, + currency, + locale, + offerId, + region, + verbose=FALSE) { + df_use <- azureDataConsumption(azureActiveContext, + instance=instance, + timeStart=timeStart, + timeEnd=timeEnd, + granularity=granularity, + verbose=verbose) + + df_used_data <- df_use[, c("meterId", + "meterSubCategory", + "usageStartTime", + "usageEndTime", + "quantity")] + + # use meterId to find pricing rates and then calculate total cost. + + df_rates <- azurePricingRates(azureActiveContext, + currency=currency, + locale=locale, + region=region, + offerId=offerId, + verbose=verbose) + + meter_list <- unique(df_used_data$meterId) + + df_used_rates <- df_rates[which(df_rates$meterId %in% meter_list), ] + df_used_rates$meterId <- df_used_rates$meterId + + # join data consumption and meter pricing rate. + + df_merged <- merge(x=df_used_data, + y=df_used_rates, + by="meterId", + all.x=TRUE) + + df_merged$meterSubCategory <- df_merged$meterSubCategory.y + df_merged$cost <- df_merged$quantity * df_merged$meterRate + + df_cost <- df_merged[, c("meterName", + "meterCategory", + "meterSubCategory", + "quantity", + "unit", + "meterRate", + "cost")] + + names(df_cost) <- paste0(tolower(substring(names(df_cost), + 1, + 1)), + substring(names(df_cost), 2)) + + df_cost +} diff --git a/R/AzureSMR-package.R b/R/AzureSMR-package.R index 81111a0..a26592b 100644 --- a/R/AzureSMR-package.R +++ b/R/AzureSMR-package.R @@ -42,5 +42,6 @@ #' @importFrom httr add_headers headers content status_code http_status authenticate #' @importFrom httr GET PUT DELETE POST #' @importFrom XML htmlParse xpathApply xpathSApply xmlValue +#' @importFrom lubridate hour minute second #' NULL diff --git a/R/zzz.R b/R/zzz.R index 6c8ef31..3c604e3 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,4 +1,6 @@ -AzureSMR.config.default <- "~/.azuresmr/config.json" +AzureSMR.config.default <- ifelse(Sys.info()["sysname"] == "Windows", + paste0("C:/Users/", Sys.getenv("USERNAME"), "/.azuresmr/config.json"), + "~/.azuresmr/config.json") .onAttach <- function(libname, pkgname) { if (is.null(getOption("AzureSMR.config"))) diff --git a/man/azureDataConsumption.Rd b/man/azureDataConsumption.Rd new file mode 100644 index 0000000..1c34175 --- /dev/null +++ b/man/azureDataConsumption.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AzureCost.R +\name{azureDataConsumption} +\alias{azureDataConsumption} +\title{Get data consumption of an Azure subscription for a time period. Aggregation method can be either daily based or hourly based.} +\usage{ +azureDataConsumption(azureActiveContext, instance, timeStart, timeEnd, + granularity = "Hourly") +} +\arguments{ +\item{azureActiveContext}{AzureSMR context object.} + +\item{instance}{Instance of Azure DSVM name that one would like to check expense.} + +\item{timeStart}{Start time.} + +\item{timeEnd}{End time.} + +\item{granularity}{Aggregation granularity. Can be either "Daily" or "Hourly".} +} +\note{ +Formats of start time point and end time point follow ISO 8601 standard. Say if one would like to calculate data consumption between Feb 21, 2017 to Feb 25, 2017, with an aggregation granularity of "daily based", the inputs should be "2017-02-21 00:00:00" and "2017-02-25 00:00:00", for start time point and end time point, respectively. If the aggregation granularity is hourly based, the inputs can be "2017-02-21 01:00:00" and "2017-02-21 02:00:00", for start and end time point, respectively. NOTE by default the Azure data consumption API does not allow an aggregation granularity that is finer than an hour. In the case of "hourly based" granularity, if the time difference between start and end time point is less than an hour, data consumption will still be calculated hourly based with end time postponed. For example, if the start time point and end time point are "2017-02-21 00:00:00" and "2017-02-21 00:45:00", the actual returned results are are data consumption in the interval of "2017-02-21 00:00:00" and "2017-02-21 01:00:00". However this calculation is merely for retrieving the information of an existing DSVM instance (e.g., meterId) with which the pricing rate is multiplied by to obtain the overall expense. Time zone of all time inputs are synchronized to UTC. +} + diff --git a/man/azureExpenseCalculator.Rd b/man/azureExpenseCalculator.Rd new file mode 100644 index 0000000..109f737 --- /dev/null +++ b/man/azureExpenseCalculator.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AzureCost.R +\name{azureExpenseCalculator} +\alias{azureExpenseCalculator} +\title{Calculate cost of using a specific instance of Azure for certain period.} +\usage{ +azureExpenseCalculator(azureActiveContext, instance, timeStart, timeEnd, + granularity, currency, locale, offerId, region) +} +\arguments{ +\item{azureActiveContext}{AzureSMR context.} + +\item{instance}{Instance of Azure instance that one would like to check expense. No matter whether resource group is given or not, if a instance of instance is given, data consumption of that instance is returned.} + +\item{timeStart}{Start time.} + +\item{timeEnd}{End time.} + +\item{granularity}{Aggregation granularity. Can be either "Daily" or "Hourly".} + +\item{currency}{Currency in which price rating is measured.} + +\item{locale}{Locality information of subscription.} + +\item{offerId}{Offer ID of the subscription. Detailed information can be found at https://azure.microsoft.com/en-us/support/legal/offer-details/} + +\item{region}{region information about the subscription.} +} +\value{ +Total cost measured in the given currency of the specified Azure instance in the period. +} +\note{ +Note if difference between \code{timeStart} and \code{timeEnd} is less than the finest granularity, e.g., "Hourly" (we notice this is a usual case when one needs to be aware of the charges of a job that takes less than an hour), the expense will be estimated based solely on computation hour. That is, the total expense is the multiplication of computation hour and pricing rate of the DSVM instance. +} + diff --git a/man/azurePricingRates.Rd b/man/azurePricingRates.Rd new file mode 100644 index 0000000..b8acd59 --- /dev/null +++ b/man/azurePricingRates.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AzureCost.R +\name{azurePricingRates} +\alias{azurePricingRates} +\title{Get pricing details of resources under a subscription.} +\usage{ +azurePricingRates(azureActiveContext, currency, locale, offerId, region) +} +\arguments{ +\item{azureActiveContext}{- Azure Context Object.} + +\item{currency}{Currency in which price rating is measured.} + +\item{locale}{Locality information of subscription.} + +\item{offerId}{Offer ID of the subscription. Detailed information can be found at https://azure.microsoft.com/en-us/support/legal/offer-details/} + +\item{region}{region information about the subscription.} +} + diff --git a/tests/testthat/test-cost.R b/tests/testthat/test-cost.R new file mode 100644 index 0000000..643e7b5 --- /dev/null +++ b/tests/testthat/test-cost.R @@ -0,0 +1,103 @@ +# ----------------------------------------------------------------- +# Test for cost functions. +# ----------------------------------------------------------------- + +# preambles. + +if (interactive()) library("testthat") + +settingsfile <- getOption("AzureSMR.config") +config <- read.AzureSMR.config() + +# setup. + +context("Data consumption and cost") + +asc <- createAzureContext() +with(config, + setAzureContext(asc, tenantID=tenantID, clientID=clientID, authKey=authKey) +) +azureAuthenticate(asc) + +timestamp <- format(Sys.time(), format="%y%m%d%H%M") +resourceGroup_name <- paste0("AzureSMtest_", timestamp) +sa_name <- paste0("azuresmr", timestamp) + +# run test. + +# get data consumption by day. + +test_that("Get data consumption by day", { + skip_if_missing_config(settingsfile) + + time_end <- paste0(as.Date(Sys.Date()), "00:00:00") + time_start <- paste0(as.Date(Sys.Date() - 365), "00:00:00") + + res <- azureDataConsumption(azureActiveContext=asc, + timeStart=time_start, + timeEnd=time_end, + granularity="Daily") + + expect_is(res, class="data.frame") + expect_identical(object=names(res), expected=c("usageStartTime", + "usageEndTime", + "meterName", + "meterCategory", + "meterSubCategory", + "unit", + "meterId", + "quantity", + "meterRegion")) +}) + +# get pricing rates for meters under subscription. + +test_that("Get pricing rates", { + skip_if_missing_config(settingsfile) + + res <- azurePricingRates(azureActiveContext=asc, + currency=config$CURRENCY, + locale=config$LOCALE, + offerId=config$OFFER, + region=config$REGION) + + expect_is(res, class="data.frame") + expect_identical(object=names(res), expected=c("effectiveDate", + "includedQuantity", + "meterCategory", + "meterId", + "meterName", + "meterRegion", + "meterStatus", + "meterSubCategory", + "unit", + "meterRate")) +}) + + +# total expense by day. + +test_that("Get cost by day", { + skip_if_missing_config(settingsfile) + + time_end <- paste0(as.Date(Sys.Date()), "00:00:00") + time_start <- paste0(as.Date(Sys.Date() - 365), "00:00:00") + + res <- azureExpenseCalculator(azureActiveContext=asc, + timeStart=time_start, + timeEnd=time_end, + granularity="Daily", + currency=config$CURRENCY, + locale=config$LOCALE, + offerId=config$OFFER, + region=config$REGION) + + expect_is(res, class="data.frame") + expect_identical(object=names(res), expected=c("meterName", + "meterCategory", + "meterSubCategory", + "quantity", + "unit", + "meterRate", + "cost")) +}) \ No newline at end of file