From fd89b6fdd06faa5c2ebb9bcf28c6bd6b80d1da42 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Fri, 28 Jul 2023 10:53:29 -0700 Subject: [PATCH 01/50] Scala client for certified events --- .../Usage/FabricTokenServiceClient.scala | 67 ++++++++ .../ml/logging/Usage/FabricUtils.scala | 61 ++++++++ .../logging/Usage/FeatureUsagePayload.scala | 7 + .../synapse/ml/logging/Usage/TokenUtils.scala | 124 +++++++++++++++ .../ml/logging/Usage/UsageConstants.scala | 36 +++++ .../synapse/ml/logging/Usage/UsageUtils.scala | 147 ++++++++++++++++++ .../synapse/ml/logging/common/WebUtils.scala | 26 ++++ 7 files changed, 468 insertions(+) create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala new file mode 100644 index 0000000000..a9758e10d7 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -0,0 +1,67 @@ +package com.microsoft.azure.synapse.ml.logging.Usage + +import java.util.UUID +import java.net.URL +import java.net.InetAddress +import java.lang.management.ManagementFactory +import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ +import spray.json.DefaultJsonProtocol.StringJsonFormat +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ +import spray.json.{JsArray, JsObject, JsValue, _} +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet} +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging + +class FabricTokenServiceClient { + val resourceMapping = Map( + "https://storage.azure.com" -> "storage", + "storage" -> "storage", + "https://analysis.windows.net/powerbi/api" -> "pbi", + "pbi" -> "pbi", + "https://vault.azure.net" -> "keyvault", + "keyvault" -> "keyvault", + "https://kusto.kusto.windows.net" -> "kusto", + "kusto" -> "kusto" + ) + + val hostname = InetAddress.getLocalHost.getHostName + val processDetail = ManagementFactory.getRuntimeMXBean().getName() + val processName = processDetail.substring(processDetail.indexOf('@') + 1) + + val fabricConbtext = FabricUtils.getFabricContext() + val synapseTokenserviceEndpoint = fabricConbtext(SYNAPSE_TOKEN_SERVICE_ENDPOINT) + val workloadEndpoint = fabricConbtext(TRIDENT_LAKEHOUSE_TOKEN_SERVICE_ENDPOINT) + val sessionToken = fabricConbtext(TRIDENT_SESSION_TOKEN) + val clusterIdentifier = fabricConbtext(SYNAPSE_CLUSTER_IDENTIFIER) + + def getAccessToken(resourceParam: String): String = { + if (!resourceMapping.contains(resourceParam)) { + throw new Exception(s"$resourceParam not supported") + } + val resource = resourceMapping.getOrElse(resourceParam, "") + val rid = UUID.randomUUID().toString() + //to do workloadEndpoint + val targetUrl = new URL(workloadEndpoint) + var headers = Map( + "x-ms-cluster-identifier" -> clusterIdentifier, + "x-ms-workload-resource-moniker" -> clusterIdentifier, + "Content-Type" -> "application/json;charset=utf-8", + "x-ms-proxy-host" -> s"${targetUrl.getProtocol}://${targetUrl.getHost}", + "x-ms-partner-token" -> sessionToken, + "User-Agent" -> s"Trident Token Library - HostName:$hostname, ProcessName:$processName", + "x-ms-client-request-id" -> rid + ) + var url = s"$synapseTokenserviceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" + var response: JsValue = JsonParser("") + try { + response = usageGet(url, headers) + if (response.asJsObject.fields("status_code").convertTo[String] != 200 + || response.asJsObject.fields("content").convertTo[String].isEmpty) { + throw new Exception("Fetch access token error") + } + } catch { + case e: Exception => + SynapseMLLogging.logMessage(s"Failed to fetch cluster details $e") + } + response.asJsObject.fields("content").toString().getBytes("UTF-8").toString() + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala new file mode 100644 index 0000000000..46d76fa8a0 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -0,0 +1,61 @@ +package com.microsoft.azure.synapse.ml.logging.Usage +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import spray.json._ +import spray.json.DefaultJsonProtocol._ +import scala.util.matching.Regex +import scala.io.Source + +case class TokenServiceConfig(tokenServiceEndpoint: String, + clusterType: String, + clusterName: String, + sessionToken: String) + +object TokenServiceConfigProtocol extends DefaultJsonProtocol { + implicit val tokenServiceConfigFormat: RootJsonFormat[TokenServiceConfig] = jsonFormat4(TokenServiceConfig) +} + +import TokenServiceConfigProtocol._ + +object FabricUtils { + var trident_context = Map[String, String]() + + def getFabricContext(): Map[String, String] = { + if (trident_context.nonEmpty) { + trident_context + } else { + try { + val lines = scala.io.Source.fromFile(FabricConstants.CONTEXT_FILE_PATH).getLines().toList + for (line <- lines) { + if (line.split('=').length == 2) { + val Array(k, v) = line.split('=') + trident_context += (k.trim -> v.trim) + } + } + + var file_content: String = Source.fromFile(FabricConstants.TOKEN_SERVICE_FILE_PATH).mkString + file_content = cleanJson(file_content) + val tokenServiceConfigJson = file_content.parseJson + + // Extract the values from the JSON using Spray JSON's automatic JSON-to-case-class conversion + val tokenServiceConfig = tokenServiceConfigJson.convertTo[TokenServiceConfig] + // Populate the trident_context map + trident_context += (FabricConstants.SYNAPSE_TOKEN_SERVICE_ENDPOINT -> tokenServiceConfig.tokenServiceEndpoint) + trident_context += (FabricConstants.SYNAPSE_CLUSTER_TYPE -> tokenServiceConfig.clusterType) + trident_context += (FabricConstants.SYNAPSE_CLUSTER_IDENTIFIER -> tokenServiceConfig.clusterName) + trident_context += (FabricConstants.TRIDENT_SESSION_TOKEN -> tokenServiceConfig.sessionToken) + } catch { + case e: Exception => + SynapseMLLogging.logMessage(s"Error reading Fabric context file: $e") + throw e + } + } + trident_context + } + + def cleanJson(s: String): String = { + val pattern: Regex = ",[ \t\r\n]+}".r + val cleanedJson = pattern.replaceAllIn(s, "}") + cleanedJson + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala new file mode 100644 index 0000000000..2cab00382f --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala @@ -0,0 +1,7 @@ +package com.microsoft.azure.synapse.ml.logging.Usage + +import scala.collection.mutable.Map + +case class FeatureUsagePayload(feature_name: UsageFeatureNames.Value, + activity_name: FeatureActivityName.Value, + attributes: Map[String, String] = Map.empty[String, String] ) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala new file mode 100644 index 0000000000..b4225b42a0 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -0,0 +1,124 @@ +package com.microsoft.azure.synapse.ml.logging.Usage + +import scala.reflect.runtime.{ universe, currentMirror } +import scala.reflect.runtime.universe._ +import java.time.Instant +import org.apache.spark.SparkContext +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import spray.json.{JsArray, JsObject, JsValue, _} +import spray.json.DefaultJsonProtocol.{IntJsonFormat, StringJsonFormat, jsonFormat3} +import java.util.UUID +import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ + +case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) +object TokenUtils { + var AADToken: String = "" + val MwcWorkloadTypeMl = "ML" + + def getAccessToken(): String = { + val token = "" + if (checkTokenValid(this.AADToken)) + this.AADToken + else { + refreshAccessToken() + this.AADToken + } + } + + def getAccessToken(tokenType: String): String = { + + val objectName = "com.microsoft.azure.trident.tokenlibrary.TokenLibrary" + val mirror = currentMirror + val module = mirror.staticModule(objectName) + val obj = mirror.reflectModule(module).instance + val objType = mirror.reflect(obj).symbol.toType + val methodName = "getAccessToken" + val methodSymbols = objType.decl(TermName(methodName)).asTerm.alternatives + val argType = typeOf[String] + + val selectedMethodSymbol = methodSymbols.find { m => + + m.asMethod.paramLists match { + + case List(List(param)) => param.typeSignature =:= argType + + case _ => false + + } + + }.getOrElse(throw new NoSuchMethodException(s"Method $methodName with argument type $argType not found")) + + val methodMirror = mirror.reflect(obj).reflectMethod(selectedMethodSymbol.asMethod) + methodMirror(tokenType).asInstanceOf[String] + + } + + def checkTokenValid(token: String): Boolean = { + if (token == null || token.isEmpty()) { + false + } + try { + val parsedToken = token.parseJson.asJsObject() + val expTime = parsedToken.fields("exp").convertTo[Int] + val now = Instant.now().getEpochSecond() + now < expTime - 60 + } catch { + case e: Exception => { + false + } + } + } + + def refreshAccessToken(): Unit = { + try { + if (SparkContext.getOrCreate() != null) { + val token = getAccessToken("pbi") + AADToken = token + SynapseMLLogging.logMessage("SynapseML Utils: refreshed pbi token via token library") + } else { + val token = new FabricTokenServiceClient().getAccessToken("pbi") + AADToken = token + SynapseMLLogging.logMessage("SynapseML Utils: refreshed pbi token via direct API call") + } + } catch { + case e: Exception => { + SynapseMLLogging.logMessage(s"failed to refresh pbi token: {e}") + } + } + } + + def getMWCToken(shared_host: String, workspace_id: String, capacity_id: String, + workload_type: String): MwcToken = { + val url: String = shared_host + "/metadata/v201606/generatemwctokenv2" + + val payLoad = s"""{ + |"capacityObjectId": "$capacity_id", + |"workspaceObjectId": "$workspace_id", + |"workloadType": "$workload_type" + }""".stripMargin + + val driverAADToken = getAccessToken() + val headers = Map( + "Content-Type" -> "application/json", + "Authorization" -> s"""Bearer $driverAADToken""".stripMargin, + "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString + ) + try{ + var response = usagePost(url, payLoad, headers) + if (response.asJsObject.fields("status_code").convertTo[String] != 200 + || response.asJsObject.fields("content").convertTo[String].isEmpty) { + throw new Exception("Fetch access token error") + } + var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] + targetUriHost = s"https://$targetUriHost" + response.asJsObject.fields.updated("TargetUriHost", targetUriHost) + implicit val mwcTokenFormat = jsonFormat3(MwcToken) + response.convertTo[MwcToken] + } + catch { + case e: Exception => + SynapseMLLogging.logMessage(s"Failed to fetch cluster details: $e") + throw e + } + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala new file mode 100644 index 0000000000..a213a10bb0 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -0,0 +1,36 @@ +package com.microsoft.azure.synapse.ml.logging.Usage +object UsageFeatureNames extends Enumeration { + type UsageFeatureNames = Value + val Predict = Value(1) +} + +object FeatureActivityName extends Enumeration { + type FeatureActivityName = Value + val API_Transform = Value(1) + val API_SQL = Value(2) + val API_UDF = Value(3) +} + +object FabricConstants { + val ML_KUSTO_TdABLE_NAME = "SynapseMLLogs" + val EMIT_USAGE = "emit_usage" + + val CONTEXT_FILE_PATH = "/home/trusted-service-user/.trident-context" + val TOKEN_SERVICE_FILE_PATH = "/opt/token-service/tokenservice.config.json" + + val SYNAPSE_TOKEN_SERVICE_ENDPOINT = "synapse.tokenServiceEndpoint" + val SYNAPSE_CLUSTER_IDENTIFIER = "synapse.clusterIdentifier" + val SYNAPSE_CLUSTER_TYPE = "synapse.clusterType" + val TRIDENT_LAKEHOUSE_TOKEN_SERVICE_ENDPOINT = "trident.lakehouse.tokenservice.endpoint" + val TRIDENT_SESSION_TOKEN = "trident.session.token" + val WEB_API = "webapi" + val CAPACITIES = "capacities" + val WORKLOADS = "workloads" + val WORKSPACE_ID = "workspaceid" + + val WORKLOAD_ENDPOINT_ML = "ML" + val WORKLOAD_ENDPOINT_LLM_PLUGIN = "LlmPlugin" + val WORKLOAD_ENDPOINT_AUTOMATIC = "Automatic" + val WORKLOAD_ENDPOINT_REGISTRY = "Registry" + val WORKLOAD_ENDPOINT_ADMIN = "MLAdmin" +} \ No newline at end of file diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala new file mode 100644 index 0000000000..dfef03827f --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -0,0 +1,147 @@ +package com.microsoft.azure.synapse.ml.logging.Usage +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet, usagePost} +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ +import java.util.UUID +import org.apache.spark.sql.SparkSession +import spray.json._ +import spray.json.DefaultJsonProtocol.StringJsonFormat +import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken +import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils +import com.microsoft.azure.synapse.ml.logging.Usage.MwcToken +import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload + +object UsageTelemetry { + // val sc = SparkSession.builder().getOrCreate().sparkContext + val CapacityId = getHadoopConfig("trident.capacity.id") + val WorkspaceId = getHadoopConfig("trident.artifact.workspace.id") + val ArtifactId = getHadoopConfig("trident.artifact.id") + val OnelakeEndpoint = getHadoopConfig("trident.onelake.endpoint") + val Region = getHadoopConfig("spark.cluster.region") + val PbiEnv = getHadoopConfig("spark.trident.pbienv").toLowerCase() + + val SharedHost = getMlflowSharedHost(PbiEnv) + val shared_endpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" + val wlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) + + val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" + def reportUsage(payload: FeatureUsagePayload): Unit = { + if (sys.env.getOrElse(EMIT_USAGE, "True") != "True") { + return + } + try { + reportUsageTelemetry(payload.feature_name.toString, payload.activity_name.toString.replace('_', '/'), payload.attributes.toMap) + } catch { + case runtimeError: Exception => + SynapseMLLogging.logMessage(s"_report_usage_telemetry: usage telemetry error = $runtimeError") + } + } + + def reportUsageTelemetry(featureName: String, activityName: String, attributes: Map[String,String] = Map()): Unit = { + SynapseMLLogging.logMessage(s"usage telemetry feature_name: $featureName, activity_name: $activityName, attributes: $attributes") + if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false") == "false") { + val data = + s"""{ + |"timestamp":${System.currentTimeMillis()}, + |"feature_name":"$featureName", + |"activity_name":"${activityName.replace('.', '-')}", + |"attributes":${attributes.map { case (k, v) => s""""$k":"$v"""" }.mkString("{", ",", "}")}, + |"session_id":"${UUID.randomUUID().toString}" + |}""".stripMargin + + val mlAdminEndpoint = getMLWorkloadEndpoint(WORKLOAD_ENDPOINT_ADMIN) + val url = """{$mlAdminEndpoint}telemetry""".stripMargin + + val driverAADToken = getAccessToken() + + val headers = Map( + "Content-Type" -> "application/json", + "Authorization" -> s"""Bearer $driverAADToken""".stripMargin, + "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString + ) + + var response: JsValue = JsonParser("") + try { + response = usagePost(url, "", headers) + if (response.asJsObject.fields("status_code").convertTo[String] != 200 + || response.asJsObject.fields("content").toString().isEmpty) { + throw new Exception("Fetch access token error") + } + } catch { + case e: Exception => + SynapseMLLogging.logMessage(s"sending $e") + } + response.asJsObject.fields("content").toString().getBytes("UTF-8") + } + } + + def getHadoopConfig(key: String): String = { + if (sc == null) { + "" + } else { + val value = sc.hadoopConfiguration.get(key, "") + if (value.isEmpty) { + throw new Exception(s"missing $key in hadoop config, mlflow failed to init") + } + value + } + } + + val PbiGlobalServiceEndpoints = Map ( + "public" -> "https://api.powerbi.com/", + "fairfax" -> "https://api.powerbigov.us", + "mooncake" -> "https://api.powerbi.cn", + "blackforest" -> "https://app.powerbi.de", + "msit" -> "https://api.powerbi.com/", + "prod"-> "https://api.powerbi.com/", + "int3"-> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dxt" -> "https://powerbistagingapi.analysis.windows.net/", + "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dev" -> "https://onebox-redirect.analysis.windows-int.net/", + "console" -> "http://localhost:5001/", + "daily"-> "https://dailyapi.powerbi.com/") + + + val DefaultGlobalServiceEndpoint: String = "https://api.powerbi.com/" + val FetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" + def getMlflowSharedHost(pbienv: String): String = { + val url = PbiGlobalServiceEndpoints.getOrElse(pbienv, DefaultGlobalServiceEndpoint) + FetchClusterDetailUri + //val sessionToken = FabricUtils.getFabricContext()(TRIDENT_SESSION_TOKEN) + //todo: check if we need pbi token + + val headers = Map( + "Authorization" -> s"Bearer ${TokenUtils.getAccessToken()}", + "RequestId" -> java.util.UUID.randomUUID().toString + ) + var response: JsValue = JsonParser("") + try{ + response = usageGet(url, headers) + } + catch + { + case e: Exception => + SynapseMLLogging.logMessage(s"sending $e") + } + response.asJsObject.fields("clusterUrl").convertTo[String] + } + + def getMlflowWorkloadHost(pbienv: String, capacityId: String, workspaceId: String, sharedHost: String = ""): String = { + val clusterUrl = if (sharedHost.isEmpty) { + getMlflowSharedHost(pbienv) + } else { + sharedHost + } + val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MWC_WORKLOAD_TYPE_ML) + if (mwcToken != null && mwcToken.TargetUriHost != null) { + mwcToken.TargetUriHost + } else { + "" + } + } + + def getMLWorkloadEndpoint(endpoint: String): String = { + val ml_workload_endpoint = s"${this.wlHost}/$WEB_API/$CAPACITIES/${this.CapacityId}/$WORKLOADS/" + + s"$WORKLOAD_ENDPOINT_ML/$endpoint/$WORKLOAD_ENDPOINT_AUTOMATIC/${WORKSPACE_ID}/${this.WorkspaceId}/" + ml_workload_endpoint + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala new file mode 100644 index 0000000000..bdfd09ffa4 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -0,0 +1,26 @@ +package com.microsoft.azure.synapse.ml.logging.common + +import org.apache.http.client.methods.{HttpGet, HttpPost} +import org.apache.http.entity.StringEntity +import spray.json.{JsArray, JsObject, JsValue, _} +import com.microsoft.azure.synapse.ml.io.http.RESTHelpers +object WebUtils { + + val Region: String = "eastus" + val BaseURL: String = s"https://$Region.azuredatabricks.net/api/2.0/" + + def usagePost(url: String, body: String, headerPayload: Map[String, String]): JsValue = { + val request = new HttpPost(url) + for ((k,v) <- headerPayload) + request.addHeader(k, v) + request.setEntity(new StringEntity(body)) + RESTHelpers.sendAndParseJson(request) + } + + def usageGet(url: String, headerPayload: Map[String, String]): JsValue = { + val request = new HttpGet(url) + for ((k, v) <- headerPayload) + request.addHeader(k, v) + RESTHelpers.sendAndParseJson(request) + } +} From 0bbac30a9701f5fdc10df018f896d8632213284b Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Fri, 28 Jul 2023 11:04:19 -0700 Subject: [PATCH 02/50] Fixing bug, where I was extracting region and pvi environment details from hadoop configuration. Changed it to retrieve rather from spark configuration --- .../azure/synapse/ml/logging/Usage/UsageUtils.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index dfef03827f..c8e6fcdbf1 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -1,4 +1,5 @@ package com.microsoft.azure.synapse.ml.logging.Usage + import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet, usagePost} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ @@ -12,13 +13,14 @@ import com.microsoft.azure.synapse.ml.logging.Usage.MwcToken import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload object UsageTelemetry { - // val sc = SparkSession.builder().getOrCreate().sparkContext + val sc = SparkSession.builder().getOrCreate().sparkContext val CapacityId = getHadoopConfig("trident.capacity.id") val WorkspaceId = getHadoopConfig("trident.artifact.workspace.id") val ArtifactId = getHadoopConfig("trident.artifact.id") val OnelakeEndpoint = getHadoopConfig("trident.onelake.endpoint") - val Region = getHadoopConfig("spark.cluster.region") - val PbiEnv = getHadoopConfig("spark.trident.pbienv").toLowerCase() + val Region = sc.getConf.get("spark.cluster.region", "") + val PbiEnv = sc.getConf.get("spark.trident.pbienv", "").toLowerCase() + val SharedHost = getMlflowSharedHost(PbiEnv) val shared_endpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" From 9d85993d7b411ca86ed5a013cdb1abe9cf990a34 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Fri, 4 Aug 2023 15:11:01 -0700 Subject: [PATCH 03/50] This state represents code that works in notebook after porting section of the codes separately in Edog. --- .../synapse/ml/io/http/RESTHelpers.scala | 21 ++- .../Usage/FabricTokenServiceClient.scala | 14 +- .../ml/logging/Usage/FabricUtils.scala | 33 +++-- .../logging/Usage/FeatureUsagePayload.scala | 5 +- .../synapse/ml/logging/Usage/TokenUtils.scala | 36 +++-- .../ml/logging/Usage/UsageConstants.scala | 45 +++--- .../synapse/ml/logging/Usage/UsageUtils.scala | 130 ++++++++++-------- .../synapse/ml/logging/common/WebUtils.scala | 27 +++- 8 files changed, 183 insertions(+), 128 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala index eb6ac62fea..9cbe91895f 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala @@ -14,6 +14,8 @@ import scala.annotation.tailrec import scala.concurrent.blocking import scala.util.Try +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging + object RESTHelpers { lazy val RequestTimeout = 60000 @@ -53,7 +55,6 @@ object RESTHelpers { backoffs: List[Int] = List(100, 500, 1000), //scalastyle:ignore magic.number expectedCodes: Set[Int] = Set(), close: Boolean = true): CloseableHttpResponse = { - retry(backoffs, { () => val response = Client.execute(request) try { @@ -78,6 +79,10 @@ object RESTHelpers { } } catch { case e: Exception => + println(s"RESTHelpers::safeSend: getting error response parsing." + + s". Exception = $e") + SynapseMLLogging.logMessage(s"RESTHelpers::safeSend: getting error response parsing." + + s". Exception = $e") response.close() throw e } finally { @@ -89,7 +94,18 @@ object RESTHelpers { } def parseResult(result: CloseableHttpResponse): String = { - IOUtils.toString(result.getEntity.getContent, "utf-8") + var res: String = "" + try { + res = IOUtils.toString(result.getEntity.getContent, "utf-8") + } + catch{ + case e: Exception => + println(s"RestHelpers::parseResult: getting exception parsing response." + + s"Exception = $e") + SynapseMLLogging.logMessage(s"RestHelpers::parseResult: getting exception parsing response." + + s"Exception = $e") + } + res } def sendAndParseJson(request: HttpRequestBase, expectedCodes: Set[Int]=Set()): JsValue = { @@ -98,5 +114,4 @@ object RESTHelpers { response.close() output } - } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index a9758e10d7..43091a72ed 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -1,3 +1,6 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.Usage import java.util.UUID @@ -28,10 +31,10 @@ class FabricTokenServiceClient { val processName = processDetail.substring(processDetail.indexOf('@') + 1) val fabricConbtext = FabricUtils.getFabricContext() - val synapseTokenserviceEndpoint = fabricConbtext(SYNAPSE_TOKEN_SERVICE_ENDPOINT) - val workloadEndpoint = fabricConbtext(TRIDENT_LAKEHOUSE_TOKEN_SERVICE_ENDPOINT) - val sessionToken = fabricConbtext(TRIDENT_SESSION_TOKEN) - val clusterIdentifier = fabricConbtext(SYNAPSE_CLUSTER_IDENTIFIER) + val synapseTokenserviceEndpoint = fabricConbtext(SynapseTokenServiceEndpoint) + val workloadEndpoint = fabricConbtext(TridentLakehouseTokenServiceEndpoint) + val sessionToken = fabricConbtext(TridentSessionToken) + val clusterIdentifier = fabricConbtext(SynapseClusterIdentifier) def getAccessToken(resourceParam: String): String = { if (!resourceMapping.contains(resourceParam)) { @@ -60,7 +63,8 @@ class FabricTokenServiceClient { } } catch { case e: Exception => - SynapseMLLogging.logMessage(s"Failed to fetch cluster details $e") + println(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") + SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") } response.asJsObject.fields("content").toString().getBytes("UTF-8").toString() } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index 46d76fa8a0..d29251175b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -1,3 +1,6 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging @@ -12,45 +15,45 @@ case class TokenServiceConfig(tokenServiceEndpoint: String, sessionToken: String) object TokenServiceConfigProtocol extends DefaultJsonProtocol { - implicit val tokenServiceConfigFormat: RootJsonFormat[TokenServiceConfig] = jsonFormat4(TokenServiceConfig) + implicit val TokenServiceConfigFormat: RootJsonFormat[TokenServiceConfig] = jsonFormat4(TokenServiceConfig) } import TokenServiceConfigProtocol._ object FabricUtils { - var trident_context = Map[String, String]() + var TridentContext = Map[String, String]() def getFabricContext(): Map[String, String] = { - if (trident_context.nonEmpty) { - trident_context + if (TridentContext.nonEmpty) { + TridentContext } else { try { - val lines = scala.io.Source.fromFile(FabricConstants.CONTEXT_FILE_PATH).getLines().toList + val lines = scala.io.Source.fromFile(FabricConstants.ContextFilePath).getLines().toList for (line <- lines) { if (line.split('=').length == 2) { val Array(k, v) = line.split('=') - trident_context += (k.trim -> v.trim) + TridentContext += (k.trim -> v.trim) } } - var file_content: String = Source.fromFile(FabricConstants.TOKEN_SERVICE_FILE_PATH).mkString - file_content = cleanJson(file_content) - val tokenServiceConfigJson = file_content.parseJson + var fileContent: String = Source.fromFile(FabricConstants.TokenServiceFilePath).mkString + fileContent = cleanJson(fileContent) + val tokenServiceConfigJson = fileContent.parseJson // Extract the values from the JSON using Spray JSON's automatic JSON-to-case-class conversion val tokenServiceConfig = tokenServiceConfigJson.convertTo[TokenServiceConfig] - // Populate the trident_context map - trident_context += (FabricConstants.SYNAPSE_TOKEN_SERVICE_ENDPOINT -> tokenServiceConfig.tokenServiceEndpoint) - trident_context += (FabricConstants.SYNAPSE_CLUSTER_TYPE -> tokenServiceConfig.clusterType) - trident_context += (FabricConstants.SYNAPSE_CLUSTER_IDENTIFIER -> tokenServiceConfig.clusterName) - trident_context += (FabricConstants.TRIDENT_SESSION_TOKEN -> tokenServiceConfig.sessionToken) + // Populate the TridentContext map + TridentContext += (FabricConstants.SynapseTokenServiceEndpoint -> tokenServiceConfig.tokenServiceEndpoint) + TridentContext += (FabricConstants.SynapseClusterType -> tokenServiceConfig.clusterType) + TridentContext += (FabricConstants.SynapseClusterIdentifier -> tokenServiceConfig.clusterName) + TridentContext += (FabricConstants.TridentSessionToken -> tokenServiceConfig.sessionToken) } catch { case e: Exception => SynapseMLLogging.logMessage(s"Error reading Fabric context file: $e") throw e } } - trident_context + TridentContext } def cleanJson(s: String): String = { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala index 2cab00382f..74d8806fac 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala @@ -1,6 +1,9 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.Usage -import scala.collection.mutable.Map +//import scala.collection.mutable.Map case class FeatureUsagePayload(feature_name: UsageFeatureNames.Value, activity_name: FeatureActivityName.Value, diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index b4225b42a0..09f0a8a2ab 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -1,3 +1,6 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.Usage import scala.reflect.runtime.{ universe, currentMirror } @@ -17,12 +20,12 @@ object TokenUtils { def getAccessToken(): String = { val token = "" - if (checkTokenValid(this.AADToken)) - this.AADToken - else { - refreshAccessToken() + /*if (checkTokenValid(this.AADToken)) this.AADToken - } + else {*/ + refreshAccessToken() + this.AADToken + //} } def getAccessToken(tokenType: String): String = { @@ -37,20 +40,14 @@ object TokenUtils { val argType = typeOf[String] val selectedMethodSymbol = methodSymbols.find { m => - m.asMethod.paramLists match { - case List(List(param)) => param.typeSignature =:= argType - case _ => false - } - }.getOrElse(throw new NoSuchMethodException(s"Method $methodName with argument type $argType not found")) val methodMirror = mirror.reflect(obj).reflectMethod(selectedMethodSymbol.asMethod) methodMirror(tokenType).asInstanceOf[String] - } def checkTokenValid(token: String): Boolean = { @@ -64,6 +61,7 @@ object TokenUtils { now < expTime - 60 } catch { case e: Exception => { + SynapseMLLogging.logMessage(s"TokenUtils::checkTokValid: Token {$token} parsing went wrong (usage test).") false } } @@ -74,41 +72,41 @@ object TokenUtils { if (SparkContext.getOrCreate() != null) { val token = getAccessToken("pbi") AADToken = token - SynapseMLLogging.logMessage("SynapseML Utils: refreshed pbi token via token library") } else { val token = new FabricTokenServiceClient().getAccessToken("pbi") AADToken = token - SynapseMLLogging.logMessage("SynapseML Utils: refreshed pbi token via direct API call") } } catch { case e: Exception => { - SynapseMLLogging.logMessage(s"failed to refresh pbi token: {e}") + SynapseMLLogging.logMessage(s"refreshAccessTok: failed to refresh pbi tok. Exception: {e}. (usage test)") } } } - def getMWCToken(shared_host: String, workspace_id: String, capacity_id: String, + def getMWCToken(shared_host: String, WorkspaceId: String, capacity_id: String, workload_type: String): MwcToken = { val url: String = shared_host + "/metadata/v201606/generatemwctokenv2" val payLoad = s"""{ |"capacityObjectId": "$capacity_id", - |"workspaceObjectId": "$workspace_id", + |"workspaceObjectId": "$WorkspaceId", |"workloadType": "$workload_type" }""".stripMargin val driverAADToken = getAccessToken() + val headers = Map( "Content-Type" -> "application/json", "Authorization" -> s"""Bearer $driverAADToken""".stripMargin, "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) + try{ var response = usagePost(url, payLoad, headers) - if (response.asJsObject.fields("status_code").convertTo[String] != 200 + /*if (response.asJsObject.fields("status_code").convertTo[String] != 200 || response.asJsObject.fields("content").convertTo[String].isEmpty) { throw new Exception("Fetch access token error") - } + }*/ var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] targetUriHost = s"https://$targetUriHost" response.asJsObject.fields.updated("TargetUriHost", targetUriHost) @@ -117,7 +115,7 @@ object TokenUtils { } catch { case e: Exception => - SynapseMLLogging.logMessage(s"Failed to fetch cluster details: $e") + SynapseMLLogging.logMessage(s"getMWCTok: Failed to fetch cluster details: $e. (usage test)") throw e } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index a213a10bb0..421ecb0cae 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -1,3 +1,6 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.Usage object UsageFeatureNames extends Enumeration { type UsageFeatureNames = Value @@ -6,31 +9,31 @@ object UsageFeatureNames extends Enumeration { object FeatureActivityName extends Enumeration { type FeatureActivityName = Value - val API_Transform = Value(1) - val API_SQL = Value(2) - val API_UDF = Value(3) + val API0Transform = Value(1) + val API0SQL = Value(2) + val API0UDF = Value(3) } object FabricConstants { - val ML_KUSTO_TdABLE_NAME = "SynapseMLLogs" - val EMIT_USAGE = "emit_usage" + val MlKustoTableName = "SynapseMLLogs" + val EmitUsage = "EmitUsage" - val CONTEXT_FILE_PATH = "/home/trusted-service-user/.trident-context" - val TOKEN_SERVICE_FILE_PATH = "/opt/token-service/tokenservice.config.json" + val ContextFilePath = "/home/trusted-service-user/.trident-context" + val TokenServiceFilePath = "/opt/token-service/tokenservice.config.json" - val SYNAPSE_TOKEN_SERVICE_ENDPOINT = "synapse.tokenServiceEndpoint" - val SYNAPSE_CLUSTER_IDENTIFIER = "synapse.clusterIdentifier" - val SYNAPSE_CLUSTER_TYPE = "synapse.clusterType" - val TRIDENT_LAKEHOUSE_TOKEN_SERVICE_ENDPOINT = "trident.lakehouse.tokenservice.endpoint" - val TRIDENT_SESSION_TOKEN = "trident.session.token" - val WEB_API = "webapi" - val CAPACITIES = "capacities" + val SynapseTokenServiceEndpoint = "synapse.tokenServiceEndpoint" + val SynapseClusterIdentifier = "synapse.clusterIdentifier" + val SynapseClusterType = "synapse.clusterType" + val TridentLakehouseTokenServiceEndpoint = "trident.lakehouse.tokenservice.endpoint" + val TridentSessionToken = "trident.session.token" + val WebApi = "webapi" + val Capacities = "Capacities" val WORKLOADS = "workloads" - val WORKSPACE_ID = "workspaceid" + val WorkspaceID = "workspaceid" - val WORKLOAD_ENDPOINT_ML = "ML" - val WORKLOAD_ENDPOINT_LLM_PLUGIN = "LlmPlugin" - val WORKLOAD_ENDPOINT_AUTOMATIC = "Automatic" - val WORKLOAD_ENDPOINT_REGISTRY = "Registry" - val WORKLOAD_ENDPOINT_ADMIN = "MLAdmin" -} \ No newline at end of file + val WorkloadEndpointMl = "ML" + val WorkloadEndpointLlmPlugin = "LlmPlugin" + val WorkloadEndpointAutomatic = "Automatic" + val WorkloadEndpointRegistry = "Registry" + val WorkloadEndpointAdmin = "MLAdmin" +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index c8e6fcdbf1..d8bd597756 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -1,59 +1,67 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet, usagePost} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import java.util.UUID +import java.time.Instant import org.apache.spark.sql.SparkSession import spray.json._ import spray.json.DefaultJsonProtocol.StringJsonFormat +import spray.json.DefaultJsonProtocol._ import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken -import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils -import com.microsoft.azure.synapse.ml.logging.Usage.MwcToken -import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload + + +import scala.util.parsing.json.JSON object UsageTelemetry { - val sc = SparkSession.builder().getOrCreate().sparkContext + val SC = SparkSession.builder().getOrCreate().sparkContext val CapacityId = getHadoopConfig("trident.capacity.id") val WorkspaceId = getHadoopConfig("trident.artifact.workspace.id") val ArtifactId = getHadoopConfig("trident.artifact.id") val OnelakeEndpoint = getHadoopConfig("trident.onelake.endpoint") - val Region = sc.getConf.get("spark.cluster.region", "") - val PbiEnv = sc.getConf.get("spark.trident.pbienv", "").toLowerCase() + val Region = SC.getConf.get("spark.cluster.region", "") + val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() val SharedHost = getMlflowSharedHost(PbiEnv) - val shared_endpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" - val wlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) + val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" + val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" def reportUsage(payload: FeatureUsagePayload): Unit = { - if (sys.env.getOrElse(EMIT_USAGE, "True") != "True") { - return - } - try { - reportUsageTelemetry(payload.feature_name.toString, payload.activity_name.toString.replace('_', '/'), payload.attributes.toMap) - } catch { - case runtimeError: Exception => - SynapseMLLogging.logMessage(s"_report_usage_telemetry: usage telemetry error = $runtimeError") + if (sys.env.getOrElse(EmitUsage, "True") == "True") { + try { + reportUsageTelemetry(payload.feature_name.toString, + payload.activity_name.toString.replace('_', '/'), + payload.attributes) + } catch { + case runtimeError: Exception => + SynapseMLLogging.logMessage(s"UsageTelemetry::reportUsage: Hit issue emitting usage telemetry." + + s" Exception = $runtimeError. (usage test)") + } } } def reportUsageTelemetry(featureName: String, activityName: String, attributes: Map[String,String] = Map()): Unit = { - SynapseMLLogging.logMessage(s"usage telemetry feature_name: $featureName, activity_name: $activityName, attributes: $attributes") + SynapseMLLogging.logMessage(s"reportUsageTelemetry: feature_name: $featureName, " + + s"activity_name: $activityName, attributes: $attributes") if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false") == "false") { + val attributesJson = attributes.toJson.compactPrint + SynapseMLLogging.logMessage(s"reportUsageTelemetry: attributesJson = $attributesJson") val data = s"""{ - |"timestamp":${System.currentTimeMillis()}, + |"timestamp":${Instant.now().getEpochSecond}, |"feature_name":"$featureName", - |"activity_name":"${activityName.replace('.', '-')}", - |"attributes":${attributes.map { case (k, v) => s""""$k":"$v"""" }.mkString("{", ",", "}")}, - |"session_id":"${UUID.randomUUID().toString}" + |"activity_name":"${activityName.replace('0', '/')}", + |"attributes":$attributesJson |}""".stripMargin - val mlAdminEndpoint = getMLWorkloadEndpoint(WORKLOAD_ENDPOINT_ADMIN) - val url = """{$mlAdminEndpoint}telemetry""".stripMargin - + val mlAdminEndpoint = getMLWorkloadEndpoint(WorkloadEndpointAdmin) + val url = "https://" + mlAdminEndpoint + "telemetry" val driverAADToken = getAccessToken() val headers = Map( @@ -62,26 +70,27 @@ object UsageTelemetry { "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) - var response: JsValue = JsonParser("") + var response: JsValue = JsonParser("{}") try { - response = usagePost(url, "", headers) - if (response.asJsObject.fields("status_code").convertTo[String] != 200 + response = usagePost(url, data, headers) + /*if (response.asJsObject.fields("status_code").convertTo[String] != 200 || response.asJsObject.fields("content").toString().isEmpty) { throw new Exception("Fetch access token error") - } + }*/ } catch { case e: Exception => - SynapseMLLogging.logMessage(s"sending $e") + SynapseMLLogging.logMessage(s"reportUsageTelemetry: Hit an emitting usage data. " + + s"Exception = $e. (usage test)") } - response.asJsObject.fields("content").toString().getBytes("UTF-8") + //response.asJsObject.fields("content").toString().getBytes("UTF-8") } } def getHadoopConfig(key: String): String = { - if (sc == null) { + if (SC == null) { "" } else { - val value = sc.hadoopConfiguration.get(key, "") + val value = SC.hadoopConfiguration.get(key, "") if (value.isEmpty) { throw new Exception(s"missing $key in hadoop config, mlflow failed to init") } @@ -89,51 +98,52 @@ object UsageTelemetry { } } - val PbiGlobalServiceEndpoints = Map ( - "public" -> "https://api.powerbi.com/", - "fairfax" -> "https://api.powerbigov.us", - "mooncake" -> "https://api.powerbi.cn", - "blackforest" -> "https://app.powerbi.de", - "msit" -> "https://api.powerbi.com/", - "prod"-> "https://api.powerbi.com/", - "int3"-> "https://biazure-int-edog-redirect.analysis-df.windows.net/", - "dxt" -> "https://powerbistagingapi.analysis.windows.net/", - "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", - "dev" -> "https://onebox-redirect.analysis.windows-int.net/", - "console" -> "http://localhost:5001/", - "daily"-> "https://dailyapi.powerbi.com/") - - - val DefaultGlobalServiceEndpoint: String = "https://api.powerbi.com/" - val FetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" def getMlflowSharedHost(pbienv: String): String = { - val url = PbiGlobalServiceEndpoints.getOrElse(pbienv, DefaultGlobalServiceEndpoint) + FetchClusterDetailUri - //val sessionToken = FabricUtils.getFabricContext()(TRIDENT_SESSION_TOKEN) - //todo: check if we need pbi token - + val pbiGlobalServiceEndpoints = Map( + "public" -> "https://api.powerbi.com/", + "fairfax" -> "https://api.powerbigov.us", + "mooncake" -> "https://api.powerbi.cn", + "blackforest" -> "https://app.powerbi.de", + "msit" -> "https://api.powerbi.com/", + "prod" -> "https://api.powerbi.com/", + "int3" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dxt" -> "https://powerbistagingapi.analysis.windows.net/", + "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dev" -> "https://onebox-redirect.analysis.windows-int.net/", + "console" -> "http://localhost:5001/", + "daily" -> "https://dailyapi.powerbi.com/") + + + val defaultGlobalServiceEndpoint: String = "https://api.powerbi.com/" + val fetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" + + val url = pbiGlobalServiceEndpoints.getOrElse(pbienv, defaultGlobalServiceEndpoint) + fetchClusterDetailUri + //val sessionToken = FabricUtils.getFabricContext()(TridentSessionToken) val headers = Map( "Authorization" -> s"Bearer ${TokenUtils.getAccessToken()}", "RequestId" -> java.util.UUID.randomUUID().toString ) - var response: JsValue = JsonParser("") + var response: JsValue = JsonParser("{}") try{ response = usageGet(url, headers) } catch { case e: Exception => - SynapseMLLogging.logMessage(s"sending $e") + SynapseMLLogging.logMessage(s"getMlflowSharedHost: Can't get ml flow shared host. Exception = $e. (usage test)") } response.asJsObject.fields("clusterUrl").convertTo[String] } - def getMlflowWorkloadHost(pbienv: String, capacityId: String, workspaceId: String, sharedHost: String = ""): String = { + def getMlflowWorkloadHost(pbienv: String, capacityId: String, + workspaceId: String, + sharedHost: String = ""): String = { val clusterUrl = if (sharedHost.isEmpty) { getMlflowSharedHost(pbienv) } else { sharedHost } - val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MWC_WORKLOAD_TYPE_ML) + val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) if (mwcToken != null && mwcToken.TargetUriHost != null) { mwcToken.TargetUriHost } else { @@ -142,8 +152,8 @@ object UsageTelemetry { } def getMLWorkloadEndpoint(endpoint: String): String = { - val ml_workload_endpoint = s"${this.wlHost}/$WEB_API/$CAPACITIES/${this.CapacityId}/$WORKLOADS/" + - s"$WORKLOAD_ENDPOINT_ML/$endpoint/$WORKLOAD_ENDPOINT_AUTOMATIC/${WORKSPACE_ID}/${this.WorkspaceId}/" - ml_workload_endpoint + val mlWorkloadEndpoint = s"${this.WlHost}/$WebApi/$Capacities/${this.CapacityId}/$WORKLOADS/" + + s"$WorkloadEndpointMl/$endpoint/$WorkloadEndpointAutomatic/${WorkspaceID}/${this.WorkspaceId}/" + mlWorkloadEndpoint } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index bdfd09ffa4..05d0d78558 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -1,9 +1,15 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + package com.microsoft.azure.synapse.ml.logging.common import org.apache.http.client.methods.{HttpGet, HttpPost} import org.apache.http.entity.StringEntity import spray.json.{JsArray, JsObject, JsValue, _} import com.microsoft.azure.synapse.ml.io.http.RESTHelpers + + +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging object WebUtils { val Region: String = "eastus" @@ -11,16 +17,29 @@ object WebUtils { def usagePost(url: String, body: String, headerPayload: Map[String, String]): JsValue = { val request = new HttpPost(url) - for ((k,v) <- headerPayload) - request.addHeader(k, v) + try { + for ((k, v) <- headerPayload) + request.addHeader(k, v) + } + catch { + case e: IllegalArgumentException => + SynapseMLLogging.logMessage(s"WebUtils::usagePost: Getting error setting in the request header. Exception = $e") + } request.setEntity(new StringEntity(body)) RESTHelpers.sendAndParseJson(request) } def usageGet(url: String, headerPayload: Map[String, String]): JsValue = { val request = new HttpGet(url) - for ((k, v) <- headerPayload) - request.addHeader(k, v) + try { + for ((k, v) <- headerPayload) + request.addHeader(k, v) + } + catch + { + case e: IllegalArgumentException => + SynapseMLLogging.logMessage(s"WebUtils::usageGet: Getting error setting in the request header. Exception = $e") + } RESTHelpers.sendAndParseJson(request) } } From 922774704c66abd986c5cc551b8937c1f6145759 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Tue, 8 Aug 2023 09:34:28 -0700 Subject: [PATCH 04/50] Token expiry check, removing unused imports --- build.sbt | 5 +- .../synapse/ml/logging/Usage/TokenUtils.scala | 70 ++++++++++++++----- .../synapse/ml/logging/Usage/UsageUtils.scala | 7 +- 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/build.sbt b/build.sbt index 1c4b70c5d8..2fa5b5819e 100644 --- a/build.sbt +++ b/build.sbt @@ -36,7 +36,10 @@ val extraDependencies = Seq( // Although breeze 1.2 is already provided by Spark, this is needed for Azure Synapse Spark 3.2 pools. // Otherwise a NoSuchMethodError will be thrown by interpretability code. This problem only happens // to Azure Synapse Spark 3.2 pools. - "org.scalanlp" %% "breeze" % "1.2" + "org.scalanlp" %% "breeze" % "1.2", + "com.typesafe.play" %% "play" % "2.8.8", + "com.pauldijou" %% "jwt-core" % "3.0.0", + "org.json" % "json" % "20210307" ).map(d => d excludeAll (excludes: _*)) val dependencies = coreDependencies ++ extraDependencies diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 09f0a8a2ab..9c3d2b24c0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -3,29 +3,35 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import scala.reflect.runtime.{ universe, currentMirror } +import scala.reflect.runtime.currentMirror import scala.reflect.runtime.universe._ import java.time.Instant import org.apache.spark.SparkContext import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import spray.json.{JsArray, JsObject, JsValue, _} -import spray.json.DefaultJsonProtocol.{IntJsonFormat, StringJsonFormat, jsonFormat3} +import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} + import java.util.UUID import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ +import java.util.Date +import pdi.jwt._ +import org.json.JSONObject +import spray.json.RootJsonFormat + +import scala.util.{Failure, Success, Try} + case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) object TokenUtils { var AADToken: String = "" val MwcWorkloadTypeMl = "ML" def getAccessToken(): String = { - val token = "" - /*if (checkTokenValid(this.AADToken)) + if (checkTokenValid(this.AADToken)) this.AADToken - else {*/ - refreshAccessToken() - this.AADToken - //} + else { + refreshAccessToken() + this.AADToken + } } def getAccessToken(tokenType: String): String = { @@ -50,8 +56,21 @@ object TokenUtils { methodMirror(tokenType).asInstanceOf[String] } - def checkTokenValid(token: String): Boolean = { - if (token == null || token.isEmpty()) { + private def checkTokenValid(token: String): Boolean = { + try{ + val expiryDate: Date = getExpiry(token) + val expiryEpoch = expiryDate.toInstant.getEpochSecond + val now = Instant.now().getEpochSecond + now < expiryEpoch - 60 + } + catch + { + case e: Exception => + SynapseMLLogging.logMessage(s"TokenUtils::checkTokValid: Token {$token} parsing went wrong (usage test). " + + s"Exception = $e") + false + } + /*if (token == null || token.isEmpty()) { false } try { @@ -64,10 +83,10 @@ object TokenUtils { SynapseMLLogging.logMessage(s"TokenUtils::checkTokValid: Token {$token} parsing went wrong (usage test).") false } - } + }*/ } - def refreshAccessToken(): Unit = { + private def refreshAccessToken(): Unit = { try { if (SparkContext.getOrCreate() != null) { val token = getAccessToken("pbi") @@ -77,9 +96,8 @@ object TokenUtils { AADToken = token } } catch { - case e: Exception => { - SynapseMLLogging.logMessage(s"refreshAccessTok: failed to refresh pbi tok. Exception: {e}. (usage test)") - } + case e: Exception => + SynapseMLLogging.logMessage(s"refreshAccessTok: failed to refresh pbi tok. Exception: {$e}. (usage test)") } } @@ -102,7 +120,7 @@ object TokenUtils { ) try{ - var response = usagePost(url, payLoad, headers) + val response = usagePost(url, payLoad, headers) /*if (response.asJsObject.fields("status_code").convertTo[String] != 200 || response.asJsObject.fields("content").convertTo[String].isEmpty) { throw new Exception("Fetch access token error") @@ -110,7 +128,9 @@ object TokenUtils { var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] targetUriHost = s"https://$targetUriHost" response.asJsObject.fields.updated("TargetUriHost", targetUriHost) - implicit val mwcTokenFormat = jsonFormat3(MwcToken) + + implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) + //implicit val mwcTokenFormat = jsonFormat3(MwcToken) response.convertTo[MwcToken] } catch { @@ -119,4 +139,18 @@ object TokenUtils { throw e } } + + private def getExpiry(accessToken: String): Date = { + val jwtOptions = new JwtOptions(false, false, false, 0) + val jwtTokenDecoded: Try[(String, String, String)] = Jwt.decodeRawAll(accessToken, jwtOptions) + jwtTokenDecoded match { + case Success((_, payload, _)) => + val jsonPayload: JSONObject = new JSONObject(payload) + val expiry = jsonPayload.get("exp").toString + new Date(expiry.toLong * 1000) + case Failure(t) => + SynapseMLLogging.logMessage(t.getMessage) + throw t + } + } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index d8bd597756..e59e08a59b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -6,16 +6,13 @@ package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet, usagePost} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ +import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken import java.util.UUID import java.time.Instant import org.apache.spark.sql.SparkSession import spray.json._ -import spray.json.DefaultJsonProtocol.StringJsonFormat import spray.json.DefaultJsonProtocol._ -import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken - - -import scala.util.parsing.json.JSON +import spray.json.DefaultJsonProtocol.StringJsonFormat object UsageTelemetry { val SC = SparkSession.builder().getOrCreate().sparkContext From bf4c30188e2c9beaf1757b8ffe871f87898b0c89 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Tue, 8 Aug 2023 21:55:37 -0700 Subject: [PATCH 05/50] Creating JWT Token Parser. Removing dependencies that are no longer needed for token parsing. --- build.sbt | 5 +- .../ml/logging/Usage/FabricTokenParser.scala | 36 +++++++++++++ .../synapse/ml/logging/Usage/TokenUtils.scala | 50 ++++++------------- .../synapse/ml/logging/Usage/UsageUtils.scala | 1 - 4 files changed, 52 insertions(+), 40 deletions(-) create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala diff --git a/build.sbt b/build.sbt index 2fa5b5819e..1c4b70c5d8 100644 --- a/build.sbt +++ b/build.sbt @@ -36,10 +36,7 @@ val extraDependencies = Seq( // Although breeze 1.2 is already provided by Spark, this is needed for Azure Synapse Spark 3.2 pools. // Otherwise a NoSuchMethodError will be thrown by interpretability code. This problem only happens // to Azure Synapse Spark 3.2 pools. - "org.scalanlp" %% "breeze" % "1.2", - "com.typesafe.play" %% "play" % "2.8.8", - "com.pauldijou" %% "jwt-core" % "3.0.0", - "org.json" % "json" % "20210307" + "org.scalanlp" %% "breeze" % "1.2" ).map(d => d excludeAll (excludes: _*)) val dependencies = coreDependencies ++ extraDependencies diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala new file mode 100644 index 0000000000..a6b9c388a2 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala @@ -0,0 +1,36 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging.Usage + +import spray.json._ + +class InvalidJwtTokenException(message: String) extends Exception(message) +class JwtTokenExpiryMissingException(message: String) extends Exception(message) +class FabricTokenParser(JWToken: String) { + val tokens = JWToken.split("\\.") + var parsedToken: JsValue = JsObject.empty + + if (tokens.length == 3) { + val payload = tokens(1) + val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") + val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) + val decodedJson = new String(decodedPayload) + parsedToken = decodedJson.parseJson + } + else { + throw new InvalidJwtTokenException(s"Invalid JWT token. Here is the token = {$JWToken}") + println("Invalid JWT token input.") + } + + def getExpiry(): Long ={ + val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } + exp match { + case Some(expValue) => + expValue + case None => + throw new JwtTokenExpiryMissingException(s"JWT token does not have expiration set. " + + s"Here is the token = {$JWToken}") + } + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 9c3d2b24c0..f0f4fcb229 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -14,12 +14,13 @@ import java.util.UUID import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ import java.util.Date -import pdi.jwt._ -import org.json.JSONObject import spray.json.RootJsonFormat import scala.util.{Failure, Success, Try} +import com.microsoft.azure.synapse.ml.logging.Usage.{FabricTokenParser, InvalidJwtTokenException} +import com.microsoft.azure.synapse.ml.logging.Usage.JwtTokenExpiryMissingException + case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) object TokenUtils { var AADToken: String = "" @@ -56,34 +57,27 @@ object TokenUtils { methodMirror(tokenType).asInstanceOf[String] } - private def checkTokenValid(token: String): Boolean = { + def checkTokenValid(token: String): Boolean = { + if (token == null || token.isEmpty()) { + false + } try{ - val expiryDate: Date = getExpiry(token) - val expiryEpoch = expiryDate.toInstant.getEpochSecond + val tokenParser = new FabricTokenParser(token) + val expiryEpoch = tokenParser.getExpiry() val now = Instant.now().getEpochSecond now < expiryEpoch - 60 } catch { - case e: Exception => - SynapseMLLogging.logMessage(s"TokenUtils::checkTokValid: Token {$token} parsing went wrong (usage test). " + + case e: InvalidJwtTokenException => + SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token used to trigger telemetry " + + s"endpoint is invalid. Exception = $e") + false + case e: JwtTokenExpiryMissingException => + SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token misses expiry. " + s"Exception = $e") false } - /*if (token == null || token.isEmpty()) { - false - } - try { - val parsedToken = token.parseJson.asJsObject() - val expTime = parsedToken.fields("exp").convertTo[Int] - val now = Instant.now().getEpochSecond() - now < expTime - 60 - } catch { - case e: Exception => { - SynapseMLLogging.logMessage(s"TokenUtils::checkTokValid: Token {$token} parsing went wrong (usage test).") - false - } - }*/ } private def refreshAccessToken(): Unit = { @@ -139,18 +133,4 @@ object TokenUtils { throw e } } - - private def getExpiry(accessToken: String): Date = { - val jwtOptions = new JwtOptions(false, false, false, 0) - val jwtTokenDecoded: Try[(String, String, String)] = Jwt.decodeRawAll(accessToken, jwtOptions) - jwtTokenDecoded match { - case Success((_, payload, _)) => - val jsonPayload: JSONObject = new JSONObject(payload) - val expiry = jsonPayload.get("exp").toString - new Date(expiry.toLong * 1000) - case Failure(t) => - SynapseMLLogging.logMessage(t.getMessage) - throw t - } - } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index e59e08a59b..cb3c27ede9 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -23,7 +23,6 @@ object UsageTelemetry { val Region = SC.getConf.get("spark.cluster.region", "") val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() - val SharedHost = getMlflowSharedHost(PbiEnv) val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) From 0fb16e6a9f48ba723c4ebc8b9f9f88c19bd253e9 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Wed, 9 Aug 2023 16:50:20 -0700 Subject: [PATCH 06/50] Restoring resthelpers.scala to prior state. Adding exception handling and few PR comments. --- .../synapse/ml/io/http/RESTHelpers.scala | 21 ++---------- .../synapse/ml/logging/Usage/TokenUtils.scala | 21 ++++++++---- .../synapse/ml/logging/Usage/UsageUtils.scala | 32 +++++++++++-------- .../synapse/ml/logging/common/WebUtils.scala | 18 +++-------- 4 files changed, 39 insertions(+), 53 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala index 9cbe91895f..eb6ac62fea 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/RESTHelpers.scala @@ -14,8 +14,6 @@ import scala.annotation.tailrec import scala.concurrent.blocking import scala.util.Try -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging - object RESTHelpers { lazy val RequestTimeout = 60000 @@ -55,6 +53,7 @@ object RESTHelpers { backoffs: List[Int] = List(100, 500, 1000), //scalastyle:ignore magic.number expectedCodes: Set[Int] = Set(), close: Boolean = true): CloseableHttpResponse = { + retry(backoffs, { () => val response = Client.execute(request) try { @@ -79,10 +78,6 @@ object RESTHelpers { } } catch { case e: Exception => - println(s"RESTHelpers::safeSend: getting error response parsing." + - s". Exception = $e") - SynapseMLLogging.logMessage(s"RESTHelpers::safeSend: getting error response parsing." + - s". Exception = $e") response.close() throw e } finally { @@ -94,18 +89,7 @@ object RESTHelpers { } def parseResult(result: CloseableHttpResponse): String = { - var res: String = "" - try { - res = IOUtils.toString(result.getEntity.getContent, "utf-8") - } - catch{ - case e: Exception => - println(s"RestHelpers::parseResult: getting exception parsing response." + - s"Exception = $e") - SynapseMLLogging.logMessage(s"RestHelpers::parseResult: getting exception parsing response." + - s"Exception = $e") - } - res + IOUtils.toString(result.getEntity.getContent, "utf-8") } def sendAndParseJson(request: HttpRequestBase, expectedCodes: Set[Int]=Set()): JsValue = { @@ -114,4 +98,5 @@ object RESTHelpers { response.close() output } + } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index f0f4fcb229..1e7d8d6f51 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -14,12 +14,12 @@ import java.util.UUID import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ import java.util.Date -import spray.json.RootJsonFormat +import spray.json.{DeserializationException, RootJsonFormat} import scala.util.{Failure, Success, Try} - import com.microsoft.azure.synapse.ml.logging.Usage.{FabricTokenParser, InvalidJwtTokenException} import com.microsoft.azure.synapse.ml.logging.Usage.JwtTokenExpiryMissingException +import spray.json.JsonParser.ParsingException case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) object TokenUtils { @@ -66,9 +66,7 @@ object TokenUtils { val expiryEpoch = tokenParser.getExpiry() val now = Instant.now().getEpochSecond now < expiryEpoch - 60 - } - catch - { + } catch { case e: InvalidJwtTokenException => SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token used to trigger telemetry " + s"endpoint is invalid. Exception = $e") @@ -124,12 +122,21 @@ object TokenUtils { response.asJsObject.fields.updated("TargetUriHost", targetUriHost) implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) - //implicit val mwcTokenFormat = jsonFormat3(MwcToken) response.convertTo[MwcToken] } catch { + case e: NoSuchElementException => + SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: Cannot retrieve targetUriHost from MWC Token.") + throw e + case e: DeserializationException => + SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: The structure of response is not of type MwcToken.") + throw e + case e: ParsingException => + SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: The structure of json response is formed correctly.") + throw e case e: Exception => - SynapseMLLogging.logMessage(s"getMWCTok: Failed to fetch cluster details: $e. (usage test)") + SynapseMLLogging.logMessage(s"getMWCTok: Failed to fetch MWC token that is required to " + + s"get cluster details: $e.") throw e } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index cb3c27ede9..fade818916 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -57,6 +57,8 @@ object UsageTelemetry { |}""".stripMargin val mlAdminEndpoint = getMLWorkloadEndpoint(WorkloadEndpointAdmin) + + // Add the protocol and the route for the certified event telemetry endpoint val url = "https://" + mlAdminEndpoint + "telemetry" val driverAADToken = getAccessToken() @@ -75,10 +77,9 @@ object UsageTelemetry { }*/ } catch { case e: Exception => - SynapseMLLogging.logMessage(s"reportUsageTelemetry: Hit an emitting usage data. " + + SynapseMLLogging.logMessage(s"UsageUtils.reportUsageTelemetry: Error occurred while emitting usage data. " + s"Exception = $e. (usage test)") } - //response.asJsObject.fields("content").toString().getBytes("UTF-8") } } @@ -88,7 +89,7 @@ object UsageTelemetry { } else { val value = SC.hadoopConfiguration.get(key, "") if (value.isEmpty) { - throw new Exception(s"missing $key in hadoop config, mlflow failed to init") + SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") } value } @@ -119,16 +120,14 @@ object UsageTelemetry { "Authorization" -> s"Bearer ${TokenUtils.getAccessToken()}", "RequestId" -> java.util.UUID.randomUUID().toString ) - var response: JsValue = JsonParser("{}") try{ - response = usageGet(url, headers) - } - catch - { + val response: JsValue = usageGet(url, headers) + response.asJsObject.fields("clusterUrl").convertTo[String] + } catch { case e: Exception => SynapseMLLogging.logMessage(s"getMlflowSharedHost: Can't get ml flow shared host. Exception = $e. (usage test)") + "" } - response.asJsObject.fields("clusterUrl").convertTo[String] } def getMlflowWorkloadHost(pbienv: String, capacityId: String, @@ -139,11 +138,16 @@ object UsageTelemetry { } else { sharedHost } - val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) - if (mwcToken != null && mwcToken.TargetUriHost != null) { - mwcToken.TargetUriHost - } else { - "" + try { + val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) + if (mwcToken != null && mwcToken.TargetUriHost != null) { + mwcToken.TargetUriHost + } else { + "" + } + } catch { + case ex: Exception => + "" } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 05d0d78558..60048958a5 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -11,20 +11,12 @@ import com.microsoft.azure.synapse.ml.io.http.RESTHelpers import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging object WebUtils { - - val Region: String = "eastus" - val BaseURL: String = s"https://$Region.azuredatabricks.net/api/2.0/" - def usagePost(url: String, body: String, headerPayload: Map[String, String]): JsValue = { val request = new HttpPost(url) - try { - for ((k, v) <- headerPayload) + + for ((k, v) <- headerPayload) request.addHeader(k, v) - } - catch { - case e: IllegalArgumentException => - SynapseMLLogging.logMessage(s"WebUtils::usagePost: Getting error setting in the request header. Exception = $e") - } + request.setEntity(new StringEntity(body)) RESTHelpers.sendAndParseJson(request) } @@ -34,9 +26,7 @@ object WebUtils { try { for ((k, v) <- headerPayload) request.addHeader(k, v) - } - catch - { + } catch { case e: IllegalArgumentException => SynapseMLLogging.logMessage(s"WebUtils::usageGet: Getting error setting in the request header. Exception = $e") } From b1e6ba3afc0559d684beab75fc4f6de68c47edf0 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Thu, 10 Aug 2023 12:18:05 -0700 Subject: [PATCH 07/50] 1) Restricting access level to class properties, and functions. 2) Cleaning unused imports. 3) Closing unused resources like file handler, etc. 4) And fixing few scala style checks like calling convention for 0 parameter func, etc. --- .../ml/logging/Usage/FabricTokenParser.scala | 8 +-- .../Usage/FabricTokenServiceClient.scala | 23 ++++----- .../ml/logging/Usage/FabricUtils.scala | 51 +++++++++++-------- .../logging/Usage/FeatureUsagePayload.scala | 2 - .../synapse/ml/logging/Usage/TokenUtils.scala | 15 ++---- .../synapse/ml/logging/Usage/UsageUtils.scala | 24 ++++----- 6 files changed, 60 insertions(+), 63 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala index a6b9c388a2..2912949ca0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala @@ -8,11 +8,13 @@ import spray.json._ class InvalidJwtTokenException(message: String) extends Exception(message) class JwtTokenExpiryMissingException(message: String) extends Exception(message) class FabricTokenParser(JWToken: String) { - val tokens = JWToken.split("\\.") - var parsedToken: JsValue = JsObject.empty + val tokens: Array[String] = JWToken.split("\\.") + private var parsedToken: JsValue = JsObject.empty if (tokens.length == 3) { + // Getting the JWT payload which is second member of [header].[payload].[signature] val payload = tokens(1) + // Removing whitespace and url safe characters encoded that might have been added to token val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) val decodedJson = new String(decodedPayload) @@ -23,7 +25,7 @@ class FabricTokenParser(JWToken: String) { println("Invalid JWT token input.") } - def getExpiry(): Long ={ + def getExpiry: Long ={ val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } exp match { case Some(expValue) => diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index 43091a72ed..fee223ac9d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -7,11 +7,10 @@ import java.util.UUID import java.net.URL import java.net.InetAddress import java.lang.management.ManagementFactory -import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ import spray.json.DefaultJsonProtocol.StringJsonFormat import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import spray.json.{JsArray, JsObject, JsValue, _} -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet} +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging class FabricTokenServiceClient { @@ -26,15 +25,15 @@ class FabricTokenServiceClient { "kusto" -> "kusto" ) - val hostname = InetAddress.getLocalHost.getHostName - val processDetail = ManagementFactory.getRuntimeMXBean().getName() - val processName = processDetail.substring(processDetail.indexOf('@') + 1) + private val hostname = InetAddress.getLocalHost.getHostName + private val processDetail = ManagementFactory.getRuntimeMXBean.getName + private val processName = processDetail.substring(processDetail.indexOf('@') + 1) - val fabricConbtext = FabricUtils.getFabricContext() - val synapseTokenserviceEndpoint = fabricConbtext(SynapseTokenServiceEndpoint) - val workloadEndpoint = fabricConbtext(TridentLakehouseTokenServiceEndpoint) - val sessionToken = fabricConbtext(TridentSessionToken) - val clusterIdentifier = fabricConbtext(SynapseClusterIdentifier) + private val fabricContext = FabricUtils.getFabricContext + private val synapseTokenServiceEndpoint: String = fabricContext(synapseTokenServiceEndpoint) + private val workloadEndpoint = fabricContext(TridentLakehouseTokenServiceEndpoint) + private val sessionToken = fabricContext(TridentSessionToken) + private val clusterIdentifier = fabricContext(SynapseClusterIdentifier) def getAccessToken(resourceParam: String): String = { if (!resourceMapping.contains(resourceParam)) { @@ -53,7 +52,7 @@ class FabricTokenServiceClient { "User-Agent" -> s"Trident Token Library - HostName:$hostname, ProcessName:$processName", "x-ms-client-request-id" -> rid ) - var url = s"$synapseTokenserviceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" + var url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" var response: JsValue = JsonParser("") try { response = usageGet(url, headers) @@ -66,6 +65,6 @@ class FabricTokenServiceClient { println(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") } - response.asJsObject.fields("content").toString().getBytes("UTF-8").toString() + response.asJsObject.fields("content").toString().getBytes("UTF-8").toString } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index d29251175b..72b8b1c54f 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -2,10 +2,9 @@ // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ + import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import spray.json._ -import spray.json.DefaultJsonProtocol._ import scala.util.matching.Regex import scala.io.Source @@ -21,42 +20,52 @@ object TokenServiceConfigProtocol extends DefaultJsonProtocol { import TokenServiceConfigProtocol._ object FabricUtils { - var TridentContext = Map[String, String]() + private var TridentContext: Map[String, String] = Map[String, String]() - def getFabricContext(): Map[String, String] = { + def getFabricContext: Map[String, String] = { if (TridentContext.nonEmpty) { TridentContext } else { try { - val lines = scala.io.Source.fromFile(FabricConstants.ContextFilePath).getLines().toList - for (line <- lines) { - if (line.split('=').length == 2) { - val Array(k, v) = line.split('=') - TridentContext += (k.trim -> v.trim) + val contextFile = scala.io.Source.fromFile(FabricConstants.ContextFilePath) + try { + val lines = contextFile.getLines().toList + for (line <- lines) { + if (line.split('=').length == 2) { + val Array(k, v) = line.split('=') + TridentContext += (k.trim -> v.trim) + } } } + finally { + contextFile.close() + } - var fileContent: String = Source.fromFile(FabricConstants.TokenServiceFilePath).mkString - fileContent = cleanJson(fileContent) - val tokenServiceConfigJson = fileContent.parseJson + val tokenServiceFile = Source.fromFile(FabricConstants.TokenServiceFilePath) + try { + var fileContent: String = tokenServiceFile.mkString + fileContent = cleanJson(fileContent) + val tokenServiceConfigJson = fileContent.parseJson - // Extract the values from the JSON using Spray JSON's automatic JSON-to-case-class conversion - val tokenServiceConfig = tokenServiceConfigJson.convertTo[TokenServiceConfig] - // Populate the TridentContext map - TridentContext += (FabricConstants.SynapseTokenServiceEndpoint -> tokenServiceConfig.tokenServiceEndpoint) - TridentContext += (FabricConstants.SynapseClusterType -> tokenServiceConfig.clusterType) - TridentContext += (FabricConstants.SynapseClusterIdentifier -> tokenServiceConfig.clusterName) - TridentContext += (FabricConstants.TridentSessionToken -> tokenServiceConfig.sessionToken) + val tokenServiceConfig = tokenServiceConfigJson.convertTo[TokenServiceConfig] + TridentContext += (FabricConstants.SynapseTokenServiceEndpoint -> tokenServiceConfig.tokenServiceEndpoint) + TridentContext += (FabricConstants.SynapseClusterType -> tokenServiceConfig.clusterType) + TridentContext += (FabricConstants.SynapseClusterIdentifier -> tokenServiceConfig.clusterName) + TridentContext += (FabricConstants.TridentSessionToken -> tokenServiceConfig.sessionToken) + } + finally { + tokenServiceFile.close() + } + TridentContext } catch { case e: Exception => SynapseMLLogging.logMessage(s"Error reading Fabric context file: $e") throw e } } - TridentContext } - def cleanJson(s: String): String = { + private def cleanJson(s: String): String = { val pattern: Regex = ",[ \t\r\n]+}".r val cleanedJson = pattern.replaceAllIn(s, "}") cleanedJson diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala index 74d8806fac..d992356d4d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala @@ -3,8 +3,6 @@ package com.microsoft.azure.synapse.ml.logging.Usage -//import scala.collection.mutable.Map - case class FeatureUsagePayload(feature_name: UsageFeatureNames.Value, activity_name: FeatureActivityName.Value, attributes: Map[String, String] = Map.empty[String, String] ) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 1e7d8d6f51..754b538452 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -9,16 +9,9 @@ import java.time.Instant import org.apache.spark.SparkContext import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} - import java.util.UUID import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ - -import java.util.Date import spray.json.{DeserializationException, RootJsonFormat} - -import scala.util.{Failure, Success, Try} -import com.microsoft.azure.synapse.ml.logging.Usage.{FabricTokenParser, InvalidJwtTokenException} -import com.microsoft.azure.synapse.ml.logging.Usage.JwtTokenExpiryMissingException import spray.json.JsonParser.ParsingException case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) @@ -26,7 +19,7 @@ object TokenUtils { var AADToken: String = "" val MwcWorkloadTypeMl = "ML" - def getAccessToken(): String = { + def getAccessToken: String = { if (checkTokenValid(this.AADToken)) this.AADToken else { @@ -58,12 +51,12 @@ object TokenUtils { } def checkTokenValid(token: String): Boolean = { - if (token == null || token.isEmpty()) { + if (token == null || token.isEmpty) { false } try{ val tokenParser = new FabricTokenParser(token) - val expiryEpoch = tokenParser.getExpiry() + val expiryEpoch = tokenParser.getExpiry val now = Instant.now().getEpochSecond now < expiryEpoch - 60 } catch { @@ -103,7 +96,7 @@ object TokenUtils { |"workloadType": "$workload_type" }""".stripMargin - val driverAADToken = getAccessToken() + val driverAADToken = getAccessToken val headers = Map( "Content-Type" -> "application/json", diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index fade818916..3340eeb41b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -15,19 +15,16 @@ import spray.json.DefaultJsonProtocol._ import spray.json.DefaultJsonProtocol.StringJsonFormat object UsageTelemetry { - val SC = SparkSession.builder().getOrCreate().sparkContext - val CapacityId = getHadoopConfig("trident.capacity.id") - val WorkspaceId = getHadoopConfig("trident.artifact.workspace.id") - val ArtifactId = getHadoopConfig("trident.artifact.id") - val OnelakeEndpoint = getHadoopConfig("trident.onelake.endpoint") - val Region = SC.getConf.get("spark.cluster.region", "") - val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() - - val SharedHost = getMlflowSharedHost(PbiEnv) + private val SC = SparkSession.builder().getOrCreate().sparkContext + private val CapacityId = getHadoopConfig("trident.capacity.id") + val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id") + private val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() + + private val SharedHost = getMlflowSharedHost(PbiEnv) val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" - val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) + private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) - val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" + private val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" def reportUsage(payload: FeatureUsagePayload): Unit = { if (sys.env.getOrElse(EmitUsage, "True") == "True") { try { @@ -60,7 +57,7 @@ object UsageTelemetry { // Add the protocol and the route for the certified event telemetry endpoint val url = "https://" + mlAdminEndpoint + "telemetry" - val driverAADToken = getAccessToken() + val driverAADToken = getAccessToken val headers = Map( "Content-Type" -> "application/json", @@ -115,9 +112,8 @@ object UsageTelemetry { val fetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" val url = pbiGlobalServiceEndpoints.getOrElse(pbienv, defaultGlobalServiceEndpoint) + fetchClusterDetailUri - //val sessionToken = FabricUtils.getFabricContext()(TridentSessionToken) val headers = Map( - "Authorization" -> s"Bearer ${TokenUtils.getAccessToken()}", + "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", "RequestId" -> java.util.UUID.randomUUID().toString ) try{ From bb093cc5fd52be7d97fd460a04880e9c681e7e4a Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Fri, 11 Aug 2023 18:21:39 -0700 Subject: [PATCH 08/50] Refactoring to support single responsibility as much as possible and adding tests --- .../ml/logging/Usage/HostEndpointUtils.scala | 73 ++++++++++++++++ .../ml/logging/Usage/UsageConstants.scala | 2 +- .../synapse/ml/logging/Usage/UsageUtils.scala | 87 ++----------------- .../ml/logging/common/SparkHadoopUtils.scala | 23 +++++ .../synapse/ml/logging/common/WebUtils.scala | 3 +- .../ml/logging/FabricTokenParserTests.scala | 56 ++++++++++++ .../ml/logging/HostEndpointUtilsTests.scala | 22 +++++ .../ml/logging/SparkHadoopUtilsTests.scala | 22 +++++ .../ml/logging/resources/UsageTestData.json | 8 ++ 9 files changed, 213 insertions(+), 83 deletions(-) create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala create mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala create mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala create mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala create mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala new file mode 100644 index 0000000000..dc36c57873 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -0,0 +1,73 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging.Usage + +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, WORKLOADS, WorkloadEndpointAutomatic} +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{ WorkloadEndpointMl, WorkspaceID, WebApi} +import spray.json.DefaultJsonProtocol.StringJsonFormat +import spray.json.JsValue + +object HostEndpointUtils { + def getMlflowSharedHost(pbienv: String): String = { + val pbiGlobalServiceEndpoints = Map( + "public" -> "https://api.powerbi.com/", + "fairfax" -> "https://api.powerbigov.us", + "mooncake" -> "https://api.powerbi.cn", + "blackforest" -> "https://app.powerbi.de", + "msit" -> "https://api.powerbi.com/", + "prod" -> "https://api.powerbi.com/", + "int3" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dxt" -> "https://powerbistagingapi.analysis.windows.net/", + "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dev" -> "https://onebox-redirect.analysis.windows-int.net/", + "console" -> "http://localhost:5001/", + "daily" -> "https://dailyapi.powerbi.com/") + + val defaultGlobalServiceEndpoint: String = "https://api.powerbi.com/" + val fetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" + + val url = pbiGlobalServiceEndpoints.getOrElse(pbienv, defaultGlobalServiceEndpoint) + fetchClusterDetailUri + val headers = Map( + "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", + "RequestId" -> java.util.UUID.randomUUID().toString + ) + try { + val response: JsValue = usageGet(url, headers) + response.asJsObject.fields("clusterUrl").convertTo[String] + } catch { + case e: Exception => + SynapseMLLogging.logMessage(s"getMlflowSharedHost: Can't get ml flow shared host. Exception = $e. (usage test)") + "" + } + } + + def getMlflowWorkloadHost(pbienv: String, capacityId: String, + workspaceId: String, + sharedHost: String = ""): String = { + val clusterUrl = if (sharedHost.isEmpty) { + getMlflowSharedHost(pbienv) + } else { + sharedHost + } + try { + val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) + if (mwcToken != null && mwcToken.TargetUriHost != null) { + mwcToken.TargetUriHost + } else { + "" + } + } catch { + case ex: Exception => + "" + } + } + + def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceID: String): String = { + val mlWorkloadEndpoint = s"$wlHost/$WebApi/$Capacities/$capacityId/$WORKLOADS/" + + s"$WorkloadEndpointMl/$endpoint/$WorkloadEndpointAutomatic/${WorkspaceID}/$workspaceID/" + mlWorkloadEndpoint + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index 421ecb0cae..f839c73ccf 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -35,5 +35,5 @@ object FabricConstants { val WorkloadEndpointLlmPlugin = "LlmPlugin" val WorkloadEndpointAutomatic = "Automatic" val WorkloadEndpointRegistry = "Registry" - val WorkloadEndpointAdmin = "MLAdmin" + val WorkloadEndpointAdmin = "MLAdmin" } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index 3340eeb41b..e6f6bc1a4d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -3,8 +3,9 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils.getHadoopConfig import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet, usagePost} +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken import java.util.UUID @@ -14,10 +15,12 @@ import spray.json._ import spray.json.DefaultJsonProtocol._ import spray.json.DefaultJsonProtocol.StringJsonFormat +import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ + object UsageTelemetry { private val SC = SparkSession.builder().getOrCreate().sparkContext - private val CapacityId = getHadoopConfig("trident.capacity.id") - val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id") + private val CapacityId = getHadoopConfig("trident.capacity.id", SC) + val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id", SC) private val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() private val SharedHost = getMlflowSharedHost(PbiEnv) @@ -40,11 +43,8 @@ object UsageTelemetry { } def reportUsageTelemetry(featureName: String, activityName: String, attributes: Map[String,String] = Map()): Unit = { - SynapseMLLogging.logMessage(s"reportUsageTelemetry: feature_name: $featureName, " + - s"activity_name: $activityName, attributes: $attributes") if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false") == "false") { val attributesJson = attributes.toJson.compactPrint - SynapseMLLogging.logMessage(s"reportUsageTelemetry: attributesJson = $attributesJson") val data = s"""{ |"timestamp":${Instant.now().getEpochSecond}, @@ -53,7 +53,7 @@ object UsageTelemetry { |"attributes":$attributesJson |}""".stripMargin - val mlAdminEndpoint = getMLWorkloadEndpoint(WorkloadEndpointAdmin) + val mlAdminEndpoint = getMLWorkloadEndpoint(WlHost, CapacityId, WorkloadEndpointAdmin, WorkspaceId) // Add the protocol and the route for the certified event telemetry endpoint val url = "https://" + mlAdminEndpoint + "telemetry" @@ -79,77 +79,4 @@ object UsageTelemetry { } } } - - def getHadoopConfig(key: String): String = { - if (SC == null) { - "" - } else { - val value = SC.hadoopConfiguration.get(key, "") - if (value.isEmpty) { - SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") - } - value - } - } - - def getMlflowSharedHost(pbienv: String): String = { - val pbiGlobalServiceEndpoints = Map( - "public" -> "https://api.powerbi.com/", - "fairfax" -> "https://api.powerbigov.us", - "mooncake" -> "https://api.powerbi.cn", - "blackforest" -> "https://app.powerbi.de", - "msit" -> "https://api.powerbi.com/", - "prod" -> "https://api.powerbi.com/", - "int3" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", - "dxt" -> "https://powerbistagingapi.analysis.windows.net/", - "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", - "dev" -> "https://onebox-redirect.analysis.windows-int.net/", - "console" -> "http://localhost:5001/", - "daily" -> "https://dailyapi.powerbi.com/") - - - val defaultGlobalServiceEndpoint: String = "https://api.powerbi.com/" - val fetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" - - val url = pbiGlobalServiceEndpoints.getOrElse(pbienv, defaultGlobalServiceEndpoint) + fetchClusterDetailUri - val headers = Map( - "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", - "RequestId" -> java.util.UUID.randomUUID().toString - ) - try{ - val response: JsValue = usageGet(url, headers) - response.asJsObject.fields("clusterUrl").convertTo[String] - } catch { - case e: Exception => - SynapseMLLogging.logMessage(s"getMlflowSharedHost: Can't get ml flow shared host. Exception = $e. (usage test)") - "" - } - } - - def getMlflowWorkloadHost(pbienv: String, capacityId: String, - workspaceId: String, - sharedHost: String = ""): String = { - val clusterUrl = if (sharedHost.isEmpty) { - getMlflowSharedHost(pbienv) - } else { - sharedHost - } - try { - val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) - if (mwcToken != null && mwcToken.TargetUriHost != null) { - mwcToken.TargetUriHost - } else { - "" - } - } catch { - case ex: Exception => - "" - } - } - - def getMLWorkloadEndpoint(endpoint: String): String = { - val mlWorkloadEndpoint = s"${this.WlHost}/$WebApi/$Capacities/${this.CapacityId}/$WORKLOADS/" + - s"$WorkloadEndpointMl/$endpoint/$WorkloadEndpointAutomatic/${WorkspaceID}/${this.WorkspaceId}/" - mlWorkloadEndpoint - } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala new file mode 100644 index 0000000000..1b384c69c9 --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala @@ -0,0 +1,23 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging.common + +import org.apache.spark.SparkContext + +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging + +object SparkHadoopUtils { + + def getHadoopConfig(key: String, SC: SparkContext): String = { + if (SC == null) { + "" + } else { + val value = SC.hadoopConfiguration.get(key, "") + if (value.isEmpty) { + SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") + } + value + } + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 60048958a5..3eb51fb97e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -7,9 +7,8 @@ import org.apache.http.client.methods.{HttpGet, HttpPost} import org.apache.http.entity.StringEntity import spray.json.{JsArray, JsObject, JsValue, _} import com.microsoft.azure.synapse.ml.io.http.RESTHelpers - - import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging + object WebUtils { def usagePost(url: String, body: String, headerPayload: Map[String, String]): JsValue = { val request = new HttpPost(url) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala new file mode 100644 index 0000000000..5b31a70aca --- /dev/null +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala @@ -0,0 +1,56 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging + +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.logging.Usage.{FabricTokenParser, InvalidJwtTokenException} + +import scala.io.Source +import spray.json._ + +class FabricTokenParserTests extends TestBase { + + case class Token(name: String, payload: String) + + object TokenJsonProtocol extends DefaultJsonProtocol { + implicit val TokenFormat: RootJsonFormat[Token] = jsonFormat2(Token) + } + + import TokenJsonProtocol._ + + test("JWT Token Expiry Check"){ + val source = Source.fromFile("../resources/UsageTestData.json") + try { + val jsonString = source.mkString + val parsedTokens = jsonString.parseJson + val tokens = parsedTokens.convertTo[Seq[Token]] + val token = tokens(0) + val fabricTokenParser = new FabricTokenParser(token.payload) + val exp: Long = fabricTokenParser.getExpiry + assert(exp > 0L) + } finally { + source.close() + } + } + + test("Invalid JWT Token Check."){ + val source = Source.fromFile("../resources/UsageTestData.json") + try { + val jsonString = source.mkString + val parsedTokens = jsonString.parseJson + val tokens = parsedTokens.convertTo[Seq[Token]] + val token = tokens(1) + + var exceptionThrown = false + try { + val fabricTokenParser = new FabricTokenParser(token.payload) + } catch { + case _: InvalidJwtTokenException => exceptionThrown = true + } + assert(exceptionThrown, "InvalidJwtTokenException was thrown.") + } finally { + source.close() + } + } +} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala new file mode 100644 index 0000000000..373e6b951b --- /dev/null +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala @@ -0,0 +1,22 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging + +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WorkloadEndpointAdmin} +import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ + +class UsageUtilsTests extends TestBase { + + val target = "f32fae846ed04406944c01e26087aa9b.pbidedicated.windows-int.net/webapi/Capacities/" + + "f32fae84-6ed0-4406-944c-01e26087aa9b/workloads/ML/MLAdmin/Automatic/" + + "workspaceid/c1aaa432-2b6e-4325-acca-1aac063d9a6e/" + val capacityId = "f32fae84-6ed0-4406-944c-01e26087aa9b" + val wlHost = "f32fae846ed04406944c01e26087aa9b.pbidedicated.windows-int.net" + val workspaceId = "c1aaa432-2b6e-4325-acca-1aac063d9a6e" + test("ML Workload Endpoint Check"){ + val url = getMLWorkloadEndpoint(this.wlHost, this.capacityId, WorkloadEndpointAdmin, this.workspaceId) + assert(url == target) + } +} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala new file mode 100644 index 0000000000..fbad1d25dc --- /dev/null +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala @@ -0,0 +1,22 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging + +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils.getHadoopConfig + +class SparkHadoopUtilsTests extends TestBase { + test("Hadoop Configuration Check (capacity id, and workspace id)"){ + sc.hadoopConfiguration.set("trident.capacity.id", "f32fae84-6ed0-4406-944c-01e26087aa9b") + val capacityId = getHadoopConfig("trident.capacity.id", sc) + val splittedCapacityId: Array[String] = capacityId.split("-") + + assert(splittedCapacityId.length == 4) + assert(splittedCapacityId(0).length == 8) + assert(splittedCapacityId(1).length == 4) + assert(splittedCapacityId(2).length == 4) + assert(splittedCapacityId(3).length == 4) + assert(splittedCapacityId(4).length == 12) + } +} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json new file mode 100644 index 0000000000..2236ced1f3 --- /dev/null +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json @@ -0,0 +1,8 @@ +[ + { + "ValidJWT": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9.OhNT7-dNRzH7JJpPtfs8f-TX1JmBYNrlePHOpQXVJq9U2MiFBougw8ea-DjRaE3lAIaBqYr9aKKlXdH_fnFF_d-OZ3REmFNPLASMKR-n8mUOQWBXwp8eaeL2A8IAD5GJkjjeu9VeTWr5ApI_AnE-gPfJbwQKpxbkSJbTcRQfVKGbaeOH_9iw5rrndUDZNURZeSW4UfS0X3SjBKQSE-J69c0yPinhZPwEQEpSvEbMIWmpHItKCew5URCuEj3cv6DU6lVEYeGlfzgJ5BTjhlW6imxOV2Ed-78pYZNmr-LcfSExa6CPtuQhYFdYCOuMVDgP53z1_8RTTy_bxb-KxfC4VQ" + }, + { + "InvalidJWT": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9" + } +] From 8a1539b8eb41d5587d699bbb9e2f661b8a1fbdee Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Mon, 14 Aug 2023 14:21:19 -0700 Subject: [PATCH 09/50] Checking an empty http response content, before parsing --- .../synapse/ml/logging/common/WebUtils.scala | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 3eb51fb97e..d029701a74 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -3,7 +3,7 @@ package com.microsoft.azure.synapse.ml.logging.common -import org.apache.http.client.methods.{HttpGet, HttpPost} +import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity import spray.json.{JsArray, JsObject, JsValue, _} import com.microsoft.azure.synapse.ml.io.http.RESTHelpers @@ -17,7 +17,9 @@ object WebUtils { request.addHeader(k, v) request.setEntity(new StringEntity(body)) - RESTHelpers.sendAndParseJson(request) + + val response = RESTHelpers.safeSend(request) + parseResponse(response) } def usageGet(url: String, headerPayload: Map[String, String]): JsValue = { @@ -29,6 +31,18 @@ object WebUtils { case e: IllegalArgumentException => SynapseMLLogging.logMessage(s"WebUtils::usageGet: Getting error setting in the request header. Exception = $e") } - RESTHelpers.sendAndParseJson(request) + val response = RESTHelpers.safeSend(request) + parseResponse(response) + } + + private def parseResponse(response: CloseableHttpResponse): JsValue = { + if (response.getEntity.getContent.available() == 0) { + JsObject() + } + else { + val output = RESTHelpers.parseResult(response).parseJson + response.close() + output + } } } From 070ccd25ac886c93bce345c1bbe9f5e5b3488e10 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Wed, 23 Aug 2023 13:41:50 -0700 Subject: [PATCH 10/50] Fixing the early http client termination. At this point we are successfully emitting telemetry. --- .../Usage/FabricTokenServiceClient.scala | 1 - .../ml/logging/Usage/HostEndpointUtils.scala | 12 ++++++---- .../synapse/ml/logging/common/WebUtils.scala | 23 ++++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index fee223ac9d..65b4445f18 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -62,7 +62,6 @@ class FabricTokenServiceClient { } } catch { case e: Exception => - println(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") } response.asJsObject.fields("content").toString().getBytes("UTF-8").toString diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index dc36c57873..2c3a0e0167 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -6,9 +6,9 @@ package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, WORKLOADS, WorkloadEndpointAutomatic} -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{ WorkloadEndpointMl, WorkspaceID, WebApi} +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WebApi, WorkloadEndpointMl, WorkspaceID} import spray.json.DefaultJsonProtocol.StringJsonFormat -import spray.json.JsValue +import spray.json.{JsValue, JsonParser} object HostEndpointUtils { def getMlflowSharedHost(pbienv: String): String = { @@ -34,14 +34,16 @@ object HostEndpointUtils { "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", "RequestId" -> java.util.UUID.randomUUID().toString ) + var response: JsValue = JsonParser("{}") try { - val response: JsValue = usageGet(url, headers) - response.asJsObject.fields("clusterUrl").convertTo[String] + response = usageGet(url, headers) } catch { case e: Exception => - SynapseMLLogging.logMessage(s"getMlflowSharedHost: Can't get ml flow shared host. Exception = $e. (usage test)") + SynapseMLLogging.logMessage(s"HostEndpointUtils.getMlflowSharedHost: " + + s"Can't get ml flow shared host. Exception = $e. (usage test)") "" } + response.asJsObject.fields("clusterUrl").convertTo[String] } def getMlflowWorkloadHost(pbienv: String, capacityId: String, diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index d029701a74..c95a197aae 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -3,6 +3,7 @@ package com.microsoft.azure.synapse.ml.logging.common +import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity import spray.json.{JsArray, JsObject, JsValue, _} @@ -18,7 +19,7 @@ object WebUtils { request.setEntity(new StringEntity(body)) - val response = RESTHelpers.safeSend(request) + val response = RESTHelpers.safeSend(request, close = false) parseResponse(response) } @@ -31,18 +32,24 @@ object WebUtils { case e: IllegalArgumentException => SynapseMLLogging.logMessage(s"WebUtils::usageGet: Getting error setting in the request header. Exception = $e") } - val response = RESTHelpers.safeSend(request) + val response = RESTHelpers.safeSend(request, close = false) parseResponse(response) } private def parseResponse(response: CloseableHttpResponse): JsValue = { - if (response.getEntity.getContent.available() == 0) { - JsObject() + var content: String = "" + try { + content = IOUtils.toString(response.getEntity.getContent, "utf-8") } - else { - val output = RESTHelpers.parseResult(response).parseJson - response.close() - output + catch { + case e: Exception => + SynapseMLLogging.logMessage(s"RestHelpers::parseResult: getting exception parsing response." + + s"Exception = $e") + } + if (content.nonEmpty) { + content.parseJson + } else { + JsObject() } } } From 3df2f45d94003483ccdf17cadbb8a50091757227 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Wed, 23 Aug 2023 22:42:12 -0700 Subject: [PATCH 11/50] Fixing typos --- .../azure/synapse/ml/logging/Usage/TokenUtils.scala | 4 ---- .../azure/synapse/ml/logging/Usage/UsageUtils.scala | 6 +----- .../azure/synapse/ml/logging/common/WebUtils.scala | 4 +++- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 754b538452..79b8832639 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -106,10 +106,6 @@ object TokenUtils { try{ val response = usagePost(url, payLoad, headers) - /*if (response.asJsObject.fields("status_code").convertTo[String] != 200 - || response.asJsObject.fields("content").convertTo[String].isEmpty) { - throw new Exception("Fetch access token error") - }*/ var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] targetUriHost = s"https://$targetUriHost" response.asJsObject.fields.updated("TargetUriHost", targetUriHost) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index e6f6bc1a4d..7e11ea3c64 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -32,7 +32,7 @@ object UsageTelemetry { if (sys.env.getOrElse(EmitUsage, "True") == "True") { try { reportUsageTelemetry(payload.feature_name.toString, - payload.activity_name.toString.replace('_', '/'), + payload.activity_name.toString, payload.attributes) } catch { case runtimeError: Exception => @@ -68,10 +68,6 @@ object UsageTelemetry { var response: JsValue = JsonParser("{}") try { response = usagePost(url, data, headers) - /*if (response.asJsObject.fields("status_code").convertTo[String] != 200 - || response.asJsObject.fields("content").toString().isEmpty) { - throw new Exception("Fetch access token error") - }*/ } catch { case e: Exception => SynapseMLLogging.logMessage(s"UsageUtils.reportUsageTelemetry: Error occurred while emitting usage data. " + diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index c95a197aae..9232b8faec 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -33,7 +33,9 @@ object WebUtils { SynapseMLLogging.logMessage(s"WebUtils::usageGet: Getting error setting in the request header. Exception = $e") } val response = RESTHelpers.safeSend(request, close = false) - parseResponse(response) + val result = parseResponse(response) + response.close() + result } private def parseResponse(response: CloseableHttpResponse): JsValue = { From 7fad214a11e96deba22f22457aaca563f291fe0e Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Thu, 24 Aug 2023 10:35:55 -0700 Subject: [PATCH 12/50] Fixing test failure. --- core/src/test/resources/UsageTestData.json | 10 ++++++++++ .../synapse/ml/logging/FabricTokenParserTests.scala | 8 +++++--- .../synapse/ml/logging/SparkHadoopUtilsTests.scala | 2 +- .../synapse/ml/logging/resources/UsageTestData.json | 8 -------- 4 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 core/src/test/resources/UsageTestData.json delete mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json diff --git a/core/src/test/resources/UsageTestData.json b/core/src/test/resources/UsageTestData.json new file mode 100644 index 0000000000..2bc1f19b30 --- /dev/null +++ b/core/src/test/resources/UsageTestData.json @@ -0,0 +1,10 @@ +[ + { + "valid": "true", + "payload": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9.OhNT7-dNRzH7JJpPtfs8f-TX1JmBYNrlePHOpQXVJq9U2MiFBougw8ea-DjRaE3lAIaBqYr9aKKlXdH_fnFF_d-OZ3REmFNPLASMKR-n8mUOQWBXwp8eaeL2A8IAD5GJkjjeu9VeTWr5ApI_AnE-gPfJbwQKpxbkSJbTcRQfVKGbaeOH_9iw5rrndUDZNURZeSW4UfS0X3SjBKQSE-J69c0yPinhZPwEQEpSvEbMIWmpHItKCew5URCuEj3cv6DU6lVEYeGlfzgJ5BTjhlW6imxOV2Ed-78pYZNmr-LcfSExa6CPtuQhYFdYCOuMVDgP53z1_8RTTy_bxb-KxfC4VQ" + }, + { + "valid": "false", + "payload": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9" + } +] diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala index 5b31a70aca..7ee7663991 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala @@ -11,7 +11,7 @@ import spray.json._ class FabricTokenParserTests extends TestBase { - case class Token(name: String, payload: String) + case class Token(valid: String, payload: String) object TokenJsonProtocol extends DefaultJsonProtocol { implicit val TokenFormat: RootJsonFormat[Token] = jsonFormat2(Token) @@ -20,7 +20,8 @@ class FabricTokenParserTests extends TestBase { import TokenJsonProtocol._ test("JWT Token Expiry Check"){ - val source = Source.fromFile("../resources/UsageTestData.json") + val filePath = getClass.getResource("/UsageTestData.json") + val source = Source.fromURL(filePath) try { val jsonString = source.mkString val parsedTokens = jsonString.parseJson @@ -35,7 +36,8 @@ class FabricTokenParserTests extends TestBase { } test("Invalid JWT Token Check."){ - val source = Source.fromFile("../resources/UsageTestData.json") + val filePath = getClass.getResource("/UsageTestData.json") + val source = Source.fromURL(filePath) try { val jsonString = source.mkString val parsedTokens = jsonString.parseJson diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala index fbad1d25dc..4054437654 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala @@ -12,7 +12,7 @@ class SparkHadoopUtilsTests extends TestBase { val capacityId = getHadoopConfig("trident.capacity.id", sc) val splittedCapacityId: Array[String] = capacityId.split("-") - assert(splittedCapacityId.length == 4) + assert(splittedCapacityId.length == 5) assert(splittedCapacityId(0).length == 8) assert(splittedCapacityId(1).length == 4) assert(splittedCapacityId(2).length == 4) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json deleted file mode 100644 index 2236ced1f3..0000000000 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/resources/UsageTestData.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "ValidJWT": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9.OhNT7-dNRzH7JJpPtfs8f-TX1JmBYNrlePHOpQXVJq9U2MiFBougw8ea-DjRaE3lAIaBqYr9aKKlXdH_fnFF_d-OZ3REmFNPLASMKR-n8mUOQWBXwp8eaeL2A8IAD5GJkjjeu9VeTWr5ApI_AnE-gPfJbwQKpxbkSJbTcRQfVKGbaeOH_9iw5rrndUDZNURZeSW4UfS0X3SjBKQSE-J69c0yPinhZPwEQEpSvEbMIWmpHItKCew5URCuEj3cv6DU6lVEYeGlfzgJ5BTjhlW6imxOV2Ed-78pYZNmr-LcfSExa6CPtuQhYFdYCOuMVDgP53z1_8RTTy_bxb-KxfC4VQ" - }, - { - "InvalidJWT": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9" - } -] From f37a89f2ca5308577d98d4e1d3f5a48da71afdf7 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Wed, 6 Sep 2023 15:49:04 -0700 Subject: [PATCH 13/50] At this point addressing just pr comments. --- .../ml/logging/Usage/FabricTokenParser.scala | 32 ++++++------- .../Usage/FabricTokenServiceClient.scala | 27 +++++++---- .../ml/logging/Usage/FabricUtils.scala | 46 ++++++++----------- .../logging/Usage/FeatureUsagePayload.scala | 2 +- .../ml/logging/Usage/HostEndpointUtils.scala | 6 +-- .../synapse/ml/logging/Usage/TokenUtils.scala | 2 +- .../ml/logging/Usage/UsageConstants.scala | 4 +- .../ml/logging/common/SparkHadoopUtils.scala | 9 ++-- .../synapse/ml/logging/common/WebUtils.scala | 16 +++---- .../ml/logging/FabricTokenParserTests.scala | 26 ++++------- 10 files changed, 79 insertions(+), 91 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala index 2912949ca0..4f184e3f06 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala @@ -9,22 +9,7 @@ class InvalidJwtTokenException(message: String) extends Exception(message) class JwtTokenExpiryMissingException(message: String) extends Exception(message) class FabricTokenParser(JWToken: String) { val tokens: Array[String] = JWToken.split("\\.") - private var parsedToken: JsValue = JsObject.empty - - if (tokens.length == 3) { - // Getting the JWT payload which is second member of [header].[payload].[signature] - val payload = tokens(1) - // Removing whitespace and url safe characters encoded that might have been added to token - val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") - val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) - val decodedJson = new String(decodedPayload) - parsedToken = decodedJson.parseJson - } - else { - throw new InvalidJwtTokenException(s"Invalid JWT token. Here is the token = {$JWToken}") - println("Invalid JWT token input.") - } - + private var parsedToken: JsValue = tokenCheckAndDecode(tokens) def getExpiry: Long ={ val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } exp match { @@ -35,4 +20,19 @@ class FabricTokenParser(JWToken: String) { s"Here is the token = {$JWToken}") } } + + private def tokenCheckAndDecode(tokens: Array[String]): JsValue ={ + if (tokens.length == 3) { + // Getting the JWT payload which is second member of [header].[payload].[signature] + val payload = tokens(1) + // Removing whitespace and url safe characters encoded that might have been added to token + val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") + val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) + val decodedJson = new String(decodedPayload) + decodedJson.parseJson + } + else { + throw new InvalidJwtTokenException(s"Invalid JWT token. Here is the token = {$JWToken}") + } + } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index 65b4445f18..a9f56d524d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -3,18 +3,20 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import java.util.UUID +import java.io.IOException +import java.lang.management.ManagementFactory import java.net.URL import java.net.InetAddress -import java.lang.management.ManagementFactory +import java.util.UUID import spray.json.DefaultJsonProtocol.StringJsonFormat import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import spray.json.{JsArray, JsObject, JsValue, _} import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import org.apache.hc.client5.http.ClientProtocolException class FabricTokenServiceClient { - val resourceMapping = Map( + private val resourceMapping = Map( "https://storage.azure.com" -> "storage", "storage" -> "storage", "https://analysis.windows.net/powerbi/api" -> "pbi", @@ -41,9 +43,8 @@ class FabricTokenServiceClient { } val resource = resourceMapping.getOrElse(resourceParam, "") val rid = UUID.randomUUID().toString() - //to do workloadEndpoint val targetUrl = new URL(workloadEndpoint) - var headers = Map( + val headers: Map[String, String] = Map( "x-ms-cluster-identifier" -> clusterIdentifier, "x-ms-workload-resource-moniker" -> clusterIdentifier, "Content-Type" -> "application/json;charset=utf-8", @@ -53,17 +54,25 @@ class FabricTokenServiceClient { "x-ms-client-request-id" -> rid ) var url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" - var response: JsValue = JsonParser("") try { - response = usageGet(url, headers) + val response: JsValue = usageGet(url, headers) if (response.asJsObject.fields("status_code").convertTo[String] != 200 || response.asJsObject.fields("content").convertTo[String].isEmpty) { throw new Exception("Fetch access token error") } + response.asJsObject.fields("content").toString().getBytes("UTF-8").toString } catch { + case e: IOException => + SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Problems in executing" + + s" http request or the connection might have been aborted. Exception = $e.") + "" + case e: ClientProtocolException => + SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. " + + s"HTTP protocol error. Exception = $e.") + "" case e: Exception => - SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Exception = $e. (usage test)") + SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Exception = $e.") + "" } - response.asJsObject.fields("content").toString().getBytes("UTF-8").toString } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index 72b8b1c54f..1678ae9a24 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -3,6 +3,7 @@ package com.microsoft.azure.synapse.ml.logging.Usage +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import spray.json._ import scala.util.matching.Regex @@ -27,39 +28,30 @@ object FabricUtils { TridentContext } else { try { - val contextFile = scala.io.Source.fromFile(FabricConstants.ContextFilePath) - try { - val lines = contextFile.getLines().toList - for (line <- lines) { - if (line.split('=').length == 2) { - val Array(k, v) = line.split('=') - TridentContext += (k.trim -> v.trim) - } + val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(FabricConstants.ContextFilePath)){ + source => source.getLines().toList + }.get + for (line <- linesContextFile) { + if (line.split('=').length == 2) { + val Array(k, v) = line.split('=') + TridentContext += (k.trim -> v.trim) } } - finally { - contextFile.close() - } - val tokenServiceFile = Source.fromFile(FabricConstants.TokenServiceFilePath) - try { - var fileContent: String = tokenServiceFile.mkString - fileContent = cleanJson(fileContent) - val tokenServiceConfigJson = fileContent.parseJson + val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile( + FabricConstants.TokenServiceFilePath)) { + source => cleanJson(source.mkString).parseJson.convertTo[TokenServiceConfig] + }.get + + TridentContext += (FabricConstants.SynapseTokenServiceEndpoint -> tokenServiceConfig.tokenServiceEndpoint) + TridentContext += (FabricConstants.SynapseClusterType -> tokenServiceConfig.clusterType) + TridentContext += (FabricConstants.SynapseClusterIdentifier -> tokenServiceConfig.clusterName) + TridentContext += (FabricConstants.TridentSessionToken -> tokenServiceConfig.sessionToken) - val tokenServiceConfig = tokenServiceConfigJson.convertTo[TokenServiceConfig] - TridentContext += (FabricConstants.SynapseTokenServiceEndpoint -> tokenServiceConfig.tokenServiceEndpoint) - TridentContext += (FabricConstants.SynapseClusterType -> tokenServiceConfig.clusterType) - TridentContext += (FabricConstants.SynapseClusterIdentifier -> tokenServiceConfig.clusterName) - TridentContext += (FabricConstants.TridentSessionToken -> tokenServiceConfig.sessionToken) - } - finally { - tokenServiceFile.close() - } TridentContext } catch { - case e: Exception => - SynapseMLLogging.logMessage(s"Error reading Fabric context file: $e") + case e: NullPointerException => + SynapseMLLogging.logMessage(s"Error reading Fabric context file: Trident context file path is missing. $e") throw e } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala index d992356d4d..1edac2b304 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala @@ -5,4 +5,4 @@ package com.microsoft.azure.synapse.ml.logging.Usage case class FeatureUsagePayload(feature_name: UsageFeatureNames.Value, activity_name: FeatureActivityName.Value, - attributes: Map[String, String] = Map.empty[String, String] ) + attributes: Map[String, String]) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index 2c3a0e0167..e45a3458bf 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -5,7 +5,7 @@ package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, WORKLOADS, WorkloadEndpointAutomatic} +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, Workloads, WorkloadEndpointAutomatic} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WebApi, WorkloadEndpointMl, WorkspaceID} import spray.json.DefaultJsonProtocol.StringJsonFormat import spray.json.{JsValue, JsonParser} @@ -55,7 +55,7 @@ object HostEndpointUtils { sharedHost } try { - val mwcToken: MwcToken = TokenUtils.getMWCToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) + val mwcToken: MwcToken = TokenUtils.getMwcToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) if (mwcToken != null && mwcToken.TargetUriHost != null) { mwcToken.TargetUriHost } else { @@ -68,7 +68,7 @@ object HostEndpointUtils { } def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceID: String): String = { - val mlWorkloadEndpoint = s"$wlHost/$WebApi/$Capacities/$capacityId/$WORKLOADS/" + + val mlWorkloadEndpoint = s"$wlHost/$WebApi/$Capacities/$capacityId/$Workloads/" + s"$WorkloadEndpointMl/$endpoint/$WorkloadEndpointAutomatic/${WorkspaceID}/$workspaceID/" mlWorkloadEndpoint } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 79b8832639..1983a5ba1a 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -86,7 +86,7 @@ object TokenUtils { } } - def getMWCToken(shared_host: String, WorkspaceId: String, capacity_id: String, + def getMwcToken(shared_host: String, WorkspaceId: String, capacity_id: String, workload_type: String): MwcToken = { val url: String = shared_host + "/metadata/v201606/generatemwctokenv2" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index f839c73ccf..7c6666884c 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -28,12 +28,12 @@ object FabricConstants { val TridentSessionToken = "trident.session.token" val WebApi = "webapi" val Capacities = "Capacities" - val WORKLOADS = "workloads" + val Workloads = "workloads" val WorkspaceID = "workspaceid" val WorkloadEndpointMl = "ML" val WorkloadEndpointLlmPlugin = "LlmPlugin" val WorkloadEndpointAutomatic = "Automatic" val WorkloadEndpointRegistry = "Registry" - val WorkloadEndpointAdmin = "MLAdmin" + val WorkloadEndpointAdmin = "MLAdmin" } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala index 1b384c69c9..1fc741bf9a 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala @@ -9,15 +9,12 @@ import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging object SparkHadoopUtils { - def getHadoopConfig(key: String, SC: SparkContext): String = { - if (SC == null) { - "" - } else { - val value = SC.hadoopConfiguration.get(key, "") + def getHadoopConfig(key: String, sc: SparkContext): String = { + val value = sc.hadoopConfiguration.get(key, "") if (value.isEmpty) { SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") + throw new IllegalArgumentException(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") } value - } } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 9232b8faec..c628ae2e07 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -11,10 +11,10 @@ import com.microsoft.azure.synapse.ml.io.http.RESTHelpers import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging object WebUtils { - def usagePost(url: String, body: String, headerPayload: Map[String, String]): JsValue = { + def usagePost(url: String, body: String, headers: Map[String, String]): JsValue = { val request = new HttpPost(url) - for ((k, v) <- headerPayload) + for ((k, v) <- headers) request.addHeader(k, v) request.setEntity(new StringEntity(body)) @@ -23,15 +23,11 @@ object WebUtils { parseResponse(response) } - def usageGet(url: String, headerPayload: Map[String, String]): JsValue = { + def usageGet(url: String, headers: Map[String, String]): JsValue = { val request = new HttpGet(url) - try { - for ((k, v) <- headerPayload) - request.addHeader(k, v) - } catch { - case e: IllegalArgumentException => - SynapseMLLogging.logMessage(s"WebUtils::usageGet: Getting error setting in the request header. Exception = $e") - } + for ((k, v) <- headers) + request.addHeader(k, v) + val response = RESTHelpers.safeSend(request, close = false) val result = parseResponse(response) response.close() diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala index 7ee7663991..7e112a7647 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala @@ -3,6 +3,7 @@ package com.microsoft.azure.synapse.ml.logging +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities import com.microsoft.azure.synapse.ml.core.test.base.TestBase import com.microsoft.azure.synapse.ml.logging.Usage.{FabricTokenParser, InvalidJwtTokenException} @@ -37,22 +38,15 @@ class FabricTokenParserTests extends TestBase { test("Invalid JWT Token Check."){ val filePath = getClass.getResource("/UsageTestData.json") - val source = Source.fromURL(filePath) - try { - val jsonString = source.mkString - val parsedTokens = jsonString.parseJson - val tokens = parsedTokens.convertTo[Seq[Token]] - val token = tokens(1) - - var exceptionThrown = false - try { - val fabricTokenParser = new FabricTokenParser(token.payload) - } catch { - case _: InvalidJwtTokenException => exceptionThrown = true - } - assert(exceptionThrown, "InvalidJwtTokenException was thrown.") - } finally { - source.close() + val jsonString = StreamUtilities.usingSource(scala.io.Source.fromURL(filePath)) { source => + source.mkString + }.get + val parsedTokens = jsonString.parseJson + val tokens = parsedTokens.convertTo[Seq[Token]] + val token = tokens(1) + + assertThrows[InvalidJwtTokenException]{ + val fabricTokenParser = new FabricTokenParser(token.payload) } } } From a58a659916932758e837615119777a069fd3cb2c Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Thu, 7 Sep 2023 14:15:11 -0700 Subject: [PATCH 14/50] Addressing PR comments --- .../ml/logging/Usage/FabricUtils.scala | 60 +++++++++---------- .../ml/logging/Usage/HostEndpointUtils.scala | 15 ++--- .../synapse/ml/logging/Usage/TokenUtils.scala | 53 ++++++++-------- .../synapse/ml/logging/Usage/UsageUtils.scala | 13 +--- .../ml/logging/SparkHadoopUtilsTests.scala | 14 ++--- 5 files changed, 73 insertions(+), 82 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index 1678ae9a24..a2d538062a 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -21,39 +21,37 @@ object TokenServiceConfigProtocol extends DefaultJsonProtocol { import TokenServiceConfigProtocol._ object FabricUtils { - private var TridentContext: Map[String, String] = Map[String, String]() + //private var TridentContext: Map[String, String] = Map[String, String]() def getFabricContext: Map[String, String] = { - if (TridentContext.nonEmpty) { - TridentContext - } else { - try { - val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(FabricConstants.ContextFilePath)){ - source => source.getLines().toList - }.get - for (line <- linesContextFile) { - if (line.split('=').length == 2) { - val Array(k, v) = line.split('=') - TridentContext += (k.trim -> v.trim) - } - } - - val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile( - FabricConstants.TokenServiceFilePath)) { - source => cleanJson(source.mkString).parseJson.convertTo[TokenServiceConfig] - }.get - - TridentContext += (FabricConstants.SynapseTokenServiceEndpoint -> tokenServiceConfig.tokenServiceEndpoint) - TridentContext += (FabricConstants.SynapseClusterType -> tokenServiceConfig.clusterType) - TridentContext += (FabricConstants.SynapseClusterIdentifier -> tokenServiceConfig.clusterName) - TridentContext += (FabricConstants.TridentSessionToken -> tokenServiceConfig.sessionToken) - - TridentContext - } catch { - case e: NullPointerException => - SynapseMLLogging.logMessage(s"Error reading Fabric context file: Trident context file path is missing. $e") - throw e - } + try { + val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(FabricConstants.ContextFilePath)) { + source => source.getLines().toList + }.get + + val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile + (FabricConstants.TokenServiceFilePath)) { + source => cleanJson(source.mkString).parseJson.convertTo[TokenServiceConfig] + }.get + + val tridentContext: Map[String, String] = linesContextFile + .filter(line => line.split('=').length == 2) + .map { line => + val Array(k, v) = line.split('=') + (k.trim, v.trim) + }.toMap + .++(Seq( + (FabricConstants.SynapseTokenServiceEndpoint, tokenServiceConfig.tokenServiceEndpoint), + (FabricConstants.SynapseClusterType, tokenServiceConfig.clusterType), + (FabricConstants.SynapseClusterIdentifier, tokenServiceConfig.clusterName), + (FabricConstants.TridentSessionToken, tokenServiceConfig.sessionToken) + ).toMap) + + tridentContext + } catch { + case e: NullPointerException => + SynapseMLLogging.logMessage(s"Error reading Fabric context file: Trident context file path is missing. $e") + throw e } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index e45a3458bf..5da99a38aa 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -54,15 +54,12 @@ object HostEndpointUtils { } else { sharedHost } - try { - val mwcToken: MwcToken = TokenUtils.getMwcToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) - if (mwcToken != null && mwcToken.TargetUriHost != null) { - mwcToken.TargetUriHost - } else { - "" - } - } catch { - case ex: Exception => + val mwcToken: Option[MwcToken] = TokenUtils.getMwcToken(clusterUrl, + workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) + mwcToken match { + case Some(token) => + token.TargetUriHost + case None => "" } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 1983a5ba1a..e5f9cc683a 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -20,7 +20,7 @@ object TokenUtils { val MwcWorkloadTypeMl = "ML" def getAccessToken: String = { - if (checkTokenValid(this.AADToken)) + if (isTokenValid(Option(this.AADToken))) this.AADToken else { refreshAccessToken() @@ -50,24 +50,26 @@ object TokenUtils { methodMirror(tokenType).asInstanceOf[String] } - def checkTokenValid(token: String): Boolean = { - if (token == null || token.isEmpty) { - false - } - try{ - val tokenParser = new FabricTokenParser(token) - val expiryEpoch = tokenParser.getExpiry - val now = Instant.now().getEpochSecond - now < expiryEpoch - 60 - } catch { - case e: InvalidJwtTokenException => - SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token used to trigger telemetry " + - s"endpoint is invalid. Exception = $e") - false - case e: JwtTokenExpiryMissingException => - SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token misses expiry. " + - s"Exception = $e") - false + private def isTokenValid(tokenOption: Option[String]): Boolean = { + tokenOption match { + case Some(token) if token.nonEmpty => + try { + val tokenParser = new FabricTokenParser(token) + val expiryEpoch = tokenParser.getExpiry + val now = Instant.now().getEpochSecond + now < expiryEpoch - 60 + } catch { + case e: InvalidJwtTokenException => + SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token used to trigger telemetry " + + s"endpoint is invalid. Exception = $e") + false + case e: JwtTokenExpiryMissingException => + SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token misses expiry. " + + s"Exception = $e") + false + } + case _ => + false // No value is present or the value is empty } } @@ -87,7 +89,7 @@ object TokenUtils { } def getMwcToken(shared_host: String, WorkspaceId: String, capacity_id: String, - workload_type: String): MwcToken = { + workload_type: String): Option[MwcToken]= { val url: String = shared_host + "/metadata/v201606/generatemwctokenv2" val payLoad = s"""{ @@ -111,22 +113,23 @@ object TokenUtils { response.asJsObject.fields.updated("TargetUriHost", targetUriHost) implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) - response.convertTo[MwcToken] + val mwcToken = response.convertTo[MwcToken] + Some(mwcToken) } catch { case e: NoSuchElementException => SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: Cannot retrieve targetUriHost from MWC Token.") - throw e + None case e: DeserializationException => SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: The structure of response is not of type MwcToken.") - throw e + None case e: ParsingException => SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: The structure of json response is formed correctly.") - throw e + None case e: Exception => SynapseMLLogging.logMessage(s"getMWCTok: Failed to fetch MWC token that is required to " + s"get cluster details: $e.") - throw e + None } } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index 7e11ea3c64..5c6d25061e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -42,7 +42,8 @@ object UsageTelemetry { } } - def reportUsageTelemetry(featureName: String, activityName: String, attributes: Map[String,String] = Map()): Unit = { + private def reportUsageTelemetry(featureName: String, activityName: String, + attributes: Map[String,String] = Map()): Unit = { if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false") == "false") { val attributesJson = attributes.toJson.compactPrint val data = @@ -64,15 +65,7 @@ object UsageTelemetry { "Authorization" -> s"""Bearer $driverAADToken""".stripMargin, "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) - - var response: JsValue = JsonParser("{}") - try { - response = usagePost(url, data, headers) - } catch { - case e: Exception => - SynapseMLLogging.logMessage(s"UsageUtils.reportUsageTelemetry: Error occurred while emitting usage data. " + - s"Exception = $e. (usage test)") - } + usagePost(url, data, headers) } } } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala index 4054437654..af2f31b76b 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala @@ -10,13 +10,13 @@ class SparkHadoopUtilsTests extends TestBase { test("Hadoop Configuration Check (capacity id, and workspace id)"){ sc.hadoopConfiguration.set("trident.capacity.id", "f32fae84-6ed0-4406-944c-01e26087aa9b") val capacityId = getHadoopConfig("trident.capacity.id", sc) - val splittedCapacityId: Array[String] = capacityId.split("-") + val splitCapacityId: Array[String] = capacityId.split("-") - assert(splittedCapacityId.length == 5) - assert(splittedCapacityId(0).length == 8) - assert(splittedCapacityId(1).length == 4) - assert(splittedCapacityId(2).length == 4) - assert(splittedCapacityId(3).length == 4) - assert(splittedCapacityId(4).length == 12) + assert(splitCapacityId.length == 5) + assert(splitCapacityId(0).length == 8) + assert(splitCapacityId(1).length == 4) + assert(splitCapacityId(2).length == 4) + assert(splitCapacityId(3).length == 4) + assert(splitCapacityId(4).length == 12) } } From 1a707a8b1fc2d1a5b93deb1071a435757421924a Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Sat, 9 Sep 2023 13:07:20 -0700 Subject: [PATCH 15/50] Removing token used for test and created a dummy token creator. --- core/src/test/resources/UsageTestData.json | 10 ----- .../ml/logging/FabricTokenParserTests.scala | 45 ++++++++++--------- 2 files changed, 25 insertions(+), 30 deletions(-) delete mode 100644 core/src/test/resources/UsageTestData.json diff --git a/core/src/test/resources/UsageTestData.json b/core/src/test/resources/UsageTestData.json deleted file mode 100644 index 2bc1f19b30..0000000000 --- a/core/src/test/resources/UsageTestData.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "valid": "true", - "payload": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9.OhNT7-dNRzH7JJpPtfs8f-TX1JmBYNrlePHOpQXVJq9U2MiFBougw8ea-DjRaE3lAIaBqYr9aKKlXdH_fnFF_d-OZ3REmFNPLASMKR-n8mUOQWBXwp8eaeL2A8IAD5GJkjjeu9VeTWr5ApI_AnE-gPfJbwQKpxbkSJbTcRQfVKGbaeOH_9iw5rrndUDZNURZeSW4UfS0X3SjBKQSE-J69c0yPinhZPwEQEpSvEbMIWmpHItKCew5URCuEj3cv6DU6lVEYeGlfzgJ5BTjhlW6imxOV2Ed-78pYZNmr-LcfSExa6CPtuQhYFdYCOuMVDgP53z1_8RTTy_bxb-KxfC4VQ" - }, - { - "valid": "false", - "payload": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSIsImtpZCI6IklMX04xYlBjUlA0alpVdzJlWjNyODd5YXZkVSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MtaW50Lm5ldC9wb3dlcmJpL2FwaSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MtcHBlLm5ldC8wNDEyNWEzNS1kZmJhLTQ5YTEtYjRmZC05OGM0NzNiZWIwNjUvIiwiaWF0IjoxNjkxMTY3MTgxLCJuYmYiOjE2OTExNjcxODEsImV4cCI6MTY5MTE3MTEwOSwiYWNjdCI6MCwiYWNyIjoiMSIsImFpbyI6IkFUUUF5LzhYQUFBQXFydkEvTG5vSTh4MUZEMUZtYjcxVHBIOUtMVU44N21xYTJCc1Z0NlNHVlRqaGlQcGhIeU5SQ3VjZXJ1TmUvSnQiLCJhbXIiOlsicHdkIl0sImFwcGlkIjoiMDAwMDAwMDktMDAwMC0wMDAwLWMwMDAtMDAwMDAwMDAwMDAwIiwiYXBwaWRhY3IiOiIyIiwiZmFtaWx5X25hbWUiOiJFZG9nIiwiZ2l2ZW5fbmFtZSI6IkFkbWluIiwiaXBhZGRyIjoiMjYwMTo2NDY6YzYwMDo4YTQwOjQ4OWU6MWM1Zjo4MDYwOjk2NmQiLCJuYW1lIjoiRWRvZyBBZG1pbnMiLCJvaWQiOiIzMzA0MThjZi03YTU3LTQxNzEtOTA4Mi1kNzE2MTg5OTBmNjEiLCJwdWlkIjoiMTAwM0RGRkQwMTk5RjY2RCIsInJoIjoiMC5BQUVBTlZvU0JMcmZvVW0wX1pqRWM3NndaUWtBQUFBQUFBQUF3QUFBQUFBQUFBQUJBSE0uIiwic2NwIjoidXNlcl9pbXBlcnNvbmF0aW9uIiwic3ViIjoiYVl3cTFadERSSEtlQ011QU1kOGI0bVE0NjlRRTZscmZKcDJ2aXpMM2dRMCIsInRpZCI6IjA0MTI1YTM1LWRmYmEtNDlhMS1iNGZkLTk4YzQ3M2JlYjA2NSIsInVuaXF1ZV9uYW1lIjoiRWRvZ0FkbWluQFRyaWRlbnRFZG9nLmNjc2N0cC5uZXQiLCJ1cG4iOiJFZG9nQWRtaW5AVHJpZGVudEVkb2cuY2NzY3RwLm5ldCIsInV0aSI6IjB6Nmt6ajhrNTBpNHp4OVh4WkFEQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjYyZTkwMzk0LTY5ZjUtNDIzNy05MTkwLTAxMjE3NzE0NWUxMCIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfcGwiOiJlbi1VUyJ9" - } -] diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala index 7e112a7647..6b5200c306 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala @@ -21,32 +21,37 @@ class FabricTokenParserTests extends TestBase { import TokenJsonProtocol._ test("JWT Token Expiry Check"){ - val filePath = getClass.getResource("/UsageTestData.json") - val source = Source.fromURL(filePath) - try { - val jsonString = source.mkString - val parsedTokens = jsonString.parseJson - val tokens = parsedTokens.convertTo[Seq[Token]] - val token = tokens(0) - val fabricTokenParser = new FabricTokenParser(token.payload) + val fabricTokenParser = new FabricTokenParser(createDummyToken(true)) val exp: Long = fabricTokenParser.getExpiry assert(exp > 0L) - } finally { - source.close() - } } test("Invalid JWT Token Check."){ - val filePath = getClass.getResource("/UsageTestData.json") - val jsonString = StreamUtilities.usingSource(scala.io.Source.fromURL(filePath)) { source => - source.mkString - }.get - val parsedTokens = jsonString.parseJson - val tokens = parsedTokens.convertTo[Seq[Token]] - val token = tokens(1) - assertThrows[InvalidJwtTokenException]{ - val fabricTokenParser = new FabricTokenParser(token.payload) + val fabricTokenParser = new FabricTokenParser(createDummyToken(false)) } } + + def createDummyToken(createValidToken: Boolean): String = { + val claims = """{ + "iss": "issuer", + "sub": "subject", + "aud": "audience", + "exp": 1691171109, + "userId": "123456789" + }""" + + val header = encodeBase64URLSafeString ("{\"alg\":\"RS256\",\"typ\":\"JWT\"}".getBytes ("UTF-8") ) + val payload = encodeBase64URLSafeString (claims.getBytes ("UTF-8") ) + val dummySignature = "dummy-signature" // You can replace this with an actual signature if needed + + if(createValidToken) + s"$header.$payload.$dummySignature" + else + s"$header.$payload" + } + + def encodeBase64URLSafeString(bytes: Array[Byte]): String = { + java.util.Base64.getUrlEncoder.encodeToString(bytes) + } } From db38753fc115be39feccef6349a8b96a85e0bea7 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Wed, 20 Sep 2023 12:50:10 -0700 Subject: [PATCH 16/50] Adding abstract classes to represent certified event payload, adding test for it, addressing PR comments. --- .../synapse/ml/logging/SynapseMLLogging.scala | 32 +++++++--- .../Usage/FabricTokenServiceClient.scala | 33 ++-------- .../ml/logging/Usage/FabricUtils.scala | 10 +-- .../logging/Usage/FeatureUsagePayload.scala | 32 +++++++++- .../ml/logging/Usage/HostEndpointUtils.scala | 18 ++---- .../synapse/ml/logging/Usage/TokenUtils.scala | 61 ++++++------------- .../ml/logging/Usage/UsageConstants.scala | 11 ---- .../synapse/ml/logging/Usage/UsageUtils.scala | 11 ++-- .../ml/logging/common/CommonUtils.scala | 35 +++++++++++ .../synapse/ml/logging/common/WebUtils.scala | 6 +- .../ml/logging/FeatureUsagePayloadTest.scala | 19 ++++++ .../ml/logging/HostEndpointUtilsTests.scala | 17 +++--- 12 files changed, 155 insertions(+), 130 deletions(-) create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala create mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index b5333454ac..727e98e593 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -4,12 +4,12 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.build.BuildInfo +import com.microsoft.azure.synapse.ml.logging.common.CommonUtils import com.microsoft.azure.synapse.ml.logging.common.SASScrubber import org.apache.spark.internal.Logging -import spray.json.{DefaultJsonProtocol, RootJsonFormat, NullOptions} - import scala.collection.JavaConverters._ import scala.collection.mutable +import spray.json.{DefaultJsonProtocol, RootJsonFormat, NullOptions} case class SynapseMLLogInfo(uid: String, className: String, @@ -76,20 +76,34 @@ trait SynapseMLLogging extends Logging { logBase("constructor", None) } - def logFit[T](f: => T, columns: Int): T = { - logVerb("fit", f, columns) + def logFit[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true, + certifiedEventAttributes: Map[String, String] = Map()): T = { + logVerb("fit", f, columns, logCertifiedEvent, certifiedEventAttributes) } - def logTrain[T](f: => T, columns: Int): T = { - logVerb("train", f, columns) + def logTrain[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true, + certifiedEventAttributes: Map[String, String] = Map()): T = { + logVerb("train", f, columns, logCertifiedEvent, certifiedEventAttributes) } - def logTransform[T](f: => T, columns: Int): T = { - logVerb("transform", f, columns) + def logTransform[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true, + certifiedEventAttributes: Map[String, String] = Map()): T = { + logVerb("transform", f, columns, logCertifiedEvent, certifiedEventAttributes) } - def logVerb[T](verb: String, f: => T, columns: Int = -1): T = { + def logVerb[T](verb: String, f: => T, columns: Int = -1, logCertifiedEvent: Boolean = true, + certifiedEventAttributes: Map[String, String] = Map()): T = { logBase(verb, if(columns == -1) None else Some(columns)) try { + // Begin emitting certified event. + if(logCertifiedEvent) + { + import com.microsoft.azure.synapse.ml.logging.Usage.FeatureSynapseML + import com.microsoft.azure.synapse.ml.logging.Usage.UsageTelemetry.reportUsage + import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload + val certifiedEventPayload = new FeatureUsagePayload(new FeatureSynapseML, + CommonUtils.getCertifiedEventActivity(verb), certifiedEventAttributes) + reportUsage(certifiedEventPayload) + } f } catch { case e: Exception => { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index a9f56d524d..05da3fed70 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -3,17 +3,13 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import java.io.IOException +import com.microsoft.azure.synapse.ml.logging.common.CommonUtils +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import java.lang.management.ManagementFactory import java.net.URL import java.net.InetAddress import java.util.UUID -import spray.json.DefaultJsonProtocol.StringJsonFormat -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import spray.json.{JsArray, JsObject, JsValue, _} -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import org.apache.hc.client5.http.ClientProtocolException class FabricTokenServiceClient { private val resourceMapping = Map( @@ -53,26 +49,9 @@ class FabricTokenServiceClient { "User-Agent" -> s"Trident Token Library - HostName:$hostname, ProcessName:$processName", "x-ms-client-request-id" -> rid ) - var url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" - try { - val response: JsValue = usageGet(url, headers) - if (response.asJsObject.fields("status_code").convertTo[String] != 200 - || response.asJsObject.fields("content").convertTo[String].isEmpty) { - throw new Exception("Fetch access token error") - } - response.asJsObject.fields("content").toString().getBytes("UTF-8").toString - } catch { - case e: IOException => - SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Problems in executing" + - s" http request or the connection might have been aborted. Exception = $e.") - "" - case e: ClientProtocolException => - SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. " + - s"HTTP protocol error. Exception = $e.") - "" - case e: Exception => - SynapseMLLogging.logMessage(s"getAccessToken: Failed to fetch cluster details. Exception = $e.") - "" - } + val url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" + + val response: JsValue = CommonUtils.requestGet(url, headers, "content") + response.toString().getBytes("UTF-8").toString } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index a2d538062a..996eb6630b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -6,8 +6,6 @@ package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.core.env.StreamUtilities import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import spray.json._ -import scala.util.matching.Regex -import scala.io.Source case class TokenServiceConfig(tokenServiceEndpoint: String, clusterType: String, @@ -31,7 +29,7 @@ object FabricUtils { val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile (FabricConstants.TokenServiceFilePath)) { - source => cleanJson(source.mkString).parseJson.convertTo[TokenServiceConfig] + source => source.mkString.parseJson.convertTo[TokenServiceConfig] }.get val tridentContext: Map[String, String] = linesContextFile @@ -54,10 +52,4 @@ object FabricUtils { throw e } } - - private def cleanJson(s: String): String = { - val pattern: Regex = ",[ \t\r\n]+}".r - val cleanedJson = pattern.replaceAllIn(s, "}") - cleanedJson - } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala index 1edac2b304..e41785450b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala @@ -3,6 +3,34 @@ package com.microsoft.azure.synapse.ml.logging.Usage -case class FeatureUsagePayload(feature_name: UsageFeatureNames.Value, - activity_name: FeatureActivityName.Value, +case class FeatureUsagePayload(feature_name: UsageFeatureName, + activity_name: FeatureActivityName, attributes: Map[String, String]) + +abstract class UsageFeatureName{ + def getFeatureName: String +} + +class FeatureSynapseML extends UsageFeatureName { + override def getFeatureName: String = "SynapseML" +} + +abstract class FeatureActivityName{ + def getFeatureActivityName: String = "Invalid" +} + +class FeatureActivityFit extends FeatureActivityName{ + override def getFeatureActivityName: String = "Fit" +} + +class FeatureActivityTransform extends FeatureActivityName{ + override def getFeatureActivityName: String = "Transform" +} + +class FeatureActivityTrain extends FeatureActivityName{ + override def getFeatureActivityName: String = "Train" +} + +class FeatureActivityInvalid extends FeatureActivityName{ + override def getFeatureActivityName: String = "Invalid" +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index 5da99a38aa..d8973ba816 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -3,12 +3,11 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import com.microsoft.azure.synapse.ml.logging.common.CommonUtils import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, Workloads, WorkloadEndpointAutomatic} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WebApi, WorkloadEndpointMl, WorkspaceID} import spray.json.DefaultJsonProtocol.StringJsonFormat -import spray.json.{JsValue, JsonParser} +import spray.json.{JsValue} object HostEndpointUtils { def getMlflowSharedHost(pbienv: String): String = { @@ -34,16 +33,9 @@ object HostEndpointUtils { "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", "RequestId" -> java.util.UUID.randomUUID().toString ) - var response: JsValue = JsonParser("{}") - try { - response = usageGet(url, headers) - } catch { - case e: Exception => - SynapseMLLogging.logMessage(s"HostEndpointUtils.getMlflowSharedHost: " + - s"Can't get ml flow shared host. Exception = $e. (usage test)") - "" - } - response.asJsObject.fields("clusterUrl").convertTo[String] + + val response: JsValue = CommonUtils.requestGet(url, headers, "clusterUrl") + response.convertTo[String] } def getMlflowWorkloadHost(pbienv: String, capacityId: String, diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index e5f9cc683a..5204f575b7 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -3,16 +3,15 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import scala.reflect.runtime.currentMirror -import scala.reflect.runtime.universe._ +import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import java.time.Instant +import java.util.UUID import org.apache.spark.SparkContext -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import scala.reflect.runtime.currentMirror +import scala.reflect.runtime.universe._ import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} -import java.util.UUID -import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ -import spray.json.{DeserializationException, RootJsonFormat} -import spray.json.JsonParser.ParsingException +import spray.json.RootJsonFormat case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) object TokenUtils { @@ -74,17 +73,12 @@ object TokenUtils { } private def refreshAccessToken(): Unit = { - try { - if (SparkContext.getOrCreate() != null) { - val token = getAccessToken("pbi") - AADToken = token - } else { - val token = new FabricTokenServiceClient().getAccessToken("pbi") - AADToken = token - } - } catch { - case e: Exception => - SynapseMLLogging.logMessage(s"refreshAccessTok: failed to refresh pbi tok. Exception: {$e}. (usage test)") + if (SparkContext.getOrCreate() != null) { + val token = getAccessToken("pbi") + AADToken = token + } else { + val token = new FabricTokenServiceClient().getAccessToken("pbi") + AADToken = token } } @@ -106,30 +100,13 @@ object TokenUtils { "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) - try{ - val response = usagePost(url, payLoad, headers) - var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] - targetUriHost = s"https://$targetUriHost" - response.asJsObject.fields.updated("TargetUriHost", targetUriHost) + val response = usagePost(url, payLoad, headers) + var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] + targetUriHost = s"https://$targetUriHost" + response.asJsObject.fields.updated("TargetUriHost", targetUriHost) - implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) - val mwcToken = response.convertTo[MwcToken] - Some(mwcToken) - } - catch { - case e: NoSuchElementException => - SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: Cannot retrieve targetUriHost from MWC Token.") - None - case e: DeserializationException => - SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: The structure of response is not of type MwcToken.") - None - case e: ParsingException => - SynapseMLLogging.logMessage(s"TokenUtils.getMWCToken: The structure of json response is formed correctly.") - None - case e: Exception => - SynapseMLLogging.logMessage(s"getMWCTok: Failed to fetch MWC token that is required to " + - s"get cluster details: $e.") - None - } + implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) + val mwcToken = response.convertTo[MwcToken] + Some(mwcToken) } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index 7c6666884c..eb11f76a22 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -2,17 +2,6 @@ // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.azure.synapse.ml.logging.Usage -object UsageFeatureNames extends Enumeration { - type UsageFeatureNames = Value - val Predict = Value(1) -} - -object FeatureActivityName extends Enumeration { - type FeatureActivityName = Value - val API0Transform = Value(1) - val API0SQL = Value(2) - val API0UDF = Value(3) -} object FabricConstants { val MlKustoTableName = "SynapseMLLogs" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index 5c6d25061e..a733ef5034 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -4,19 +4,18 @@ package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils.getHadoopConfig -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.{usageGet, usagePost} +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usagePost import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ +import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken -import java.util.UUID import java.time.Instant +import java.util.UUID import org.apache.spark.sql.SparkSession import spray.json._ import spray.json.DefaultJsonProtocol._ import spray.json.DefaultJsonProtocol.StringJsonFormat -import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ - object UsageTelemetry { private val SC = SparkSession.builder().getOrCreate().sparkContext private val CapacityId = getHadoopConfig("trident.capacity.id", SC) @@ -31,8 +30,8 @@ object UsageTelemetry { def reportUsage(payload: FeatureUsagePayload): Unit = { if (sys.env.getOrElse(EmitUsage, "True") == "True") { try { - reportUsageTelemetry(payload.feature_name.toString, - payload.activity_name.toString, + reportUsageTelemetry(payload.feature_name.getFeatureName, + payload.activity_name.getFeatureActivityName, payload.attributes) } catch { case runtimeError: Exception => diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala new file mode 100644 index 0000000000..447cf4686a --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala @@ -0,0 +1,35 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging.common + +import com.microsoft.azure.synapse.ml.logging.Usage.FeatureActivityName +import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ +import com.microsoft.azure.synapse.ml.logging.Usage.{FeatureActivityFit, FeatureActivityTrain, + FeatureActivityTransform, FeatureActivityInvalid} +import scala.util.{Success, Try} +import spray.json.{JsValue} +import spray.json.DefaultJsonProtocol.IntJsonFormat +import spray.json.DefaultJsonProtocol.StringJsonFormat + +object CommonUtils { + def requestGet(url: String, headers: Map[String, String], property: String): JsValue = { + val response: JsValue = usageGet(url, headers) + + val statusCode = Try(response.asJsObject.fields("status_code").convertTo[Int]) + val propertyValue = Try(response.asJsObject.fields(property).convertTo[String]) + + (statusCode, propertyValue) match { + case (Success(code), Success(value)) if code == 200 && !value.isEmpty => response.asJsObject.fields(property) + case _ => throw new Exception("Fetch access token error") + } + } + def getCertifiedEventActivity(verb: String): FeatureActivityName = { + verb match { + case "train" => new FeatureActivityTrain + case "fit" => new FeatureActivityFit + case "transform" => new FeatureActivityTransform + case _ => new FeatureActivityInvalid + } + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index c628ae2e07..0d57063057 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -3,12 +3,12 @@ package com.microsoft.azure.synapse.ml.logging.common +import com.microsoft.azure.synapse.ml.io.http.RESTHelpers +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity -import spray.json.{JsArray, JsObject, JsValue, _} -import com.microsoft.azure.synapse.ml.io.http.RESTHelpers -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import spray.json.{JsObject, JsValue, _} object WebUtils { def usagePost(url: String, body: String, headers: Map[String, String]): JsValue = { diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala new file mode 100644 index 0000000000..e288d3a232 --- /dev/null +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala @@ -0,0 +1,19 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging + +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.logging.Usage.{FeatureSynapseML, FeatureActivityTransform, + FeatureActivityTrain, FeatureActivityFit} + +class FeatureUsagePayloadTest extends TestBase { + test("FeatureName"){ + assert(new FeatureSynapseML().getFeatureName == "SynapseML") + } + test("FeatureActivityName") { + assert(new FeatureActivityTransform().getFeatureActivityName == "Transform") + assert(new FeatureActivityTrain().getFeatureActivityName == "Train") + assert(new FeatureActivityFit().getFeatureActivityName == "Fit") + } +} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala index 373e6b951b..cd69986d97 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala @@ -4,17 +4,18 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WorkloadEndpointAdmin} -import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ class UsageUtilsTests extends TestBase { - val target = "f32fae846ed04406944c01e26087aa9b.pbidedicated.windows-int.net/webapi/Capacities/" + - "f32fae84-6ed0-4406-944c-01e26087aa9b/workloads/ML/MLAdmin/Automatic/" + - "workspaceid/c1aaa432-2b6e-4325-acca-1aac063d9a6e/" - val capacityId = "f32fae84-6ed0-4406-944c-01e26087aa9b" - val wlHost = "f32fae846ed04406944c01e26087aa9b.pbidedicated.windows-int.net" - val workspaceId = "c1aaa432-2b6e-4325-acca-1aac063d9a6e" + import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WorkloadEndpointAdmin} + import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ + + val target = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net/webapi/Capacities/" + + "c528701c-8f94-42c0-b65a-1660171c306c/workloads/ML/MLAdmin/Automatic/" + + "workspaceid/89b9b330-6eac-4ee1-b225-590dfd68e4be/" + val capacityId = "c528701c-8f94-42c0-b65a-1660171c306c" + val wlHost = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net" + val workspaceId = "89b9b330-6eac-4ee1-b225-590dfd68e4be" test("ML Workload Endpoint Check"){ val url = getMLWorkloadEndpoint(this.wlHost, this.capacityId, WorkloadEndpointAdmin, this.workspaceId) assert(url == target) From 2619dbb4dcb3b27c8585abd6e3a3a44b7647e60c Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Fri, 22 Sep 2023 15:34:13 -0700 Subject: [PATCH 17/50] Turning code immutable as much as possible and removing few tests and classes that were replaced by alternative approach. --- .../synapse/ml/logging/SynapseMLLogging.scala | 9 ++-- .../ml/logging/Usage/FabricTokenParser.scala | 2 +- .../ml/logging/Usage/FabricUtils.scala | 54 ++++++++----------- .../logging/Usage/FeatureUsagePayload.scala | 32 +---------- .../ml/logging/Usage/HostEndpointUtils.scala | 11 ++-- .../synapse/ml/logging/Usage/TokenUtils.scala | 15 +++--- .../ml/logging/Usage/UsageConstants.scala | 1 + .../synapse/ml/logging/Usage/UsageUtils.scala | 11 ++-- .../ml/logging/common/CommonUtils.scala | 14 +---- .../synapse/ml/logging/common/WebUtils.scala | 10 +--- .../ml/logging/FeatureUsagePayloadTest.scala | 19 ------- 11 files changed, 52 insertions(+), 126 deletions(-) delete mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index 727e98e593..1f0b20bfa0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -6,6 +6,8 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.build.BuildInfo import com.microsoft.azure.synapse.ml.logging.common.CommonUtils import com.microsoft.azure.synapse.ml.logging.common.SASScrubber +import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload +import com.microsoft.azure.synapse.ml.logging.Usage.UsageTelemetry.reportUsage import org.apache.spark.internal.Logging import scala.collection.JavaConverters._ import scala.collection.mutable @@ -97,11 +99,8 @@ trait SynapseMLLogging extends Logging { // Begin emitting certified event. if(logCertifiedEvent) { - import com.microsoft.azure.synapse.ml.logging.Usage.FeatureSynapseML - import com.microsoft.azure.synapse.ml.logging.Usage.UsageTelemetry.reportUsage - import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload - val certifiedEventPayload = new FeatureUsagePayload(new FeatureSynapseML, - CommonUtils.getCertifiedEventActivity(verb), certifiedEventAttributes) + val certifiedEventPayload = new FeatureUsagePayload("SynapseML", + verb, certifiedEventAttributes) reportUsage(certifiedEventPayload) } f diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala index 4f184e3f06..c8d3bd7a37 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala @@ -9,7 +9,7 @@ class InvalidJwtTokenException(message: String) extends Exception(message) class JwtTokenExpiryMissingException(message: String) extends Exception(message) class FabricTokenParser(JWToken: String) { val tokens: Array[String] = JWToken.split("\\.") - private var parsedToken: JsValue = tokenCheckAndDecode(tokens) + private val parsedToken: JsValue = tokenCheckAndDecode(tokens) def getExpiry: Long ={ val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } exp match { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index 996eb6630b..620830d9b6 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -19,37 +19,29 @@ object TokenServiceConfigProtocol extends DefaultJsonProtocol { import TokenServiceConfigProtocol._ object FabricUtils { - //private var TridentContext: Map[String, String] = Map[String, String]() - def getFabricContext: Map[String, String] = { - try { - val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(FabricConstants.ContextFilePath)) { - source => source.getLines().toList - }.get - - val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile - (FabricConstants.TokenServiceFilePath)) { - source => source.mkString.parseJson.convertTo[TokenServiceConfig] - }.get - - val tridentContext: Map[String, String] = linesContextFile - .filter(line => line.split('=').length == 2) - .map { line => - val Array(k, v) = line.split('=') - (k.trim, v.trim) - }.toMap - .++(Seq( - (FabricConstants.SynapseTokenServiceEndpoint, tokenServiceConfig.tokenServiceEndpoint), - (FabricConstants.SynapseClusterType, tokenServiceConfig.clusterType), - (FabricConstants.SynapseClusterIdentifier, tokenServiceConfig.clusterName), - (FabricConstants.TridentSessionToken, tokenServiceConfig.sessionToken) - ).toMap) - - tridentContext - } catch { - case e: NullPointerException => - SynapseMLLogging.logMessage(s"Error reading Fabric context file: Trident context file path is missing. $e") - throw e - } + val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(FabricConstants.ContextFilePath)) { + source => source.getLines().toList + }.get + + val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile + (FabricConstants.TokenServiceFilePath)) { + source => source.mkString.parseJson.convertTo[TokenServiceConfig] + }.get + + val tridentContext: Map[String, String] = linesContextFile + .filter(line => line.split('=').length == 2) + .map { line => + val Array(k, v) = line.split('=') + (k.trim, v.trim) + }.toMap + .++(Seq( + (FabricConstants.SynapseTokenServiceEndpoint, tokenServiceConfig.tokenServiceEndpoint), + (FabricConstants.SynapseClusterType, tokenServiceConfig.clusterType), + (FabricConstants.SynapseClusterIdentifier, tokenServiceConfig.clusterName), + (FabricConstants.TridentSessionToken, tokenServiceConfig.sessionToken) + ).toMap) + + tridentContext } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala index e41785450b..e04bdcf705 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala @@ -3,34 +3,6 @@ package com.microsoft.azure.synapse.ml.logging.Usage -case class FeatureUsagePayload(feature_name: UsageFeatureName, - activity_name: FeatureActivityName, +case class FeatureUsagePayload(feature_name: String, + activity_name: String, attributes: Map[String, String]) - -abstract class UsageFeatureName{ - def getFeatureName: String -} - -class FeatureSynapseML extends UsageFeatureName { - override def getFeatureName: String = "SynapseML" -} - -abstract class FeatureActivityName{ - def getFeatureActivityName: String = "Invalid" -} - -class FeatureActivityFit extends FeatureActivityName{ - override def getFeatureActivityName: String = "Fit" -} - -class FeatureActivityTransform extends FeatureActivityName{ - override def getFeatureActivityName: String = "Transform" -} - -class FeatureActivityTrain extends FeatureActivityName{ - override def getFeatureActivityName: String = "Train" -} - -class FeatureActivityInvalid extends FeatureActivityName{ - override def getFeatureActivityName: String = "Invalid" -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index d8973ba816..64b8d22faa 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -3,11 +3,13 @@ package com.microsoft.azure.synapse.ml.logging.Usage +import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.common.CommonUtils -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, Workloads, WorkloadEndpointAutomatic} +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, WorkloadEndpointAutomatic, Workloads} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WebApi, WorkloadEndpointMl, WorkspaceID} +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet import spray.json.DefaultJsonProtocol.StringJsonFormat -import spray.json.{JsValue} +import spray.json.{JsValue, JsonParser} object HostEndpointUtils { def getMlflowSharedHost(pbienv: String): String = { @@ -33,9 +35,8 @@ object HostEndpointUtils { "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", "RequestId" -> java.util.UUID.randomUUID().toString ) - - val response: JsValue = CommonUtils.requestGet(url, headers, "clusterUrl") - response.convertTo[String] + val response = usageGet(url, headers) + response.asJsObject.fields("clusterUrl").convertTo[String] } def getMlflowWorkloadHost(pbienv: String, capacityId: String, diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 5204f575b7..a4828f8b21 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -15,15 +15,15 @@ import spray.json.RootJsonFormat case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) object TokenUtils { - var AADToken: String = "" + var AADToken: Option[String] = None val MwcWorkloadTypeMl = "ML" def getAccessToken: String = { - if (isTokenValid(Option(this.AADToken))) - this.AADToken + if (isTokenValid(AADToken)) + this.AADToken.get else { refreshAccessToken() - this.AADToken + this.AADToken.get } } @@ -75,10 +75,10 @@ object TokenUtils { private def refreshAccessToken(): Unit = { if (SparkContext.getOrCreate() != null) { val token = getAccessToken("pbi") - AADToken = token + AADToken = Some(token) } else { val token = new FabricTokenServiceClient().getAccessToken("pbi") - AADToken = token + AADToken = Some(token) } } @@ -101,8 +101,7 @@ object TokenUtils { ) val response = usagePost(url, payLoad, headers) - var targetUriHost = response.asJsObject.fields("TargetUriHost").convertTo[String] - targetUriHost = s"https://$targetUriHost" + val targetUriHost = s"https://${response.asJsObject.fields("TargetUriHost").convertTo[String]}" response.asJsObject.fields.updated("TargetUriHost", targetUriHost) implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index eb11f76a22..ed27e9ee0c 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -6,6 +6,7 @@ package com.microsoft.azure.synapse.ml.logging.Usage object FabricConstants { val MlKustoTableName = "SynapseMLLogs" val EmitUsage = "EmitUsage" + val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" val ContextFilePath = "/home/trusted-service-user/.trident-context" val TokenServiceFilePath = "/opt/token-service/tokenservice.config.json" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index a733ef5034..1851699fb0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -26,12 +26,11 @@ object UsageTelemetry { val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) - private val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" def reportUsage(payload: FeatureUsagePayload): Unit = { - if (sys.env.getOrElse(EmitUsage, "True") == "True") { + if (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") { try { - reportUsageTelemetry(payload.feature_name.getFeatureName, - payload.activity_name.getFeatureActivityName, + reportUsageTelemetry(payload.feature_name, + payload.activity_name, payload.attributes) } catch { case runtimeError: Exception => @@ -43,13 +42,13 @@ object UsageTelemetry { private def reportUsageTelemetry(featureName: String, activityName: String, attributes: Map[String,String] = Map()): Unit = { - if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false") == "false") { + if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false").toLowerCase == "false") { val attributesJson = attributes.toJson.compactPrint val data = s"""{ |"timestamp":${Instant.now().getEpochSecond}, |"feature_name":"$featureName", - |"activity_name":"${activityName.replace('0', '/')}", + |"activity_name":"$activityName", |"attributes":$attributesJson |}""".stripMargin diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala index 447cf4686a..022f3c9a5e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala @@ -3,10 +3,7 @@ package com.microsoft.azure.synapse.ml.logging.common -import com.microsoft.azure.synapse.ml.logging.Usage.FeatureActivityName import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ -import com.microsoft.azure.synapse.ml.logging.Usage.{FeatureActivityFit, FeatureActivityTrain, - FeatureActivityTransform, FeatureActivityInvalid} import scala.util.{Success, Try} import spray.json.{JsValue} import spray.json.DefaultJsonProtocol.IntJsonFormat @@ -21,15 +18,8 @@ object CommonUtils { (statusCode, propertyValue) match { case (Success(code), Success(value)) if code == 200 && !value.isEmpty => response.asJsObject.fields(property) - case _ => throw new Exception("Fetch access token error") - } - } - def getCertifiedEventActivity(verb: String): FeatureActivityName = { - verb match { - case "train" => new FeatureActivityTrain - case "fit" => new FeatureActivityFit - case "transform" => new FeatureActivityTransform - case _ => new FeatureActivityInvalid + case _ => throw new Exception(s"CommonUtils.requestGet: Failed with " + + s"code=$statusCode. Property looked for was = $property") } } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 0d57063057..0e97c9e1e3 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -35,15 +35,7 @@ object WebUtils { } private def parseResponse(response: CloseableHttpResponse): JsValue = { - var content: String = "" - try { - content = IOUtils.toString(response.getEntity.getContent, "utf-8") - } - catch { - case e: Exception => - SynapseMLLogging.logMessage(s"RestHelpers::parseResult: getting exception parsing response." + - s"Exception = $e") - } + val content: String = IOUtils.toString(response.getEntity.getContent, "utf-8") if (content.nonEmpty) { content.parseJson } else { diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala deleted file mode 100644 index e288d3a232..0000000000 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FeatureUsagePayloadTest.scala +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging - -import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.Usage.{FeatureSynapseML, FeatureActivityTransform, - FeatureActivityTrain, FeatureActivityFit} - -class FeatureUsagePayloadTest extends TestBase { - test("FeatureName"){ - assert(new FeatureSynapseML().getFeatureName == "SynapseML") - } - test("FeatureActivityName") { - assert(new FeatureActivityTransform().getFeatureActivityName == "Transform") - assert(new FeatureActivityTrain().getFeatureActivityName == "Train") - assert(new FeatureActivityFit().getFeatureActivityName == "Fit") - } -} From 309885186a018f73ce4d46ddd4f4eb4c847cc189 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:22:02 -0700 Subject: [PATCH 18/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala Co-authored-by: Mark Hamilton --- .../microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index 1851699fb0..ae85bb50cd 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -56,7 +56,6 @@ object UsageTelemetry { // Add the protocol and the route for the certified event telemetry endpoint val url = "https://" + mlAdminEndpoint + "telemetry" - val driverAADToken = getAccessToken val headers = Map( "Content-Type" -> "application/json", From 2e65c8782f3b9ad4061a43b4e12b27e25a68569a Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:22:20 -0700 Subject: [PATCH 19/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala Co-authored-by: Mark Hamilton --- .../microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index ae85bb50cd..022c7e1b80 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -59,7 +59,7 @@ object UsageTelemetry { val headers = Map( "Content-Type" -> "application/json", - "Authorization" -> s"""Bearer $driverAADToken""".stripMargin, + "Authorization" -> s"""Bearer ${getAccessToken}""".stripMargin, "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) usagePost(url, data, headers) From b1558ed269c8ab51af68a93b6d2f50dd2b97646c Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:37:28 -0700 Subject: [PATCH 20/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala Co-authored-by: Mark Hamilton --- .../microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index a4828f8b21..7e46ffb44e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -105,7 +105,6 @@ object TokenUtils { response.asJsObject.fields.updated("TargetUriHost", targetUriHost) implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) - val mwcToken = response.convertTo[MwcToken] - Some(mwcToken) + Some(response.convertTo[MwcToken]) } } From b7a78c5fbab1c069e50cf0bfe905d2605aa7fcb1 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:38:06 -0700 Subject: [PATCH 21/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala Co-authored-by: Mark Hamilton --- .../microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 7e46ffb44e..06fa5ec254 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -77,8 +77,7 @@ object TokenUtils { val token = getAccessToken("pbi") AADToken = Some(token) } else { - val token = new FabricTokenServiceClient().getAccessToken("pbi") - AADToken = Some(token) + AADToken = Some(new FabricTokenServiceClient().getAccessToken("pbi")) } } From 400dbe8076e84a238dd40f7cd1c22e043cd08cbd Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:38:31 -0700 Subject: [PATCH 22/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala Co-authored-by: Mark Hamilton --- .../microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 06fa5ec254..12bcb8b9f9 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -74,8 +74,7 @@ object TokenUtils { private def refreshAccessToken(): Unit = { if (SparkContext.getOrCreate() != null) { - val token = getAccessToken("pbi") - AADToken = Some(token) + AADToken = Some(getAccessToken("pbi")) } else { AADToken = Some(new FabricTokenServiceClient().getAccessToken("pbi")) } From fd3a62a8db187d52fcafb6fb53cb9db18f99dc33 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 22:08:23 -0700 Subject: [PATCH 23/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala Co-authored-by: Mark Hamilton --- .../azure/synapse/ml/logging/Usage/HostEndpointUtils.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index 64b8d22faa..0a81f3fd61 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -35,8 +35,7 @@ object HostEndpointUtils { "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", "RequestId" -> java.util.UUID.randomUUID().toString ) - val response = usageGet(url, headers) - response.asJsObject.fields("clusterUrl").convertTo[String] + usageGet(url, headers).asJsObject.fields("clusterUrl").convertTo[String] } def getMlflowWorkloadHost(pbienv: String, capacityId: String, From 01a63014afd12ffaba8f626028b8200824fe7d51 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya <34048254+saileshbaidya@users.noreply.github.com> Date: Sat, 23 Sep 2023 22:09:07 -0700 Subject: [PATCH 24/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala Co-authored-by: Mark Hamilton --- .../synapse/ml/logging/Usage/FabricTokenServiceClient.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index 05da3fed70..66bbdaf84d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -51,7 +51,6 @@ class FabricTokenServiceClient { ) val url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" - val response: JsValue = CommonUtils.requestGet(url, headers, "content") - response.toString().getBytes("UTF-8").toString + CommonUtils.requestGet(url, headers, "content").toString().getBytes("UTF-8").toString } } From f3a0ba0364cb6d808d6868ecda268757c0abe7ec Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Sun, 24 Sep 2023 21:26:59 -0700 Subject: [PATCH 25/50] porting some api realted to web calls to WebUtils.scala, making few calls succinct, addressing potential null exceptions, etc. --- .../ml/logging/Usage/FabricTokenParser.scala | 2 +- .../Usage/FabricTokenServiceClient.scala | 10 ++++---- .../ml/logging/Usage/HostEndpointUtils.scala | 22 ++++++++-------- .../synapse/ml/logging/Usage/TokenUtils.scala | 2 +- .../ml/logging/Usage/UsageConstants.scala | 3 --- .../synapse/ml/logging/Usage/UsageUtils.scala | 15 +++++++---- .../ml/logging/common/CommonUtils.scala | 25 ------------------- .../ml/logging/common/SparkHadoopUtils.scala | 19 ++++++++------ .../synapse/ml/logging/common/WebUtils.scala | 19 +++++++++++++- .../ml/logging/SparkHadoopUtilsTests.scala | 2 +- 10 files changed, 57 insertions(+), 62 deletions(-) delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala index c8d3bd7a37..fd1e17adfa 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala @@ -8,7 +8,7 @@ import spray.json._ class InvalidJwtTokenException(message: String) extends Exception(message) class JwtTokenExpiryMissingException(message: String) extends Exception(message) class FabricTokenParser(JWToken: String) { - val tokens: Array[String] = JWToken.split("\\.") + private val tokens: Array[String] = JWToken.split("\\.") private val parsedToken: JsValue = tokenCheckAndDecode(tokens) def getExpiry: Long ={ val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index 05da3fed70..169c35525e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -3,8 +3,8 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.common.CommonUtils import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ +import com.microsoft.azure.synapse.ml.logging.common.WebUtils.requestGet import java.lang.management.ManagementFactory import java.net.URL import java.net.InetAddress @@ -35,9 +35,9 @@ class FabricTokenServiceClient { def getAccessToken(resourceParam: String): String = { if (!resourceMapping.contains(resourceParam)) { - throw new Exception(s"$resourceParam not supported") + throw new IllegalArgumentException(s"$resourceParam not supported") } - val resource = resourceMapping.getOrElse(resourceParam, "") + val resource: Option[String] = resourceMapping.get(resourceParam) val rid = UUID.randomUUID().toString() val targetUrl = new URL(workloadEndpoint) val headers: Map[String, String] = Map( @@ -49,9 +49,9 @@ class FabricTokenServiceClient { "User-Agent" -> s"Trident Token Library - HostName:$hostname, ProcessName:$processName", "x-ms-client-request-id" -> rid ) - val url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=$resource" + val url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=${resource.get}" - val response: JsValue = CommonUtils.requestGet(url, headers, "content") + val response: JsValue = requestGet(url, headers, "content") response.toString().getBytes("UTF-8").toString } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index 64b8d22faa..c0692050b2 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -3,13 +3,10 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import com.microsoft.azure.synapse.ml.logging.common.CommonUtils import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, WorkloadEndpointAutomatic, Workloads} import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WebApi, WorkloadEndpointMl, WorkspaceID} import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet import spray.json.DefaultJsonProtocol.StringJsonFormat -import spray.json.{JsValue, JsonParser} object HostEndpointUtils { def getMlflowSharedHost(pbienv: String): String = { @@ -39,21 +36,22 @@ object HostEndpointUtils { response.asJsObject.fields("clusterUrl").convertTo[String] } - def getMlflowWorkloadHost(pbienv: String, capacityId: String, - workspaceId: String, - sharedHost: String = ""): String = { - val clusterUrl = if (sharedHost.isEmpty) { - getMlflowSharedHost(pbienv) - } else { - sharedHost + def getMlflowWorkloadHost(pbienv: String, capacityId: String, workspaceId: String, + sharedHost: Option[String] = None): Option[String] = { + val clusterUrl = sharedHost match { + case Some(value) => + value + case None => + getMlflowSharedHost(pbienv) } + val mwcToken: Option[MwcToken] = TokenUtils.getMwcToken(clusterUrl, workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) mwcToken match { case Some(token) => - token.TargetUriHost + Some(token.TargetUriHost) case None => - "" + None } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index a4828f8b21..3fc5689406 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -15,7 +15,7 @@ import spray.json.RootJsonFormat case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) object TokenUtils { - var AADToken: Option[String] = None + private var AADToken: Option[String] = None val MwcWorkloadTypeMl = "ML" def getAccessToken: String = { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index ed27e9ee0c..c952385618 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -4,7 +4,6 @@ package com.microsoft.azure.synapse.ml.logging.Usage object FabricConstants { - val MlKustoTableName = "SynapseMLLogs" val EmitUsage = "EmitUsage" val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" @@ -22,8 +21,6 @@ object FabricConstants { val WorkspaceID = "workspaceid" val WorkloadEndpointMl = "ML" - val WorkloadEndpointLlmPlugin = "LlmPlugin" val WorkloadEndpointAutomatic = "Automatic" - val WorkloadEndpointRegistry = "Registry" val WorkloadEndpointAdmin = "MLAdmin" } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index 1851699fb0..62b1d723a1 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -18,13 +18,13 @@ import spray.json.DefaultJsonProtocol.StringJsonFormat object UsageTelemetry { private val SC = SparkSession.builder().getOrCreate().sparkContext - private val CapacityId = getHadoopConfig("trident.capacity.id", SC) - val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id", SC) + private val CapacityId = getHadoopConfig("trident.capacity.id", Some(SC)) + private val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id", Some(SC)) private val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() private val SharedHost = getMlflowSharedHost(PbiEnv) - val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" - private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, SharedHost) + private val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" + private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, Some(SharedHost)) def reportUsage(payload: FeatureUsagePayload): Unit = { if (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") { @@ -52,7 +52,12 @@ object UsageTelemetry { |"attributes":$attributesJson |}""".stripMargin - val mlAdminEndpoint = getMLWorkloadEndpoint(WlHost, CapacityId, WorkloadEndpointAdmin, WorkspaceId) + val mlAdminEndpoint = WlHost match { + case Some(host) => + getMLWorkloadEndpoint(host, CapacityId, WorkloadEndpointAdmin, WorkspaceId) + case None => + throw new IllegalArgumentException("Workload host name is missing.") + } // Add the protocol and the route for the certified event telemetry endpoint val url = "https://" + mlAdminEndpoint + "telemetry" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala deleted file mode 100644 index 022f3c9a5e..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/CommonUtils.scala +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.common - -import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ -import scala.util.{Success, Try} -import spray.json.{JsValue} -import spray.json.DefaultJsonProtocol.IntJsonFormat -import spray.json.DefaultJsonProtocol.StringJsonFormat - -object CommonUtils { - def requestGet(url: String, headers: Map[String, String], property: String): JsValue = { - val response: JsValue = usageGet(url, headers) - - val statusCode = Try(response.asJsObject.fields("status_code").convertTo[Int]) - val propertyValue = Try(response.asJsObject.fields(property).convertTo[String]) - - (statusCode, propertyValue) match { - case (Success(code), Success(value)) if code == 200 && !value.isEmpty => response.asJsObject.fields(property) - case _ => throw new Exception(s"CommonUtils.requestGet: Failed with " + - s"code=$statusCode. Property looked for was = $property") - } - } -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala index 1fc741bf9a..b7c9e824ee 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala @@ -4,17 +4,20 @@ package com.microsoft.azure.synapse.ml.logging.common import org.apache.spark.SparkContext - import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging +import org.apache.spark.sql.SparkSession object SparkHadoopUtils { - def getHadoopConfig(key: String, sc: SparkContext): String = { - val value = sc.hadoopConfiguration.get(key, "") - if (value.isEmpty) { - SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") - throw new IllegalArgumentException(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") - } - value + def getHadoopConfig(key: String, sc: Option[SparkContext]): String = { + lazy val hadoopConfig: String = sc match{ + case Some(value) => value.hadoopConfiguration.get(key, "") + case None => SparkSession.builder().getOrCreate().sparkContext.hadoopConfiguration.get(key, "") + } + if (hadoopConfig.isEmpty) { + SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") + throw new IllegalArgumentException(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") + } + hadoopConfig } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 0e97c9e1e3..6bdc1874e1 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -8,6 +8,8 @@ import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity +import scala.util.{Success, Try} +import spray.json.DefaultJsonProtocol.{IntJsonFormat, StringJsonFormat} import spray.json.{JsObject, JsValue, _} object WebUtils { @@ -20,7 +22,9 @@ object WebUtils { request.setEntity(new StringEntity(body)) val response = RESTHelpers.safeSend(request, close = false) - parseResponse(response) + val parsedResponse = parseResponse(response) + response.close() + parsedResponse } def usageGet(url: String, headers: Map[String, String]): JsValue = { @@ -42,4 +46,17 @@ object WebUtils { JsObject() } } + + def requestGet(url: String, headers: Map[String, String], property: String): JsValue = { + val response: JsValue = usageGet(url, headers) + + val statusCode = Try(response.asJsObject.fields("status_code").convertTo[Int]) + val propertyValue = Try(response.asJsObject.fields(property).convertTo[String]) + + (statusCode, propertyValue) match { + case (Success(code), Success(value)) if code == 200 && !value.isEmpty => response.asJsObject.fields(property) + case _ => throw new Exception(s"CommonUtils.requestGet: Failed with " + + s"code=$statusCode. Property looked for was = $property") + } + } } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala index af2f31b76b..3983fb3910 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala @@ -9,7 +9,7 @@ import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils.getHadoopC class SparkHadoopUtilsTests extends TestBase { test("Hadoop Configuration Check (capacity id, and workspace id)"){ sc.hadoopConfiguration.set("trident.capacity.id", "f32fae84-6ed0-4406-944c-01e26087aa9b") - val capacityId = getHadoopConfig("trident.capacity.id", sc) + val capacityId = getHadoopConfig("trident.capacity.id", Some(sc)) val splitCapacityId: Array[String] = capacityId.split("-") assert(splitCapacityId.length == 5) From 0f33d72219ae1766df779775bdbc1535a4486a67 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Mon, 25 Sep 2023 10:55:48 -0700 Subject: [PATCH 26/50] Fixing build error that was partly introduced from missing to remove uavailable reference and partly syncing to remote. --- .../azure/synapse/ml/logging/SynapseMLLogging.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index 696f4a59ce..ab9172e2d7 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -4,14 +4,16 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.build.BuildInfo -import com.microsoft.azure.synapse.ml.logging.common.CommonUtils import com.microsoft.azure.synapse.ml.logging.common.SASScrubber import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload import com.microsoft.azure.synapse.ml.logging.Usage.UsageTelemetry.reportUsage import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import spray.json.DefaultJsonProtocol._ +import spray.json._ + import scala.collection.JavaConverters._ import scala.collection.mutable -import spray.json.{DefaultJsonProtocol, RootJsonFormat, NullOptions} case class RequiredLogFields(uid: String, className: String, @@ -154,7 +156,7 @@ trait SynapseMLLogging extends Logging { reportUsage(certifiedEventPayload) } val ret = f - logBase(verb, columns, Some((System.nanoTime() - startTime) / 1e9)) + logBase(verb, Some(columns), Some((System.nanoTime() - startTime) / 1e9)) ret } catch { case e: Exception => From d7a15c8ef20f5c82e041c3b71f808f47847d2c9e Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Mon, 25 Sep 2023 14:30:13 -0700 Subject: [PATCH 27/50] refactoring to change few object into traits, and addressing few PR comments --- .../synapse/ml/logging/SynapseMLLogging.scala | 39 +++++++++---------- .../Usage/FabricTokenServiceClient.scala | 18 ++++----- .../ml/logging/Usage/FabricUtils.scala | 17 ++++---- .../ml/logging/Usage/HostEndpointUtils.scala | 12 +++--- .../synapse/ml/logging/Usage/TokenUtils.scala | 4 +- .../ml/logging/Usage/UsageConstants.scala | 34 ++++++++-------- .../synapse/ml/logging/Usage/UsageUtils.scala | 14 +++---- .../ml/logging/common/SparkHadoopUtils.scala | 2 +- .../synapse/ml/logging/common/WebUtils.scala | 2 +- .../ml/logging/HostEndpointUtilsTests.scala | 7 ++-- .../ml/logging/SparkHadoopUtilsTests.scala | 4 +- 11 files changed, 72 insertions(+), 81 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index ab9172e2d7..6bf2ff94b6 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -112,16 +112,23 @@ trait SynapseMLLogging extends Logging { protected def logBase(methodName: String, numCols: Option[Int], - executionSeconds: Option[Double] + executionSeconds: Option[Double], + logCertifiedEvent: Boolean = false ): Unit = { logBase(getPayload( methodName, numCols, executionSeconds, - None)) + None), logCertifiedEvent) } - protected def logBase(info: Map[String, String]): Unit = { + protected def logBase(info: Map[String, String], logCertifiedEvent: Boolean): Unit = { + if (logCertifiedEvent) { + val certifiedEventPayload = new FeatureUsagePayload(info.get("libraryName").get, + info.get("method").get, info -- Seq("libraryName", "method")) + reportUsage(certifiedEventPayload) + } + logInfo(info.toJson.compactPrint) } @@ -132,32 +139,22 @@ trait SynapseMLLogging extends Logging { } def logClass(): Unit = { - logBase("constructor", None, None) + logBase("Constructor", None, None, true) } - def logFit[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true, - certifiedEventAttributes: Map[String, String] = Map()): T = { - logVerb("fit", f, columns, logCertifiedEvent, certifiedEventAttributes) + def logFit[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true): T = { + logVerb("Fit", f, columns, logCertifiedEvent) } - def logTransform[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true, - certifiedEventAttributes: Map[String, String] = Map()): T = { - logVerb("transform", f, columns, logCertifiedEvent, certifiedEventAttributes) + def logTransform[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true): T = { + logVerb("Transform", f, columns, logCertifiedEvent) } - def logVerb[T](verb: String, f: => T, columns: Int = -1, logCertifiedEvent: Boolean = true, - certifiedEventAttributes: Map[String, String] = Map()): T = { + def logVerb[T](verb: String, f: => T, columns: Int = -1, logCertifiedEvent: Boolean = false): T = { val startTime = System.nanoTime() try { // Begin emitting certified event. - if(logCertifiedEvent) - { - val certifiedEventPayload = new FeatureUsagePayload("SynapseML", - verb, certifiedEventAttributes) - reportUsage(certifiedEventPayload) - } - val ret = f - logBase(verb, Some(columns), Some((System.nanoTime() - startTime) / 1e9)) - ret + logBase(verb, Some(columns), Some((System.nanoTime() - startTime) / 1e9), logCertifiedEvent) + f } catch { case e: Exception => logErrorBase(verb, e) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala index c8bc6ccf73..c735d5bcd9 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala @@ -3,15 +3,13 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.requestGet +import com.microsoft.azure.synapse.ml.logging.common.WebUtils import java.lang.management.ManagementFactory import java.net.URL import java.net.InetAddress import java.util.UUID -import spray.json.{JsArray, JsObject, JsValue, _} -class FabricTokenServiceClient { +class FabricTokenServiceClient extends FabricConstants with WebUtils { private val resourceMapping = Map( "https://storage.azure.com" -> "storage", "storage" -> "storage", @@ -27,11 +25,11 @@ class FabricTokenServiceClient { private val processDetail = ManagementFactory.getRuntimeMXBean.getName private val processName = processDetail.substring(processDetail.indexOf('@') + 1) - private val fabricContext = FabricUtils.getFabricContext - private val synapseTokenServiceEndpoint: String = fabricContext(synapseTokenServiceEndpoint) - private val workloadEndpoint = fabricContext(TridentLakehouseTokenServiceEndpoint) - private val sessionToken = fabricContext(TridentSessionToken) - private val clusterIdentifier = fabricContext(SynapseClusterIdentifier) + private val fabricContext = FabricUtils.FabricContext + private val synapseTokServiceEndpoint: String = fabricContext(synapseTokenServiceEndpoint) + private val workloadEndpoint = fabricContext(tridentLakehouseTokenServiceEndpoint) + private val sessionToken = fabricContext(tridentSessionToken) + private val clusterIdentifier = fabricContext(synapseClusterIdentifier) def getAccessToken(resourceParam: String): String = { if (!resourceMapping.contains(resourceParam)) { @@ -49,7 +47,7 @@ class FabricTokenServiceClient { "User-Agent" -> s"Trident Token Library - HostName:$hostname, ProcessName:$processName", "x-ms-client-request-id" -> rid ) - val url = s"$synapseTokenServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=${resource.get}" + val url = s"$synapseTokServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=${resource.get}" requestGet(url, headers, "content").toString().getBytes("UTF-8").toString } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala index 620830d9b6..b040d78345 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala @@ -4,7 +4,6 @@ package com.microsoft.azure.synapse.ml.logging.Usage import com.microsoft.azure.synapse.ml.core.env.StreamUtilities -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import spray.json._ case class TokenServiceConfig(tokenServiceEndpoint: String, @@ -18,14 +17,14 @@ object TokenServiceConfigProtocol extends DefaultJsonProtocol { import TokenServiceConfigProtocol._ -object FabricUtils { - def getFabricContext: Map[String, String] = { - val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(FabricConstants.ContextFilePath)) { +object FabricUtils extends FabricConstants { + lazy val FabricContext: Map[String, String] = { + val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(contextFilePath)) { source => source.getLines().toList }.get val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile - (FabricConstants.TokenServiceFilePath)) { + (tokenServiceFilePath)) { source => source.mkString.parseJson.convertTo[TokenServiceConfig] }.get @@ -36,10 +35,10 @@ object FabricUtils { (k.trim, v.trim) }.toMap .++(Seq( - (FabricConstants.SynapseTokenServiceEndpoint, tokenServiceConfig.tokenServiceEndpoint), - (FabricConstants.SynapseClusterType, tokenServiceConfig.clusterType), - (FabricConstants.SynapseClusterIdentifier, tokenServiceConfig.clusterName), - (FabricConstants.TridentSessionToken, tokenServiceConfig.sessionToken) + (synapseTokenServiceEndpoint, tokenServiceConfig.tokenServiceEndpoint), + (synapseClusterType, tokenServiceConfig.clusterType), + (synapseClusterIdentifier, tokenServiceConfig.clusterName), + (tridentSessionToken, tokenServiceConfig.sessionToken) ).toMap) tridentContext diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala index ef130a5010..d23f139afb 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala @@ -3,12 +3,10 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{Capacities, WorkloadEndpointAutomatic, Workloads} -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WebApi, WorkloadEndpointMl, WorkspaceID} -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usageGet +import com.microsoft.azure.synapse.ml.logging.common.WebUtils import spray.json.DefaultJsonProtocol.StringJsonFormat -object HostEndpointUtils { +object HostEndpointUtils extends FabricConstants with WebUtils { def getMlflowSharedHost(pbienv: String): String = { val pbiGlobalServiceEndpoints = Map( "public" -> "https://api.powerbi.com/", @@ -54,9 +52,9 @@ object HostEndpointUtils { } } - def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceID: String): String = { - val mlWorkloadEndpoint = s"$wlHost/$WebApi/$Capacities/$capacityId/$Workloads/" + - s"$WorkloadEndpointMl/$endpoint/$WorkloadEndpointAutomatic/${WorkspaceID}/$workspaceID/" + def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceId: String): String = { + val mlWorkloadEndpoint = s"$wlHost/$webApi/$capacities/$capacityId/$workloads/" + + s"$workloadEndpointMl/$endpoint/$workloadEndpointAutomatic/${workspaceID}/$workspaceId/" mlWorkloadEndpoint } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index 68567e4525..cf4a07ee2b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -3,7 +3,7 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.common.WebUtils._ +import com.microsoft.azure.synapse.ml.logging.common.WebUtils import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import java.time.Instant import java.util.UUID @@ -14,7 +14,7 @@ import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} import spray.json.RootJsonFormat case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) -object TokenUtils { +object TokenUtils extends WebUtils { private var AADToken: Option[String] = None val MwcWorkloadTypeMl = "ML" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index c952385618..d004dd10f6 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -3,24 +3,24 @@ package com.microsoft.azure.synapse.ml.logging.Usage -object FabricConstants { - val EmitUsage = "EmitUsage" - val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" +trait FabricConstants { + val emitUsage = "EmitUsage" + val fabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" - val ContextFilePath = "/home/trusted-service-user/.trident-context" - val TokenServiceFilePath = "/opt/token-service/tokenservice.config.json" + val contextFilePath = "/home/trusted-service-user/.trident-context" + val tokenServiceFilePath = "/opt/token-service/tokenservice.config.json" - val SynapseTokenServiceEndpoint = "synapse.tokenServiceEndpoint" - val SynapseClusterIdentifier = "synapse.clusterIdentifier" - val SynapseClusterType = "synapse.clusterType" - val TridentLakehouseTokenServiceEndpoint = "trident.lakehouse.tokenservice.endpoint" - val TridentSessionToken = "trident.session.token" - val WebApi = "webapi" - val Capacities = "Capacities" - val Workloads = "workloads" - val WorkspaceID = "workspaceid" + val synapseTokenServiceEndpoint = "synapse.tokenServiceEndpoint" + val synapseClusterIdentifier = "synapse.clusterIdentifier" + val synapseClusterType = "synapse.clusterType" + val tridentLakehouseTokenServiceEndpoint = "trident.lakehouse.tokenservice.endpoint" + val tridentSessionToken = "trident.session.token" + val webApi = "webapi" + val capacities = "Capacities" + val workloads = "workloads" + val workspaceID = "workspaceid" - val WorkloadEndpointMl = "ML" - val WorkloadEndpointAutomatic = "Automatic" - val WorkloadEndpointAdmin = "MLAdmin" + val workloadEndpointMl = "ML" + val workloadEndpointAutomatic = "Automatic" + val workloadEndpointAdmin = "MLAdmin" } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index 8a9a353d69..c69d0506e3 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -3,12 +3,12 @@ package com.microsoft.azure.synapse.ml.logging.Usage -import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils.getHadoopConfig -import com.microsoft.azure.synapse.ml.logging.common.WebUtils.usagePost +import com.microsoft.azure.synapse.ml.logging.common.WebUtils import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants._ import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken +import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils + import java.time.Instant import java.util.UUID import org.apache.spark.sql.SparkSession @@ -16,7 +16,7 @@ import spray.json._ import spray.json.DefaultJsonProtocol._ import spray.json.DefaultJsonProtocol.StringJsonFormat -object UsageTelemetry { +object UsageTelemetry extends FabricConstants with SparkHadoopUtils with WebUtils { private val SC = SparkSession.builder().getOrCreate().sparkContext private val CapacityId = getHadoopConfig("trident.capacity.id", Some(SC)) private val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id", Some(SC)) @@ -27,7 +27,7 @@ object UsageTelemetry { private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, Some(SharedHost)) def reportUsage(payload: FeatureUsagePayload): Unit = { - if (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") { + if (sys.env.getOrElse(emitUsage, "true").toLowerCase == "true") { try { reportUsageTelemetry(payload.feature_name, payload.activity_name, @@ -42,7 +42,7 @@ object UsageTelemetry { private def reportUsageTelemetry(featureName: String, activityName: String, attributes: Map[String,String] = Map()): Unit = { - if (sys.env.getOrElse(FabricFakeTelemetryReportCalls,"false").toLowerCase == "false") { + if (sys.env.getOrElse(fabricFakeTelemetryReportCalls,"false").toLowerCase == "false") { val attributesJson = attributes.toJson.compactPrint val data = s"""{ @@ -54,7 +54,7 @@ object UsageTelemetry { val mlAdminEndpoint = WlHost match { case Some(host) => - getMLWorkloadEndpoint(host, CapacityId, WorkloadEndpointAdmin, WorkspaceId) + getMLWorkloadEndpoint(host, CapacityId, workloadEndpointAdmin, WorkspaceId) case None => throw new IllegalArgumentException("Workload host name is missing.") } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala index b7c9e824ee..e278708ad7 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala @@ -7,7 +7,7 @@ import org.apache.spark.SparkContext import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import org.apache.spark.sql.SparkSession -object SparkHadoopUtils { +trait SparkHadoopUtils { def getHadoopConfig(key: String, sc: Option[SparkContext]): String = { lazy val hadoopConfig: String = sc match{ diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index 6bdc1874e1..cda9d17d7b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -12,7 +12,7 @@ import scala.util.{Success, Try} import spray.json.DefaultJsonProtocol.{IntJsonFormat, StringJsonFormat} import spray.json.{JsObject, JsValue, _} -object WebUtils { +trait WebUtils { def usagePost(url: String, body: String, headers: Map[String, String]): JsValue = { val request = new HttpPost(url) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala index cd69986d97..4b261088b6 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala @@ -4,10 +4,9 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants -class UsageUtilsTests extends TestBase { - - import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants.{WorkloadEndpointAdmin} +class UsageUtilsTests extends TestBase with FabricConstants { import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ val target = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net/webapi/Capacities/" + @@ -17,7 +16,7 @@ class UsageUtilsTests extends TestBase { val wlHost = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net" val workspaceId = "89b9b330-6eac-4ee1-b225-590dfd68e4be" test("ML Workload Endpoint Check"){ - val url = getMLWorkloadEndpoint(this.wlHost, this.capacityId, WorkloadEndpointAdmin, this.workspaceId) + val url = getMLWorkloadEndpoint(this.wlHost, this.capacityId, workloadEndpointAdmin, this.workspaceId) assert(url == target) } } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala index 3983fb3910..dd5c867f56 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala @@ -4,9 +4,9 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils.getHadoopConfig +import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils -class SparkHadoopUtilsTests extends TestBase { +class SparkHadoopUtilsTests extends TestBase with SparkHadoopUtils { test("Hadoop Configuration Check (capacity id, and workspace id)"){ sc.hadoopConfiguration.set("trident.capacity.id", "f32fae84-6ed0-4406-944c-01e26087aa9b") val capacityId = getHadoopConfig("trident.capacity.id", Some(sc)) From 71abb8cb58f3688bd52887410791b316ff9b5151 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Mon, 25 Sep 2023 14:55:35 -0700 Subject: [PATCH 28/50] changing the case of certified event activity name --- .../azure/synapse/ml/logging/SynapseMLLogging.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index 6bf2ff94b6..4a38049c27 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -139,15 +139,15 @@ trait SynapseMLLogging extends Logging { } def logClass(): Unit = { - logBase("Constructor", None, None, true) + logBase("constructor", None, None, true) } def logFit[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true): T = { - logVerb("Fit", f, columns, logCertifiedEvent) + logVerb("fit", f, columns, logCertifiedEvent) } def logTransform[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true): T = { - logVerb("Transform", f, columns, logCertifiedEvent) + logVerb("transform", f, columns, logCertifiedEvent) } def logVerb[T](verb: String, f: => T, columns: Int = -1, logCertifiedEvent: Boolean = false): T = { val startTime = System.nanoTime() From 14cf6647d7aaa54074d95614c5ffb9b23a8ed4d9 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Mon, 25 Sep 2023 15:42:00 -0700 Subject: [PATCH 29/50] Removing some class and associated test. Turning some variable lazy. --- .../synapse/ml/logging/Usage/UsageUtils.scala | 11 ++++----- .../ml/logging/common/SparkHadoopUtils.scala | 23 ------------------- .../ml/logging/SparkHadoopUtilsTests.scala | 22 ------------------ 3 files changed, 5 insertions(+), 51 deletions(-) delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala delete mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index c69d0506e3..af2a940146 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -7,7 +7,6 @@ import com.microsoft.azure.synapse.ml.logging.common.WebUtils import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken -import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils import java.time.Instant import java.util.UUID @@ -16,13 +15,13 @@ import spray.json._ import spray.json.DefaultJsonProtocol._ import spray.json.DefaultJsonProtocol.StringJsonFormat -object UsageTelemetry extends FabricConstants with SparkHadoopUtils with WebUtils { +object UsageTelemetry extends FabricConstants with WebUtils { private val SC = SparkSession.builder().getOrCreate().sparkContext - private val CapacityId = getHadoopConfig("trident.capacity.id", Some(SC)) - private val WorkspaceId: String = getHadoopConfig("trident.artifact.workspace.id", Some(SC)) - private val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() + private lazy val CapacityId = SC.hadoopConfiguration.get("trident.capacity.id") + private lazy val WorkspaceId: String = SC.hadoopConfiguration.get("trident.artifact.workspace.id") + private lazy val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() - private val SharedHost = getMlflowSharedHost(PbiEnv) + private lazy val SharedHost = getMlflowSharedHost(PbiEnv) private val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, Some(SharedHost)) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala deleted file mode 100644 index e278708ad7..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/SparkHadoopUtils.scala +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.common - -import org.apache.spark.SparkContext -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import org.apache.spark.sql.SparkSession - -trait SparkHadoopUtils { - - def getHadoopConfig(key: String, sc: Option[SparkContext]): String = { - lazy val hadoopConfig: String = sc match{ - case Some(value) => value.hadoopConfiguration.get(key, "") - case None => SparkSession.builder().getOrCreate().sparkContext.hadoopConfiguration.get(key, "") - } - if (hadoopConfig.isEmpty) { - SynapseMLLogging.logMessage(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") - throw new IllegalArgumentException(s"UsageUtils.getHadoopConfig: Hadoop configuration $key is empty.") - } - hadoopConfig - } -} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala deleted file mode 100644 index dd5c867f56..0000000000 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/SparkHadoopUtilsTests.scala +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging - -import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.common.SparkHadoopUtils - -class SparkHadoopUtilsTests extends TestBase with SparkHadoopUtils { - test("Hadoop Configuration Check (capacity id, and workspace id)"){ - sc.hadoopConfiguration.set("trident.capacity.id", "f32fae84-6ed0-4406-944c-01e26087aa9b") - val capacityId = getHadoopConfig("trident.capacity.id", Some(sc)) - val splitCapacityId: Array[String] = capacityId.split("-") - - assert(splitCapacityId.length == 5) - assert(splitCapacityId(0).length == 8) - assert(splitCapacityId(1).length == 4) - assert(splitCapacityId(2).length == 4) - assert(splitCapacityId(3).length == 4) - assert(splitCapacityId(4).length == 12) - } -} From bd3508751f038326c88e3f127efbb23d92042b50 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Thu, 28 Sep 2023 14:59:22 -0700 Subject: [PATCH 30/50] Changing some parameter to Option and then removing unused imports --- .../ml/logging/Usage/FabricTokenParser.scala | 31 +++++++++++-------- .../synapse/ml/logging/Usage/UsageUtils.scala | 1 - .../synapse/ml/logging/common/WebUtils.scala | 1 - 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala index fd1e17adfa..c9d949e13e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala @@ -9,7 +9,7 @@ class InvalidJwtTokenException(message: String) extends Exception(message) class JwtTokenExpiryMissingException(message: String) extends Exception(message) class FabricTokenParser(JWToken: String) { private val tokens: Array[String] = JWToken.split("\\.") - private val parsedToken: JsValue = tokenCheckAndDecode(tokens) + private val parsedToken: JsValue = tokenCheckAndDecode(Some(tokens)) def getExpiry: Long ={ val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } exp match { @@ -21,18 +21,23 @@ class FabricTokenParser(JWToken: String) { } } - private def tokenCheckAndDecode(tokens: Array[String]): JsValue ={ - if (tokens.length == 3) { - // Getting the JWT payload which is second member of [header].[payload].[signature] - val payload = tokens(1) - // Removing whitespace and url safe characters encoded that might have been added to token - val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") - val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) - val decodedJson = new String(decodedPayload) - decodedJson.parseJson - } - else { - throw new InvalidJwtTokenException(s"Invalid JWT token. Here is the token = {$JWToken}") + private def tokenCheckAndDecode(tokens: Option[Array[String]]): JsValue ={ + tokens match { + case Some(tokens) => + if (tokens.length == 3) { + // Getting the JWT payload which is second member of [header].[payload].[signature] + val payload = tokens(1) + // Removing whitespace and url safe characters encoded that might have been added to token + val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") + val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) + val decodedJson = new String(decodedPayload) + decodedJson.parseJson + } + else { + throw new InvalidJwtTokenException(s"Invalid JWT token. Here is the token = {$JWToken}") + } + case None => + throw new NullPointerException("Invalid JWT token used for reporting usage data.") } } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala index af2a940146..8491fc79cd 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala @@ -22,7 +22,6 @@ object UsageTelemetry extends FabricConstants with WebUtils { private lazy val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() private lazy val SharedHost = getMlflowSharedHost(PbiEnv) - private val SharedEndpoint = f"{SharedHost}/metadata/workspaces/{WorkspaceId}/artifacts" private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, Some(SharedHost)) def reportUsage(payload: FeatureUsagePayload): Unit = { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala index cda9d17d7b..6377193b47 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala @@ -4,7 +4,6 @@ package com.microsoft.azure.synapse.ml.logging.common import com.microsoft.azure.synapse.ml.io.http.RESTHelpers -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity From 98771b77f01049c7cd0a2f50ffd7727d2385d652 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Mon, 2 Oct 2023 14:22:01 -0700 Subject: [PATCH 31/50] 1) Removing token extraction via fabric token service. --- .../Usage/FabricTokenServiceClient.scala | 54 ------------------- .../ml/logging/Usage/FabricUtils.scala | 46 ---------------- .../synapse/ml/logging/Usage/TokenUtils.scala | 7 +-- .../ml/logging/Usage/UsageConstants.scala | 7 --- 4 files changed, 1 insertion(+), 113 deletions(-) delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala deleted file mode 100644 index c735d5bcd9..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenServiceClient.scala +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.Usage - -import com.microsoft.azure.synapse.ml.logging.common.WebUtils -import java.lang.management.ManagementFactory -import java.net.URL -import java.net.InetAddress -import java.util.UUID - -class FabricTokenServiceClient extends FabricConstants with WebUtils { - private val resourceMapping = Map( - "https://storage.azure.com" -> "storage", - "storage" -> "storage", - "https://analysis.windows.net/powerbi/api" -> "pbi", - "pbi" -> "pbi", - "https://vault.azure.net" -> "keyvault", - "keyvault" -> "keyvault", - "https://kusto.kusto.windows.net" -> "kusto", - "kusto" -> "kusto" - ) - - private val hostname = InetAddress.getLocalHost.getHostName - private val processDetail = ManagementFactory.getRuntimeMXBean.getName - private val processName = processDetail.substring(processDetail.indexOf('@') + 1) - - private val fabricContext = FabricUtils.FabricContext - private val synapseTokServiceEndpoint: String = fabricContext(synapseTokenServiceEndpoint) - private val workloadEndpoint = fabricContext(tridentLakehouseTokenServiceEndpoint) - private val sessionToken = fabricContext(tridentSessionToken) - private val clusterIdentifier = fabricContext(synapseClusterIdentifier) - - def getAccessToken(resourceParam: String): String = { - if (!resourceMapping.contains(resourceParam)) { - throw new IllegalArgumentException(s"$resourceParam not supported") - } - val resource: Option[String] = resourceMapping.get(resourceParam) - val rid = UUID.randomUUID().toString() - val targetUrl = new URL(workloadEndpoint) - val headers: Map[String, String] = Map( - "x-ms-cluster-identifier" -> clusterIdentifier, - "x-ms-workload-resource-moniker" -> clusterIdentifier, - "Content-Type" -> "application/json;charset=utf-8", - "x-ms-proxy-host" -> s"${targetUrl.getProtocol}://${targetUrl.getHost}", - "x-ms-partner-token" -> sessionToken, - "User-Agent" -> s"Trident Token Library - HostName:$hostname, ProcessName:$processName", - "x-ms-client-request-id" -> rid - ) - val url = s"$synapseTokServiceEndpoint/api/v1/proxy${targetUrl.getPath}/access?resource=${resource.get}" - - requestGet(url, headers, "content").toString().getBytes("UTF-8").toString - } -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala deleted file mode 100644 index b040d78345..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricUtils.scala +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.Usage - -import com.microsoft.azure.synapse.ml.core.env.StreamUtilities -import spray.json._ - -case class TokenServiceConfig(tokenServiceEndpoint: String, - clusterType: String, - clusterName: String, - sessionToken: String) - -object TokenServiceConfigProtocol extends DefaultJsonProtocol { - implicit val TokenServiceConfigFormat: RootJsonFormat[TokenServiceConfig] = jsonFormat4(TokenServiceConfig) -} - -import TokenServiceConfigProtocol._ - -object FabricUtils extends FabricConstants { - lazy val FabricContext: Map[String, String] = { - val linesContextFile = StreamUtilities.usingSource(scala.io.Source.fromFile(contextFilePath)) { - source => source.getLines().toList - }.get - - val tokenServiceConfig = StreamUtilities.usingSource(scala.io.Source.fromFile - (tokenServiceFilePath)) { - source => source.mkString.parseJson.convertTo[TokenServiceConfig] - }.get - - val tridentContext: Map[String, String] = linesContextFile - .filter(line => line.split('=').length == 2) - .map { line => - val Array(k, v) = line.split('=') - (k.trim, v.trim) - }.toMap - .++(Seq( - (synapseTokenServiceEndpoint, tokenServiceConfig.tokenServiceEndpoint), - (synapseClusterType, tokenServiceConfig.clusterType), - (synapseClusterIdentifier, tokenServiceConfig.clusterName), - (tridentSessionToken, tokenServiceConfig.sessionToken) - ).toMap) - - tridentContext - } -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala index cf4a07ee2b..4e7c7da058 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala @@ -7,7 +7,6 @@ import com.microsoft.azure.synapse.ml.logging.common.WebUtils import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import java.time.Instant import java.util.UUID -import org.apache.spark.SparkContext import scala.reflect.runtime.currentMirror import scala.reflect.runtime.universe._ import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} @@ -73,11 +72,7 @@ object TokenUtils extends WebUtils { } private def refreshAccessToken(): Unit = { - if (SparkContext.getOrCreate() != null) { - AADToken = Some(getAccessToken("pbi")) - } else { - AADToken = Some(new FabricTokenServiceClient().getAccessToken("pbi")) - } + AADToken = Some(getAccessToken("pbi")) } def getMwcToken(shared_host: String, WorkspaceId: String, capacity_id: String, diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala index d004dd10f6..00bb3e0671 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala @@ -8,13 +8,6 @@ trait FabricConstants { val fabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" val contextFilePath = "/home/trusted-service-user/.trident-context" - val tokenServiceFilePath = "/opt/token-service/tokenservice.config.json" - - val synapseTokenServiceEndpoint = "synapse.tokenServiceEndpoint" - val synapseClusterIdentifier = "synapse.clusterIdentifier" - val synapseClusterType = "synapse.clusterType" - val tridentLakehouseTokenServiceEndpoint = "trident.lakehouse.tokenservice.endpoint" - val tridentSessionToken = "trident.session.token" val webApi = "webapi" val capacities = "Capacities" val workloads = "workloads" From 5e6ec48483d0e7e6adf01edd08cdf2180823a8a2 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Tue, 3 Oct 2023 14:32:21 -0400 Subject: [PATCH 32/50] some cleanup --- .../synapse/ml/logging/SynapseMLLogging.scala | 12 +++++---- .../logging/Usage/FeatureUsagePayload.scala | 8 ------ .../{Usage => fabric}/FabricTokenParser.scala | 9 ++++--- .../{Usage => fabric}/HostEndpointUtils.scala | 5 ++-- .../{Usage => fabric}/TokenUtils.scala | 25 +++++++++++-------- .../{Usage => fabric}/UsageConstants.scala | 2 +- .../{Usage => fabric}/UsageUtils.scala | 25 +++++++++++-------- .../logging/{common => fabric}/WebUtils.scala | 20 +++------------ .../ml/logging/FabricTokenParserTests.scala | 2 +- .../ml/logging/HostEndpointUtilsTests.scala | 4 +-- 10 files changed, 50 insertions(+), 62 deletions(-) delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/{Usage => fabric}/FabricTokenParser.scala (94%) rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/{Usage => fabric}/HostEndpointUtils.scala (93%) rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/{Usage => fabric}/TokenUtils.scala (84%) rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/{Usage => fabric}/UsageConstants.scala (91%) rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/{Usage => fabric}/UsageUtils.scala (75%) rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/{common => fabric}/WebUtils.scala (60%) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index 4a38049c27..69416e1e90 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -5,8 +5,7 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.build.BuildInfo import com.microsoft.azure.synapse.ml.logging.common.SASScrubber -import com.microsoft.azure.synapse.ml.logging.Usage.FeatureUsagePayload -import com.microsoft.azure.synapse.ml.logging.Usage.UsageTelemetry.reportUsage +import com.microsoft.azure.synapse.ml.logging.fabric.UsageTelemetry.reportUsage import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import spray.json.DefaultJsonProtocol._ @@ -124,9 +123,11 @@ trait SynapseMLLogging extends Logging { protected def logBase(info: Map[String, String], logCertifiedEvent: Boolean): Unit = { if (logCertifiedEvent) { - val certifiedEventPayload = new FeatureUsagePayload(info.get("libraryName").get, - info.get("method").get, info -- Seq("libraryName", "method")) - reportUsage(certifiedEventPayload) + reportUsage( + info("libraryName"), + info("method"), + info -- Seq("libraryName", "method") + ) } logInfo(info.toJson.compactPrint) @@ -149,6 +150,7 @@ trait SynapseMLLogging extends Logging { def logTransform[T](f: => T, columns: Int, logCertifiedEvent: Boolean = true): T = { logVerb("transform", f, columns, logCertifiedEvent) } + def logVerb[T](verb: String, f: => T, columns: Int = -1, logCertifiedEvent: Boolean = false): T = { val startTime = System.nanoTime() try { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala deleted file mode 100644 index e04bdcf705..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FeatureUsagePayload.scala +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.Usage - -case class FeatureUsagePayload(feature_name: String, - activity_name: String, - attributes: Map[String, String]) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala similarity index 94% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala index c9d949e13e..f4a5a788bc 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/FabricTokenParser.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala @@ -1,16 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.azure.synapse.ml.logging.Usage +package com.microsoft.azure.synapse.ml.logging.fabric import spray.json._ class InvalidJwtTokenException(message: String) extends Exception(message) + class JwtTokenExpiryMissingException(message: String) extends Exception(message) + class FabricTokenParser(JWToken: String) { private val tokens: Array[String] = JWToken.split("\\.") private val parsedToken: JsValue = tokenCheckAndDecode(Some(tokens)) - def getExpiry: Long ={ + + def getExpiry: Long = { val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } exp match { case Some(expValue) => @@ -21,7 +24,7 @@ class FabricTokenParser(JWToken: String) { } } - private def tokenCheckAndDecode(tokens: Option[Array[String]]): JsValue ={ + private def tokenCheckAndDecode(tokens: Option[Array[String]]): JsValue = { tokens match { case Some(tokens) => if (tokens.length == 3) { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala similarity index 93% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala index d23f139afb..7c77dd6c42 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala @@ -1,9 +1,8 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.azure.synapse.ml.logging.Usage +package com.microsoft.azure.synapse.ml.logging.fabric -import com.microsoft.azure.synapse.ml.logging.common.WebUtils import spray.json.DefaultJsonProtocol.StringJsonFormat object HostEndpointUtils extends FabricConstants with WebUtils { @@ -52,7 +51,7 @@ object HostEndpointUtils extends FabricConstants with WebUtils { } } - def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceId: String): String = { + def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceId: String): String = { val mlWorkloadEndpoint = s"$wlHost/$webApi/$capacities/$capacityId/$workloads/" + s"$workloadEndpointMl/$endpoint/$workloadEndpointAutomatic/${workspaceID}/$workspaceId/" mlWorkloadEndpoint diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala similarity index 84% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala index 4e7c7da058..3c7efd7195 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala @@ -1,9 +1,8 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.azure.synapse.ml.logging.Usage +package com.microsoft.azure.synapse.ml.logging.fabric -import com.microsoft.azure.synapse.ml.logging.common.WebUtils import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging import java.time.Instant import java.util.UUID @@ -12,7 +11,8 @@ import scala.reflect.runtime.universe._ import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} import spray.json.RootJsonFormat -case class MwcToken (TargetUriHost: String, CapacityObjectId: String, Token: String) +case class MwcToken(TargetUriHost: String, CapacityObjectId: String, Token: String) + object TokenUtils extends WebUtils { private var AADToken: Option[String] = None val MwcWorkloadTypeMl = "ML" @@ -75,14 +75,17 @@ object TokenUtils extends WebUtils { AADToken = Some(getAccessToken("pbi")) } - def getMwcToken(shared_host: String, WorkspaceId: String, capacity_id: String, - workload_type: String): Option[MwcToken]= { - val url: String = shared_host + "/metadata/v201606/generatemwctokenv2" - - val payLoad = s"""{ - |"capacityObjectId": "$capacity_id", - |"workspaceObjectId": "$WorkspaceId", - |"workloadType": "$workload_type" + def getMwcToken(sharedHost: String, + workspaceId: String, + capacityId: String, + workloadType: String): Option[MwcToken] = { + val url: String = sharedHost + "/metadata/v201606/generatemwctokenv2" + + val payLoad = + s"""{ + |"capacityObjectId": "$capacityId", + |"workspaceObjectId": "$workspaceId", + |"workloadType": "$workloadType" }""".stripMargin val driverAADToken = getAccessToken diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala similarity index 91% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala index 00bb3e0671..1b350fac95 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.azure.synapse.ml.logging.Usage +package com.microsoft.azure.synapse.ml.logging.fabric trait FabricConstants { val emitUsage = "EmitUsage" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala similarity index 75% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala index 8491fc79cd..7105e67d23 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/Usage/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala @@ -1,12 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.azure.synapse.ml.logging.Usage +package com.microsoft.azure.synapse.ml.logging.fabric -import com.microsoft.azure.synapse.ml.logging.common.WebUtils import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ -import com.microsoft.azure.synapse.ml.logging.Usage.TokenUtils.getAccessToken +import com.microsoft.azure.synapse.ml.logging.fabric.HostEndpointUtils._ +import com.microsoft.azure.synapse.ml.logging.fabric.TokenUtils.getAccessToken import java.time.Instant import java.util.UUID @@ -24,12 +23,15 @@ object UsageTelemetry extends FabricConstants with WebUtils { private lazy val SharedHost = getMlflowSharedHost(PbiEnv) private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, Some(SharedHost)) - def reportUsage(payload: FeatureUsagePayload): Unit = { + def reportUsage(featureName: String, + activityName: String, + attributes: Map[String, String]): Unit = { if (sys.env.getOrElse(emitUsage, "true").toLowerCase == "true") { try { - reportUsageTelemetry(payload.feature_name, - payload.activity_name, - payload.attributes) + reportUsageTelemetry( + featureName, + activityName, + attributes) } catch { case runtimeError: Exception => SynapseMLLogging.logMessage(s"UsageTelemetry::reportUsage: Hit issue emitting usage telemetry." + @@ -38,9 +40,10 @@ object UsageTelemetry extends FabricConstants with WebUtils { } } - private def reportUsageTelemetry(featureName: String, activityName: String, - attributes: Map[String,String] = Map()): Unit = { - if (sys.env.getOrElse(fabricFakeTelemetryReportCalls,"false").toLowerCase == "false") { + private def reportUsageTelemetry(featureName: String, + activityName: String, + attributes: Map[String, String]): Unit = { + if (sys.env.getOrElse(fabricFakeTelemetryReportCalls, "false").toLowerCase == "false") { val attributesJson = attributes.toJson.compactPrint val data = s"""{ diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/WebUtils.scala similarity index 60% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/WebUtils.scala index 6377193b47..a6cc2b46a0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/WebUtils.scala @@ -1,14 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.azure.synapse.ml.logging.common +package com.microsoft.azure.synapse.ml.logging.fabric import com.microsoft.azure.synapse.ml.io.http.RESTHelpers import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity -import scala.util.{Success, Try} -import spray.json.DefaultJsonProtocol.{IntJsonFormat, StringJsonFormat} import spray.json.{JsObject, JsValue, _} trait WebUtils { @@ -16,7 +14,7 @@ trait WebUtils { val request = new HttpPost(url) for ((k, v) <- headers) - request.addHeader(k, v) + request.addHeader(k, v) request.setEntity(new StringEntity(body)) @@ -29,7 +27,7 @@ trait WebUtils { def usageGet(url: String, headers: Map[String, String]): JsValue = { val request = new HttpGet(url) for ((k, v) <- headers) - request.addHeader(k, v) + request.addHeader(k, v) val response = RESTHelpers.safeSend(request, close = false) val result = parseResponse(response) @@ -46,16 +44,4 @@ trait WebUtils { } } - def requestGet(url: String, headers: Map[String, String], property: String): JsValue = { - val response: JsValue = usageGet(url, headers) - - val statusCode = Try(response.asJsObject.fields("status_code").convertTo[Int]) - val propertyValue = Try(response.asJsObject.fields(property).convertTo[String]) - - (statusCode, propertyValue) match { - case (Success(code), Success(value)) if code == 200 && !value.isEmpty => response.asJsObject.fields(property) - case _ => throw new Exception(s"CommonUtils.requestGet: Failed with " + - s"code=$statusCode. Property looked for was = $property") - } - } } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala index 6b5200c306..3d2d5f3030 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala @@ -5,7 +5,7 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.core.env.StreamUtilities import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.Usage.{FabricTokenParser, InvalidJwtTokenException} +import com.microsoft.azure.synapse.ml.logging.fabric.{FabricTokenParser, InvalidJwtTokenException} import scala.io.Source import spray.json._ diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala index 4b261088b6..7cfa5e299b 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala @@ -4,10 +4,10 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.Usage.FabricConstants +import com.microsoft.azure.synapse.ml.logging.fabric.FabricConstants class UsageUtilsTests extends TestBase with FabricConstants { - import com.microsoft.azure.synapse.ml.logging.Usage.HostEndpointUtils._ + import com.microsoft.azure.synapse.ml.logging.fabric.HostEndpointUtils._ val target = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net/webapi/Capacities/" + "c528701c-8f94-42c0-b65a-1660171c306c/workloads/ML/MLAdmin/Automatic/" + From 5b66b5853525c1886225a90cf00b2ba234b627f0 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Tue, 3 Oct 2023 13:20:17 -0700 Subject: [PATCH 33/50] Removing token caching. --- .../ml/logging/fabric/HostEndpointUtils.scala | 2 +- .../ml/logging/fabric/TokenUtils.scala | 40 +------------------ .../ml/logging/fabric/UsageUtils.scala | 2 +- 3 files changed, 3 insertions(+), 41 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala index 7c77dd6c42..e6f857800a 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala @@ -26,7 +26,7 @@ object HostEndpointUtils extends FabricConstants with WebUtils { val url = pbiGlobalServiceEndpoints.getOrElse(pbienv, defaultGlobalServiceEndpoint) + fetchClusterDetailUri val headers = Map( - "Authorization" -> s"Bearer ${TokenUtils.getAccessToken}", + "Authorization" -> s"Bearer ${TokenUtils.getAccessToken("pbi")}", "RequestId" -> java.util.UUID.randomUUID().toString ) usageGet(url, headers).asJsObject.fields("clusterUrl").convertTo[String] diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala index 3c7efd7195..189526636c 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala @@ -3,8 +3,6 @@ package com.microsoft.azure.synapse.ml.logging.fabric -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import java.time.Instant import java.util.UUID import scala.reflect.runtime.currentMirror import scala.reflect.runtime.universe._ @@ -17,15 +15,6 @@ object TokenUtils extends WebUtils { private var AADToken: Option[String] = None val MwcWorkloadTypeMl = "ML" - def getAccessToken: String = { - if (isTokenValid(AADToken)) - this.AADToken.get - else { - refreshAccessToken() - this.AADToken.get - } - } - def getAccessToken(tokenType: String): String = { val objectName = "com.microsoft.azure.trident.tokenlibrary.TokenLibrary" @@ -48,33 +37,6 @@ object TokenUtils extends WebUtils { methodMirror(tokenType).asInstanceOf[String] } - private def isTokenValid(tokenOption: Option[String]): Boolean = { - tokenOption match { - case Some(token) if token.nonEmpty => - try { - val tokenParser = new FabricTokenParser(token) - val expiryEpoch = tokenParser.getExpiry - val now = Instant.now().getEpochSecond - now < expiryEpoch - 60 - } catch { - case e: InvalidJwtTokenException => - SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token used to trigger telemetry " + - s"endpoint is invalid. Exception = $e") - false - case e: JwtTokenExpiryMissingException => - SynapseMLLogging.logMessage(s"TokenUtils::checkTokenValid: Token misses expiry. " + - s"Exception = $e") - false - } - case _ => - false // No value is present or the value is empty - } - } - - private def refreshAccessToken(): Unit = { - AADToken = Some(getAccessToken("pbi")) - } - def getMwcToken(sharedHost: String, workspaceId: String, capacityId: String, @@ -88,7 +50,7 @@ object TokenUtils extends WebUtils { |"workloadType": "$workloadType" }""".stripMargin - val driverAADToken = getAccessToken + val driverAADToken = getAccessToken("pbi") val headers = Map( "Content-Type" -> "application/json", diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala index 7105e67d23..293dbc1e87 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala @@ -65,7 +65,7 @@ object UsageTelemetry extends FabricConstants with WebUtils { val headers = Map( "Content-Type" -> "application/json", - "Authorization" -> s"""Bearer ${getAccessToken}""".stripMargin, + "Authorization" -> s"""Bearer ${getAccessToken("pbi")}""".stripMargin, "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) usagePost(url, data, headers) From 825c796921e5fd5fd12174715d5a904f45efb916 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Thu, 5 Oct 2023 14:07:43 -0400 Subject: [PATCH 34/50] neaten PR --- .../synapse/ml/logging/SynapseMLLogging.scala | 2 +- .../logging/fabric/CertifiedEventClient.scala | 113 ++++++++++++++++++ .../ml/logging/fabric/FabricTokenParser.scala | 46 ------- .../ml/logging/fabric/HostEndpointUtils.scala | 59 --------- .../{WebUtils.scala => RESTUtils.scala} | 2 +- .../ml/logging/fabric/TokenUtils.scala | 68 ----------- .../ml/logging/fabric/UsageConstants.scala | 19 --- .../ml/logging/fabric/UsageUtils.scala | 74 ------------ .../ml/logging/FabricTokenParserTests.scala | 57 --------- .../ml/logging/HostEndpointUtilsTests.scala | 22 ---- 10 files changed, 115 insertions(+), 347 deletions(-) create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala rename core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/{WebUtils.scala => RESTUtils.scala} (98%) delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala delete mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala delete mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala delete mode 100644 core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index 69416e1e90..3d77d46c79 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -5,7 +5,7 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.build.BuildInfo import com.microsoft.azure.synapse.ml.logging.common.SASScrubber -import com.microsoft.azure.synapse.ml.logging.fabric.UsageTelemetry.reportUsage +import com.microsoft.azure.synapse.ml.logging.fabric.CertifiedEventClient.reportUsage import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import spray.json.DefaultJsonProtocol._ diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala new file mode 100644 index 0000000000..42ca5d6f4b --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -0,0 +1,113 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging.fabric + +import org.apache.spark.sql.SparkSession +import spray.json.DefaultJsonProtocol.{StringJsonFormat, _} +import spray.json._ + +import java.time.Instant +import java.util.UUID +import scala.reflect.runtime.currentMirror +import scala.reflect.runtime.universe._ + +object CertifiedEventClient extends RESTUtils { + + private val EmitUsage = "EmitUsage" + + private val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" + + private val PbiGlobalServiceEndpoints = Map( + "public" -> "https://api.powerbi.com/", + "fairfax" -> "https://api.powerbigov.us", + "mooncake" -> "https://api.powerbi.cn", + "blackforest" -> "https://app.powerbi.de", + "msit" -> "https://api.powerbi.com/", + "prod" -> "https://api.powerbi.com/", + "int3" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dxt" -> "https://powerbistagingapi.analysis.windows.net/", + "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", + "dev" -> "https://onebox-redirect.analysis.windows-int.net/", + "console" -> "http://localhost:5001/", + "daily" -> "https://dailyapi.powerbi.com/") + + + private lazy val CertifiedEventUri = getCertifiedEventUri + + private def getAccessToken: String = { + val objectName = "com.microsoft.azure.trident.tokenlibrary.TokenLibrary" + val mirror = currentMirror + val module = mirror.staticModule(objectName) + val obj = mirror.reflectModule(module).instance + val objType = mirror.reflect(obj).symbol.toType + val methodName = "getAccessToken" + val methodSymbols = objType.decl(TermName(methodName)).asTerm.alternatives + val argType = typeOf[String] + val selectedMethodSymbol = methodSymbols.find { m => + m.asMethod.paramLists match { + case List(List(param)) => param.typeSignature =:= argType + case _ => false + } + }.getOrElse(throw new NoSuchMethodException(s"Method $methodName with argument type $argType not found")) + val methodMirror = mirror.reflect(obj).reflectMethod(selectedMethodSymbol.asMethod) + methodMirror("pbi").asInstanceOf[String] + } + + private def getHeaders: Map[String, String] = { + Map( + "Authorization" -> s"""Bearer $getAccessToken""".stripMargin, + "RequestId" -> java.util.UUID.randomUUID().toString, + "Content-Type" -> "application/json", + "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString + ) + } + + private def getCertifiedEventUri: String = { + val sc = SparkSession.builder().getOrCreate().sparkContext + val workspaceId = sc.hadoopConfiguration.get("trident.artifact.workspace.id") + val capacityId = sc.hadoopConfiguration.get("trident.capacity.id") + val pbiEnv = sc.getConf.get("spark.trident.pbienv").toLowerCase() + + val clusterDetailUrl = s"${PbiGlobalServiceEndpoints(pbiEnv)}powerbi/globalservice/v201606/clusterDetails" + val headers = getHeaders + + val clusterUrl = usageGet(clusterDetailUrl, headers) + .asJsObject.fields("clusterUrl").convertTo[String] + val tokenUrl: String = s"$clusterUrl/metadata/v201606/generatemwctokenv2" + + val payload = + s"""{ + |"capacityObjectId": "$capacityId", + |"workspaceObjectId": "$workspaceId", + |"workloadType": "ML" + |}""".stripMargin + + + val host = usagePost(tokenUrl, payload, headers) + .asJsObject.fields("TargetUriHost").convertTo[String] + + s"https://$host/webapi/Capacities/$capacityId/workloads/ML/MLAdmin/Automatic/workspaceid/$workspaceId/telemetry" + } + + + private[ml] def reportUsage(featureName: String, + activityName: String, + attributes: Map[String, String]): Unit = { + + val shouldReport = (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") && + (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") + + if (shouldReport) { + val payload = + s"""{ + |"timestamp":${Instant.now().getEpochSecond}, + |"feature_name":"$featureName", + |"activity_name":"$activityName", + |"attributes":${attributes.toJson.compactPrint} + |}""".stripMargin + + usagePost(CertifiedEventUri, payload, getHeaders) + } + } +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala deleted file mode 100644 index f4a5a788bc..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/FabricTokenParser.scala +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.fabric - -import spray.json._ - -class InvalidJwtTokenException(message: String) extends Exception(message) - -class JwtTokenExpiryMissingException(message: String) extends Exception(message) - -class FabricTokenParser(JWToken: String) { - private val tokens: Array[String] = JWToken.split("\\.") - private val parsedToken: JsValue = tokenCheckAndDecode(Some(tokens)) - - def getExpiry: Long = { - val exp: Option[Long] = parsedToken.asJsObject.fields.get("exp").collect { case JsNumber(value) => value.toLong } - exp match { - case Some(expValue) => - expValue - case None => - throw new JwtTokenExpiryMissingException(s"JWT token does not have expiration set. " + - s"Here is the token = {$JWToken}") - } - } - - private def tokenCheckAndDecode(tokens: Option[Array[String]]): JsValue = { - tokens match { - case Some(tokens) => - if (tokens.length == 3) { - // Getting the JWT payload which is second member of [header].[payload].[signature] - val payload = tokens(1) - // Removing whitespace and url safe characters encoded that might have been added to token - val sanitizedPayload = payload.replace('-', '+').replace('_', '/').replaceAll("\\.", "").replaceAll("\\s", "") - val decodedPayload = java.util.Base64.getDecoder.decode(sanitizedPayload) - val decodedJson = new String(decodedPayload) - decodedJson.parseJson - } - else { - throw new InvalidJwtTokenException(s"Invalid JWT token. Here is the token = {$JWToken}") - } - case None => - throw new NullPointerException("Invalid JWT token used for reporting usage data.") - } - } -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala deleted file mode 100644 index e6f857800a..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/HostEndpointUtils.scala +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.fabric - -import spray.json.DefaultJsonProtocol.StringJsonFormat - -object HostEndpointUtils extends FabricConstants with WebUtils { - def getMlflowSharedHost(pbienv: String): String = { - val pbiGlobalServiceEndpoints = Map( - "public" -> "https://api.powerbi.com/", - "fairfax" -> "https://api.powerbigov.us", - "mooncake" -> "https://api.powerbi.cn", - "blackforest" -> "https://app.powerbi.de", - "msit" -> "https://api.powerbi.com/", - "prod" -> "https://api.powerbi.com/", - "int3" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", - "dxt" -> "https://powerbistagingapi.analysis.windows.net/", - "edog" -> "https://biazure-int-edog-redirect.analysis-df.windows.net/", - "dev" -> "https://onebox-redirect.analysis.windows-int.net/", - "console" -> "http://localhost:5001/", - "daily" -> "https://dailyapi.powerbi.com/") - - val defaultGlobalServiceEndpoint: String = "https://api.powerbi.com/" - val fetchClusterDetailUri: String = "powerbi/globalservice/v201606/clusterDetails" - - val url = pbiGlobalServiceEndpoints.getOrElse(pbienv, defaultGlobalServiceEndpoint) + fetchClusterDetailUri - val headers = Map( - "Authorization" -> s"Bearer ${TokenUtils.getAccessToken("pbi")}", - "RequestId" -> java.util.UUID.randomUUID().toString - ) - usageGet(url, headers).asJsObject.fields("clusterUrl").convertTo[String] - } - - def getMlflowWorkloadHost(pbienv: String, capacityId: String, workspaceId: String, - sharedHost: Option[String] = None): Option[String] = { - val clusterUrl = sharedHost match { - case Some(value) => - value - case None => - getMlflowSharedHost(pbienv) - } - - val mwcToken: Option[MwcToken] = TokenUtils.getMwcToken(clusterUrl, - workspaceId, capacityId, TokenUtils.MwcWorkloadTypeMl) - mwcToken match { - case Some(token) => - Some(token.TargetUriHost) - case None => - None - } - } - - def getMLWorkloadEndpoint(wlHost: String, capacityId: String, endpoint: String, workspaceId: String): String = { - val mlWorkloadEndpoint = s"$wlHost/$webApi/$capacities/$capacityId/$workloads/" + - s"$workloadEndpointMl/$endpoint/$workloadEndpointAutomatic/${workspaceID}/$workspaceId/" - mlWorkloadEndpoint - } -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/WebUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/RESTUtils.scala similarity index 98% rename from core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/WebUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/RESTUtils.scala index a6cc2b46a0..12121f321c 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/WebUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/RESTUtils.scala @@ -9,7 +9,7 @@ import org.apache.http.client.methods.{CloseableHttpResponse, HttpGet, HttpPost} import org.apache.http.entity.StringEntity import spray.json.{JsObject, JsValue, _} -trait WebUtils { +trait RESTUtils { def usagePost(url: String, body: String, headers: Map[String, String]): JsValue = { val request = new HttpPost(url) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala deleted file mode 100644 index 189526636c..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/TokenUtils.scala +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.fabric - -import java.util.UUID -import scala.reflect.runtime.currentMirror -import scala.reflect.runtime.universe._ -import spray.json.DefaultJsonProtocol.{StringJsonFormat, jsonFormat3} -import spray.json.RootJsonFormat - -case class MwcToken(TargetUriHost: String, CapacityObjectId: String, Token: String) - -object TokenUtils extends WebUtils { - private var AADToken: Option[String] = None - val MwcWorkloadTypeMl = "ML" - - def getAccessToken(tokenType: String): String = { - - val objectName = "com.microsoft.azure.trident.tokenlibrary.TokenLibrary" - val mirror = currentMirror - val module = mirror.staticModule(objectName) - val obj = mirror.reflectModule(module).instance - val objType = mirror.reflect(obj).symbol.toType - val methodName = "getAccessToken" - val methodSymbols = objType.decl(TermName(methodName)).asTerm.alternatives - val argType = typeOf[String] - - val selectedMethodSymbol = methodSymbols.find { m => - m.asMethod.paramLists match { - case List(List(param)) => param.typeSignature =:= argType - case _ => false - } - }.getOrElse(throw new NoSuchMethodException(s"Method $methodName with argument type $argType not found")) - - val methodMirror = mirror.reflect(obj).reflectMethod(selectedMethodSymbol.asMethod) - methodMirror(tokenType).asInstanceOf[String] - } - - def getMwcToken(sharedHost: String, - workspaceId: String, - capacityId: String, - workloadType: String): Option[MwcToken] = { - val url: String = sharedHost + "/metadata/v201606/generatemwctokenv2" - - val payLoad = - s"""{ - |"capacityObjectId": "$capacityId", - |"workspaceObjectId": "$workspaceId", - |"workloadType": "$workloadType" - }""".stripMargin - - val driverAADToken = getAccessToken("pbi") - - val headers = Map( - "Content-Type" -> "application/json", - "Authorization" -> s"""Bearer $driverAADToken""".stripMargin, - "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString - ) - - val response = usagePost(url, payLoad, headers) - val targetUriHost = s"https://${response.asJsObject.fields("TargetUriHost").convertTo[String]}" - response.asJsObject.fields.updated("TargetUriHost", targetUriHost) - - implicit val mwcTokenFormat: RootJsonFormat[MwcToken] = jsonFormat3(MwcToken) - Some(response.convertTo[MwcToken]) - } -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala deleted file mode 100644 index 1b350fac95..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageConstants.scala +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.fabric - -trait FabricConstants { - val emitUsage = "EmitUsage" - val fabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" - - val contextFilePath = "/home/trusted-service-user/.trident-context" - val webApi = "webapi" - val capacities = "Capacities" - val workloads = "workloads" - val workspaceID = "workspaceid" - - val workloadEndpointMl = "ML" - val workloadEndpointAutomatic = "Automatic" - val workloadEndpointAdmin = "MLAdmin" -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala deleted file mode 100644 index 293dbc1e87..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/UsageUtils.scala +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging.fabric - -import com.microsoft.azure.synapse.ml.logging.SynapseMLLogging -import com.microsoft.azure.synapse.ml.logging.fabric.HostEndpointUtils._ -import com.microsoft.azure.synapse.ml.logging.fabric.TokenUtils.getAccessToken - -import java.time.Instant -import java.util.UUID -import org.apache.spark.sql.SparkSession -import spray.json._ -import spray.json.DefaultJsonProtocol._ -import spray.json.DefaultJsonProtocol.StringJsonFormat - -object UsageTelemetry extends FabricConstants with WebUtils { - private val SC = SparkSession.builder().getOrCreate().sparkContext - private lazy val CapacityId = SC.hadoopConfiguration.get("trident.capacity.id") - private lazy val WorkspaceId: String = SC.hadoopConfiguration.get("trident.artifact.workspace.id") - private lazy val PbiEnv = SC.getConf.get("spark.trident.pbienv", "").toLowerCase() - - private lazy val SharedHost = getMlflowSharedHost(PbiEnv) - private val WlHost = getMlflowWorkloadHost(PbiEnv, CapacityId, WorkspaceId, Some(SharedHost)) - - def reportUsage(featureName: String, - activityName: String, - attributes: Map[String, String]): Unit = { - if (sys.env.getOrElse(emitUsage, "true").toLowerCase == "true") { - try { - reportUsageTelemetry( - featureName, - activityName, - attributes) - } catch { - case runtimeError: Exception => - SynapseMLLogging.logMessage(s"UsageTelemetry::reportUsage: Hit issue emitting usage telemetry." + - s" Exception = $runtimeError. (usage test)") - } - } - } - - private def reportUsageTelemetry(featureName: String, - activityName: String, - attributes: Map[String, String]): Unit = { - if (sys.env.getOrElse(fabricFakeTelemetryReportCalls, "false").toLowerCase == "false") { - val attributesJson = attributes.toJson.compactPrint - val data = - s"""{ - |"timestamp":${Instant.now().getEpochSecond}, - |"feature_name":"$featureName", - |"activity_name":"$activityName", - |"attributes":$attributesJson - |}""".stripMargin - - val mlAdminEndpoint = WlHost match { - case Some(host) => - getMLWorkloadEndpoint(host, CapacityId, workloadEndpointAdmin, WorkspaceId) - case None => - throw new IllegalArgumentException("Workload host name is missing.") - } - - // Add the protocol and the route for the certified event telemetry endpoint - val url = "https://" + mlAdminEndpoint + "telemetry" - - val headers = Map( - "Content-Type" -> "application/json", - "Authorization" -> s"""Bearer ${getAccessToken("pbi")}""".stripMargin, - "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString - ) - usagePost(url, data, headers) - } - } -} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala deleted file mode 100644 index 3d2d5f3030..0000000000 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/FabricTokenParserTests.scala +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging - -import com.microsoft.azure.synapse.ml.core.env.StreamUtilities -import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.fabric.{FabricTokenParser, InvalidJwtTokenException} - -import scala.io.Source -import spray.json._ - -class FabricTokenParserTests extends TestBase { - - case class Token(valid: String, payload: String) - - object TokenJsonProtocol extends DefaultJsonProtocol { - implicit val TokenFormat: RootJsonFormat[Token] = jsonFormat2(Token) - } - - import TokenJsonProtocol._ - - test("JWT Token Expiry Check"){ - val fabricTokenParser = new FabricTokenParser(createDummyToken(true)) - val exp: Long = fabricTokenParser.getExpiry - assert(exp > 0L) - } - - test("Invalid JWT Token Check."){ - assertThrows[InvalidJwtTokenException]{ - val fabricTokenParser = new FabricTokenParser(createDummyToken(false)) - } - } - - def createDummyToken(createValidToken: Boolean): String = { - val claims = """{ - "iss": "issuer", - "sub": "subject", - "aud": "audience", - "exp": 1691171109, - "userId": "123456789" - }""" - - val header = encodeBase64URLSafeString ("{\"alg\":\"RS256\",\"typ\":\"JWT\"}".getBytes ("UTF-8") ) - val payload = encodeBase64URLSafeString (claims.getBytes ("UTF-8") ) - val dummySignature = "dummy-signature" // You can replace this with an actual signature if needed - - if(createValidToken) - s"$header.$payload.$dummySignature" - else - s"$header.$payload" - } - - def encodeBase64URLSafeString(bytes: Array[Byte]): String = { - java.util.Base64.getUrlEncoder.encodeToString(bytes) - } -} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala deleted file mode 100644 index 7cfa5e299b..0000000000 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/logging/HostEndpointUtilsTests.scala +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.logging - -import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.logging.fabric.FabricConstants - -class UsageUtilsTests extends TestBase with FabricConstants { - import com.microsoft.azure.synapse.ml.logging.fabric.HostEndpointUtils._ - - val target = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net/webapi/Capacities/" + - "c528701c-8f94-42c0-b65a-1660171c306c/workloads/ML/MLAdmin/Automatic/" + - "workspaceid/89b9b330-6eac-4ee1-b225-590dfd68e4be/" - val capacityId = "c528701c-8f94-42c0-b65a-1660171c306c" - val wlHost = "c528701c8f9442c0b65a1660171c306c.pbidedicated.windows-int.net" - val workspaceId = "89b9b330-6eac-4ee1-b225-590dfd68e4be" - test("ML Workload Endpoint Check"){ - val url = getMLWorkloadEndpoint(this.wlHost, this.capacityId, workloadEndpointAdmin, this.workspaceId) - assert(url == target) - } -} From b23941ab2cc299e4d9bed6188dcf3fb7697a1b7b Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Thu, 5 Oct 2023 14:26:55 -0400 Subject: [PATCH 35/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala --- .../azure/synapse/ml/logging/fabric/CertifiedEventClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index 42ca5d6f4b..6b399b25ee 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -57,7 +57,7 @@ object CertifiedEventClient extends RESTUtils { private def getHeaders: Map[String, String] = { Map( "Authorization" -> s"""Bearer $getAccessToken""".stripMargin, - "RequestId" -> java.util.UUID.randomUUID().toString, + "RequestId" -> UUID.randomUUID().toString, "Content-Type" -> "application/json", "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString ) From 94d41caca4fc047c3c5e223033f0ea6d52f52cf3 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Thu, 5 Oct 2023 14:27:42 -0400 Subject: [PATCH 36/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala --- .../azure/synapse/ml/logging/fabric/CertifiedEventClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index 6b399b25ee..49e338f149 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -56,7 +56,7 @@ object CertifiedEventClient extends RESTUtils { private def getHeaders: Map[String, String] = { Map( - "Authorization" -> s"""Bearer $getAccessToken""".stripMargin, + "Authorization" -> s"Bearer $getAccessToken", "RequestId" -> UUID.randomUUID().toString, "Content-Type" -> "application/json", "x-ms-workload-resource-moniker" -> UUID.randomUUID().toString From 2c904981a74cf6f21fdde0a724cf3e86e8668133 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Thu, 5 Oct 2023 18:03:47 -0400 Subject: [PATCH 37/50] add futures --- .../synapse/ml/logging/SynapseMLLogging.scala | 15 +++++++++++++-- .../ml/logging/fabric/CertifiedEventClient.scala | 11 +++++++---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index 3d77d46c79..e16a9becf9 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -5,7 +5,7 @@ package com.microsoft.azure.synapse.ml.logging import com.microsoft.azure.synapse.ml.build.BuildInfo import com.microsoft.azure.synapse.ml.logging.common.SASScrubber -import com.microsoft.azure.synapse.ml.logging.fabric.CertifiedEventClient.reportUsage +import com.microsoft.azure.synapse.ml.logging.fabric.CertifiedEventClient.logToCertifiedEvents import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import spray.json.DefaultJsonProtocol._ @@ -13,6 +13,8 @@ import spray.json._ import scala.collection.JavaConverters._ import scala.collection.mutable +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.Future case class RequiredLogFields(uid: String, className: String, @@ -123,7 +125,16 @@ trait SynapseMLLogging extends Logging { protected def logBase(info: Map[String, String], logCertifiedEvent: Boolean): Unit = { if (logCertifiedEvent) { - reportUsage( + // Future { + // logToCertifiedEvents( + // info("libraryName"), + // info("method"), + // info -- Seq("libraryName", "method") + // ) + // }.failed.map { + // case e: Exception => logErrorBase("certifiedEventLogging", e) + // } + logToCertifiedEvents( info("libraryName"), info("method"), info -- Seq("libraryName", "method") diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index 42ca5d6f4b..e695eba5f0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -9,6 +9,7 @@ import spray.json._ import java.time.Instant import java.util.UUID +import scala.concurrent.ExecutionContext import scala.reflect.runtime.currentMirror import scala.reflect.runtime.universe._ @@ -91,12 +92,14 @@ object CertifiedEventClient extends RESTUtils { } - private[ml] def reportUsage(featureName: String, - activityName: String, - attributes: Map[String, String]): Unit = { + private[ml] def logToCertifiedEvents(featureName: String, + activityName: String, + attributes: Map[String, String]): Unit = { - val shouldReport = (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") && + val shouldReport = ( + (sys.env.getOrElse(EmitUsage, "false").toLowerCase == "true") && (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") + ) if (shouldReport) { val payload = From a9de0577cf2dfe2796cecd3c3b88bdd94ac504a5 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Thu, 5 Oct 2023 16:29:34 -0700 Subject: [PATCH 38/50] Adding Fabric environment check and using it to decide emitting certified event --- .../ml/logging/common/PlatformDetails.scala | 42 +++++++++++++++++++ .../logging/fabric/CertifiedEventClient.scala | 9 ++-- 2 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala new file mode 100644 index 0000000000..ed70770d1d --- /dev/null +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala @@ -0,0 +1,42 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.logging.common + +import org.apache.spark.sql.SparkSession +object Constants { + val PlatformSynapseInternal = "synapse_internal" + val PlatformSynapse = "synapse" + val PlatformBinder = "binder" + val PlatformDatabricks = "databricks" + val PlatformUnknown = "unknown" + val SecretStore = "mmlspark-build-keys" + val SynapseProjectName = "Microsoft.ProjectArcadia" +} +object PlatformDetection { + + import Constants._ + + def currentPlatform(): String = { + val azureService = sys.env.get("AZURE_SERVICE") + azureService match { + case Some(serviceName) if serviceName == SynapseProjectName => + val spark = SparkSession.builder.getOrCreate() + val clusterType = spark.conf.get("spark.cluster.type") + if (clusterType == "synapse") PlatformSynapse else PlatformSynapseInternal + case _ if new java.io.File("/dbfs").exists() => PlatformDatabricks + case _ if sys.env.get("BINDER_LAUNCH_HOST").isDefined => PlatformBinder + case _ => PlatformUnknown + } + } + + def runningOnSynapseInternal(): Boolean = currentPlatform() == PlatformSynapseInternal + + def runningOnSynapse(): Boolean = currentPlatform() == PlatformSynapse + + // def runningOnBinder(): Boolean = currentPlatform() == PlatformBinder + + def runningOnDatabricks(): Boolean = currentPlatform() == PlatformDatabricks + + def runningOnFabric(): Boolean = runningOnSynapseInternal || runningOnSynapse || runningOnDatabricks +} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index a6ce5cd614..084a158be5 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -13,6 +13,8 @@ import scala.concurrent.ExecutionContext import scala.reflect.runtime.currentMirror import scala.reflect.runtime.universe._ +import com.microsoft.azure.synapse.ml.logging.common.PlatformDetection + object CertifiedEventClient extends RESTUtils { private val EmitUsage = "EmitUsage" @@ -96,12 +98,13 @@ object CertifiedEventClient extends RESTUtils { activityName: String, attributes: Map[String, String]): Unit = { - val shouldReport = ( + val shouldeEmitCertifiedEvent = ( (sys.env.getOrElse(EmitUsage, "false").toLowerCase == "true") && - (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") + (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") && + (PlatformDetection.runningOnFabric) ) - if (shouldReport) { + if (shouldeEmitCertifiedEvent) { val payload = s"""{ |"timestamp":${Instant.now().getEpochSecond}, From c9877236f6599788c57d77fa5e92f46f94afc657 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Fri, 6 Oct 2023 17:12:48 -0700 Subject: [PATCH 39/50] Adding platform check before emitting CE. Turning calls to log CE asynchronous. --- .../synapse/ml/logging/SynapseMLLogging.scala | 23 ++++++++----------- .../ml/logging/common/PlatformDetails.scala | 15 ++++-------- .../logging/fabric/CertifiedEventClient.scala | 13 +++++------ 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala index e16a9becf9..4cab7a971b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/SynapseMLLogging.scala @@ -125,20 +125,15 @@ trait SynapseMLLogging extends Logging { protected def logBase(info: Map[String, String], logCertifiedEvent: Boolean): Unit = { if (logCertifiedEvent) { - // Future { - // logToCertifiedEvents( - // info("libraryName"), - // info("method"), - // info -- Seq("libraryName", "method") - // ) - // }.failed.map { - // case e: Exception => logErrorBase("certifiedEventLogging", e) - // } - logToCertifiedEvents( - info("libraryName"), - info("method"), - info -- Seq("libraryName", "method") - ) + Future { + logToCertifiedEvents( + info("libraryName"), + info("method"), + info -- Seq("libraryName", "method") + ) + }.failed.map { + case e: Exception => logErrorBase("certifiedEventLogging", e) + } } logInfo(info.toJson.compactPrint) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala index ed70770d1d..2af0243490 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala @@ -10,14 +10,13 @@ object Constants { val PlatformBinder = "binder" val PlatformDatabricks = "databricks" val PlatformUnknown = "unknown" - val SecretStore = "mmlspark-build-keys" val SynapseProjectName = "Microsoft.ProjectArcadia" } -object PlatformDetection { +object PlatformDetails { import Constants._ - def currentPlatform(): String = { + private def currentPlatform(): String = { val azureService = sys.env.get("AZURE_SERVICE") azureService match { case Some(serviceName) if serviceName == SynapseProjectName => @@ -30,13 +29,9 @@ object PlatformDetection { } } - def runningOnSynapseInternal(): Boolean = currentPlatform() == PlatformSynapseInternal + private def runningOnSynapseInternal(): Boolean = currentPlatform() == PlatformSynapseInternal - def runningOnSynapse(): Boolean = currentPlatform() == PlatformSynapse + private def runningOnSynapse(): Boolean = currentPlatform() == PlatformSynapse - // def runningOnBinder(): Boolean = currentPlatform() == PlatformBinder - - def runningOnDatabricks(): Boolean = currentPlatform() == PlatformDatabricks - - def runningOnFabric(): Boolean = runningOnSynapseInternal || runningOnSynapse || runningOnDatabricks + private[ml] def runningOnFabric(): Boolean = runningOnSynapseInternal || runningOnSynapse } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index 084a158be5..f30d019526 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -9,11 +9,10 @@ import spray.json._ import java.time.Instant import java.util.UUID -import scala.concurrent.ExecutionContext import scala.reflect.runtime.currentMirror import scala.reflect.runtime.universe._ -import com.microsoft.azure.synapse.ml.logging.common.PlatformDetection +import com.microsoft.azure.synapse.ml.logging.common.PlatformDetails.runningOnFabric object CertifiedEventClient extends RESTUtils { @@ -98,13 +97,13 @@ object CertifiedEventClient extends RESTUtils { activityName: String, attributes: Map[String, String]): Unit = { - val shouldeEmitCertifiedEvent = ( - (sys.env.getOrElse(EmitUsage, "false").toLowerCase == "true") && - (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") && - (PlatformDetection.runningOnFabric) + val shouldEmitCertifiedEvent = ( + (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") && + (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") + && runningOnFabric ) - if (shouldeEmitCertifiedEvent) { + if (shouldEmitCertifiedEvent) { val payload = s"""{ |"timestamp":${Instant.now().getEpochSecond}, From 8e38b6c8e7382941c74df9e869b1ea3e541158b6 Mon Sep 17 00:00:00 2001 From: Sailesh Baidya Date: Sun, 8 Oct 2023 11:48:21 -0700 Subject: [PATCH 40/50] Modifying logic to determine if platform is Fabric only if it is Synapse internal --- .../azure/synapse/ml/logging/common/PlatformDetails.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala index 2af0243490..4ba1011c82 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala @@ -33,5 +33,5 @@ object PlatformDetails { private def runningOnSynapse(): Boolean = currentPlatform() == PlatformSynapse - private[ml] def runningOnFabric(): Boolean = runningOnSynapseInternal || runningOnSynapse + private[ml] def runningOnFabric(): Boolean = runningOnSynapseInternal } From 3f0fe99d8125f0d8e1316ed1b8995f63b6dfed4f Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:30:50 -0400 Subject: [PATCH 41/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala --- .../azure/synapse/ml/logging/common/PlatformDetails.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala index 4ba1011c82..af8647f278 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala @@ -4,17 +4,13 @@ package com.microsoft.azure.synapse.ml.logging.common import org.apache.spark.sql.SparkSession -object Constants { +object PlatformDetails { val PlatformSynapseInternal = "synapse_internal" val PlatformSynapse = "synapse" val PlatformBinder = "binder" val PlatformDatabricks = "databricks" val PlatformUnknown = "unknown" val SynapseProjectName = "Microsoft.ProjectArcadia" -} -object PlatformDetails { - - import Constants._ private def currentPlatform(): String = { val azureService = sys.env.get("AZURE_SERVICE") From 7b6afc20a728963012c744ac3a77626dd18b90d3 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:31:15 -0400 Subject: [PATCH 42/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala --- .../azure/synapse/ml/logging/common/PlatformDetails.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala index af8647f278..6762eaac73 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala @@ -4,6 +4,7 @@ package com.microsoft.azure.synapse.ml.logging.common import org.apache.spark.sql.SparkSession + object PlatformDetails { val PlatformSynapseInternal = "synapse_internal" val PlatformSynapse = "synapse" From 6a760a91cf5d16c68c44e47b305d7275d1e9126d Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:33:09 -0400 Subject: [PATCH 43/50] Apply suggestions from code review --- .../azure/synapse/ml/logging/common/PlatformDetails.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala index 6762eaac73..ce00a8d1fa 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/common/PlatformDetails.scala @@ -13,7 +13,7 @@ object PlatformDetails { val PlatformUnknown = "unknown" val SynapseProjectName = "Microsoft.ProjectArcadia" - private def currentPlatform(): String = { + def currentPlatform(): String = { val azureService = sys.env.get("AZURE_SERVICE") azureService match { case Some(serviceName) if serviceName == SynapseProjectName => @@ -26,9 +26,9 @@ object PlatformDetails { } } - private def runningOnSynapseInternal(): Boolean = currentPlatform() == PlatformSynapseInternal + def runningOnSynapseInternal(): Boolean = currentPlatform() == PlatformSynapseInternal - private def runningOnSynapse(): Boolean = currentPlatform() == PlatformSynapse + def runningOnSynapse(): Boolean = currentPlatform() == PlatformSynapse - private[ml] def runningOnFabric(): Boolean = runningOnSynapseInternal + def runningOnFabric(): Boolean = runningOnSynapseInternal } From 2a257a6240b430853a691b50fe13c256a0c3071a Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:38:07 -0400 Subject: [PATCH 44/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala --- .../synapse/ml/logging/fabric/CertifiedEventClient.scala | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index f30d019526..3b472f5620 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -97,13 +97,7 @@ object CertifiedEventClient extends RESTUtils { activityName: String, attributes: Map[String, String]): Unit = { - val shouldEmitCertifiedEvent = ( - (sys.env.getOrElse(EmitUsage, "true").toLowerCase == "true") && - (sys.env.getOrElse(FabricFakeTelemetryReportCalls, "false").toLowerCase == "false") - && runningOnFabric - ) - - if (shouldEmitCertifiedEvent) { + if (runningOnFabric) { val payload = s"""{ |"timestamp":${Instant.now().getEpochSecond}, From 75571ab1c35a58c20370bb90cc0f9a360b64cf24 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:38:39 -0400 Subject: [PATCH 45/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala --- .../azure/synapse/ml/logging/fabric/CertifiedEventClient.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index 3b472f5620..a544acab77 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -16,9 +16,6 @@ import com.microsoft.azure.synapse.ml.logging.common.PlatformDetails.runningOnFa object CertifiedEventClient extends RESTUtils { - private val EmitUsage = "EmitUsage" - - private val FabricFakeTelemetryReportCalls = "fabric_fake_usage_telemetry" private val PbiGlobalServiceEndpoints = Map( "public" -> "https://api.powerbi.com/", From 30933467c4173c192dbce67a947df7bc9be5090e Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:38:57 -0400 Subject: [PATCH 46/50] Update core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala --- .../azure/synapse/ml/logging/fabric/CertifiedEventClient.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala index a544acab77..8c832301f9 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/fabric/CertifiedEventClient.scala @@ -16,7 +16,6 @@ import com.microsoft.azure.synapse.ml.logging.common.PlatformDetails.runningOnFa object CertifiedEventClient extends RESTUtils { - private val PbiGlobalServiceEndpoints = Map( "public" -> "https://api.powerbi.com/", "fairfax" -> "https://api.powerbigov.us", From 629dfa7ab75fecd0b7b904fba92a2b419c8f61f6 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:39:26 -0400 Subject: [PATCH 47/50] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d201cc7af5..d2a5a21ab6 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,4 @@ metastore_db/ **/build/* **/dist/* **/*.egg-info/* + From a755d643bfc698ee8c7e35c52a7bec51c1db78a6 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:39:54 -0400 Subject: [PATCH 48/50] Update tools/docgen/docgen/manifest.yaml --- tools/docgen/docgen/manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/docgen/docgen/manifest.yaml b/tools/docgen/docgen/manifest.yaml index ff5b87ced0..96848562c6 100644 --- a/tools/docgen/docgen/manifest.yaml +++ b/tools/docgen/docgen/manifest.yaml @@ -90,7 +90,7 @@ channels: - path: Explore Algorithms/OpenAI/OpenAI.ipynb metadata: title: Azure OpenAI for big data - description: Use Azure OpenAI service to solve a large number of natural language tasks through prompting the completion API. + description: Use Azure OpenAI service to solve a large number of natural language tasks through prompting the completion API. ms.topic: how-to ms.custom: build-2023 ms.reviewer: jessiwang From 8811a5288d447860a2cfd6386ca31bd99d08c447 Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:40:11 -0400 Subject: [PATCH 49/50] Update environment.yml --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 97c23c11af..e121a61bde 100644 --- a/environment.yml +++ b/environment.yml @@ -50,3 +50,4 @@ dependencies: - pypandoc - markdownify - traitlets + From 5ef7c598ff7ba8e31f12254e2889b60578b511fe Mon Sep 17 00:00:00 2001 From: Mark Hamilton Date: Mon, 9 Oct 2023 13:40:33 -0400 Subject: [PATCH 50/50] Update environment.yml --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index e121a61bde..97c23c11af 100644 --- a/environment.yml +++ b/environment.yml @@ -50,4 +50,3 @@ dependencies: - pypandoc - markdownify - traitlets -