From 49bf0945ae1a5c5b871a5e89948c61d013f95450 Mon Sep 17 00:00:00 2001 From: Farrukh Masud Date: Mon, 11 Nov 2024 10:42:05 -0800 Subject: [PATCH 1/5] Disabling failing test --- .../azure/synapse/ml/nbtest/DatabricksRapidsTests.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala index b549a153cf..267e44d36c 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala @@ -4,14 +4,10 @@ package com.microsoft.azure.synapse.ml.nbtest import com.microsoft.azure.synapse.ml.nbtest.DatabricksUtilities._ +import org.scalatest.Ignore -import com.microsoft.azure.synapse.ml.build.BuildInfo -import com.microsoft.azure.synapse.ml.core.env.FileUtilities -import com.microsoft.azure.synapse.ml.nbtest.DatabricksUtilities._ - -import java.io.File -import scala.collection.mutable.ListBuffer +@Ignore class DatabricksRapidsTests extends DatabricksTestHelper { val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 1, GpuPoolId, RapidsInitScripts) From 93524dd1eb308ce838bab357071d3ed516a43962 Mon Sep 17 00:00:00 2001 From: Farrukh Masud Date: Mon, 11 Nov 2024 12:06:15 -0800 Subject: [PATCH 2/5] Increasing timeout for rapids test --- .../azure/synapse/ml/nbtest/DatabricksRapidsTests.scala | 6 ++---- .../azure/synapse/ml/nbtest/DatabricksUtilities.scala | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala index 267e44d36c..e3d8f21191 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala @@ -4,15 +4,13 @@ package com.microsoft.azure.synapse.ml.nbtest import com.microsoft.azure.synapse.ml.nbtest.DatabricksUtilities._ -import org.scalatest.Ignore - -@Ignore class DatabricksRapidsTests extends DatabricksTestHelper { val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 1, GpuPoolId, RapidsInitScripts) - databricksTestHelper(clusterId, GPULibraries, RapidsNotebooks) + // We want to wait for 40 minutes for each test to complete + databricksTestHelper(clusterId, GPULibraries, RapidsNotebooks, retries = Seq.fill(60 * 40)(1000).toArray) protected override def afterAll(): Unit = { afterAllHelper(clusterId, RapidsClusterName) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index 55e0fbdfce..eea263eb19 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -427,10 +427,11 @@ abstract class DatabricksTestHelper extends TestBase { def databricksTestHelper(clusterId: String, libraries: String, - notebooks: Seq[File]): Unit = { + notebooks: Seq[File], + retries: Array[Int] = Seq.fill(60 * 20)(1000).toArray): Unit = { println("Checking if cluster is active") - tryWithRetries(Seq.fill(60 * 20)(1000).toArray) { () => + tryWithRetries(retries) { () => assert(isClusterActive(clusterId)) } From af55f1b09d9d39c1df1fc8eb496406c762a55707 Mon Sep 17 00:00:00 2001 From: Farrukh Masud Date: Mon, 11 Nov 2024 15:10:59 -0800 Subject: [PATCH 3/5] Increasing timeout all --- .../azure/synapse/ml/nbtest/DatabricksRapidsTests.scala | 2 +- .../azure/synapse/ml/nbtest/DatabricksUtilities.scala | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala index e3d8f21191..aadd6c9788 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala @@ -10,7 +10,7 @@ class DatabricksRapidsTests extends DatabricksTestHelper { val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 1, GpuPoolId, RapidsInitScripts) // We want to wait for 40 minutes for each test to complete - databricksTestHelper(clusterId, GPULibraries, RapidsNotebooks, retries = Seq.fill(60 * 40)(1000).toArray) + databricksTestHelper(clusterId, GPULibraries, RapidsNotebooks) protected override def afterAll(): Unit = { afterAllHelper(clusterId, RapidsClusterName) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index eea263eb19..3a34d6391f 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -427,11 +427,10 @@ abstract class DatabricksTestHelper extends TestBase { def databricksTestHelper(clusterId: String, libraries: String, - notebooks: Seq[File], - retries: Array[Int] = Seq.fill(60 * 20)(1000).toArray): Unit = { + notebooks: Seq[File]): Unit = { println("Checking if cluster is active") - tryWithRetries(retries) { () => + tryWithRetries(Seq.fill(60 * 40)(1000).toArray) { () => assert(isClusterActive(clusterId)) } From 7896dc326a06eefa017009d312a28367c04bd3e4 Mon Sep 17 00:00:00 2001 From: Farrukh Masud Date: Tue, 12 Nov 2024 09:44:06 -0800 Subject: [PATCH 4/5] Removing the databricks rapids test correct way --- .../azure/synapse/ml/nbtest/DatabricksRapidsTests.scala | 8 +++++++- .../azure/synapse/ml/nbtest/DatabricksUtilities.scala | 2 +- pipeline.yaml | 2 -- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala index aadd6c9788..b549a153cf 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksRapidsTests.scala @@ -5,11 +5,17 @@ package com.microsoft.azure.synapse.ml.nbtest import com.microsoft.azure.synapse.ml.nbtest.DatabricksUtilities._ +import com.microsoft.azure.synapse.ml.build.BuildInfo +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.nbtest.DatabricksUtilities._ + +import java.io.File +import scala.collection.mutable.ListBuffer + class DatabricksRapidsTests extends DatabricksTestHelper { val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 1, GpuPoolId, RapidsInitScripts) - // We want to wait for 40 minutes for each test to complete databricksTestHelper(clusterId, GPULibraries, RapidsNotebooks) protected override def afterAll(): Unit = { diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index 3a34d6391f..55e0fbdfce 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -430,7 +430,7 @@ abstract class DatabricksTestHelper extends TestBase { notebooks: Seq[File]): Unit = { println("Checking if cluster is active") - tryWithRetries(Seq.fill(60 * 40)(1000).toArray) { () => + tryWithRetries(Seq.fill(60 * 20)(1000).toArray) { () => assert(isClusterActive(clusterId)) } diff --git a/pipeline.yaml b/pipeline.yaml index 22c581587e..854b054c7f 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -143,8 +143,6 @@ jobs: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests" databricks-gpu: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests" - databricks-rapids: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" synapse: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests" # ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}: From 90f7042d108183c444aa029a3b6b2033cf5c55bc Mon Sep 17 00:00:00 2001 From: Farrukh Masud Date: Tue, 12 Nov 2024 11:37:33 -0800 Subject: [PATCH 5/5] Addign comments --- pipeline.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pipeline.yaml b/pipeline.yaml index 854b054c7f..c3db92d3f7 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -143,6 +143,10 @@ jobs: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests" databricks-gpu: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests" +# databricks-rapids tests have been disabled because these tests are failing. +# This test will be re-enabled once the issue is fixed. +# databricks-rapids: +# TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" synapse: TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests" # ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}: