diff --git a/pkg/controller/mpi_job_controller.go b/pkg/controller/mpi_job_controller.go index 02026944..042ed53e 100644 --- a/pkg/controller/mpi_job_controller.go +++ b/pkg/controller/mpi_job_controller.go @@ -111,10 +111,6 @@ const ( // From: k8s.io/kubernetes/pkg/apis/core/validation/events.go eventMessageLimit = 1024 - // jobBackoffLimitExceededReason is the reason that the k8s job controller - // uses when the backoff limit is exceeded. - jobBackoffLimitExceededReason = "BackoffLimitExceeded" - openMPISlotsEnv = "OMPI_MCA_orte_set_default_slots" intelMPISlotsEnv = "I_MPI_PERHOST" ) @@ -1149,7 +1145,7 @@ func (c *MPIJobController) updateMPIJobFailedStatus(mpiJob *kubeflow.MPIJob, lau if msg == "" { msg = fmt.Sprintf("MPIJob %s/%s has failed", mpiJob.Namespace, mpiJob.Name) } - if reason == jobBackoffLimitExceededReason { + if reason == batchv1.JobReasonBackoffLimitExceeded { // Concatenate the reason and message from the last failed Pod. var lastFailedPod *corev1.Pod for _, p := range launcherPods { diff --git a/pkg/controller/mpi_job_controller_test.go b/pkg/controller/mpi_job_controller_test.go index 3594d354..cf51131f 100644 --- a/pkg/controller/mpi_job_controller_test.go +++ b/pkg/controller/mpi_job_controller_test.go @@ -635,7 +635,7 @@ func TestLauncherFailed(t *testing.T) { launcher.Status.Conditions = append(launcher.Status.Conditions, batchv1.JobCondition{ Type: batchv1.JobFailed, Status: corev1.ConditionTrue, - Reason: jobBackoffLimitExceededReason, + Reason: batchv1.JobReasonBackoffLimitExceeded, Message: "Job has reached the specified backoff limit", }) launcher.Status.Failed = 2 @@ -668,7 +668,7 @@ func TestLauncherFailed(t *testing.T) { msg := fmt.Sprintf("MPIJob %s/%s is created.", mpiJob.Namespace, mpiJob.Name) updateMPIJobConditions(mpiJobCopy, kubeflow.JobCreated, corev1.ConditionTrue, mpiJobCreatedReason, msg) msg = "Job has reached the specified backoff limit: second message" - updateMPIJobConditions(mpiJobCopy, kubeflow.JobFailed, corev1.ConditionTrue, jobBackoffLimitExceededReason+"/FailedReason2", msg) + updateMPIJobConditions(mpiJobCopy, kubeflow.JobFailed, corev1.ConditionTrue, batchv1.JobReasonBackoffLimitExceeded+"/FailedReason2", msg) f.expectUpdateMPIJobStatusAction(mpiJobCopy)