diff --git a/server/src/main/java/io/orkes/conductor/server/service/OrkesSweeperProperties.java b/server/src/main/java/io/orkes/conductor/server/service/OrkesSweeperProperties.java index 9cec440..37f4b1d 100644 --- a/server/src/main/java/io/orkes/conductor/server/service/OrkesSweeperProperties.java +++ b/server/src/main/java/io/orkes/conductor/server/service/OrkesSweeperProperties.java @@ -20,11 +20,12 @@ import lombok.ToString; @Configuration -@ConfigurationProperties("conductor.app.sweeper") +@ConfigurationProperties("conductor.orkes.sweeper") @Getter @Setter @ToString public class OrkesSweeperProperties { + private int frequencyMillis = 10; private int sweepBatchSize = 5; private int queuePopTimeout = 100; } diff --git a/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweepWorker.java b/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweepWorker.java new file mode 100644 index 0000000..09aeea9 --- /dev/null +++ b/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweepWorker.java @@ -0,0 +1,252 @@ +/* + * Copyright 2023 Orkes, Inc. + *
+ * Licensed under the Orkes Community License (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *
+ * https://github.com/orkes-io/licenses/blob/main/community/LICENSE.txt + *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package io.orkes.conductor.server.service;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Component;
+
+import com.netflix.conductor.core.config.ConductorProperties;
+import com.netflix.conductor.core.exception.NotFoundException;
+import com.netflix.conductor.core.execution.WorkflowExecutor;
+import com.netflix.conductor.core.execution.tasks.SystemTaskRegistry;
+import com.netflix.conductor.core.execution.tasks.WorkflowSystemTask;
+import com.netflix.conductor.core.utils.QueueUtils;
+import com.netflix.conductor.core.utils.Utils;
+import com.netflix.conductor.dao.ExecutionDAO;
+import com.netflix.conductor.dao.QueueDAO;
+import com.netflix.conductor.metrics.Monitors;
+import com.netflix.conductor.model.TaskModel;
+import com.netflix.conductor.model.WorkflowModel;
+import com.netflix.conductor.service.ExecutionLockService;
+
+import io.orkes.conductor.metrics.MetricsCollector;
+
+import lombok.extern.slf4j.Slf4j;
+
+import static com.netflix.conductor.core.config.SchedulerConfiguration.SWEEPER_EXECUTOR_NAME;
+import static com.netflix.conductor.core.utils.Utils.DECIDER_QUEUE;
+
+@Component
+@ConditionalOnProperty(name = "conductor.orkes.sweeper.enabled", havingValue = "true")
+@Slf4j
+public class OrkesWorkflowSweepWorker {
+
+ private final QueueDAO queueDAO;
+ private final ConductorProperties properties;
+ private final WorkflowExecutor workflowExecutor;
+ private final ExecutionDAO executionDAO;
+ private final MetricsCollector metricsCollector;
+ private final SystemTaskRegistry systemTaskRegistry;
+ private final ExecutionLockService executionLockService;
+
+ public OrkesWorkflowSweepWorker(
+ QueueDAO queueDAO,
+ WorkflowExecutor workflowExecutor,
+ ExecutionDAO executionDAO,
+ MetricsCollector metricsCollector,
+ SystemTaskRegistry systemTaskRegistry,
+ ExecutionLockService executionLockService,
+ ConductorProperties properties) {
+ this.queueDAO = queueDAO;
+ this.executionDAO = executionDAO;
+ this.metricsCollector = metricsCollector;
+ this.systemTaskRegistry = systemTaskRegistry;
+ this.executionLockService = executionLockService;
+ this.properties = properties;
+ this.workflowExecutor = workflowExecutor;
+ }
+
+ @Async(SWEEPER_EXECUTOR_NAME)
+ public CompletableFutureDECIDER_QUEUE
*/
+ private WorkflowModel decideAndRemove(WorkflowModel workflow) {
+ WorkflowModel workflowModel = workflowExecutor.decide(workflow);
+ if (workflowModel == null) {
+ return null;
+ }
+ if (workflowModel.getStatus().isTerminal()) {
+ queueDAO.remove(DECIDER_QUEUE, workflowModel.getWorkflowId());
+ }
+ return workflowModel;
+ }
+
+ private boolean ensurePendingTaskIsInQueue(TaskModel task) {
+ if (shouldTaskExistInQueue(task)) {
+ // Ensure QueueDAO contains this taskId
+ String taskQueueName = QueueUtils.getQueueName(task);
+ if (!queueDAO.containsMessage(taskQueueName, task.getTaskId())) {
+ queueDAO.push(taskQueueName, task.getTaskId(), task.getCallbackAfterSeconds());
+ log.info(
+ "Task {} in workflow {} re-queued for repairs",
+ task.getTaskId(),
+ task.getWorkflowInstanceId());
+ metricsCollector
+ .getCounter("repairTaskReQueued", task.getTaskDefName())
+ .increment();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean ensureWorkflowExistsInDecider(String workflowId) {
+ if (StringUtils.isNotEmpty(workflowId)) {
+ String queueName = Utils.DECIDER_QUEUE;
+ if (!queueDAO.containsMessage(queueName, workflowId)) {
+ queueDAO.push(
+ queueName, workflowId, properties.getWorkflowOffsetTimeout().getSeconds());
+ log.info("Workflow {} re-queued for repairs", workflowId);
+ Monitors.recordQueueMessageRepushFromRepairService(queueName);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean shouldTaskExistInQueue(TaskModel task) {
+ if (systemTaskRegistry.isSystemTask(task.getTaskType())) {
+ WorkflowSystemTask workflowSystemTask = systemTaskRegistry.get(task.getTaskType());
+ return workflowSystemTask.isAsync() // Is Async
+ // Not async complete OR is async complete, but in scheduled state
+ && (!workflowSystemTask.isAsyncComplete(task)
+ || (workflowSystemTask.isAsyncComplete(task)
+ && task.getStatus() == TaskModel.Status.SCHEDULED))
+ // Status is IN_PROGRESS or SCHEDULED
+ && (task.getStatus() == TaskModel.Status.IN_PROGRESS
+ || task.getStatus() == TaskModel.Status.SCHEDULED);
+ }
+ return task.getStatus() == TaskModel.Status.SCHEDULED;
+ }
+}
diff --git a/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweeper.java b/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweeper.java
index 21df5c8..eb9fb6a 100644
--- a/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweeper.java
+++ b/server/src/main/java/io/orkes/conductor/server/service/OrkesWorkflowSweeper.java
@@ -12,245 +12,85 @@
*/
package io.orkes.conductor.server.service;
-import java.util.Collections;
import java.util.List;
-import java.util.concurrent.Executor;
-import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
+import java.util.concurrent.CompletableFuture;
-import org.apache.commons.lang3.StringUtils;
-import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.scheduling.annotation.EnableScheduling;
+import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import com.netflix.conductor.core.LifecycleAwareComponent;
import com.netflix.conductor.core.config.ConductorProperties;
-import com.netflix.conductor.core.exception.NotFoundException;
-import com.netflix.conductor.core.execution.OrkesWorkflowExecutor;
-import com.netflix.conductor.core.execution.tasks.SystemTaskRegistry;
-import com.netflix.conductor.core.execution.tasks.WorkflowSystemTask;
-import com.netflix.conductor.core.utils.QueueUtils;
-import com.netflix.conductor.core.utils.Utils;
-import com.netflix.conductor.dao.ExecutionDAO;
import com.netflix.conductor.dao.QueueDAO;
import com.netflix.conductor.metrics.Monitors;
-import com.netflix.conductor.model.TaskModel;
-import com.netflix.conductor.model.WorkflowModel;
-import io.orkes.conductor.metrics.MetricsCollector;
-
-import com.google.common.util.concurrent.Uninterruptibles;
import lombok.extern.slf4j.Slf4j;
-import static com.netflix.conductor.core.config.SchedulerConfiguration.SWEEPER_EXECUTOR_NAME;
import static com.netflix.conductor.core.utils.Utils.DECIDER_QUEUE;
@Component
+@EnableScheduling
@ConditionalOnProperty(name = "conductor.orkes.sweeper.enabled", havingValue = "true")
@Slf4j
public class OrkesWorkflowSweeper extends LifecycleAwareComponent {
private final QueueDAO queueDAO;
- private final ConductorProperties properties;
private final OrkesSweeperProperties sweeperProperties;
- private final OrkesWorkflowExecutor workflowExecutor;
- private final ExecutionDAO executionDAO;
- private final MetricsCollector metricsCollector;
- private final SystemTaskRegistry systemTaskRegistry;
+ private final OrkesWorkflowSweepWorker sweepWorker;
public OrkesWorkflowSweeper(
- @Qualifier(SWEEPER_EXECUTOR_NAME) Executor sweeperExecutor,
+ OrkesWorkflowSweepWorker sweepWorker,
QueueDAO queueDAO,
- OrkesWorkflowExecutor workflowExecutor,
- ExecutionDAO executionDAO,
- MetricsCollector metricsCollector,
- SystemTaskRegistry systemTaskRegistry,
ConductorProperties properties,
OrkesSweeperProperties sweeperProperties) {
+ this.sweepWorker = sweepWorker;
this.queueDAO = queueDAO;
- this.executionDAO = executionDAO;
- this.metricsCollector = metricsCollector;
- this.systemTaskRegistry = systemTaskRegistry;
- this.properties = properties;
this.sweeperProperties = sweeperProperties;
- this.workflowExecutor = workflowExecutor;
log.info("Initializing sweeper with {} threads", properties.getSweeperThreadCount());
- for (int i = 0; i < properties.getSweeperThreadCount(); i++) {
- sweeperExecutor.execute(this::pollAndSweep);
- }
}
- private void pollAndSweep() {
+ // Reuse com.netflix.conductor.core.config.SchedulerConfiguration
+ @Scheduled(
+ fixedDelayString = "${conductor.orkes.sweeper.frequencyMillis:10}",
+ initialDelayString = "${conductor.orkes.sweeper.frequencyMillis:10}")
+ public void pollAndSweep() {
try {
- while (true) {
- try {
- if (!isRunning()) {
- log.trace("Component stopped, skip workflow sweep");
- } else {
- List