diff --git a/src/slurm_plugin/clustermgtd.py b/src/slurm_plugin/clustermgtd.py index 4d042f8d..ce22febd 100644 --- a/src/slurm_plugin/clustermgtd.py +++ b/src/slurm_plugin/clustermgtd.py @@ -1207,6 +1207,7 @@ def _is_node_in_replacement_valid(self, node: SlurmNode, check_node_is_valid): If check_node_is_valid=True, check whether a node is in replacement, If check_node_is_valid=False, check whether a node is replacement timeout. """ + log.debug(f"Checking if node is in replacement {node}") if ( node.is_backing_instance_valid( self._config.ec2_instance_missing_max_count, @@ -1215,9 +1216,15 @@ def _is_node_in_replacement_valid(self, node: SlurmNode, check_node_is_valid): ) and node.name in self._static_nodes_in_replacement ): - time_is_expired = time_is_up( - node.instance.launch_time, self._current_time, grace_time=self._config.node_replacement_timeout + # Set `time_is_expired` to `False` if `node.instance` is `None` since we don't have a launch time yet + time_is_expired = ( + False + if not node.instance + else time_is_up( + node.instance.launch_time, self._current_time, grace_time=self._config.node_replacement_timeout + ) ) + log.debug(f"Node {node} is in replacement and timer expired? {time_is_expired}, instance? {node.instance}") return not time_is_expired if check_node_is_valid else time_is_expired return False diff --git a/src/slurm_plugin/slurm_resources.py b/src/slurm_plugin/slurm_resources.py index e65f5050..a2378a0f 100644 --- a/src/slurm_plugin/slurm_resources.py +++ b/src/slurm_plugin/slurm_resources.py @@ -474,7 +474,7 @@ def is_backing_instance_valid( if log_warn_if_unhealthy: logger.warning( f"Incrementing missing EC2 instance count for node {self.name} to " - f"{nodes_without_backing_instance_count_map[self.name]}." + f"{nodes_without_backing_instance_count_map[self.name].count}." ) else: # Remove the slurm node from the map since the instance is healthy