diff --git a/codalab/worker/docker_utils.py b/codalab/worker/docker_utils.py index edffc7c9d..c547ab94a 100644 --- a/codalab/worker/docker_utils.py +++ b/codalab/worker/docker_utils.py @@ -165,10 +165,14 @@ def get_nvidia_devices(self, use_docker=True): remove=True, ) gpu_info = output.decode() - if gpu_info.find("==========") != -1: # need to remove header - gpus = gpu_info.split("\n")[15:-1] - else: - gpus = gpu_info.split("\n")[:-1] + GPU_REGEX = r"(\d+), (?:GPU-)?([a-fA-F0-9-]+)" + gpus = {} + for line in gpu_info.splitlines(): + match = re.match(GPU_REGEX, line) + if match: + idx = match.group(1) + uuid = match.group(2) + gpus[idx] = uuid else: # use the singularity runtime to run nvidia-smi @@ -177,10 +181,10 @@ def get_nvidia_devices(self, use_docker=True): # if output['return_code'] != 0: # raise SingularityError # gpus = output['message'] - gpus = [] + gpus = {} # Get newline delimited gpu-index, gpu-uuid list logger.info("GPUs: " + str(gpus)) - return {gpu.split(',')[0].strip(): gpu.split(',')[1].strip() for gpu in gpus} + return gpus @wrap_exception('Unable to fetch Docker container ip') def get_container_ip(self, network_name: str, container_id: str):