feat(gpu): auto-detect GPU (CUDA/MPS/cpu), remove hard-coded (#20)

TODO: amd64 base image with anything else we need for apple silicon
kiri-art · Feb 5, 2023 · 682a342 · 682a342
1 parent 92047c1
commit 682a342
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 12 deletions.
diff --git a/api/app.py b/api/app.py
@@ -20,6 +20,7 @@
 from download import download_model, normalize_model_id
 import traceback
 from precision import MODEL_REVISION, MODEL_PRECISION
+from device import device, device_id, device_name
 
 RUNTIME_DOWNLOADS = os.getenv("RUNTIME_DOWNLOADS") == "1"
 USE_DREAMBOOTH = os.getenv("USE_DREAMBOOTH") == "1"
@@ -58,7 +59,7 @@ def init():
         "init",
         "start",
         {
-            "device": torch.cuda.get_device_name(),
+            "device": device_name,
             "hostname": os.getenv("HOSTNAME"),
             "model_id": MODEL_ID,
             "diffusers": __version__,
@@ -329,7 +330,7 @@ def inference(all_inputs: dict) -> dict:
         last_xformers_memory_efficient_attention.update({pipeline: x_m_e_a})
 
     # Run the model
-    # with autocast("cuda"):
+    # with autocast(device_id):
     # image = pipeline(**model_inputs).images[0]
 
     if call_inputs.get("train", None) == "dreambooth":
@@ -357,10 +358,10 @@ def inference(all_inputs: dict) -> dict:
     # Do this after dreambooth as dreambooth accepts a seed int directly.
     seed = model_inputs.get("seed", None)
     if seed == None:
-        generator = torch.Generator(device="cuda")
+        generator = torch.Generator(device=device)
         generator.seed()
     else:
-        generator = torch.Generator(device="cuda").manual_seed(seed)
+        generator = torch.Generator(device=device).manual_seed(seed)
         del model_inputs["seed"]
 
     model_inputs.update({"generator": generator})
@@ -375,7 +376,7 @@ def inference(all_inputs: dict) -> dict:
             # autocast im2img and inpaint which are broken in 0.4.0, 0.4.1
             # still broken in 0.5.1
             elif call_inputs.get("PIPELINE") != "StableDiffusionPipeline":
-                with autocast("cuda"):
+                with autocast(device_id):
                     images = pipeline(**model_inputs).images
             else:
                 images = pipeline(**model_inputs).images
@@ -403,6 +404,7 @@ def inference(all_inputs: dict) -> dict:
     else:
         result = result | {"image_base64": images_base64[0]}
 
+    # TODO, move and generalize in device.py
     mem_usage = 0
     if torch.cuda.is_available():
         mem_usage = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated()

diff --git a/api/convert_to_diffusers.py b/api/convert_to_diffusers.py
@@ -3,6 +3,7 @@
 import subprocess
 import torch
 from utils import Storage
+from device import device_id
 
 MODEL_ID = os.environ.get("MODEL_ID", None)
 CHECKPOINT_DIR = "/root/.cache/checkpoints"
@@ -36,17 +37,13 @@ def main(model_id: str, checkpoint_url: str, checkpoint_config_url: str):
         "./diffusers/scripts/convert_original_stable_diffusion_to_diffusers.py"
     )
 
-    gpu = False
-    if torch.cuda.is_available():
-        gpu = True
-
     print("Converting " + fname + " to diffusers model " + model_id + "...", flush=True)
 
     subprocess.run(
         ["pip", "install", "omegaconf", "pytorch_lightning", "tensorboard"], check=True
     )
     subprocess.run(["apt-get", "install", "-y", "wget"], check=True)
-    if not gpu:
+    if device_id == "cpu":
         subprocess.run(
             [
                 "sed",

diff --git a/api/device.py b/api/device.py
@@ -0,0 +1,32 @@
+import torch
+
+if torch.cuda.is_available():
+    print("[device] CUDA (Nvidia) detected")
+    device_id = "cuda"
+    device_name = torch.cuda.get_device_name()
+elif torch.backends.mps.is_available():
+    print("[device] MPS (MacOS Metal, Apple M1, etc) detected")
+    device_id = "mps"
+    device_name = "MPS"
+else:
+    print("[device] CPU only - no GPU detected")
+    device_id = "cpu"
+    device_name = "CPU only"
+
+    if not torch.backends.cuda.is_built():
+        print(
+            "CUDA not available because the current PyTorch install was not "
+            "built with CUDA enabled."
+        )
+    if torch.backends.mps.is_built():
+        print(
+            "MPS not available because the current MacOS version is not 12.3+ "
+            "and/or you do not have an MPS-enabled device on this machine."
+        )
+    else:
+        print(
+            "MPS not available because the current PyTorch install was not "
+            "built with MPS enabled."
+        )
+
+device = torch.device(device_id)
diff --git a/api/download.py b/api/download.py
@@ -101,7 +101,7 @@ def download_model(
                         "model_revision": model_revision,
                     }
                 )
-                # This would be quicker to just model.to("cuda") afterwards, but
+                # This would be quicker to just model.to(device) afterwards, but
                 # this conveniently logs all the timings (and doesn't happen often)
                 print("download")
                 send("download", "start", {})

diff --git a/api/loadModel.py b/api/loadModel.py
@@ -3,6 +3,7 @@
 from diffusers import pipelines as _pipelines, StableDiffusionPipeline
 from getScheduler import getScheduler, DEFAULT_SCHEDULER
 from precision import torch_dtype_from_precision
+from device import device
 import time
 
 HF_AUTH_TOKEN = os.getenv("HF_AUTH_TOKEN")
@@ -70,7 +71,7 @@ def loadModel(model_id: str, load=True, precision=None, revision=None):
 
     if load:
         to_gpu = time.time()
-        model.to("cuda")
+        model.to(device)
         to_gpu = round((time.time() - to_gpu) * 1000)
         print(f"Loaded from disk in {from_pretrained} ms, to gpu in {to_gpu} ms")
     else: