Skip to content

Commit

Permalink
feat(gpu): auto-detect GPU (CUDA/MPS/cpu), remove hard-coded (#20)
Browse files Browse the repository at this point in the history
TODO: amd64 base image with anything else we need for apple silicon
  • Loading branch information
gadicc committed Feb 5, 2023
1 parent 92047c1 commit 682a342
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 12 deletions.
12 changes: 7 additions & 5 deletions api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from download import download_model, normalize_model_id
import traceback
from precision import MODEL_REVISION, MODEL_PRECISION
from device import device, device_id, device_name

RUNTIME_DOWNLOADS = os.getenv("RUNTIME_DOWNLOADS") == "1"
USE_DREAMBOOTH = os.getenv("USE_DREAMBOOTH") == "1"
Expand Down Expand Up @@ -58,7 +59,7 @@ def init():
"init",
"start",
{
"device": torch.cuda.get_device_name(),
"device": device_name,
"hostname": os.getenv("HOSTNAME"),
"model_id": MODEL_ID,
"diffusers": __version__,
Expand Down Expand Up @@ -329,7 +330,7 @@ def inference(all_inputs: dict) -> dict:
last_xformers_memory_efficient_attention.update({pipeline: x_m_e_a})

# Run the model
# with autocast("cuda"):
# with autocast(device_id):
# image = pipeline(**model_inputs).images[0]

if call_inputs.get("train", None) == "dreambooth":
Expand Down Expand Up @@ -357,10 +358,10 @@ def inference(all_inputs: dict) -> dict:
# Do this after dreambooth as dreambooth accepts a seed int directly.
seed = model_inputs.get("seed", None)
if seed == None:
generator = torch.Generator(device="cuda")
generator = torch.Generator(device=device)
generator.seed()
else:
generator = torch.Generator(device="cuda").manual_seed(seed)
generator = torch.Generator(device=device).manual_seed(seed)
del model_inputs["seed"]

model_inputs.update({"generator": generator})
Expand All @@ -375,7 +376,7 @@ def inference(all_inputs: dict) -> dict:
# autocast im2img and inpaint which are broken in 0.4.0, 0.4.1
# still broken in 0.5.1
elif call_inputs.get("PIPELINE") != "StableDiffusionPipeline":
with autocast("cuda"):
with autocast(device_id):
images = pipeline(**model_inputs).images
else:
images = pipeline(**model_inputs).images
Expand Down Expand Up @@ -403,6 +404,7 @@ def inference(all_inputs: dict) -> dict:
else:
result = result | {"image_base64": images_base64[0]}

# TODO, move and generalize in device.py
mem_usage = 0
if torch.cuda.is_available():
mem_usage = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated()
Expand Down
7 changes: 2 additions & 5 deletions api/convert_to_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import subprocess
import torch
from utils import Storage
from device import device_id

MODEL_ID = os.environ.get("MODEL_ID", None)
CHECKPOINT_DIR = "/root/.cache/checkpoints"
Expand Down Expand Up @@ -36,17 +37,13 @@ def main(model_id: str, checkpoint_url: str, checkpoint_config_url: str):
"./diffusers/scripts/convert_original_stable_diffusion_to_diffusers.py"
)

gpu = False
if torch.cuda.is_available():
gpu = True

print("Converting " + fname + " to diffusers model " + model_id + "...", flush=True)

subprocess.run(
["pip", "install", "omegaconf", "pytorch_lightning", "tensorboard"], check=True
)
subprocess.run(["apt-get", "install", "-y", "wget"], check=True)
if not gpu:
if device_id == "cpu":
subprocess.run(
[
"sed",
Expand Down
32 changes: 32 additions & 0 deletions api/device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import torch

if torch.cuda.is_available():
print("[device] CUDA (Nvidia) detected")
device_id = "cuda"
device_name = torch.cuda.get_device_name()
elif torch.backends.mps.is_available():
print("[device] MPS (MacOS Metal, Apple M1, etc) detected")
device_id = "mps"
device_name = "MPS"
else:
print("[device] CPU only - no GPU detected")
device_id = "cpu"
device_name = "CPU only"

if not torch.backends.cuda.is_built():
print(
"CUDA not available because the current PyTorch install was not "
"built with CUDA enabled."
)
if torch.backends.mps.is_built():
print(
"MPS not available because the current MacOS version is not 12.3+ "
"and/or you do not have an MPS-enabled device on this machine."
)
else:
print(
"MPS not available because the current PyTorch install was not "
"built with MPS enabled."
)

device = torch.device(device_id)
2 changes: 1 addition & 1 deletion api/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def download_model(
"model_revision": model_revision,
}
)
# This would be quicker to just model.to("cuda") afterwards, but
# This would be quicker to just model.to(device) afterwards, but
# this conveniently logs all the timings (and doesn't happen often)
print("download")
send("download", "start", {})
Expand Down
3 changes: 2 additions & 1 deletion api/loadModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from diffusers import pipelines as _pipelines, StableDiffusionPipeline
from getScheduler import getScheduler, DEFAULT_SCHEDULER
from precision import torch_dtype_from_precision
from device import device
import time

HF_AUTH_TOKEN = os.getenv("HF_AUTH_TOKEN")
Expand Down Expand Up @@ -70,7 +71,7 @@ def loadModel(model_id: str, load=True, precision=None, revision=None):

if load:
to_gpu = time.time()
model.to("cuda")
model.to(device)
to_gpu = round((time.time() - to_gpu) * 1000)
print(f"Loaded from disk in {from_pretrained} ms, to gpu in {to_gpu} ms")
else:
Expand Down

0 comments on commit 682a342

Please sign in to comment.