From 191f64e95ad43570118137cf1cbdc329baa05a85 Mon Sep 17 00:00:00 2001
From: Hui <tzattack@gmail.com>
Date: Mon, 29 Apr 2024 22:31:22 +0800
Subject: [PATCH] add support to macOS GPU

---
 inference.py                                | 15 ++++++++++++---
 src/facerender/modules/dense_motion.py      | 19 ++++++++++---------
 src/facerender/modules/keypoint_detector.py |  2 +-
 src/facerender/modules/util.py              |  5 ++++-
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/inference.py b/inference.py
index a0b00790..bf1b13e1 100644
--- a/inference.py
+++ b/inference.py
@@ -1,8 +1,11 @@
-from glob import glob
+import os
 import shutil
+
+os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
+
 import torch
 from time import  strftime
-import os, sys, time
+import sys
 from argparse import ArgumentParser
 
 from src.utils.preprocess import CropAndExtract
@@ -136,10 +139,16 @@ def main(args):
 
     args = parser.parse_args()
 
-    if torch.cuda.is_available() and not args.cpu:
+    if args.cpu:
+        args.device = "cpu"
+    elif torch.backends.mps.is_available():
+        args.device = "mps"
+    elif torch.cuda.is_available():
         args.device = "cuda"
     else:
         args.device = "cpu"
 
+    print(f"Using device: {args.device}")
+
     main(args)
 
diff --git a/src/facerender/modules/dense_motion.py b/src/facerender/modules/dense_motion.py
index a286ead2..8e9db5fd 100644
--- a/src/facerender/modules/dense_motion.py
+++ b/src/facerender/modules/dense_motion.py
@@ -35,23 +35,24 @@ def create_sparse_motions(self, feature, kp_driving, kp_source):
         bs, _, d, h, w = feature.shape
         identity_grid = make_coordinate_grid((d, h, w), type=kp_source['value'].type())
         identity_grid = identity_grid.view(1, 1, d, h, w, 3)
-        coordinate_grid = identity_grid - kp_driving['value'].view(bs, self.num_kp, 1, 1, 1, 3)
-        
+        coordinate_grid = identity_grid - kp_driving['value'].view(bs, self.num_kp, 1, 1, 1, 3).to(identity_grid.device)
+
         # if 'jacobian' in kp_driving:
         if 'jacobian' in kp_driving and kp_driving['jacobian'] is not None:
             jacobian = torch.matmul(kp_source['jacobian'], torch.inverse(kp_driving['jacobian']))
             jacobian = jacobian.unsqueeze(-3).unsqueeze(-3).unsqueeze(-3)
             jacobian = jacobian.repeat(1, 1, d, h, w, 1, 1)
             coordinate_grid = torch.matmul(jacobian, coordinate_grid.unsqueeze(-1))
-            coordinate_grid = coordinate_grid.squeeze(-1)                  
+            coordinate_grid = coordinate_grid.squeeze(-1)
 
 
-        driving_to_source = coordinate_grid + kp_source['value'].view(bs, self.num_kp, 1, 1, 1, 3)    # (bs, num_kp, d, h, w, 3)
+        driving_to_source = (coordinate_grid +
+                             kp_source['value'].view(bs, self.num_kp, 1, 1, 1, 3).to(coordinate_grid.device))    # (bs, num_kp, d, h, w, 3)
 
         #adding background feature
         identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1, 1)
         sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1)                #bs num_kp+1 d h w 3
-        
+
         # sparse_motions = driving_to_source
 
         return sparse_motions
@@ -90,7 +91,7 @@ def forward(self, feature, kp_driving, kp_source):
 
         heatmap = self.create_heatmap_representations(deformed_feature, kp_driving, kp_source)
 
-        input_ = torch.cat([heatmap, deformed_feature], dim=2)
+        input_ = torch.cat([heatmap.to(deformed_feature.device), deformed_feature], dim=2)
         input_ = input_.view(bs, -1, d, h, w)
 
         # input = deformed_feature.view(bs, -1, d, h, w)      # (bs, num_kp+1 * c, d, h, w)
@@ -102,9 +103,9 @@ def forward(self, feature, kp_driving, kp_source):
         mask = F.softmax(mask, dim=1)
         out_dict['mask'] = mask
         mask = mask.unsqueeze(2)                                   # (bs, num_kp+1, 1, d, h, w)
-        
-        zeros_mask = torch.zeros_like(mask)   
-        mask = torch.where(mask < 1e-3, zeros_mask, mask) 
+
+        zeros_mask = torch.zeros_like(mask)
+        mask = torch.where(mask < 1e-3, zeros_mask, mask).to(sparse_motion.device)
 
         sparse_motion = sparse_motion.permute(0, 1, 5, 2, 3, 4)    # (bs, num_kp+1, 3, d, h, w)
         deformation = (sparse_motion * mask).sum(dim=1)            # (bs, 3, d, h, w)
diff --git a/src/facerender/modules/keypoint_detector.py b/src/facerender/modules/keypoint_detector.py
index 62a38a96..c8d1f845 100644
--- a/src/facerender/modules/keypoint_detector.py
+++ b/src/facerender/modules/keypoint_detector.py
@@ -47,7 +47,7 @@ def gaussian2kp(self, heatmap):
         """
         shape = heatmap.shape
         heatmap = heatmap.unsqueeze(-1)
-        grid = make_coordinate_grid(shape[2:], heatmap.type()).unsqueeze_(0).unsqueeze_(0)
+        grid = make_coordinate_grid(shape[2:], heatmap.type()).to(heatmap.device).unsqueeze_(0).unsqueeze_(0)
         value = (heatmap * grid).sum(dim=(2, 3, 4))
         kp = {'value': value}
 
diff --git a/src/facerender/modules/util.py b/src/facerender/modules/util.py
index b916deef..cfb7bc14 100644
--- a/src/facerender/modules/util.py
+++ b/src/facerender/modules/util.py
@@ -26,7 +26,7 @@ def kp2gaussian(kp, spatial_size, kp_variance):
     shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 3)
     mean = mean.view(*shape)
 
-    mean_sub = (coordinate_grid - mean)
+    mean_sub = (coordinate_grid - mean.to(coordinate_grid.device))
 
     out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
 
@@ -53,6 +53,9 @@ def make_coordinate_grid_2d(spatial_size, type):
 
 def make_coordinate_grid(spatial_size, type):
     d, h, w = spatial_size
+
+    if torch.backends.mps.is_available():
+        type = torch.float
     x = torch.arange(w).type(type)
     y = torch.arange(h).type(type)
     z = torch.arange(d).type(type)