From 69a744a7abe3b21b1c594ba2068a79faa8ca3493 Mon Sep 17 00:00:00 2001 From: Shohruh Date: Tue, 5 Mar 2024 11:12:05 +0900 Subject: [PATCH] Initial commit --- main.py | 219 +++++++++++++++++++++++++++++++++++++++ models/nets.py | 158 ++++++++++++++++++++++++++++ outputs/weights/step.csv | 101 ++++++++++++++++++ utils/datasets.py | 111 ++++++++++++++++++++ utils/util.py | 190 +++++++++++++++++++++++++++++++++ 5 files changed, 779 insertions(+) create mode 100644 main.py create mode 100644 models/nets.py create mode 100644 outputs/weights/step.csv create mode 100644 utils/datasets.py create mode 100644 utils/util.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..a15f1a2 --- /dev/null +++ b/main.py @@ -0,0 +1,219 @@ +import argparse +import csv +import time + +import cv2 +import numpy as np +import tqdm +from PIL import Image +from face_detection import RetinaFace +from torch.utils.data import DataLoader + +from utils.datasets import Datasets +from utils.util import * + + +def train(args): + model = load_model(args, True).cuda() + dataset = Datasets(f'{args.data_dir}', '300W_LP', get_transforms(True), True) + loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) + + criterion = GeodesicLoss().cuda() + optimizer = torch.optim.Adam(model.parameters(), args.lr) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20], gamma=0.5) + + best_loss = float('inf') + with open('outputs/weights/step.csv', 'w') as log: + logger = csv.DictWriter(log, fieldnames=['epoch', 'Loss', 'Pitch', 'Yaw', 'Roll']) + logger.writeheader() + for epoch in range(args.epochs): + print(('\n' + '%10s' * 3) % ('epoch', 'memory', 'loss')) + p_bar = tqdm.tqdm(loader, total=len(loader)) + model.train() + total_loss = 0 + for i, (samples, labels) in enumerate(p_bar): + samples = samples.cuda() + labels = labels.cuda() + optimizer.zero_grad() + outputs = model(samples) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + total_loss += loss.item() + memory = f'{torch.cuda.memory_reserved() / 1E9:.3g}G' + s = ('%10s' * 2 + '%10.3g') % (f'{epoch + 1}/{args.epochs}', memory, loss.item()) + p_bar.set_description(s) + + avg_loss = total_loss / len(loader) + val_loss, val_pitch, val_yaw, val_roll = test(args, model) + scheduler.step() + + logger.writerow({'Pitch': str(f'{val_pitch:.3f}'), + 'Yaw': str(f'{val_yaw:.3f}'), + 'Roll': str(f'{val_roll:.3f}'), + 'Loss': str(f'{avg_loss:.3f}'), + 'epoch': str(epoch + 1).zfill(3)}) + log.flush() + if val_loss < best_loss: + best_loss = val_loss + torch.save(model.state_dict(), f'{args.save_dir}/weights/best.pt') + print(f'Epoch {epoch + 1}: New best model saved with val_loss: {best_loss:.3f}') + + torch.save(model.state_dict(), f'{args.save_dir}/weights/last.pt') + scheduler.step() + + torch.cuda.empty_cache() + print('Training completed.') + + +@torch.no_grad() +def test(args, model=None): + dataset = Datasets(f'{args.data_dir}', 'AFLW2K', get_transforms(False), False) + loader = DataLoader(dataset, batch_size=64) + if model is None: + model = load_model(args, False).cuda() + # model = model.float() + model.half() + model.eval() + + total, y_error, p_error, r_error = 0, 0.0, 0.0, 0.0 + for sample, label in tqdm.tqdm(loader, ('%10s' * 3) % ('Pitch', 'Yaw', 'Roll')): + sample = sample.cuda() + sample = sample.half() + total += label.size(0) + + p_gt = label[:, 0].float() * 180 / np.pi + y_gt = label[:, 1].float() * 180 / np.pi + r_gt = label[:, 2].float() * 180 / np.pi + + output = model(sample) + euler = compute_euler(output) * 180 / np.pi + + p_pred = euler[:, 0].cpu() + y_pred = euler[:, 1].cpu() + r_pred = euler[:, 2].cpu() + + p_error += torch.sum(torch.min(torch.stack((torch.abs(p_gt - p_pred), + torch.abs(p_pred + 360 - p_gt), + torch.abs(p_pred - 360 - p_gt), + torch.abs(p_pred + 180 - p_gt), + torch.abs(p_pred - 180 - p_gt))), 0)[0]) + + y_error += torch.sum(torch.min(torch.stack((torch.abs(y_gt - y_pred), + torch.abs(y_pred + 360 - y_gt), + torch.abs(y_pred - 360 - y_gt), + torch.abs(y_pred + 180 - y_gt), + torch.abs(y_pred - 180 - y_gt))), 0)[0]) + + r_error += torch.sum(torch.min(torch.stack((torch.abs(r_gt - r_pred), + torch.abs(r_pred + 360 - r_gt), + torch.abs(r_pred - 360 - r_gt), + torch.abs(r_pred + 180 - r_gt), + torch.abs(r_pred - 180 - r_gt))), 0)[0]) + + p_error, y_error, r_error = p_error / total, y_error / total, r_error / total + avg_error = (p_error + y_error + r_error) / (3 * total) + print(('%10.3g' * 3) % (p_error, y_error, r_error)) + + model.float() # for training + return avg_error, p_error, y_error, r_error + + +@torch.no_grad() +def inference(args): + model = load_model(args, False).cuda() + model.eval() + detector = RetinaFace(0) + + cap = cv2.VideoCapture(0) + frame_width = int(cap.get(3)) + frame_height = int(cap.get(4)) + out = cv2.VideoWriter(f'{args.save_dir}/output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 25, + (frame_width, frame_height)) + # Check if the webcam is opened correctly + if not cap.isOpened(): + raise IOError("Cannot open webcam") + + with torch.no_grad(): + while True: + ret, frame = cap.read() + + faces = detector(frame) + + for box, landmarks, score in faces: + + # Print the location of each face in this image + if score < .95: + continue + x_min = int(box[0]) + y_min = int(box[1]) + x_max = int(box[2]) + y_max = int(box[3]) + bbox_width = abs(x_max - x_min) + bbox_height = abs(y_max - y_min) + + x_min = max(0, x_min - int(0.2 * bbox_height)) + y_min = max(0, y_min - int(0.2 * bbox_width)) + x_max = x_max + int(0.2 * bbox_height) + y_max = y_max + int(0.2 * bbox_width) + + img = frame[y_min:y_max, x_min:x_max] + img = Image.fromarray(img) + img = img.convert('RGB') + img = get_transforms(False)(img) + + img = torch.Tensor(img[None, :]).cuda() + + c = cv2.waitKey(1) + if c == 27: + break + + start = time.time() + R_pred = model(img) + end = time.time() + print('Head pose estimation: %2f ms' % ((end - start) * 1000.)) + + euler = compute_euler( + R_pred) * 180 / np.pi + p_pred_deg = euler[:, 0].cpu() + y_pred_deg = euler[:, 1].cpu() + r_pred_deg = euler[:, 2].cpu() + + # utils.draw_axis(frame, y_pred_deg, p_pred_deg, r_pred_deg, left+int(.5*(right-left)), top, size=100) + plot_pose_cube(frame, y_pred_deg, p_pred_deg, r_pred_deg, x_min + int(.5 * ( + x_max - x_min)), y_min + int(.5 * (y_max - y_min)), size=bbox_width) + + cv2.imshow("Demo", frame) + out.write(frame) + cv2.waitKey(5) + cap.release() + out.release() + + # Closes all the frames + cv2.destroyAllWindows() + + +def main(): + parser = argparse.ArgumentParser(description='Head Pose Estimation') + parser.add_argument('--model_name', type=str, default='RepVGG-A2') + parser.add_argument('--data_dir', type=str, default='../../Datasets/HPE') + parser.add_argument('--save-dir', type=str, default='./outputs') + parser.add_argument('--epochs', type=int, default=100) + parser.add_argument('--lr', type=float, default=0.0001) + parser.add_argument('--batch-size', type=int, default=64) + parser.add_argument('--train', action='store_true') + parser.add_argument('--test', action='store_true') + parser.add_argument('--inference', default=True, action='store_true') + + args = parser.parse_args() + if args.train: + train(args) + if args.test: + test(args) + if args.inference: + inference(args) + + +if __name__ == "__main__": + main() diff --git a/models/nets.py b/models/nets.py new file mode 100644 index 0000000..601653d --- /dev/null +++ b/models/nets.py @@ -0,0 +1,158 @@ +import torch +import torch.nn as nn +from utils import util + + +def conv_bn(inp, oup, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(inp, oup, kernel_size, stride, padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(oup)) + return result + + +class RepVGGBlock(nn.Module): + def __init__(self, inp, oup, k, s=1, p=0, d=1, gr=1, padding_mode='zeros', deploy=False): + super(RepVGGBlock, self).__init__() + self.inp = inp + self.groups = gr + self.deploy = deploy + self.nonlinearity = nn.ReLU() + self.se = nn.Identity() + + assert k == 3 + assert p == 1 + + padding = p - k // 2 + + if deploy: + self.rbr_reparam = nn.Conv2d(inp, oup, k, s, p, d, gr, bias=True, padding_mode=padding_mode) + else: + self.rbr_identity = nn.BatchNorm2d(inp) if oup == inp and s == 1 else None + self.rbr_dense = conv_bn(inp, oup, k, s, p, groups=gr) + self.rbr_1x1 = conv_bn(inp, oup, 1, s, padding, groups=gr) + + def forward(self, x): + if hasattr(self, 'rbr_reparam'): + return self.nonlinearity(self.se(self.rbr_reparam(x))) + + if self.rbr_identity is None: + out = 0 + else: + out = self.rbr_identity(x) + + return self.nonlinearity(self.se(self.rbr_dense(x) + self.rbr_1x1(x) + out)) + + +class RepVGG(nn.Module): + def __init__(self, layers, width=None, num_cls=1000, gr_map=None, deploy=False): + super(RepVGG, self).__init__() + self.deploy = deploy + self.cur_layer_idx = 1 + self.gr_map = gr_map or dict() + + assert len(width) == 4 + assert 0 not in self.gr_map + + self.inp = min(64, int(64 * width[0])) + + self.stage0 = RepVGGBlock(3, self.inp, 3, 2, 1, deploy=self.deploy) + self.stage1 = self._make_stage(int(64 * width[0]), layers[0], stride=2) + self.stage2 = self._make_stage(int(128 * width[1]), layers[1], stride=2) + self.stage3 = self._make_stage(int(256 * width[2]), layers[2], stride=2) + self.stage4 = self._make_stage(int(512 * width[3]), layers[3], stride=2) + self.gap = nn.AdaptiveAvgPool2d(output_size=1) + self.linear = nn.Linear(int(512 * width[3]), num_cls) + + def _make_stage(self, oup, layer, stride): + strides = [stride] + [1] * (layer - 1) + layers = [] + for stride in strides: + cur_groups = self.gr_map.get(self.cur_layer_idx, 1) + layers.append(RepVGGBlock(self.inp, oup, 3, stride, p=1, gr=cur_groups, deploy=self.deploy, )) + self.inp = oup + self.cur_layer_idx += 1 + return nn.Sequential(*layers) + + def forward(self, x): + out = self.stage0(x) + out = self.stage1(out) + out = self.stage2(out) + out = self.stage3(out) + out = self.stage4(out) + out = self.gap(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def create_model(backbone_name, num_cls=1000): + optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26] + g2_map = {l: 2 for l in optional_groupwise_layers} + g4_map = {l: 4 for l in optional_groupwise_layers} + default_group_map = None + net_configs = { + 'RepVGG-A0': ([2, 4, 14, 1], [0.75, 0.75, 0.75, 2.5], default_group_map), + 'RepVGG-A1': ([2, 4, 14, 1], [1, 1, 1, 2.5], default_group_map), + 'RepVGG-A2': ([2, 4, 14, 1], [1.5, 1.5, 1.5, 2.75], default_group_map), + 'RepVGG-B0': ([4, 6, 16, 1], [1, 1, 1, 2.5], default_group_map), + 'RepVGG-B1': ([4, 6, 16, 1], [2, 2, 2, 4], default_group_map), + 'RepVGG-B1g2': ([4, 6, 16, 1], [2, 2, 2, 4], g2_map), + 'RepVGG-B1g4': ([4, 6, 16, 1], [2, 2, 2, 4], g4_map), + 'RepVGG-B2': ([4, 6, 16, 1], [2.5, 2.5, 2.5, 5], default_group_map), + 'RepVGG-B2g2': ([4, 6, 16, 1], [2.5, 2.5, 2.5, 5], g2_map), + 'RepVGG-B2g4': ([4, 6, 16, 1], [2.5, 2.5, 2.5, 5], g4_map), + 'RepVGG-B3': ([4, 6, 16, 1], [3, 3, 3, 5], default_group_map), + 'RepVGG-B3g2': ([4, 6, 16, 1], [3, 3, 3, 5], g2_map), + 'RepVGG-B3g4': ([4, 6, 16, 1], [3, 3, 3, 5], g4_map), + } + + def model_constructor(deploy): + configs = net_configs.get(backbone_name) + if configs is None: + raise ValueError(f"Network {backbone_name} is not supported.") + layers, width, gr_map = configs[:3] + return RepVGG(layers, width, num_cls, gr_map, deploy=deploy) + + return model_constructor + + +class HPE(nn.Module): + def __init__(self, model_name, weight, deploy, pretrained=True): + super(HPE, self).__init__() + repvgg = create_model(model_name) + backbone = repvgg(deploy) + if pretrained: + checkpoint = torch.load(weight) + if 'state_dict' in checkpoint: + checkpoint = checkpoint['state_dict'] + ckpt = {k.replace('module.', ''): v for k, + v in checkpoint.items()} # strip the names + backbone.load_state_dict(ckpt) + + self.layer0 = backbone.stage0 + self.layer1 = backbone.stage1 + self.layer2 = backbone.stage2 + self.layer3 = backbone.stage3 + self.layer4 = backbone.stage4 + self.gap = nn.AdaptiveAvgPool2d(output_size=1) + + last_channel = 0 + for n, m in self.layer4.named_modules(): + if ('rbr_dense' in n or 'rbr_reparam' in n) and isinstance(m, nn.Conv2d): + last_channel = m.out_channels + + fea_dim = last_channel + + self.linear_reg = nn.Linear(fea_dim, 6) + + def forward(self, x): + x = self.layer0(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.gap(x) + x = torch.flatten(x, 1) + x = self.linear_reg(x) + + return util.compute_rotation(x) diff --git a/outputs/weights/step.csv b/outputs/weights/step.csv new file mode 100644 index 0000000..b951718 --- /dev/null +++ b/outputs/weights/step.csv @@ -0,0 +1,101 @@ +epoch,Loss,Pitch,Yaw,Roll +001,0.085,6.628,4.732,4.674 +002,0.050,5.570,5.128,4.183 +003,0.042,5.877,4.997,4.244 +004,0.037,5.616,4.860,3.812 +005,0.034,5.476,4.771,4.200 +006,0.027,5.533,4.533,4.070 +007,0.024,5.290,4.494,3.708 +008,0.023,5.322,4.355,3.793 +009,0.022,5.228,4.404,3.722 +010,0.021,5.098,4.121,3.480 +011,0.018,5.210,4.278,3.661 +012,0.018,5.186,4.184,3.592 +013,0.017,5.276,4.076,3.689 +014,0.017,5.085,4.072,3.553 +015,0.016,5.130,4.017,3.524 +016,0.016,5.434,4.132,3.880 +017,0.016,5.309,4.076,3.722 +018,0.016,5.055,4.021,3.434 +019,0.015,5.132,3.937,3.636 +020,0.015,5.182,3.980,3.675 +021,0.015,5.202,3.976,3.605 +022,0.015,5.021,3.922,3.524 +023,0.015,5.176,4.018,3.565 +024,0.014,5.133,3.986,3.590 +025,0.014,5.237,3.940,3.723 +026,0.014,5.212,3.895,3.682 +027,0.014,5.387,3.916,3.841 +028,0.014,5.118,3.864,3.610 +029,0.014,5.197,3.937,3.592 +030,0.014,5.205,3.959,3.683 +031,0.014,5.143,3.860,3.634 +032,0.013,5.055,3.821,3.536 +033,0.013,5.117,3.830,3.580 +034,0.013,5.277,3.886,3.652 +035,0.013,5.099,3.850,3.637 +036,0.013,5.030,3.985,3.523 +037,0.013,5.209,3.901,3.692 +038,0.013,5.103,3.998,3.571 +039,0.013,5.182,3.949,3.746 +040,0.013,5.028,3.906,3.533 +041,0.013,5.184,3.972,3.767 +042,0.012,5.144,3.861,3.618 +043,0.012,5.164,3.878,3.705 +044,0.012,5.042,3.854,3.533 +045,0.012,5.109,3.853,3.587 +046,0.012,5.184,3.917,3.717 +047,0.012,5.135,3.833,3.691 +048,0.012,5.185,3.892,3.697 +049,0.012,5.163,3.960,3.681 +050,0.012,5.107,3.794,3.569 +051,0.012,5.082,3.852,3.578 +052,0.012,5.048,3.820,3.557 +053,0.012,4.965,3.761,3.511 +054,0.012,4.977,3.853,3.533 +055,0.012,4.992,3.805,3.502 +056,0.011,4.989,3.854,3.527 +057,0.011,4.986,3.845,3.572 +058,0.011,5.043,3.796,3.561 +059,0.011,4.987,3.748,3.466 +060,0.011,4.915,3.724,3.444 +061,0.011,4.974,3.869,3.507 +062,0.011,5.030,3.767,3.556 +063,0.011,5.021,3.832,3.573 +064,0.011,5.038,3.883,3.549 +065,0.011,5.126,3.878,3.691 +066,0.011,5.014,3.778,3.557 +067,0.011,5.088,3.871,3.641 +068,0.011,5.033,3.835,3.491 +069,0.011,4.966,3.803,3.530 +070,0.011,5.098,3.755,3.625 +071,0.011,5.087,3.823,3.562 +072,0.011,5.043,3.739,3.557 +073,0.011,5.138,3.912,3.673 +074,0.010,5.146,3.827,3.642 +075,0.010,5.037,3.795,3.567 +076,0.010,5.047,3.767,3.585 +077,0.010,4.999,3.807,3.559 +078,0.010,5.034,3.801,3.585 +079,0.010,5.008,3.737,3.507 +080,0.010,5.048,3.795,3.558 +081,0.010,4.974,3.880,3.528 +082,0.010,5.083,3.826,3.601 +083,0.010,5.197,3.936,3.738 +084,0.010,4.969,3.852,3.505 +085,0.010,4.976,3.783,3.489 +086,0.010,4.953,3.787,3.524 +087,0.010,5.139,3.810,3.682 +088,0.010,5.017,3.808,3.525 +089,0.010,4.979,3.896,3.539 +090,0.010,5.059,3.828,3.631 +091,0.010,5.166,3.854,3.761 +092,0.010,4.990,3.772,3.567 +093,0.010,5.001,3.798,3.601 +094,0.010,4.894,3.771,3.507 +095,0.010,5.076,3.859,3.608 +096,0.010,5.099,3.784,3.677 +097,0.010,4.964,3.844,3.531 +098,0.010,5.101,3.849,3.647 +099,0.010,5.015,3.921,3.635 +100,0.010,5.142,3.810,3.655 diff --git a/utils/datasets.py b/utils/datasets.py new file mode 100644 index 0000000..9690724 --- /dev/null +++ b/utils/datasets.py @@ -0,0 +1,111 @@ +import os +from pathlib import Path + +import numpy as np +import scipy.io as sio +import torch +from PIL import Image, ImageFilter +from torch.utils.data import Dataset + + +class Datasets(Dataset): + def __init__(self, data_dir, data_name, transform=None, train_mode=True): + self.data_dir = data_dir + self.transform = transform + self.train_mode = train_mode + file_path = Path(f'{self.data_dir}/{data_name}/files.txt') + if not file_path.exists(): + self.load_label(f'{self.data_dir}/{data_name}/') + self.samples = open(file_path).read().splitlines() + + def __getitem__(self, idx): + image = Image.open(f'{self.samples[idx]}.jpg') + image = image.convert('RGB') + label = sio.loadmat(f'{self.samples[idx]}.mat') + pt2d = label['pt2d'] + x_min = min(pt2d[0, :]) + y_min = min(pt2d[1, :]) + x_max = max(pt2d[0, :]) + y_max = max(pt2d[1, :]) + + if self.train_mode: + k = np.random.random_sample() * 0.2 + 0.2 + x_min -= 0.6 * k * abs(x_max - x_min) + y_min -= 2 * k * abs(y_max - y_min) + x_max += 0.6 * k * abs(x_max - x_min) + y_max += 0.6 * k * abs(y_max - y_min) + else: + k = 0.20 + x_min -= 2 * k * abs(x_max - x_min) + y_min -= 2 * k * abs(y_max - y_min) + x_max += 2 * k * abs(x_max - x_min) + y_max += 0.6 * k * abs(y_max - y_min) + + image = image.crop((int(x_min), int(y_min), int(x_max), int(y_max))) + pre_pose_params = label['Pose_Para'][0] + pose = pre_pose_params[:3] + pitch, yaw, roll = pose[0], pose[1], pose[2] + + rnd = np.random.random_sample() + if self.train_mode and rnd < 0.5: + yaw = -yaw + roll = -roll + image = image.transpose(Image.FLIP_LEFT_RIGHT) + rnd = np.random.random_sample() + + if self.train_mode and rnd < 0.05: + image = image.filter(ImageFilter.BLUR) + + if self.transform is not None: + image = self.transform(image) + + if self.train_mode: + return image, torch.FloatTensor(self.get_rotation(pitch, yaw, roll)) + else: + return image, torch.FloatTensor([pitch, yaw, roll]) + + def __len__(self): + return len(self.samples) + + @staticmethod + def get_rotation(x, y, z): + rotate_x = np.array([[1, 0, 0], + [0, np.cos(x), -np.sin(x)], + [0, np.sin(x), np.cos(x)]]) + # y + rotate_y = np.array([[np.cos(y), 0, np.sin(y)], + [0, 1, 0], + [-np.sin(y), 0, np.cos(y)]]) + # z + rotate_z = np.array([[np.cos(z), -np.sin(z), 0], + [np.sin(z), np.cos(z), 0], + [0, 0, 1]]) + + rotation = rotate_z.dot(rotate_y.dot(rotate_x)) + return rotation + + @staticmethod + def load_label(data_dir): + f_counter, rej_counter = 0, 0 + file = open(f'{data_dir}files.txt', 'w') + + for root, dirs, files in os.walk(data_dir): + for f in files: + if f[-4:].lower().endswith('.jpg'): + mat_path = os.path.join(root, f.replace('.jpg', '.mat')) + mat = sio.loadmat(mat_path) + pre_pose_ = mat['Pose_Para'][0] + pose = pre_pose_[:3] + pitch = pose[0] * 180 / np.pi + yaw = pose[1] * 180 / np.pi + roll = pose[2] * 180 / np.pi + if all(abs(angle) <= 99 for angle in (pitch, yaw, roll)): + if f_counter > 0: + file.write('\n') + file.write(os.path.join(root, f[:-4])) + f_counter += 1 + else: + rej_counter += 1 + +# data_dir = '../../../Datasets/HPE' +# Datasets(data_dir, transform=None)[0] diff --git a/utils/util.py b/utils/util.py new file mode 100644 index 0000000..a63cdb8 --- /dev/null +++ b/utils/util.py @@ -0,0 +1,190 @@ +from pathlib import Path + +import torch +import torchvision.transforms as T + + +class GeodesicLoss(torch.nn.Module): + def __init__(self, eps=1e-7): + super().__init__() + self.eps = eps + + def forward(self, m1, m2): + m = torch.bmm(m1, m2.transpose(1, 2)) # batch*3*3 + + cos = (m[:, 0, 0] + m[:, 1, 1] + m[:, 2, 2] - 1) / 2 + theta = torch.acos(torch.clamp(cos, -1 + self.eps, 1 - self.eps)) + + return torch.mean(theta) + + +def compute_euler(rotation_matrices): + batch = rotation_matrices.shape[0] + R = rotation_matrices + sy = torch.sqrt(R[:, 0, 0] * R[:, 0, 0] + R[:, 1, 0] * R[:, 1, 0]) + singular = sy < 1e-6 + singular = singular.float() + + x = torch.atan2(R[:, 2, 1], R[:, 2, 2]) + y = torch.atan2(-R[:, 2, 0], sy) + z = torch.atan2(R[:, 1, 0], R[:, 0, 0]) + + xs = torch.atan2(-R[:, 1, 2], R[:, 1, 1]) + ys = torch.atan2(-R[:, 2, 0], sy) + zs = R[:, 1, 0] * 0 + + gpu = rotation_matrices.get_device() + if gpu < 0: + out_euler = torch.autograd.Variable(torch.zeros(batch, 3)).to(torch.device('cpu')) + else: + out_euler = torch.autograd.Variable(torch.zeros(batch, 3)).to(torch.device('cuda:%d' % gpu)) + out_euler[:, 0] = x * (1 - singular) + xs * singular + out_euler[:, 1] = y * (1 - singular) + ys * singular + out_euler[:, 2] = z * (1 - singular) + zs * singular + + return out_euler + + +def normalize_vector(v): + batch = v.shape[0] + v_mag = torch.sqrt(v.pow(2).sum(1)) # batch + gpu = v_mag.get_device() + if gpu < 0: + eps = torch.autograd.Variable(torch.FloatTensor([1e-8])).to(torch.device('cpu')) + else: + eps = torch.autograd.Variable(torch.FloatTensor([1e-8])).to(torch.device('cuda:%d' % gpu)) + v_mag = torch.max(v_mag, eps) + v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1]) + v = v / v_mag + return v + + +# u, v batch*n +def cross_product(u, v): + batch = u.shape[0] + # print (u.shape) + # print (v.shape) + i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1] + j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2] + k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0] + + out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1) # batch*3 + + return out + + +def compute_rotation(poses): + x_raw = poses[:, 0:3] # batch*3 + y_raw = poses[:, 3:6] # batch*3 + + x = normalize_vector(x_raw) # batch*3 + z = cross_product(x, y_raw) # batch*3 + z = normalize_vector(z) # batch*3 + y = cross_product(z, x) # batch*3 + + x = x.view(-1, 3, 1) + y = y.view(-1, 3, 1) + z = z.view(-1, 3, 1) + matrix = torch.cat((x, y, z), 2) # batch*3*3 + return matrix + + +def download_weights(model_name, save_dir): + import os + import gdown + net_name = { + 'RepVGG-A0': '13Gn8rq1PztoMEgK7rCOPMUYHjGzk-w11', + 'RepVGG-A1': '19lX6lNKSwiO5STCvvu2xRTKcPsSfWAO1', + 'RepVGG-A2': '1PvtYTOX4gd-1VHX8LoT7s6KIyfTKOf8G', + 'RepVGG-B0': '18g7YziprUky7cX6L6vMJ_874PP8tbtKx', + 'RepVGG-B1': '1VlCfXXiaJjNjzQBy3q7C3H2JcxoL0fms', + 'RepVGG-B1g2': '1PL-m9n3g0CEPrSpf3KwWEOf9_ZG-Ux1Z', + 'RepVGG-B1g4': '1WXxhyRDTgUjgkofRV1bLnwzTsFWRwZ0k', + 'RepVGG-B2': '1cFgWJkmf9U1L1UmJsA8UT__kyd3xuY_y', + 'RepVGG-B2g4': '1LZ61o5XH6u1n3_tXIgKII7XqKoqqracI', + 'RepVGG-B3': '1wBpq5317iPKk3-qblBHnx35bY_WumAlU', + 'RepVGG-B3g4': '1s7PxIP-oYB1a94_qzHyzfXAbbI24GYQ8', + } + weight_id = net_name.get(model_name) + url = f'https://drive.google.com/uc?id={weight_id}' + output = os.path.join(save_dir, 'weights', model_name + '.pth') + gdown.download(url, output, quiet=False) + + +def load_model(args, for_training=True): + from models.nets import HPE + + file_path = Path(f'{args.save_dir}/weights/{args.model_name}.pth') + if not file_path.exists(): + print('Downloading ImageNet weights...') + download_weights(args.model_name, args.save_dir) + + model = HPE(args.model_name, file_path, False, for_training).cuda() + if not for_training: + saved_dict = torch.load(f'{args.save_dir}/weights/best.pt') + if 'model_state_dict' in saved_dict: + model.load_state_dict(saved_dict['model_state_dict']) + else: + model.load_state_dict(saved_dict) + return model + + +def get_transforms(for_training=True): + if for_training: + return T.Compose([ + T.RandomResizedCrop(size=224, scale=(0.8, 1)), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + else: + return T.Compose([ + T.Resize(256), + T.CenterCrop(224), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + +def plot_pose_cube(img, yaw, pitch, roll, tdx=None, tdy=None, size=150.): + import cv2 + import numpy as np + from math import sin, cos + p = pitch * np.pi / 180 + y = -(yaw * np.pi / 180) + r = roll * np.pi / 180 + if tdx != None and tdy != None: + face_x = tdx - 0.50 * size + face_y = tdy - 0.50 * size + + else: + height, width = img.shape[:2] + face_x = width / 2 - 0.5 * size + face_y = height / 2 - 0.5 * size + + x1 = size * (cos(y) * cos(r)) + face_x + y1 = size * (cos(p) * sin(r) + cos(r) * sin(p) * sin(y)) + face_y + x2 = size * (-cos(y) * sin(r)) + face_x + y2 = size * (cos(p) * cos(r) - sin(p) * sin(y) * sin(r)) + face_y + x3 = size * (sin(y)) + face_x + y3 = size * (-cos(y) * sin(p)) + face_y + + # Draw base in red + cv2.line(img, (int(face_x), int(face_y)), (int(x1), int(y1)), (0, 0, 255), 3) + cv2.line(img, (int(face_x), int(face_y)), (int(x2), int(y2)), (0, 0, 255), 3) + cv2.line(img, (int(x2), int(y2)), (int(x2 + x1 - face_x), int(y2 + y1 - face_y)), (0, 0, 255), 3) + cv2.line(img, (int(x1), int(y1)), (int(x1 + x2 - face_x), int(y1 + y2 - face_y)), (0, 0, 255), 3) + # Draw pillars in blue + cv2.line(img, (int(face_x), int(face_y)), (int(x3), int(y3)), (255, 0, 0), 2) + cv2.line(img, (int(x1), int(y1)), (int(x1 + x3 - face_x), int(y1 + y3 - face_y)), (255, 0, 0), 2) + cv2.line(img, (int(x2), int(y2)), (int(x2 + x3 - face_x), int(y2 + y3 - face_y)), (255, 0, 0), 2) + cv2.line(img, (int(x2 + x1 - face_x), int(y2 + y1 - face_y)), + (int(x3 + x1 + x2 - 2 * face_x), int(y3 + y2 + y1 - 2 * face_y)), (255, 0, 0), 2) + # Draw top in green + cv2.line(img, (int(x3 + x1 - face_x), int(y3 + y1 - face_y)), + (int(x3 + x1 + x2 - 2 * face_x), int(y3 + y2 + y1 - 2 * face_y)), (0, 255, 0), 2) + cv2.line(img, (int(x2 + x3 - face_x), int(y2 + y3 - face_y)), + (int(x3 + x1 + x2 - 2 * face_x), int(y3 + y2 + y1 - 2 * face_y)), (0, 255, 0), 2) + cv2.line(img, (int(x3), int(y3)), (int(x3 + x1 - face_x), int(y3 + y1 - face_y)), (0, 255, 0), 2) + cv2.line(img, (int(x3), int(y3)), (int(x3 + x2 - face_x), int(y3 + y2 - face_y)), (0, 255, 0), 2) + + return img