Merge pull request #3 from PPierzc/main

Additional Evaluation Logic
sinzlab · Jul 20, 2022 · 1ea9683 · 1ea9683
2 parents 6186e10 + 489e2c8
commit 1ea9683
Show file tree

Hide file tree

Showing 27 changed files with 1,395 additions and 59 deletions.
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -78,7 +78,7 @@ services:
     volumes:
       - .:/src/propose
       - ./tests:/tests
-    entrypoint: [ "pytest" ]
+    entrypoint: [ "pytest", "/tests/"]
 
   notebook_server:
     <<: *common

diff --git a/experiments/human36m/mpii-prod-xlarge_lr_decr.yaml b/experiments/human36m/mpii-prod-xlarge_lr_decr.yaml
@@ -0,0 +1,46 @@
+seed: 0
+checkpoint_every: 10
+use_pretrained: mpii-prod-xlarge:latest
+
+tags:
+  - mpii
+  - human36m
+group: prod
+
+dataset:
+  dirname: "/data/human36m/processed"
+  mpii: true
+
+train:
+  optimizer:
+    lr: 1.0e-5
+    weight_decay: 0
+  lr_scheduler:
+    patience: 10
+    cooldown: 5
+    mode: "min"
+    factor: 0.1
+    threshold: 1.0e-2
+    min_lr: 1.0e-6
+  batch_size: 200
+  epochs: 200
+
+model:
+  num_layers: 14
+  context_features: 68
+  hidden_features: 262
+  relations:
+      - x
+      - c
+      - r
+      - x->x
+      - x<-x
+      - c->x
+      - r->x
+
+embedding:
+    name: "sage"
+    config:
+      input_dim: 2
+      hidden_dim: 177
+      output_dim: 68
diff --git a/propose/datasets/human36m/Human36mDataset.py b/propose/datasets/human36m/Human36mDataset.py
@@ -18,14 +18,15 @@
 
 def tensor_to_graph(inputs, context, root, edges, context_edges, root_edges):
     """
-    Convert a tensor to a graph.
-    :param inputs: tensor of shape (batch_size, num_nodes, num_features)
-    :param context: tensor of shape (batch_size, num_nodes, num_context_features)
-    :param root: tensor of shape (batch_size, num_nodes)
-    :param edges: tensor of shape (batch_size, num_edges, 2)
-    :param context_edges: tensor of shape (batch_size, num_context_edges, 2)
-    :param root_edges: tensor of shape (batch_size, num_root_edges, 2)
-    :return: HeteroData
+    It takes in the inputs, context, root, and edges, and returns a HeteroData object
+
+    :param inputs: the input tensor
+    :param context: the context nodes
+    :param root: the root node
+    :param edges: the edges between the nodes in the graph
+    :param context_edges: the edges from the context to the inputs
+    :param root_edges: the edges from the root node to the other nodes
+    :return: A hetero data object.
     """
     data = HeteroData()
 
@@ -44,6 +45,13 @@ def tensor_to_graph(inputs, context, root, edges, context_edges, root_edges):
 
 
 def tensor_to_human36m_graph(inputs, context, context_edges):
+    """
+    It takes the input tensors, and converts them to a graph
+
+    :param inputs: the input tensor, which is a tensor of shape (num_frames, num_joints, 3)
+    :param context: the context of the graph, which is the same as the input to the model
+    :param context_edges: the edges that are used to compute the context
+    """
     pose = Human36mPose(np.zeros((1, 17, 3)))
     edges = torch.LongTensor(pose.edges).T
 
@@ -270,9 +278,18 @@ def __init__(
             self.base_data.append(base_data)
 
     def __len__(self):
+        """
+        The function returns the length of the data attribute of the object
+        :return: The length of the data.
+        """
         return len(self.data)
 
     def __getitem__(self, item):
+        """
+        The function returns the data, base data, and a dictionary of the action, camera, subject, occlusion, and center3d
+
+        :param item: the index of the item we want to get
+        """
         if self.return_matrix:
             return (
                 self.data[item]["x"]["x"],
@@ -299,6 +316,16 @@ def __getitem__(self, item):
 
     @classmethod
     def remove_root_edges(cls, edges, context_edges, num_context_samples):
+        """
+        We remove the root edges from the full edges, and then we subtract 1 from the full edges and context edges to
+        make them zero-indexed
+
+        :param cls: the class of the object
+        :param edges: the edges of the full graph
+        :param context_edges: the edges that are in the context graph
+        :param num_context_samples: The number of samples in the context
+        :return: The edges are being returned with the root edges removed.
+        """
         full_edges = edges[:, torch.where(edges[0] != 0)[0]]
         context_edges = context_edges[:, torch.where(context_edges[1] != 0)[0]]
         root_edges = edges[:, torch.where(edges[0] == 0)[0]]
@@ -311,6 +338,14 @@ def remove_root_edges(cls, edges, context_edges, num_context_samples):
         return full_edges, root_edges, context_edges
 
     def _sample_context(self, gaussfit, num_context_samples):
+        """
+        Given a gaussian fit, sample from the gaussian distribution and return the samples
+
+        :param gaussfit: the output of the neural network, which is a 16x6 tensor. The first column is the probability of
+        the gaussian, the next two are the mean, and the last three are the covariance matrix
+        :param num_context_samples: number of samples to draw from the context distribution
+        :return: The samples are being returned.
+        """
         mean = torch.stack([gaussfit[:, 1], gaussfit[:, 2]], dim=1)
         cov = torch.stack([gaussfit[:, 3], gaussfit[:, 5]], dim=1).unsqueeze(
             2
@@ -321,6 +356,14 @@ def _sample_context(self, gaussfit, num_context_samples):
         return samples.view(samples.shape[0] * samples.shape[1], samples.shape[2])
 
     def _add_variance(self, pose2d, gaussfit):
+        """
+        It takes in a pose2d and a gaussfit, and if use_variance is true, it returns a concatenation of pose2d and the
+        square of the third and sixth columns of gaussfit. Otherwise, it just returns pose2d
+
+        :param pose2d: the 2D pose
+        :param gaussfit: the output of the gaussian fitting function
+        :return: The pose2d is being returned.
+        """
         if self.use_variance:
             res = torch.cat(
                 [
@@ -504,6 +547,10 @@ def __init__(
             self.base_data.append(base_data)
 
     def __len__(self):
+        """
+        The function returns the length of the data attribute of the object
+        :return: The length of the data.
+        """
         return len(self.data)
 
     def __getitem__(self, item):
@@ -527,6 +574,14 @@ def __getitem__(self, item):
         )  # returns: full data, base data
 
     def remove_root_edges(self, edges, context_edges):
+        """
+        It takes in the edges and context edges, and returns the full edges, root edges, and context edges
+
+        :param edges: the edges of the graph, in the form of a 2xN tensor, where N is the number of edges. The first
+        row is the source node, the second row is the destination node
+        :param context_edges: the edges that are in the context of the current node
+        :return: The full_edges, root_edges, and context_edges are being returned.
+        """
         full_edges = edges[:, torch.where(edges[0] != 0)[0]]
         context_edges = context_edges[:, 1:]
         root_edges = edges[:, torch.where(edges[0] == 0)[0]]

diff --git a/propose/evaluation/__init__.py b/propose/evaluation/__init__.py
diff --git a/propose/evaluation/mpjpe.py b/propose/evaluation/mpjpe.py
@@ -0,0 +1,117 @@
+import torch
+import numpy as np
+
+
+def mpjpe(pred, gt, dim=None, mean=True):
+    """
+    `mpjpe` is the mean per joint position error, which is the mean of the Euclidean distance between the predicted 3D
+    joint positions and the ground truth 3D joint positions
+
+    Used in Protocol-I for Human3.6M dataset evaluation.
+
+    :param pred: the predicted 3D pose
+    :param gt: ground truth
+    :param dim: the dimension to average over. If None, the average is taken over all dimensions
+    :param mean: If True, returns the mean of the MPJPE across all frames. If False, returns the MPJPE for each frame,
+    defaults to True (optional)
+    :return: The mean of the pjpe
+    """
+    pjpe = ((pred - gt) ** 2).sum(-1) ** 0.5
+
+    if not mean:
+        return pjpe
+
+    # if pjpe is torch.Tensor use dim if numpy.array use axis
+    if isinstance(pjpe, torch.Tensor):
+        if dim is None:
+            return pjpe.mean()
+        return pjpe.mean(dim=dim)
+
+    if dim is None:
+        return np.mean(pjpe)
+
+    return np.mean(pjpe, axis=dim)
+
+
+def pa_mpjpe(
+    p_gt: torch.TensorType, p_pred: torch.TensorType, dim: int = None, mean: bool = True
+):
+    """
+    PA-MPJPE is the Procrustes mean per joint position error, which is the mean of the Euclidean distance between the
+    predicted 3D joint positions and the ground truth 3D joint positions, after projecting the ground truth onto the
+    predicted 3D skeleton.
+
+    Used in Protocol-II for Human3.6M dataset evaluation.
+
+    Code adapted from:
+    https://github.com/twehrbein/Probabilistic-Monocular-3D-Human-Pose-Estimation-with-Normalizing-Flows/
+
+    :param p_gt: the ground truth 3D pose
+    :type p_gt: torch.TensorType
+    :param p_pred: predicted 3D pose
+    :type p_pred: torch.TensorType
+    :param dim: the dimension to average over. If None, the average is taken over all dimensions
+    :type dim: int
+    :param mean: If True, returns the mean of the MPJPE across all frames. If False, returns the MPJPE for each frame,
+    defaults to True (optional)
+    :return: The transformed coordinates.
+    """
+    if not isinstance(p_pred, torch.Tensor):
+        p_pred = torch.Tensor(p_pred)
+
+    if not isinstance(p_gt, torch.Tensor):
+        p_gt = torch.Tensor(p_gt)
+
+    og_gt = p_gt.clone()
+
+    p_gt = p_gt.repeat(1, p_pred.shape[1], 1)
+
+    p_gt = p_gt.permute(1, 2, 0).contiguous()
+    p_pred = p_pred.permute(1, 2, 0).contiguous()
+
+    # Moving the tensors to the CPU as the following code is more efficient on the CPU
+    p_pred = p_pred.cpu()
+    p_gt = p_gt.cpu()
+
+    mu_gt = p_gt.mean(dim=2)
+    mu_pred = p_pred.mean(dim=2)
+
+    p_gt = p_gt - mu_gt[:, :, None]
+    p_pred = p_pred - mu_pred[:, :, None]
+
+    ss_gt = (p_gt**2.0).sum(dim=(1, 2))
+    ss_pred = (p_pred**2.0).sum(dim=(1, 2))
+
+    # centred Frobenius norm
+    norm_gt = torch.sqrt(ss_gt)
+    norm_pred = torch.sqrt(ss_pred)
+
+    # scale to equal (unit) norm
+    p_gt /= norm_gt[:, None, None]
+    p_pred /= norm_pred[:, None, None]
+
+    # optimum rotation matrix of Y
+    A = torch.bmm(p_gt, p_pred.transpose(1, 2))
+
+    U, s, V = torch.svd(A, some=True)
+
+    # Computing the rotation matrix.
+    T = torch.bmm(V, U.transpose(1, 2))
+
+    detT = torch.det(T)
+    sign = torch.sign(detT)
+    V[:, :, -1] *= sign[:, None]
+    s[:, -1] *= sign
+    T = torch.bmm(V, U.transpose(1, 2))
+
+    # Computing the trace of the matrix A.
+    traceTA = s.sum(dim=1)
+
+    # transformed coords
+    scale = norm_gt * traceTA
+
+    p_pred_projected = (
+        scale[:, None, None] * torch.bmm(p_pred.transpose(1, 2), T) + mu_gt[:, None, :]
+    )
+
+    return mpjpe(og_gt, p_pred_projected.permute(1, 0, 2), dim=0)
diff --git a/propose/evaluation/pck.py b/propose/evaluation/pck.py
@@ -0,0 +1,39 @@
+import torch
+
+human36m_joints_to_use = [1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16]
+
+
+def pck(
+    poses_gt: torch.Tensor,
+    poses_pred: torch.Tensor,
+    threshold: float = 150,
+    return_distances: bool = False,
+) -> torch.BoolTensor:
+    """
+    It computes the percentage of frames in which the predicted pose is within a threshold distance of the ground truth
+    pose
+
+    :param poses_gt: the ground truth poses with only the joints of interest (frames x joints x 3)
+    :type poses_gt: torch.Tensor
+    :param poses_pred: the predicted poses with only the joints of interest (frames x joints x 3)
+    :type poses_pred: torch.Tensor
+    :param threshold: The threshold for the distance between the predicted and ground truth pose, defaults to 180
+    :type threshold: float (optional)
+    :param return_distances: If True, returns the distances between the predicted and ground truth pose, defaults to False
+    :type return_distances: bool (optional)
+    """
+    if not isinstance(poses_pred, torch.Tensor):
+        poses_pred = torch.Tensor(poses_pred)
+
+    if not isinstance(poses_gt, torch.Tensor):
+        poses_gt = torch.Tensor(poses_gt)
+
+    distances = torch.sqrt(torch.sum((poses_gt - poses_pred) ** 2, dim=-1))
+
+    if return_distances:
+        return distances
+
+    n_correct_joints = torch.count_nonzero(distances < threshold, dim=1)
+    correct_poses = n_correct_joints / poses_gt.shape[1]
+
+    return correct_poses
diff --git a/propose/models/flows/CondGraphFlow.py b/propose/models/flows/CondGraphFlow.py
@@ -97,6 +97,17 @@ def from_pretrained(cls, artifact_name):
         flow = cls.build_model(artifact.metadata)
 
         artifact_dir = artifact.download()
-        flow.load_state_dict(torch.load(artifact_dir + "/model.pt"))
+
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        flow.load_state_dict(
+            torch.load(artifact_dir + "/model.pt", map_location=torch.device(device))
+        )
 
         return flow
+
+    def set_device(self):
+        if torch.cuda.is_available():
+            self.to("cuda:0")
+            return True
+
+        return False
diff --git a/propose/poses/metadata/__init__.py b/propose/poses/metadata/__init__.py
diff --git a/propose/training/supervised.py b/propose/training/supervised.py
@@ -5,7 +5,7 @@
 
 from torch_geometric.loader.dataloader import Collater
 
-from propose.utils.mpjpe import mpjpe
+from propose.evaluation.mpjpe import mpjpe
 
 
 def supervised_trainer(