From b595b988131d59fce756af50f9e177b376075fde Mon Sep 17 00:00:00 2001 From: Dheemant Dixit <102575300+Dheemant-Dixit@users.noreply.github.com> Date: Thu, 24 Aug 2023 06:13:22 +0530 Subject: [PATCH] Added GOT10K (#147) * GOT10K dataset added * Added got10k * Added got10k and some changes * Changes to got10k and requirements --------- Co-authored-by: EC2 Default User --- experiments/datasets/load_got10k.ipynb | 231 ++++++++++++++++++++++ requirements.txt | 3 +- trailmet/datasets/__init__.py | 1 + trailmet/datasets/tracking/__init__.py | 53 +++++ trailmet/datasets/tracking/got10kdata.py | 239 +++++++++++++++++++++++ 5 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 experiments/datasets/load_got10k.ipynb create mode 100644 trailmet/datasets/tracking/__init__.py create mode 100644 trailmet/datasets/tracking/got10kdata.py diff --git a/experiments/datasets/load_got10k.ipynb b/experiments/datasets/load_got10k.ipynb new file mode 100644 index 0000000..bc364d1 --- /dev/null +++ b/experiments/datasets/load_got10k.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"./../../\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import matplotlib.pyplot as plt\n", + "from torchvision import transforms\n", + "from trailmet.datasets.tracking import TrackingDatasetFactory" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "root_dir = \"./../../../got10kdata/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "got_dataset = TrackingDatasetFactory.create_dataset(\n", + " name=\"got10k\", root=root_dir, split_types=[\"train\", \"val\", \"test\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "got_dataset[\"train\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'train_size': 93350, 'val_size': 1800, 'test_size': 180, 'note': ''}\n" + ] + } + ], + "source": [ + "print(got_dataset[\"info\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train samples: 93350\n", + "Val samples: 1800\n", + "Test samples: 180\n" + ] + } + ], + "source": [ + "# getting the size of the different splits of the data\n", + "print(\"Train samples: \", got_dataset[\"info\"][\"train_size\"])\n", + "print(\"Val samples: \", got_dataset[\"info\"][\"val_size\"])\n", + "print(\"Test samples: \", got_dataset[\"info\"][\"test_size\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No. of training batches: 1459\n", + "No. of validation batches: 29\n", + "No. of test batches: 3\n" + ] + } + ], + "source": [ + "# Construct dataloaders\n", + "train_loader = torch.utils.data.DataLoader(\n", + " got_dataset[\"train\"],\n", + " batch_size=64,\n", + " sampler=got_dataset[\"train_sampler\"],\n", + " num_workers=0,\n", + ")\n", + "val_loader = torch.utils.data.DataLoader(\n", + " got_dataset[\"val\"],\n", + " batch_size=64,\n", + " sampler=got_dataset[\"val_sampler\"],\n", + " num_workers=0,\n", + ")\n", + "test_loader = torch.utils.data.DataLoader(\n", + " got_dataset[\"test\"],\n", + " batch_size=64,\n", + " sampler=got_dataset[\"test_sampler\"],\n", + " num_workers=0,\n", + ")\n", + "\n", + "print(\"No. of training batches: \", len(train_loader))\n", + "print(\"No. of validation batches: \", len(val_loader))\n", + "print(\"No. of test batches: \", len(test_loader))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[395., 340., 532., 407.]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "got_dataset[\"test\"][0][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature batch shape: torch.Size([64, 3, 127, 127])\n", + "Labels batch shape: torch.Size([64, 3, 239, 239])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Display image and label.\n", + "train_features, train_labels = next(iter(train_loader))\n", + "print(f\"Feature batch shape: {train_features.size()}\")\n", + "print(f\"Labels batch shape: {train_labels.size()}\")\n", + "img = train_features[7, 0, :, :].squeeze()\n", + "label = train_labels[0]\n", + "plt.imshow(img, cmap=\"gray\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "trailmet", + "language": "python", + "name": "trailmet" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index f8fcd2c..ccc1625 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ codecarbon==2.2.3 -pandas==2.0.2 +pandas==2.0.3 tqdm==4.65.0 timm==0.9.2 matplotlib==3.7.1 @@ -11,3 +11,4 @@ scikit-learn==1.2.2 pytest==7.3.1 torch_pruning==1.1.9 wandb==0.15.4 +got10k==0.1.3 diff --git a/trailmet/datasets/__init__.py b/trailmet/datasets/__init__.py index 4a89614..450b656 100644 --- a/trailmet/datasets/__init__.py +++ b/trailmet/datasets/__init__.py @@ -20,3 +20,4 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from .classification import DatasetFactory +from .tracking import TrackingDatasetFactory diff --git a/trailmet/datasets/tracking/__init__.py b/trailmet/datasets/tracking/__init__.py new file mode 100644 index 0000000..0064ff1 --- /dev/null +++ b/trailmet/datasets/tracking/__init__.py @@ -0,0 +1,53 @@ +# MIT License +# +# Copyright (c) 2023 Transmute AI Lab +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from .got10kdata import GOT10kDataset + + +class TrackingDatasetFactory(object): + """This class forms the generic wrapper for the different dataset classes. + + The module includes utilities to load datasets, including methods to load + and fetch popular reference datasets. + """ + + @staticmethod + def create_dataset(**kwargs): + """ + Args: + name(string): dataset name 'CIFAR10', 'CIFAR100', 'ImageNet', 'CHEST', + root(string): Root directory of dataset where directory + cifar-10-batches-py exists or will be saved + to if download is set to True. + Return: + dataset(tuple): dataset + """ + assert 'name' in kwargs, 'should provide dataset name' + name = kwargs['name'] + assert 'root' in kwargs, 'should provide dataset root' + if 'got10k' == name: + obj_dfactory = GOT10kDataset(**kwargs) + else: + raise Exception(f"unknown dataset{kwargs['name']}") + dataset = obj_dfactory.stack_dataset() + dataset = obj_dfactory.build_dict_info() + + return dataset diff --git a/trailmet/datasets/tracking/got10kdata.py b/trailmet/datasets/tracking/got10kdata.py new file mode 100644 index 0000000..43f9f54 --- /dev/null +++ b/trailmet/datasets/tracking/got10kdata.py @@ -0,0 +1,239 @@ +# MIT License +# +# Copyright (c) 2023 Transmute AI Lab +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# https://github.com/got-10k/siamfc +from __future__ import absolute_import, division, print_function + +import os +import sys +import numpy as np +import torch +from collections import namedtuple +from torch.utils.data import Dataset +from torch.utils.data.sampler import SubsetRandomSampler +from torchvision.transforms import Compose, CenterCrop, RandomCrop, ToTensor +from PIL import Image, ImageStat, ImageOps + +from got10k.datasets import GOT10k + + +class RandomStretch(object): + + def __init__(self, max_stretch=0.05, interpolation='bilinear'): + assert interpolation in ['bilinear', 'bicubic'] + self.max_stretch = max_stretch + self.interpolation = interpolation + + def __call__(self, img): + scale = 1.0 + np.random.uniform(-self.max_stretch, self.max_stretch) + size = np.round(np.array(img.size, float) * scale).astype(int) + if self.interpolation == 'bilinear': + method = Image.BILINEAR + elif self.interpolation == 'bicubic': + method = Image.BICUBIC + return img.resize(tuple(size), method) + + +class Pairwise(Dataset): + + def __init__(self, seq_dataset, **kargs): + super(Pairwise, self).__init__() + self.cfg = self.parse_args(**kargs) + + self.seq_dataset = seq_dataset + self.indices = np.random.permutation(len(seq_dataset)) + # augmentation for exemplar and instance images + self.transform_z = Compose([ + RandomStretch(max_stretch=0.05), + CenterCrop(self.cfg.instance_sz - 8), + RandomCrop(self.cfg.instance_sz - 2 * 8), + CenterCrop(self.cfg.exemplar_sz), + ToTensor(), + ]) + self.transform_x = Compose([ + RandomStretch(max_stretch=0.05), + CenterCrop(self.cfg.instance_sz - 8), + RandomCrop(self.cfg.instance_sz - 2 * 8), + ToTensor(), + ]) + + def parse_args(self, **kargs): + # default parameters + cfg = { + 'pairs_per_seq': 10, + 'max_dist': 100, + 'exemplar_sz': 127, + 'instance_sz': 255, + 'context': 0.5, + } + + for key, val in kargs.items(): + if key in cfg: + cfg.update({key: val}) + return namedtuple('GenericDict', cfg.keys())(**cfg) + + def __getitem__(self, index): + index = self.indices[index % len(self.seq_dataset)] + img_files, anno = self.seq_dataset[index] + + # remove too small objects + valid = anno[:, 2:].prod(axis=1) >= 10 + img_files = np.array(img_files)[valid] + anno = anno[valid, :] + + rand_z, rand_x = self._sample_pair(len(img_files)) + + exemplar_image = Image.open(img_files[rand_z]) + instance_image = Image.open(img_files[rand_x]) + exemplar_image = self._crop_and_resize(exemplar_image, anno[rand_z]) + instance_image = self._crop_and_resize(instance_image, anno[rand_x]) + exemplar_image = 255.0 * self.transform_z(exemplar_image) + instance_image = 255.0 * self.transform_x(instance_image) + + return exemplar_image, instance_image + + def __len__(self): + return self.cfg.pairs_per_seq * len(self.seq_dataset) + + def _sample_pair(self, n): + assert n > 0 + if n == 1: + return 0, 0 + elif n == 2: + return 0, 1 + else: + max_dist = min(n - 1, self.cfg.max_dist) + rand_dist = np.random.choice(max_dist) + 1 + rand_z = np.random.choice(n - rand_dist) + rand_x = rand_z + rand_dist + + return rand_z, rand_x + + def _crop_and_resize(self, image, box): + # convert box to 0-indexed and center based + box = np.array( + [ + box[0] - 1 + (box[2] - 1) / 2, + box[1] - 1 + (box[3] - 1) / 2, + box[2], + box[3], + ], + dtype=np.float32, + ) + center, target_sz = box[:2], box[2:] + + # exemplar and search sizes + context = self.cfg.context * np.sum(target_sz) + z_sz = np.sqrt(np.prod(target_sz + context)) + x_sz = z_sz * self.cfg.instance_sz / self.cfg.exemplar_sz + + # convert box to corners (0-indexed) + size = round(x_sz) + corners = np.concatenate(( + np.round(center - (size - 1) / 2), + np.round(center - (size - 1) / 2) + size, + )) + corners = np.round(corners).astype(int) + + # pad image if necessary + pads = np.concatenate((-corners[:2], corners[2:] - image.size)) + npad = max(0, int(pads.max())) + if npad > 0: + avg_color = ImageStat.Stat(image).mean + # PIL doesn't support float RGB image + avg_color = tuple(int(round(c)) for c in avg_color) + image = ImageOps.expand(image, border=npad, fill=avg_color) + + # crop image patch + corners = tuple((corners + npad).astype(int)) + patch = image.crop(corners) + + # resize to instance_sz + out_size = (self.cfg.instance_sz, self.cfg.instance_sz) + patch = patch.resize(out_size, Image.BILINEAR) + + return patch + + +class GOT10kDataset: + + def __init__( + self, + name=None, + root=None, + split_types=None, + shuffle=True, + random_seed=None, + ): + self.name = name + self.shuffle = shuffle + self.dataset_dict = {} + + for item in split_types: + dataset_type = item + data = GOT10k(root, subset=dataset_type) + if item != 'test': + data = Pairwise(data) + self.dataset_dict[dataset_type] = data + + def build_dict_info(self): + """ + Behavior: + This function creates info key in the output dictionary. The info key contains details related to the size + of the training, validation and test datasets. Further, it can be used to define any additional information + necessary for the user. + Returns: + dataset_dict (dict): Updated with info key that contains details related to the data splits + """ + self.dataset_dict['info'] = {} + self.dataset_dict['info']['train_size'] = len( + self.dataset_dict['train']) + self.dataset_dict['info']['val_size'] = len(self.dataset_dict['val']) + self.dataset_dict['info']['test_size'] = len(self.dataset_dict['test']) + self.dataset_dict['info']['note'] = '' + return self.dataset_dict + + def stack_dataset(self): + """ + Behavior: + This function stacks the three dataset objects (train, val and test) in a single dictionary together with + their samplers. For cases where the no validation set is explicitly available, the split is performed here. + Returns: + dataset_dict (dict): The keys of the dictionary are "train_datset", "val_dataset" + and "test_dataset" and the values are object of dataset containing train, + val and test respectively. + """ + + # defining the samplers + self.dataset_dict['train_sampler'] = None + self.dataset_dict['val_sampler'] = None + self.dataset_dict['test_sampler'] = None + + if self.name == 'got10k': + self.train_idx, self.valid_idx = range( + len(self.dataset_dict['train'])), range( + len(self.dataset_dict['val'])) + train_sampler = SubsetRandomSampler(self.train_idx) + valid_sampler = SubsetRandomSampler(self.valid_idx) + self.dataset_dict['train_sampler'] = train_sampler + self.dataset_dict['val_sampler'] = valid_sampler + + return self.dataset_dict