init

kahnchana · Dec 27, 2021 · 1eaf35c · 1eaf35c
1 parent b71e65c
commit 1eaf35c
Show file tree

Hide file tree

Showing 48 changed files with 10,458 additions and 0 deletions.
diff --git a/datasets/DATASET.md b/datasets/DATASET.md
@@ -0,0 +1,26 @@
+# Dataset Preparation
+
+## Kinetics
+
+The Kinetics Dataset could be downloaded from the following [link](https://github.com/cvdfoundation/kinetics-dataset):
+
+After all the videos were downloaded, resize the video to the short edge size of 256, then prepare the csv files for training, validation, and testing set as `train.csv`, `val.csv`, `test.csv`. The format of the csv file is:
+
+```
+path_to_video_1 label_1
+path_to_video_2 label_2
+path_to_video_3 label_3
+...
+path_to_video_N label_N
+```
+
+## Something-Something V2
+1. Please download the dataset and annotations from [dataset provider](https://20bn.com/datasets/something-something).
+
+2. Download the *frame list* from the following links: ([train](https://dl.fbaipublicfiles.com/pyslowfast/dataset/ssv2/frame_lists/train.csv), [val](https://dl.fbaipublicfiles.com/pyslowfast/dataset/ssv2/frame_lists/val.csv)).
+
+3. Extract the frames at 30 FPS. (We used ffmpeg-4.1.3 with command
+`ffmpeg -i "${video}" -r 30 -q:v 1 "${out_name}"`
+ in experiments.) Please put the frames in a structure consistent with the frame lists.
+
+Please put all annotation json files and the frame lists in the same folder, and set `DATA.PATH_TO_DATA_DIR` to the path. Set `DATA.PATH_PREFIX` to be the path to the folder containing extracted frames.
diff --git a/datasets/__init__.py b/datasets/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+from .kinetics import Kinetics # noqa
+from .ucf101 import UCF101
+from .hmdb51 import HMDB51
+# from .ssv2 import Ssv2 # noqa
diff --git a/datasets/build.py b/datasets/build.py
@@ -0,0 +1,30 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+from fvcore.common.registry import Registry
+
+DATASET_REGISTRY = Registry("DATASET")
+DATASET_REGISTRY.__doc__ = """
+Registry for dataset.
+
+The registered object will be called with `obj(cfg, split)`.
+The call should return a `torch.utils.data.Dataset` object.
+"""
+
+
+def build_dataset(dataset_name, cfg, split):
+ """
+ Build a dataset, defined by `dataset_name`.
+ Args:
+ dataset_name (str): the name of the dataset to be constructed.
+ cfg (CfgNode): configs. Details can be found in
+ slowfast/config/defaults.py
+ split (str): the split of the data loader. Options include `train`,
+ `val`, and `test`.
+ Returns:
+ Dataset: a constructed dataset specified by dataset_name.
+ """
+ # Capitalize the the first letter of the dataset_name since the dataset_name
+ # in configs may be in lowercase but the name of dataset class should always
+ # start with an uppercase letter.
+ name = dataset_name.capitalize()
+ return DATASET_REGISTRY.get(name)(cfg, split)