data

ZhengyiLuo · Aug 20, 2024 · 1212f1a · 1212f1a
1 parent 8a129cf
commit 1212f1a
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -11,6 +11,15 @@ Official implementation of CVPR 2024 highlight paper: "Real-Time Simulated Avata
   <img src="assets/simxr_teaser.gif" />
 </div>
 
+## News 🚩
+
+[August 20, 2024] Data released!
+
+[August 5, 2024] Evaluation code released!
+
+[May 11, 2024] Skeleton code Released!
+
+
 ### Dependencies
 1. Create new conda environment and install pytroch:
 
@@ -39,17 +48,19 @@ pip install -r requirement.txt
 ## Data 
 ### Aria 
 
-Processed Aria sequences can be found here for training and evaluation: [[Train]](https://drive.google.com/drive/folders/1hvwYZnPOowWnwuMgu28i_-KS9lSl9YUX?usp=drive_link) [[Test]](https://drive.google.com/drive/folders/1BWecE6BSDfrzUGXxlU_UOnk1gJ0vXVMJ?usp=drive_link)
+Processed Aria sequences can be found here for training and evaluation: [[Train]](https://drive.google.com/drive/folders/1ZsT4sgz3NUmpoMqcR35KJ-hFfuknrVQi?usp=drive_link) [[Test]](https://drive.google.com/drive/folders/10L8tARGzShPwzG1aJM3fPzIxuarEBAKW?usp=drive_link)
 
 ### Quest 2
 Processed real-world sequences can be found here for evaluations: [[Google Drive]](https://drive.google.com/drive/folders/1z6cviNR624UERdi8YrAMCyHbjMitsZO9?usp=sharing)
 
-Processed synthetic sequences can be found here for training: [[Google Drive]](https://drive.google.com/drive/folders/19AnRCFpO8ML82XGvNQOGtPYzbE1vvHQi?usp=sharing)
+Processed synthetic sequences can be found here for training: [[Train]]() [[Test]]()
 
 ### Splitting Data 
 After downloading the data, you can split the data into training and testing data using the following command: 
 
 ``` 
+python scripts/data_process/split_data_syn.py 
+python scripts/data_process/split_data_aria.py 
 
 ```
 

diff --git a/scripts/data_process/split_data_aria.py b/scripts/data_process/split_data_aria.py
@@ -0,0 +1,75 @@
+import glob
+import os
+import sys
+import pdb
+import os.path as osp
+sys.path.append(os.getcwd())
+
+from tqdm import tqdm
+import numpy as np
+import joblib
+import copy
+import cv2
+from multiprocessing import Pool
+import shutil
+
+
+
+
+
+def split_all_files(all_files, data_dir):
+    for file in tqdm(all_files):
+        try:
+            data_entry = joblib.load(file)
+        except:
+            print("bad file", file)
+            continue
+        take_key = list(data_entry.keys())[0]
+        seq_len = data_entry[take_key]['trans_orig'].shape[0]
+
+        if seq_len > 1000:
+            indxes = np.arange(seq_len)
+            seg_length = 450
+            splits = np.array_split(indxes, len(indxes) // seg_length + 1)
+
+            for split in splits:
+                seq_start, seq_end = split[0], split[-1]
+                data_dump = {k: v[seq_start:seq_end+1] if not k in ['fps', 'scale', 'smpl_data', 'track_idx'] else v for k, v in data_entry[take_key].items()}
+                dump_key = f"{take_key}_{seq_start}_{seq_end}"
+
+                joblib.dump({dump_key: data_dump}, osp.join(data_dir, f"{data_split}_seg/{dump_key}.pkl"), compress = True)
+
+                del data_dump['segmentation_mono']
+                del data_dump['heatmaps']
+                joblib.dump({dump_key: data_dump}, osp.join(data_dir, f"{data_split}_seg_motion/{dump_key}.pkl"), compress = True)
+
+        else:
+            joblib.dump(data_entry, osp.join(data_dir, f"{data_split}_seg/{take_key}.pkl"))
+
+            del data_entry[take_key]['segmentation_mono']
+            del data_dump['heatmaps']
+
+            joblib.dump(data_entry, osp.join(data_dir, f"{data_split}_seg_motion/{take_key}.pkl"))
+
+
+data_dir = "/hdd2/zen/data/SimXR/syn"
+###################### Splitting data into Segments ######################
+for data_split in ["train", "test"]:
+    all_files = glob.glob(osp.join(data_dir, f"{data_split}/*"))
+    os.makedirs(osp.join(data_dir, f"{data_split}_seg/"), exist_ok=True)
+    os.makedirs(osp.join(data_dir, f"{data_split}_seg_motion/"), exist_ok=True)
+
+
+    jobs = all_files
+    num_jobs = 10
+    chunk = np.ceil(len(jobs)/num_jobs).astype(int)
+    jobs= [jobs[i:i + chunk] for i in range(0, len(jobs), chunk)]
+    job_args = [(jobs[i], data_dir) for i in range(len(jobs))]
+    print(len(job_args))
+
+    try:
+        pool = Pool(num_jobs)   # multi-processing
+        pool.starmap(split_all_files, job_args)
+    except KeyboardInterrupt:
+        pool.terminate()
+        pool.join()