Skip to content

Commit

Permalink
data
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhengyiLuo committed Aug 20, 2024
1 parent 8a129cf commit 1212f1a
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 2 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ Official implementation of CVPR 2024 highlight paper: "Real-Time Simulated Avata
<img src="assets/simxr_teaser.gif" />
</div>

## News 🚩

[August 20, 2024] Data released!

[August 5, 2024] Evaluation code released!

[May 11, 2024] Skeleton code Released!


### Dependencies
1. Create new conda environment and install pytroch:

Expand Down Expand Up @@ -39,17 +48,19 @@ pip install -r requirement.txt
## Data
### Aria

Processed Aria sequences can be found here for training and evaluation: [[Train]](https://drive.google.com/drive/folders/1hvwYZnPOowWnwuMgu28i_-KS9lSl9YUX?usp=drive_link) [[Test]](https://drive.google.com/drive/folders/1BWecE6BSDfrzUGXxlU_UOnk1gJ0vXVMJ?usp=drive_link)
Processed Aria sequences can be found here for training and evaluation: [[Train]](https://drive.google.com/drive/folders/1ZsT4sgz3NUmpoMqcR35KJ-hFfuknrVQi?usp=drive_link) [[Test]](https://drive.google.com/drive/folders/10L8tARGzShPwzG1aJM3fPzIxuarEBAKW?usp=drive_link)

### Quest 2
Processed real-world sequences can be found here for evaluations: [[Google Drive]](https://drive.google.com/drive/folders/1z6cviNR624UERdi8YrAMCyHbjMitsZO9?usp=sharing)

Processed synthetic sequences can be found here for training: [[Google Drive]](https://drive.google.com/drive/folders/19AnRCFpO8ML82XGvNQOGtPYzbE1vvHQi?usp=sharing)
Processed synthetic sequences can be found here for training: [[Train]]() [[Test]]()

### Splitting Data
After downloading the data, you can split the data into training and testing data using the following command:

```
python scripts/data_process/split_data_syn.py
python scripts/data_process/split_data_aria.py
```

Expand Down
75 changes: 75 additions & 0 deletions scripts/data_process/split_data_aria.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import glob
import os
import sys
import pdb
import os.path as osp
sys.path.append(os.getcwd())

from tqdm import tqdm
import numpy as np
import joblib
import copy
import cv2
from multiprocessing import Pool
import shutil





def split_all_files(all_files, data_dir):
for file in tqdm(all_files):
try:
data_entry = joblib.load(file)
except:
print("bad file", file)
continue
take_key = list(data_entry.keys())[0]
seq_len = data_entry[take_key]['trans_orig'].shape[0]

if seq_len > 1000:
indxes = np.arange(seq_len)
seg_length = 450
splits = np.array_split(indxes, len(indxes) // seg_length + 1)

for split in splits:
seq_start, seq_end = split[0], split[-1]
data_dump = {k: v[seq_start:seq_end+1] if not k in ['fps', 'scale', 'smpl_data', 'track_idx'] else v for k, v in data_entry[take_key].items()}
dump_key = f"{take_key}_{seq_start}_{seq_end}"

joblib.dump({dump_key: data_dump}, osp.join(data_dir, f"{data_split}_seg/{dump_key}.pkl"), compress = True)

del data_dump['segmentation_mono']
del data_dump['heatmaps']
joblib.dump({dump_key: data_dump}, osp.join(data_dir, f"{data_split}_seg_motion/{dump_key}.pkl"), compress = True)

else:
joblib.dump(data_entry, osp.join(data_dir, f"{data_split}_seg/{take_key}.pkl"))

del data_entry[take_key]['segmentation_mono']
del data_dump['heatmaps']

joblib.dump(data_entry, osp.join(data_dir, f"{data_split}_seg_motion/{take_key}.pkl"))


data_dir = "/hdd2/zen/data/SimXR/syn"
###################### Splitting data into Segments ######################
for data_split in ["train", "test"]:
all_files = glob.glob(osp.join(data_dir, f"{data_split}/*"))
os.makedirs(osp.join(data_dir, f"{data_split}_seg/"), exist_ok=True)
os.makedirs(osp.join(data_dir, f"{data_split}_seg_motion/"), exist_ok=True)


jobs = all_files
num_jobs = 10
chunk = np.ceil(len(jobs)/num_jobs).astype(int)
jobs= [jobs[i:i + chunk] for i in range(0, len(jobs), chunk)]
job_args = [(jobs[i], data_dir) for i in range(len(jobs))]
print(len(job_args))

try:
pool = Pool(num_jobs) # multi-processing
pool.starmap(split_all_files, job_args)
except KeyboardInterrupt:
pool.terminate()
pool.join()

0 comments on commit 1212f1a

Please sign in to comment.