From 9c176918de6a04a8f2b22c15b50074b0bbf86caa Mon Sep 17 00:00:00 2001 From: Bingxin Date: Wed, 15 May 2024 10:44:08 +0200 Subject: [PATCH] [ADD] hypersim preprocessing scripts --- script/dataset_preprocess/hypersim/README.md | 22 +++ .../hypersim/hypersim_util.py | 69 ++++++++ .../hypersim/preprocess_hypersim.py | 149 ++++++++++++++++++ 3 files changed, 240 insertions(+) create mode 100644 script/dataset_preprocess/hypersim/README.md create mode 100644 script/dataset_preprocess/hypersim/hypersim_util.py create mode 100644 script/dataset_preprocess/hypersim/preprocess_hypersim.py diff --git a/script/dataset_preprocess/hypersim/README.md b/script/dataset_preprocess/hypersim/README.md new file mode 100644 index 0000000..f7e89a0 --- /dev/null +++ b/script/dataset_preprocess/hypersim/README.md @@ -0,0 +1,22 @@ +# Hypersim preprocessing + +## Download + +Download [Hypersim](https://github.com/apple/ml-hypersim) dataset using [this script](https://github.com/apple/ml-hypersim/blob/20f398f4387aeca73175494d6a2568f37f372150/code/python/tools/dataset_download_images.py). + +Download the scene split file from [here](https://github.com/apple/ml-hypersim/blob/main/evermotion_dataset/analysis/metadata_images_split_scene_v1.csv). + +## Process dataset + +Run the preprocessing script: + +```bash +python script/dataset_preprocess/hypersim/preprocess_hypersim.py --split_csv /path/to/metadata_images_split_scene_v1.csv +``` + +(optional) Tar the processed data, for example: + +```bash +cd data/Hypersim/processed/train +tar -cf ../../hypersim_processed_train.tar . +``` diff --git a/script/dataset_preprocess/hypersim/hypersim_util.py b/script/dataset_preprocess/hypersim/hypersim_util.py new file mode 100644 index 0000000..2bedaed --- /dev/null +++ b/script/dataset_preprocess/hypersim/hypersim_util.py @@ -0,0 +1,69 @@ +# Author: Bingxin Ke +# Last modified: 2024-02-19 + + +from pylab import count_nonzero, clip, np + + +# Adapted from https://github.com/apple/ml-hypersim/blob/main/code/python/tools/scene_generate_images_tonemap.py +def tone_map(rgb, entity_id_map): + assert (entity_id_map != 0).all() + + gamma = 1.0 / 2.2 # standard gamma correction exponent + inv_gamma = 1.0 / gamma + percentile = ( + 90 # we want this percentile brightness value in the unmodified image... + ) + brightness_nth_percentile_desired = 0.8 # ...to be this bright after scaling + + valid_mask = entity_id_map != -1 + + if count_nonzero(valid_mask) == 0: + scale = 1.0 # if there are no valid pixels, then set scale to 1.0 + else: + brightness = ( + 0.3 * rgb[:, :, 0] + 0.59 * rgb[:, :, 1] + 0.11 * rgb[:, :, 2] + ) # "CCIR601 YIQ" method for computing brightness + brightness_valid = brightness[valid_mask] + + eps = 0.0001 # if the kth percentile brightness value in the unmodified image is less than this, set the scale to 0.0 to avoid divide-by-zero + brightness_nth_percentile_current = np.percentile(brightness_valid, percentile) + + if brightness_nth_percentile_current < eps: + scale = 0.0 + else: + # Snavely uses the following expression in the code at https://github.com/snavely/pbrs_tonemapper/blob/master/tonemap_rgbe.py: + # scale = np.exp(np.log(brightness_nth_percentile_desired)*inv_gamma - np.log(brightness_nth_percentile_current)) + # + # Our expression below is equivalent, but is more intuitive, because it follows more directly from the expression: + # (scale*brightness_nth_percentile_current)^gamma = brightness_nth_percentile_desired + + scale = ( + np.power(brightness_nth_percentile_desired, inv_gamma) + / brightness_nth_percentile_current + ) + + rgb_color_tm = np.power(np.maximum(scale * rgb, 0), gamma) + rgb_color_tm = clip(rgb_color_tm, 0, 1) + return rgb_color_tm + + +# According to https://github.com/apple/ml-hypersim/issues/9 +def dist_2_depth(width, height, flt_focal, distance): + img_plane_x = ( + np.linspace((-0.5 * width) + 0.5, (0.5 * width) - 0.5, width) + .reshape(1, width) + .repeat(height, 0) + .astype(np.float32)[:, :, None] + ) + img_plane_y = ( + np.linspace((-0.5 * height) + 0.5, (0.5 * height) - 0.5, height) + .reshape(height, 1) + .repeat(width, 1) + .astype(np.float32)[:, :, None] + ) + img_plane_z = np.full([height, width, 1], flt_focal, np.float32) + img_plane = np.concatenate([img_plane_x, img_plane_y, img_plane_z], 2) + + depth = distance / np.linalg.norm(img_plane, 2, 2) * flt_focal + return depth diff --git a/script/dataset_preprocess/hypersim/preprocess_hypersim.py b/script/dataset_preprocess/hypersim/preprocess_hypersim.py new file mode 100644 index 0000000..c818f01 --- /dev/null +++ b/script/dataset_preprocess/hypersim/preprocess_hypersim.py @@ -0,0 +1,149 @@ +# Author: Bingxin Ke +# Last modified: 2024-02-19 + +import argparse +import os + +import cv2 +import h5py +import numpy as np +import pandas as pd +from hypersim_util import dist_2_depth, tone_map +from tqdm import tqdm + +IMG_WIDTH = 1024 +IMG_HEIGHT = 768 +FOCAL_LENGTH = 886.81 + +if "__main__" == __name__: + parser = argparse.ArgumentParser() + parser.add_argument( + "--split_csv", + type=str, + default="data/Hypersim/metadata_images_split_scene_v1.csv", + ) + parser.add_argument("--dataset_dir", type=str, default="data/Hypersim/raw_data") + parser.add_argument("--output_dir", type=str, default="data/Hypersim/processed") + + args = parser.parse_args() + + split_csv = args.split_csv + dataset_dir = args.dataset_dir + output_dir = args.output_dir + + # %% + raw_meta_df = pd.read_csv(split_csv) + meta_df = raw_meta_df[raw_meta_df.included_in_public_release].copy() + + # %% + for split in ["train", "val", "test"]: + split_output_dir = os.path.join(output_dir, split) + os.makedirs(split_output_dir) + + split_meta_df = meta_df[meta_df.split_partition_name == split].copy() + split_meta_df["rgb_path"] = None + split_meta_df["rgb_mean"] = np.nan + split_meta_df["rgb_std"] = np.nan + split_meta_df["rgb_min"] = np.nan + split_meta_df["rgb_max"] = np.nan + split_meta_df["depth_path"] = None + split_meta_df["depth_mean"] = np.nan + split_meta_df["depth_std"] = np.nan + split_meta_df["depth_min"] = np.nan + split_meta_df["depth_max"] = np.nan + split_meta_df["invalid_ratio"] = np.nan + + for i, row in tqdm(split_meta_df.iterrows(), total=len(split_meta_df)): + # Load data + rgb_path = os.path.join( + row.scene_name, + "images", + f"scene_{row.camera_name}_final_hdf5", + f"frame.{row.frame_id:04d}.color.hdf5", + ) + dist_path = os.path.join( + row.scene_name, + "images", + f"scene_{row.camera_name}_geometry_hdf5", + f"frame.{row.frame_id:04d}.depth_meters.hdf5", + ) + render_entity_id_path = os.path.join( + row.scene_name, + "images", + f"scene_{row.camera_name}_geometry_hdf5", + f"frame.{row.frame_id:04d}.render_entity_id.hdf5", + ) + assert os.path.exists(os.path.join(dataset_dir, rgb_path)) + assert os.path.exists(os.path.join(dataset_dir, dist_path)) + + with h5py.File(os.path.join(dataset_dir, rgb_path), "r") as f: + rgb = np.array(f["dataset"]).astype(float) + with h5py.File(os.path.join(dataset_dir, dist_path), "r") as f: + dist_from_center = np.array(f["dataset"]).astype(float) + with h5py.File(os.path.join(dataset_dir, render_entity_id_path), "r") as f: + render_entity_id = np.array(f["dataset"]).astype(int) + + # Tone map + rgb_color_tm = tone_map(rgb, render_entity_id) + rgb_int = (rgb_color_tm * 255).astype(np.uint8) # [H, W, RGB] + + # Distance -> depth + plane_depth = dist_2_depth( + IMG_WIDTH, IMG_HEIGHT, FOCAL_LENGTH, dist_from_center + ) + valid_mask = render_entity_id != -1 + + # Record invalid ratio + invalid_ratio = (np.prod(valid_mask.shape) - valid_mask.sum()) / np.prod( + valid_mask.shape + ) + plane_depth[~valid_mask] = 0 + + # Save as png + scene_path = row.scene_name + if not os.path.exists(os.path.join(split_output_dir, row.scene_name)): + os.makedirs(os.path.join(split_output_dir, row.scene_name)) + + rgb_name = f"rgb_{row.camera_name}_fr{row.frame_id:04d}.png" + rgb_path = os.path.join(scene_path, rgb_name) + cv2.imwrite( + os.path.join(split_output_dir, rgb_path), + cv2.cvtColor(rgb_int, cv2.COLOR_RGB2BGR), + ) + + plane_depth *= 1000.0 + plane_depth = plane_depth.astype(np.uint16) + depth_name = f"depth_plane_{row.camera_name}_fr{row.frame_id:04d}.png" + depth_path = os.path.join(scene_path, depth_name) + cv2.imwrite(os.path.join(split_output_dir, depth_path), plane_depth) + + # Meta data + split_meta_df.at[i, "rgb_path"] = rgb_path + split_meta_df.at[i, "rgb_mean"] = np.mean(rgb_int) + split_meta_df.at[i, "rgb_std"] = np.std(rgb_int) + split_meta_df.at[i, "rgb_min"] = np.min(rgb_int) + split_meta_df.at[i, "rgb_max"] = np.max(rgb_int) + + split_meta_df.at[i, "depth_path"] = depth_path + restored_depth = plane_depth / 1000.0 + split_meta_df.at[i, "depth_mean"] = np.mean(restored_depth) + split_meta_df.at[i, "depth_std"] = np.std(restored_depth) + split_meta_df.at[i, "depth_min"] = np.min(restored_depth) + split_meta_df.at[i, "depth_max"] = np.max(restored_depth) + + split_meta_df.at[i, "invalid_ratio"] = invalid_ratio + + with open( + os.path.join(split_output_dir, f"filename_list_{split}.txt"), "w+" + ) as f: + lines = split_meta_df.apply( + lambda r: f"{r['rgb_path']} {r['depth_path']}", axis=1 + ).tolist() + f.writelines("\n".join(lines)) + + with open( + os.path.join(split_output_dir, f"filename_meta_{split}.csv"), "w+" + ) as f: + split_meta_df.to_csv(f, header=True) + + print("Preprocess finished")