diff --git a/configs/upload_diffusercam_huggingface.yaml b/configs/upload_diffusercam_huggingface.yaml index 90bba9d9..f27d6bce 100644 --- a/configs/upload_diffusercam_huggingface.yaml +++ b/configs/upload_diffusercam_huggingface.yaml @@ -6,8 +6,8 @@ hydra: repo_id: "bezzam/DiffuserCam-Lensless-Mirflickr-Dataset" dir_diffuser: "/scratch/bezzam/DiffuserCam_mirflickr/dataset/diffuser_images" dir_lensed: "/scratch/bezzam/DiffuserCam_mirflickr/dataset/ground_truth_lensed" -psf_fp: "data/psf/diffusercam_psf.tiff" +psf_fp: "/home/bezzam/LenslessPiCam/data/psf/diffusercam_psf.tiff" hf_token: null file_ext: ".npy" n_files: null -n_jobs: 4 # for parallelizing conversion to PNG \ No newline at end of file +n_jobs: 8 # for parallelizing conversion to PNG diff --git a/docs/requirements.txt b/docs/requirements.txt index 148ae070..dfe5ea54 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx==4.0.1 +sphinx==5.0.2 sphinx_rtd_theme==0.4.3 docutils==0.16 # >0.17 doesn't render bullets numpy>=1.22 # so that default dtype are correctly rendered diff --git a/docs/source/data.rst b/docs/source/data.rst index 50b323c6..f0d9b72d 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -53,21 +53,12 @@ They both correspond to the PSF which can be found `here `__ -that we've prepared -`here `__ with -``scripts/prepare_mirflickr_subset.py``. The original dataset is quite -large (25000 files, 100 GB). So we've prepared a more manageable -dataset (200 files, 725 MB). It was prepared with the following script: - -.. code:: bash - - python scripts/prepare_mirflickr_subset.py \ - --data ~/Documents/DiffuserCam/DiffuserCam_Mirflickr_Dataset +The original dataset is available `here `__. +However, it is quite large (100 GB). We've prepared a more manageable (6GB) +and viewable version on `Hugging Face `__. 3D data diff --git a/lensless/utils/io.py b/lensless/utils/io.py index 4f25b62a..2ed26675 100644 --- a/lensless/utils/io.py +++ b/lensless/utils/io.py @@ -536,17 +536,19 @@ def load_data( return psf, data -def save_image(img, fp, max_val=255): +def save_image(img, fp, max_val=255, normalize=True): """Save as uint8 image.""" img_tmp = img.copy() - if img_tmp.dtype == np.uint16: + if img_tmp.dtype == np.uint16 or img_tmp.dtype == np.uint8: img_tmp = img_tmp.astype(np.float32) - if img_tmp.dtype == np.float64 or img_tmp.dtype == np.float32: + if normalize: img_tmp -= img_tmp.min() img_tmp /= img_tmp.max() + + if img_tmp.dtype == np.float64 or img_tmp.dtype == np.float32: img_tmp *= max_val img_tmp = img_tmp.astype(np.uint8) diff --git a/scripts/data/upload_diffusercam_huggingface.py b/scripts/data/upload_diffusercam_huggingface.py index ab10b254..1d531a4b 100644 --- a/scripts/data/upload_diffusercam_huggingface.py +++ b/scripts/data/upload_diffusercam_huggingface.py @@ -22,7 +22,6 @@ import glob from lensless.utils.io import save_image import cv2 -import PIL from datasets import Dataset, DatasetDict, Image from huggingface_hub import upload_file from lensless.utils.dataset import natural_sort @@ -44,17 +43,12 @@ def upload_dataset(config): file_ext = config.file_ext n_files = config.n_files n_jobs = config.n_jobs + normalize = False assert hf_token is not None, "Please provide a HuggingFace token." start_time = time.time() - # load PSF, convert to RGB, save as PNG - psf_img = np.array(PIL.Image.open(psf_fp)) - psf_img = cv2.cvtColor(psf_img, cv2.COLOR_BGR2RGB) # convert to RGB - psf_fp_png = psf_fp.replace(".tiff", ".png") - save_image(psf_img, psf_fp_png) - # get all lensless-lensed pairs files_diffuser = glob.glob(os.path.join(dir_diffuser, "*" + file_ext)) files_lensed = glob.glob(os.path.join(dir_lensed, "*" + file_ext)) @@ -69,50 +63,78 @@ def upload_dataset(config): print(f"Only keeping {n_files} files...") common_files = common_files[:n_files] + # load PSF, convert to RGB, save as PNG + # psf_img = np.array(PIL.Image.open(psf_fp)) + psf_img = cv2.imread(psf_fp, cv2.IMREAD_UNCHANGED) + psf_img = cv2.cvtColor(psf_img, cv2.COLOR_BGR2RGB) # convert to RGB + psf_fp_png = psf_fp.replace(".tiff", ".png") + save_image(psf_img, psf_fp_png, normalize=True) # need normalize=True + # save as PNG dir_diffuser_png = dir_diffuser.replace("diffuser_images", "diffuser_png") os.makedirs(dir_diffuser_png, exist_ok=True) dir_lensed_png = dir_lensed.replace("ground_truth_lensed", "lensed_png") os.makedirs(dir_lensed_png, exist_ok=True) - diffuser_png_files = [] - lensed_png_files = [] # -- parallelize with joblib def save_png(f, dir_diffuser, dir_diffuser_png, dir_lensed, dir_lensed_png): + diffuser_img = np.load(os.path.join(dir_diffuser, f)) diffuser_img = cv2.cvtColor(diffuser_img, cv2.COLOR_BGR2RGB) # convert to RGB diffuser_fn = os.path.join(dir_diffuser_png, f.replace(file_ext, ".png")) - diffuser_png_files.append(diffuser_fn) - save_image(diffuser_img, diffuser_fn) + save_image(diffuser_img, diffuser_fn, normalize=normalize) lensed_img = np.load(os.path.join(dir_lensed, f)) lensed_img = cv2.cvtColor(lensed_img, cv2.COLOR_BGR2RGB) # convert to RGB lensed_fn = os.path.join(dir_lensed_png, f.replace(file_ext, ".png")) - lensed_png_files.append(lensed_fn) - save_image(lensed_img, lensed_fn) + save_image(lensed_img, lensed_fn, normalize=normalize) Parallel(n_jobs=n_jobs)( delayed(save_png)(f, dir_diffuser, dir_diffuser_png, dir_lensed, dir_lensed_png) for f in tqdm(common_files) ) + # get file paths + diffuser_files = [ + os.path.join(dir_diffuser_png, f.replace(file_ext, ".png")) for f in common_files + ] + lensed_files = [os.path.join(dir_lensed_png, f.replace(file_ext, ".png")) for f in common_files] + diffuser_files = natural_sort(diffuser_files) + lensed_files = natural_sort(lensed_files) + # step 1: create Dataset objects - dataset = Dataset.from_dict( - { - "lensless": diffuser_png_files, - "lensed": lensed_png_files, - } - ) - dataset = dataset.cast_column("lensless", Image()) - dataset = dataset.cast_column("lensed", Image()) + def create_dataset(diffuser_files, lensed_files): + dataset = Dataset.from_dict( + { + "lensless": diffuser_files, + "lensed": lensed_files, + } + ) + dataset = dataset.cast_column("lensless", Image()) + dataset = dataset.cast_column("lensed", Image()) + return dataset + + # according to original split test files are up to idx=1000, for some reason im1 is missing? + test_dataset = create_dataset(diffuser_files[:999], lensed_files[:999]) + train_dataset = create_dataset(diffuser_files[999:], lensed_files[999:]) # step 2: create DatasetDict dataset_dict = DatasetDict( { - "all": dataset, + "train": train_dataset, + "test": test_dataset, } ) + # step 3: push to hub + upload_file( + path_or_fileobj=psf_fp, + path_in_repo="psf.tiff", + repo_id=repo_id, + repo_type="dataset", + token=hf_token, + ) + # -- dataset dataset_dict.push_to_hub( repo_id, @@ -126,14 +148,14 @@ def save_png(f, dir_diffuser, dir_diffuser_png, dir_lensed, dir_lensed_png): token=hf_token, ) upload_file( - path_or_fileobj=diffuser_png_files[0], + path_or_fileobj=diffuser_files[0], path_in_repo="lensless_example.png", repo_id=repo_id, repo_type="dataset", token=hf_token, ) upload_file( - path_or_fileobj=lensed_png_files[0], + path_or_fileobj=lensed_files[0], path_in_repo="lensed_example.png", repo_id=repo_id, repo_type="dataset",