Skip to content

Commit

Permalink
Clean up DiffuserCam dataset upload script. (#112)
Browse files Browse the repository at this point in the history
* Add option to add unrolled output to loss.

* Fix bug in initializing metric dict.

* Add option for adding noise.

* Add notebook with modular reconstruction results.

* Update notebook.

* Add support for PnP.

* Add trainable inverse.

* Add full demo for modular reconstruction.

* Cleaner plug-and-play interface.

* Add upload script for diffusercam.

* Finalize notebook.

* Update CHANGELOG.

* More options for saving image.

* Clean up upload script for DiffuserCam.

* Update documentation on Hugging Face dataset.

* Update sphinx.
  • Loading branch information
ebezzam authored Feb 8, 2024
1 parent f950fbf commit 7253505
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 44 deletions.
4 changes: 2 additions & 2 deletions configs/upload_diffusercam_huggingface.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ hydra:
repo_id: "bezzam/DiffuserCam-Lensless-Mirflickr-Dataset"
dir_diffuser: "/scratch/bezzam/DiffuserCam_mirflickr/dataset/diffuser_images"
dir_lensed: "/scratch/bezzam/DiffuserCam_mirflickr/dataset/ground_truth_lensed"
psf_fp: "data/psf/diffusercam_psf.tiff"
psf_fp: "/home/bezzam/LenslessPiCam/data/psf/diffusercam_psf.tiff"
hf_token: null
file_ext: ".npy"
n_files: null
n_jobs: 4 # for parallelizing conversion to PNG
n_jobs: 8 # for parallelizing conversion to PNG
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sphinx==4.0.1
sphinx==5.0.2
sphinx_rtd_theme==0.4.3
docutils==0.16 # >0.17 doesn't render bullets
numpy>=1.22 # so that default dtype are correctly rendered
Expand Down
19 changes: 5 additions & 14 deletions docs/source/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,12 @@ They both correspond to the PSF which can be found `here <https://drive.switch.c
distance of 2 mm).


DiffuserCam Lensless Mirflickr Dataset (DLMD)
---------------------------------------------
DiffuserCam Lensless Mirflickr Dataset
--------------------------------------

You can download a subset for the `DiffuserCam Lensless Mirflickr
Dataset <https://waller-lab.github.io/LenslessLearning/dataset.html>`__
that we've prepared
`here <https://drive.switch.ch/index.php/s/vmAZzryGI8U8rcE>`__ with
``scripts/prepare_mirflickr_subset.py``. The original dataset is quite
large (25000 files, 100 GB). So we've prepared a more manageable
dataset (200 files, 725 MB). It was prepared with the following script:

.. code:: bash
python scripts/prepare_mirflickr_subset.py \
--data ~/Documents/DiffuserCam/DiffuserCam_Mirflickr_Dataset
The original dataset is available `here <https://waller-lab.github.io/LenslessLearning/dataset.html>`__.
However, it is quite large (100 GB). We've prepared a more manageable (6GB)
and viewable version on `Hugging Face <https://huggingface.co/datasets/bezzam/DiffuserCam-Lensless-Mirflickr-Dataset>`__.


3D data
Expand Down
8 changes: 5 additions & 3 deletions lensless/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,17 +536,19 @@ def load_data(
return psf, data


def save_image(img, fp, max_val=255):
def save_image(img, fp, max_val=255, normalize=True):
"""Save as uint8 image."""

img_tmp = img.copy()

if img_tmp.dtype == np.uint16:
if img_tmp.dtype == np.uint16 or img_tmp.dtype == np.uint8:
img_tmp = img_tmp.astype(np.float32)

if img_tmp.dtype == np.float64 or img_tmp.dtype == np.float32:
if normalize:
img_tmp -= img_tmp.min()
img_tmp /= img_tmp.max()

if img_tmp.dtype == np.float64 or img_tmp.dtype == np.float32:
img_tmp *= max_val
img_tmp = img_tmp.astype(np.uint8)

Expand Down
70 changes: 46 additions & 24 deletions scripts/data/upload_diffusercam_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import glob
from lensless.utils.io import save_image
import cv2
import PIL
from datasets import Dataset, DatasetDict, Image
from huggingface_hub import upload_file
from lensless.utils.dataset import natural_sort
Expand All @@ -44,17 +43,12 @@ def upload_dataset(config):
file_ext = config.file_ext
n_files = config.n_files
n_jobs = config.n_jobs
normalize = False

assert hf_token is not None, "Please provide a HuggingFace token."

start_time = time.time()

# load PSF, convert to RGB, save as PNG
psf_img = np.array(PIL.Image.open(psf_fp))
psf_img = cv2.cvtColor(psf_img, cv2.COLOR_BGR2RGB) # convert to RGB
psf_fp_png = psf_fp.replace(".tiff", ".png")
save_image(psf_img, psf_fp_png)

# get all lensless-lensed pairs
files_diffuser = glob.glob(os.path.join(dir_diffuser, "*" + file_ext))
files_lensed = glob.glob(os.path.join(dir_lensed, "*" + file_ext))
Expand All @@ -69,50 +63,78 @@ def upload_dataset(config):
print(f"Only keeping {n_files} files...")
common_files = common_files[:n_files]

# load PSF, convert to RGB, save as PNG
# psf_img = np.array(PIL.Image.open(psf_fp))
psf_img = cv2.imread(psf_fp, cv2.IMREAD_UNCHANGED)
psf_img = cv2.cvtColor(psf_img, cv2.COLOR_BGR2RGB) # convert to RGB
psf_fp_png = psf_fp.replace(".tiff", ".png")
save_image(psf_img, psf_fp_png, normalize=True) # need normalize=True

# save as PNG
dir_diffuser_png = dir_diffuser.replace("diffuser_images", "diffuser_png")
os.makedirs(dir_diffuser_png, exist_ok=True)
dir_lensed_png = dir_lensed.replace("ground_truth_lensed", "lensed_png")
os.makedirs(dir_lensed_png, exist_ok=True)
diffuser_png_files = []
lensed_png_files = []

# -- parallelize with joblib
def save_png(f, dir_diffuser, dir_diffuser_png, dir_lensed, dir_lensed_png):

diffuser_img = np.load(os.path.join(dir_diffuser, f))
diffuser_img = cv2.cvtColor(diffuser_img, cv2.COLOR_BGR2RGB) # convert to RGB
diffuser_fn = os.path.join(dir_diffuser_png, f.replace(file_ext, ".png"))
diffuser_png_files.append(diffuser_fn)
save_image(diffuser_img, diffuser_fn)
save_image(diffuser_img, diffuser_fn, normalize=normalize)

lensed_img = np.load(os.path.join(dir_lensed, f))
lensed_img = cv2.cvtColor(lensed_img, cv2.COLOR_BGR2RGB) # convert to RGB
lensed_fn = os.path.join(dir_lensed_png, f.replace(file_ext, ".png"))
lensed_png_files.append(lensed_fn)
save_image(lensed_img, lensed_fn)
save_image(lensed_img, lensed_fn, normalize=normalize)

Parallel(n_jobs=n_jobs)(
delayed(save_png)(f, dir_diffuser, dir_diffuser_png, dir_lensed, dir_lensed_png)
for f in tqdm(common_files)
)

# get file paths
diffuser_files = [
os.path.join(dir_diffuser_png, f.replace(file_ext, ".png")) for f in common_files
]
lensed_files = [os.path.join(dir_lensed_png, f.replace(file_ext, ".png")) for f in common_files]
diffuser_files = natural_sort(diffuser_files)
lensed_files = natural_sort(lensed_files)

# step 1: create Dataset objects
dataset = Dataset.from_dict(
{
"lensless": diffuser_png_files,
"lensed": lensed_png_files,
}
)
dataset = dataset.cast_column("lensless", Image())
dataset = dataset.cast_column("lensed", Image())
def create_dataset(diffuser_files, lensed_files):
dataset = Dataset.from_dict(
{
"lensless": diffuser_files,
"lensed": lensed_files,
}
)
dataset = dataset.cast_column("lensless", Image())
dataset = dataset.cast_column("lensed", Image())
return dataset

# according to original split test files are up to idx=1000, for some reason im1 is missing?
test_dataset = create_dataset(diffuser_files[:999], lensed_files[:999])
train_dataset = create_dataset(diffuser_files[999:], lensed_files[999:])

# step 2: create DatasetDict
dataset_dict = DatasetDict(
{
"all": dataset,
"train": train_dataset,
"test": test_dataset,
}
)

# step 3: push to hub
upload_file(
path_or_fileobj=psf_fp,
path_in_repo="psf.tiff",
repo_id=repo_id,
repo_type="dataset",
token=hf_token,
)

# -- dataset
dataset_dict.push_to_hub(
repo_id,
Expand All @@ -126,14 +148,14 @@ def save_png(f, dir_diffuser, dir_diffuser_png, dir_lensed, dir_lensed_png):
token=hf_token,
)
upload_file(
path_or_fileobj=diffuser_png_files[0],
path_or_fileobj=diffuser_files[0],
path_in_repo="lensless_example.png",
repo_id=repo_id,
repo_type="dataset",
token=hf_token,
)
upload_file(
path_or_fileobj=lensed_png_files[0],
path_or_fileobj=lensed_files[0],
path_in_repo="lensed_example.png",
repo_id=repo_id,
repo_type="dataset",
Expand Down

0 comments on commit 7253505

Please sign in to comment.