Upload measured datasets to Hugging Face (#114)

* Fixes to run classify script. * Fixes to dataset reconstruction. * Add script for uploading measured dataset. * Add extra files. * Better configs. * Update documentation. * Rotate if necessary. * Add badge to notebooks. * Add hugging face badge. * Improve dataset script to use data from Hugging Face. * Add todo. * Update CHANGELOG.
LCAV · Feb 21, 2024 · 467c927 · 467c927
1 parent 5669f4d
commit 467c927
Show file tree

Hide file tree

Showing 14 changed files with 369 additions and 105 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -13,12 +13,12 @@ Unreleased
 Added
 ~~~~~
 
-- Nothing
+- Script to upload measured datasets to Hugging Face: ``scripts/data/upload_dataset_huggingface.py``
 
 Changed
 ~~~~~
 
-- Nothing
+- Dataset reconstruction script uses datasets from Hugging Face: ``scripts/recon/dataset.py``
 
 Bugfix
 ~~~~~

diff --git a/README.rst b/README.rst
@@ -16,6 +16,15 @@ LenslessPiCam
       :alt: Downloads
 
 
+.. image:: https://colab.research.google.com/assets/colab-badge.svg
+      :target: https://drive.google.com/drive/folders/1nBDsg86RaZIqQM6qD-612k9v8gDrgdwB?usp=drive_link
+      :alt: notebooks
+
+.. image:: https://huggingface.co/datasets/huggingface/badges/resolve/main/powered-by-huggingface-dark.svg
+      :target: https://huggingface.co/bezzam
+      :alt: huggingface
+
+
 *A Hardware and Software Toolkit for Lensless Computational Imaging with a Raspberry Pi*
 -----------------------------------------------------------------------------------------
 

diff --git a/configs/recon_celeba_digicam.yaml b/configs/recon_celeba_digicam.yaml
@@ -0,0 +1,32 @@
+# python scripts/recon/dataset.py -cn recon_celeba_digicam
+defaults:
+  - recon_dataset
+  - _self_
+
+torch: True
+torch_device: 'cuda:0'
+
+repo_id: "bezzam/DigiCam-CelebA-10K"
+split: "test"    # "train", "test", "all"
+psf_fn: "psf_measured.png"    # in repo root
+n_files: 25    # null for all files
+
+preprocess:
+  flip_ud: True 
+  flip_lr: True
+  downsample: 6
+
+  # to have different data shape than PSF
+  data_dim: null
+  # data_dim: [48, 64]       # down 64
+  # data_dim: [506, 676]   # down 6
+
+algo: admm   # "admm", "apgd", "null" to just copy over (resized) raw data
+admm:
+  n_iter: 10
+
+# extraction region of interest
+# roi: null   # top, left, bottom, right
+# roi: [10, 300, 560, 705]  # down 4
+roi: [10, 190, 377, 490]   # down 6
+# roi: [5, 150, 280, 352]   # down 8
diff --git a/configs/recon_dataset.yaml b/configs/recon_dataset.yaml
@@ -6,42 +6,29 @@ defaults:
 torch: True
 torch_device: 'cuda:0'
 
-input:
-  # https://drive.switch.ch/index.php/s/NdgHlcDeHVDH5ww?path=%2Fpsf
-  psf: data/psf/adafruit_random_2mm_20231907.png
-  # https://drive.switch.ch/index.php/s/m89D1tFEfktQueS
-  raw_data: data/celeba_adafruit_random_2mm_20230720_1K
-
+repo_id: "bezzam/DiffuserCam-Lensless-Mirflickr-Dataset"
+split: "test"    # "train", "test", "all"
+psf_fn: "psf.png"    # in repo root
+output_folder: null  # autocreate name if not spe
 n_files: 25    # null for all files
-output_folder: data/celeba_adafruit_recon
-
-# extraction region of interest
-roi: null   # top, left, bottom, right
-# -- values for `data/celeba_adafruit_random_2mm_20230720_1K`
-# roi: [10, 300, 560, 705]  # down 4
-# roi: [6, 200, 373, 470]   # down 6
-# roi: [5, 150, 280, 352]   # down 8
 
 preprocess:
-  flip: True
+  flip_ud: True 
+  flip_lr: False
   downsample: 6
-
   # to have different data shape than PSF
   data_dim: null
-  # data_dim: [48, 64]       # down 64
-  # data_dim: [506, 676]   # down 6
-
-display:
-  disp: -1
-  plot: False
 
 algo: admm   # "admm", "apgd", "null" to just copy over (resized) raw data
-
+admm:
+  n_iter: 100
 apgd:  
   n_jobs: 1    # run in parallel as algo is slow
   max_iter: 500
 
-admm:
-  n_iter: 10
+# extraction region of interest
+roi: null   # top, left, bottom, right
 
-save: False
+display:
+  disp: -1
+  plot: False
diff --git a/configs/sim_digicam_psf.yaml b/configs/sim_digicam_psf.yaml
@@ -33,7 +33,7 @@ sim:
   flipud: True
 
   # in practice found waveprop=True or False doesn't make difference
-  waveprop: True
+  waveprop: False
 
   # below are ignored if waveprop=False
   scene2mask: 0.3   # [m]

diff --git a/configs/train_celeba_classifier.yaml b/configs/train_celeba_classifier.yaml
@@ -12,7 +12,7 @@ data:
 
   # -- raw
   # https://drive.switch.ch/index.php/s/m89D1tFEfktQueS
-  measured: data/celeba_adafruit_random_2mm_20230720_10K
+  measured: /scratch/bezzam/celeba_adafruit_random_2mm_20230720_10K
   raw: True
 
   # # -- reconstructed

diff --git a/configs/upload_dataset_huggingface.yaml b/configs/upload_dataset_huggingface.yaml
@@ -0,0 +1,20 @@
+# python scripts/data/upload_dataset_huggingface.py
+hydra:
+  job:
+    chdir: True    # change to output folder
+
+repo_id: null
+hf_token: null
+n_files: null
+test_size: 0.15
+
+lensless:
+  dir: null
+  ext: null
+
+lensed:
+  dir: null
+  ext: null
+
+# additional files to upload to root folder
+files: null
diff --git a/configs/upload_digicam_10k.yaml b/configs/upload_digicam_10k.yaml
@@ -0,0 +1,23 @@
+# python scripts/data/upload_dataset_huggingface.py -cn upload_digicam_10k
+defaults:
+  - upload_dataset_huggingface
+  - _self_
+
+repo_id: "bezzam/DigiCam-CelebA-10K"
+test_size: 0.15
+
+lensless:
+  dir: "/scratch/bezzam/celeba_adafruit_random_2mm_20230720_10K"
+  ext: ".png"
+
+lensed:
+  dir: "/scratch/bezzam/celeba/img_align_celeba"
+  ext: ".jpg"
+  celeba_attr: "/scratch/bezzam/celeba/list_attr_celeba.txt"
+
+files:
+  psf_measured: "/home/bezzam/LenslessPiCam/data/psf/adafruit_random_2mm_20231907.png"
+  psf_simulated: "/home/bezzam/LenslessPiCam/data/psf/adafruit_random_pattern_20230719_SIM_psf.png"
+  mask_pattern: "/home/bezzam/LenslessPiCam/data/psf/adafruit_random_pattern_20230719.npy"
+  mug_in_the_wild_12cm: "/home/bezzam/LenslessPiCam/data/raw_data/adafruit_mug_12cm_july21.png"
+  psf_12cm: "/home/bezzam/LenslessPiCam/data/psf/adafruit_psf_2mm_12p5cm_july21.png"
diff --git a/configs/upload_digicam_26k.yaml b/configs/upload_digicam_26k.yaml
@@ -0,0 +1,21 @@
+# python scripts/data/upload_dataset_huggingface.py -cn upload_digicam_26k
+defaults:
+  - upload_dataset_huggingface
+  - _self_
+
+repo_id: "bezzam/DigiCam-CelebA-26K"
+test_size: 0.15
+
+lensless:
+  dir: "/scratch/bezzam/celeba/celeba_adafruit_random_30cm_2mm_20231004_26K"
+  ext: ".png"
+
+lensed:
+  dir: "/scratch/bezzam/celeba/img_align_celeba"
+  ext: ".jpg"
+  celeba_attr: "/scratch/bezzam/celeba/list_attr_celeba.txt"
+
+files:
+  psf_measured: "/home/bezzam/LenslessPiCam/rpi_hq_adafruit_psf_2mm/raw_data_rgb.png"
+  psf_simulated: "/home/bezzam/LenslessPiCam/outputs/2024-02-21/10-07-17/adafruit_random_pattern_20231004_174047_SIM_psf.png"
+  mask_pattern: "/home/bezzam/LenslessPiCam/adafruit_random_pattern_20231004_174047.npy"
diff --git a/docs/source/data.rst b/docs/source/data.rst
@@ -42,15 +42,12 @@ use the correct PSF file for the data you're using!
 Measured CelebA Dataset
 -----------------------
 
-You can download 1K measurements of the CelebA dataset done with
-our lensless camera and a random pattern on the Adafruit LCD
-`here (1.2 GB) <https://drive.switch.ch/index.php/s/m89D1tFEfktQueS>`__,
-and a dataset with 10K measurements 
-`here (13.1 GB) <https://drive.switch.ch/index.php/s/9NNGCJs3DoBDGlY>`__.
-They both correspond to the PSF which can be found `here <https://drive.switch.ch/index.php/s/NdgHlcDeHVDH5ww?path=%2Fpsf>`__
-(``adafruit_random_2mm_20231907.png`` which is the PSF of
-``adafruit_random_pattern_20230719.npy`` measured with a mask to sensor
-distance of 2 mm).
+You can download a dataset of `10K measurements <https://huggingface.co/datasets/bezzam/DigiCam-CelebA-10K>`__
+and a dataset of `26K measurements <https://huggingface.co/datasets/bezzam/DigiCam-CelebA-26K>`__ 
+from Hugging Face. The PSFs (measured and simulated) can be found under "Files and versions".
+Both dataset are measured with `DigiCam <https://opg.optica.org/abstract.cfm?uri=pcAOP-2023-JTu4A.45>`__,
+namely an LCD-based lensless camera, where the pattern was set randomly. The images were taken of 
+a monitor 30 cm away from the camera, and the LCD was 2 mm away from the sensor.
 
 
 DiffuserCam Lensless Mirflickr Dataset

diff --git a/lensless/eval/metric.py b/lensless/eval/metric.py
@@ -298,12 +298,13 @@ def extract(
         horizontal_crop = (0, estimate.shape[1])
 
     # crop and rotate estimate image
-    estimate = rotate(
-        estimate[vertical_crop[0] : vertical_crop[1], horizontal_crop[0] : horizontal_crop[1]],
-        angle=rotation,
-        mode="nearest",
-        reshape=False,
-    )
+    if rotation:
+        estimate = rotate(
+            estimate[vertical_crop[0] : vertical_crop[1], horizontal_crop[0] : horizontal_crop[1]],
+            angle=rotation,
+            mode="nearest",
+            reshape=False,
+        )
     estimate /= estimate.max()
     estimate = np.clip(estimate, 0, 1)
     if verbose:

diff --git a/scripts/classify/train_celeba_vit.py b/scripts/classify/train_celeba_vit.py
@@ -4,7 +4,7 @@
 
 First, set-up HuggingFace libraries:
 ```
-pip install datasets transformers
+pip install datasets transformers[torch] scikit-learn tensorboardX
 ```
 
 Raw measurement datasets can be download from SwitchDrive.
@@ -42,6 +42,8 @@
 Other hyperparameters for classification can be found in
 `configs/train_celeba_classifier.yaml`.
 
+# TODO: update with Hugging Face dataset: https://huggingface.co/datasets/bezzam/DigiCam-CelebA-10K
+
 """
 
 import warnings
@@ -197,19 +199,13 @@ def train_celeba_classifier(config):
                 ratio=(0.9, 1.1),
             )
         )
-    _train_transforms.append(
-        Resize(size),
-        CenterCrop(size),
-    )
+    _train_transforms += [Resize(size), CenterCrop(size)]
     if config.augmentation.horizontal_flip:
         if config.data.raw:
             warnings.warn("Horizontal flip is not supported for raw data, Skipping!")
         else:
             _train_transforms.append(RandomHorizontalFlip())
-    _train_transforms.append(
-        ToTensor(),
-        normalize,
-    )
+    _train_transforms += [ToTensor(), normalize]
     _train_transforms = Compose(_train_transforms)
 
     _val_transforms = Compose(
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,12 +13,12 @@ Unreleased @@
     Added
     ~~~~~
-    - Nothing
+    - Script to upload measured datasets to Hugging Face: ``scripts/data/upload_dataset_huggingface.py``
     Changed
     ~~~~~
-    - Nothing
+    - Dataset reconstruction script uses datasets from Hugging Face: ``scripts/recon/dataset.py``
     Bugfix
     ~~~~~
@@ Expand Down @@