From 467c9270f8cd025732f1ebe612b0621103103627 Mon Sep 17 00:00:00 2001 From: Eric Bezzam Date: Wed, 21 Feb 2024 18:33:43 +0100 Subject: [PATCH] Upload measured datasets to Hugging Face (#114) * Fixes to run classify script. * Fixes to dataset reconstruction. * Add script for uploading measured dataset. * Add extra files. * Better configs. * Update documentation. * Rotate if necessary. * Add badge to notebooks. * Add hugging face badge. * Improve dataset script to use data from Hugging Face. * Add todo. * Update CHANGELOG. --- CHANGELOG.rst | 4 +- README.rst | 9 ++ configs/recon_celeba_digicam.yaml | 32 +++++ configs/recon_dataset.yaml | 39 ++---- configs/sim_digicam_psf.yaml | 2 +- configs/train_celeba_classifier.yaml | 2 +- configs/upload_dataset_huggingface.yaml | 20 +++ configs/upload_digicam_10k.yaml | 23 ++++ configs/upload_digicam_26k.yaml | 21 +++ docs/source/data.rst | 15 +-- lensless/eval/metric.py | 13 +- scripts/classify/train_celeba_vit.py | 14 +- scripts/data/upload_dataset_huggingface.py | 148 +++++++++++++++++++++ scripts/recon/dataset.py | 132 +++++++++++------- 14 files changed, 369 insertions(+), 105 deletions(-) create mode 100644 configs/recon_celeba_digicam.yaml create mode 100644 configs/upload_dataset_huggingface.yaml create mode 100644 configs/upload_digicam_10k.yaml create mode 100644 configs/upload_digicam_26k.yaml create mode 100644 scripts/data/upload_dataset_huggingface.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a7e199f6..813b4b25 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,12 +13,12 @@ Unreleased Added ~~~~~ -- Nothing +- Script to upload measured datasets to Hugging Face: ``scripts/data/upload_dataset_huggingface.py`` Changed ~~~~~ -- Nothing +- Dataset reconstruction script uses datasets from Hugging Face: ``scripts/recon/dataset.py`` Bugfix ~~~~~ diff --git a/README.rst b/README.rst index 97d607da..4e03c3fa 100644 --- a/README.rst +++ b/README.rst @@ -16,6 +16,15 @@ LenslessPiCam :alt: Downloads +.. image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://drive.google.com/drive/folders/1nBDsg86RaZIqQM6qD-612k9v8gDrgdwB?usp=drive_link + :alt: notebooks + +.. image:: https://huggingface.co/datasets/huggingface/badges/resolve/main/powered-by-huggingface-dark.svg + :target: https://huggingface.co/bezzam + :alt: huggingface + + *A Hardware and Software Toolkit for Lensless Computational Imaging with a Raspberry Pi* ----------------------------------------------------------------------------------------- diff --git a/configs/recon_celeba_digicam.yaml b/configs/recon_celeba_digicam.yaml new file mode 100644 index 00000000..f1e5c28e --- /dev/null +++ b/configs/recon_celeba_digicam.yaml @@ -0,0 +1,32 @@ +# python scripts/recon/dataset.py -cn recon_celeba_digicam +defaults: + - recon_dataset + - _self_ + +torch: True +torch_device: 'cuda:0' + +repo_id: "bezzam/DigiCam-CelebA-10K" +split: "test" # "train", "test", "all" +psf_fn: "psf_measured.png" # in repo root +n_files: 25 # null for all files + +preprocess: + flip_ud: True + flip_lr: True + downsample: 6 + + # to have different data shape than PSF + data_dim: null + # data_dim: [48, 64] # down 64 + # data_dim: [506, 676] # down 6 + +algo: admm # "admm", "apgd", "null" to just copy over (resized) raw data +admm: + n_iter: 10 + +# extraction region of interest +# roi: null # top, left, bottom, right +# roi: [10, 300, 560, 705] # down 4 +roi: [10, 190, 377, 490] # down 6 +# roi: [5, 150, 280, 352] # down 8 diff --git a/configs/recon_dataset.yaml b/configs/recon_dataset.yaml index f474aed5..fc14af9a 100644 --- a/configs/recon_dataset.yaml +++ b/configs/recon_dataset.yaml @@ -6,42 +6,29 @@ defaults: torch: True torch_device: 'cuda:0' -input: - # https://drive.switch.ch/index.php/s/NdgHlcDeHVDH5ww?path=%2Fpsf - psf: data/psf/adafruit_random_2mm_20231907.png - # https://drive.switch.ch/index.php/s/m89D1tFEfktQueS - raw_data: data/celeba_adafruit_random_2mm_20230720_1K - +repo_id: "bezzam/DiffuserCam-Lensless-Mirflickr-Dataset" +split: "test" # "train", "test", "all" +psf_fn: "psf.png" # in repo root +output_folder: null # autocreate name if not spe n_files: 25 # null for all files -output_folder: data/celeba_adafruit_recon - -# extraction region of interest -roi: null # top, left, bottom, right -# -- values for `data/celeba_adafruit_random_2mm_20230720_1K` -# roi: [10, 300, 560, 705] # down 4 -# roi: [6, 200, 373, 470] # down 6 -# roi: [5, 150, 280, 352] # down 8 preprocess: - flip: True + flip_ud: True + flip_lr: False downsample: 6 - # to have different data shape than PSF data_dim: null - # data_dim: [48, 64] # down 64 - # data_dim: [506, 676] # down 6 - -display: - disp: -1 - plot: False algo: admm # "admm", "apgd", "null" to just copy over (resized) raw data - +admm: + n_iter: 100 apgd: n_jobs: 1 # run in parallel as algo is slow max_iter: 500 -admm: - n_iter: 10 +# extraction region of interest +roi: null # top, left, bottom, right -save: False \ No newline at end of file +display: + disp: -1 + plot: False diff --git a/configs/sim_digicam_psf.yaml b/configs/sim_digicam_psf.yaml index e101767f..70bc416c 100644 --- a/configs/sim_digicam_psf.yaml +++ b/configs/sim_digicam_psf.yaml @@ -33,7 +33,7 @@ sim: flipud: True # in practice found waveprop=True or False doesn't make difference - waveprop: True + waveprop: False # below are ignored if waveprop=False scene2mask: 0.3 # [m] diff --git a/configs/train_celeba_classifier.yaml b/configs/train_celeba_classifier.yaml index 11a391c8..9b563217 100644 --- a/configs/train_celeba_classifier.yaml +++ b/configs/train_celeba_classifier.yaml @@ -12,7 +12,7 @@ data: # -- raw # https://drive.switch.ch/index.php/s/m89D1tFEfktQueS - measured: data/celeba_adafruit_random_2mm_20230720_10K + measured: /scratch/bezzam/celeba_adafruit_random_2mm_20230720_10K raw: True # # -- reconstructed diff --git a/configs/upload_dataset_huggingface.yaml b/configs/upload_dataset_huggingface.yaml new file mode 100644 index 00000000..4059b41f --- /dev/null +++ b/configs/upload_dataset_huggingface.yaml @@ -0,0 +1,20 @@ +# python scripts/data/upload_dataset_huggingface.py +hydra: + job: + chdir: True # change to output folder + +repo_id: null +hf_token: null +n_files: null +test_size: 0.15 + +lensless: + dir: null + ext: null + +lensed: + dir: null + ext: null + +# additional files to upload to root folder +files: null \ No newline at end of file diff --git a/configs/upload_digicam_10k.yaml b/configs/upload_digicam_10k.yaml new file mode 100644 index 00000000..eca5afcd --- /dev/null +++ b/configs/upload_digicam_10k.yaml @@ -0,0 +1,23 @@ +# python scripts/data/upload_dataset_huggingface.py -cn upload_digicam_10k +defaults: + - upload_dataset_huggingface + - _self_ + +repo_id: "bezzam/DigiCam-CelebA-10K" +test_size: 0.15 + +lensless: + dir: "/scratch/bezzam/celeba_adafruit_random_2mm_20230720_10K" + ext: ".png" + +lensed: + dir: "/scratch/bezzam/celeba/img_align_celeba" + ext: ".jpg" + celeba_attr: "/scratch/bezzam/celeba/list_attr_celeba.txt" + +files: + psf_measured: "/home/bezzam/LenslessPiCam/data/psf/adafruit_random_2mm_20231907.png" + psf_simulated: "/home/bezzam/LenslessPiCam/data/psf/adafruit_random_pattern_20230719_SIM_psf.png" + mask_pattern: "/home/bezzam/LenslessPiCam/data/psf/adafruit_random_pattern_20230719.npy" + mug_in_the_wild_12cm: "/home/bezzam/LenslessPiCam/data/raw_data/adafruit_mug_12cm_july21.png" + psf_12cm: "/home/bezzam/LenslessPiCam/data/psf/adafruit_psf_2mm_12p5cm_july21.png" diff --git a/configs/upload_digicam_26k.yaml b/configs/upload_digicam_26k.yaml new file mode 100644 index 00000000..e10e2d56 --- /dev/null +++ b/configs/upload_digicam_26k.yaml @@ -0,0 +1,21 @@ +# python scripts/data/upload_dataset_huggingface.py -cn upload_digicam_26k +defaults: + - upload_dataset_huggingface + - _self_ + +repo_id: "bezzam/DigiCam-CelebA-26K" +test_size: 0.15 + +lensless: + dir: "/scratch/bezzam/celeba/celeba_adafruit_random_30cm_2mm_20231004_26K" + ext: ".png" + +lensed: + dir: "/scratch/bezzam/celeba/img_align_celeba" + ext: ".jpg" + celeba_attr: "/scratch/bezzam/celeba/list_attr_celeba.txt" + +files: + psf_measured: "/home/bezzam/LenslessPiCam/rpi_hq_adafruit_psf_2mm/raw_data_rgb.png" + psf_simulated: "/home/bezzam/LenslessPiCam/outputs/2024-02-21/10-07-17/adafruit_random_pattern_20231004_174047_SIM_psf.png" + mask_pattern: "/home/bezzam/LenslessPiCam/adafruit_random_pattern_20231004_174047.npy" diff --git a/docs/source/data.rst b/docs/source/data.rst index f0d9b72d..ce01c960 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -42,15 +42,12 @@ use the correct PSF file for the data you're using! Measured CelebA Dataset ----------------------- -You can download 1K measurements of the CelebA dataset done with -our lensless camera and a random pattern on the Adafruit LCD -`here (1.2 GB) `__, -and a dataset with 10K measurements -`here (13.1 GB) `__. -They both correspond to the PSF which can be found `here `__ -(``adafruit_random_2mm_20231907.png`` which is the PSF of -``adafruit_random_pattern_20230719.npy`` measured with a mask to sensor -distance of 2 mm). +You can download a dataset of `10K measurements `__ +and a dataset of `26K measurements `__ +from Hugging Face. The PSFs (measured and simulated) can be found under "Files and versions". +Both dataset are measured with `DigiCam `__, +namely an LCD-based lensless camera, where the pattern was set randomly. The images were taken of +a monitor 30 cm away from the camera, and the LCD was 2 mm away from the sensor. DiffuserCam Lensless Mirflickr Dataset diff --git a/lensless/eval/metric.py b/lensless/eval/metric.py index ae11e0af..bd1746bb 100644 --- a/lensless/eval/metric.py +++ b/lensless/eval/metric.py @@ -298,12 +298,13 @@ def extract( horizontal_crop = (0, estimate.shape[1]) # crop and rotate estimate image - estimate = rotate( - estimate[vertical_crop[0] : vertical_crop[1], horizontal_crop[0] : horizontal_crop[1]], - angle=rotation, - mode="nearest", - reshape=False, - ) + if rotation: + estimate = rotate( + estimate[vertical_crop[0] : vertical_crop[1], horizontal_crop[0] : horizontal_crop[1]], + angle=rotation, + mode="nearest", + reshape=False, + ) estimate /= estimate.max() estimate = np.clip(estimate, 0, 1) if verbose: diff --git a/scripts/classify/train_celeba_vit.py b/scripts/classify/train_celeba_vit.py index 79a32e44..3d55bb0c 100644 --- a/scripts/classify/train_celeba_vit.py +++ b/scripts/classify/train_celeba_vit.py @@ -4,7 +4,7 @@ First, set-up HuggingFace libraries: ``` -pip install datasets transformers +pip install datasets transformers[torch] scikit-learn tensorboardX ``` Raw measurement datasets can be download from SwitchDrive. @@ -42,6 +42,8 @@ Other hyperparameters for classification can be found in `configs/train_celeba_classifier.yaml`. +# TODO: update with Hugging Face dataset: https://huggingface.co/datasets/bezzam/DigiCam-CelebA-10K + """ import warnings @@ -197,19 +199,13 @@ def train_celeba_classifier(config): ratio=(0.9, 1.1), ) ) - _train_transforms.append( - Resize(size), - CenterCrop(size), - ) + _train_transforms += [Resize(size), CenterCrop(size)] if config.augmentation.horizontal_flip: if config.data.raw: warnings.warn("Horizontal flip is not supported for raw data, Skipping!") else: _train_transforms.append(RandomHorizontalFlip()) - _train_transforms.append( - ToTensor(), - normalize, - ) + _train_transforms += [ToTensor(), normalize] _train_transforms = Compose(_train_transforms) _val_transforms = Compose( diff --git a/scripts/data/upload_dataset_huggingface.py b/scripts/data/upload_dataset_huggingface.py new file mode 100644 index 00000000..029b013d --- /dev/null +++ b/scripts/data/upload_dataset_huggingface.py @@ -0,0 +1,148 @@ +""" +Push dataset measured with LenslessPiCam to HuggingFace. + +```bash +# install +pip install datasets +pip install huggingface_hub + +# make a write token on HuggingFace + +# run +python scripts/data/upload_dataset_huggingface.py \ +hf_token=... \ +``` +""" + +import hydra +import time +import os +import glob +from datasets import Dataset, DatasetDict, Image +from huggingface_hub import upload_file +from lensless.utils.dataset import natural_sort + + +@hydra.main( + version_base=None, config_path="../../configs", config_name="upload_dataset_huggingface" +) +def upload_dataset(config): + + start_time = time.time() + + # parameters + repo_id = config.repo_id + hf_token = config.hf_token + n_files = config.n_files + assert hf_token is not None, "Please provide a HuggingFace token." + + # get lensless files + files_lensless = glob.glob(os.path.join(config.lensless.dir, "*" + config.lensless.ext)) + files_lensless = natural_sort(files_lensless) + if n_files is not None: + print(f"Only keeping {n_files} files...") + files_lensless = files_lensless[:n_files] + + # get lensed files + files_lensed = glob.glob(os.path.join(config.lensed.dir, "*" + config.lensed.ext)) + + # only keep if in both + bn_lensless = [os.path.basename(f).split(".")[0] for f in files_lensless] + bn_lensed = [os.path.basename(f).split(".")[0] for f in files_lensed] + common_files = list(set(bn_lensless).intersection(bn_lensed)) + common_files = natural_sort(common_files) + print(f"Number of common files: {len(common_files)}") + + # get file paths + lensless_files = [ + os.path.join(config.lensless.dir, f + config.lensless.ext) for f in common_files + ] + lensed_files = [os.path.join(config.lensed.dir, f + config.lensed.ext) for f in common_files] + + # check for attribute + df_attr = None + if config.lensed.celeba_attr is not None: + # load attribute txt file with pandas + import pandas as pd + + fp = config.lensed.celeba_attr + df = pd.read_csv(fp, sep=r"\s+", header=1, index_col=0) + df_attr = df[: len(common_files)] + # convert -1 to 0 + df_attr = df_attr.replace(-1, 0) + # convert to boolean + df_attr = df_attr.astype(bool) + # to dict + df_attr = df_attr.to_dict(orient="list") + + # step 1: create Dataset objects + def create_dataset(lensless_files, lensed_files, df_attr=None): + dataset_dict = { + "lensless": lensless_files, + "lensed": lensed_files, + } + if df_attr is not None: + # combine dictionaries + dataset_dict = {**dataset_dict, **df_attr} + dataset = Dataset.from_dict(dataset_dict) + dataset = dataset.cast_column("lensless", Image()) + dataset = dataset.cast_column("lensed", Image()) + return dataset + + # train-test split + test_size = config.test_size + n_test = int(test_size * len(common_files)) + if df_attr is not None: + # split dict into train-test + df_attr_test = {k: v[:n_test] for k, v in df_attr.items()} + df_attr_train = {k: v[n_test:] for k, v in df_attr.items()} + else: + df_attr_test = None + df_attr_train = None + test_dataset = create_dataset(lensless_files[:n_test], lensed_files[:n_test], df_attr_test) + train_dataset = create_dataset(lensless_files[n_test:], lensed_files[n_test:], df_attr_train) + + # step 2: create DatasetDict + dataset_dict = DatasetDict( + { + "train": train_dataset, + "test": test_dataset, + } + ) + + # step 3: push to hub + if config.files is not None: + for f in config.files: + fp = config.files[f] + ext = os.path.splitext(fp)[1] + remote_fn = f"{f}{ext}" + upload_file( + path_or_fileobj=fp, + path_in_repo=remote_fn, + repo_id=repo_id, + repo_type="dataset", + token=hf_token, + ) + dataset_dict.push_to_hub(repo_id, token=hf_token) + + upload_file( + path_or_fileobj=lensless_files[0], + path_in_repo="lensless_example.png", + repo_id=repo_id, + repo_type="dataset", + token=hf_token, + ) + upload_file( + path_or_fileobj=lensed_files[0], + path_in_repo="lensed_example.png", + repo_id=repo_id, + repo_type="dataset", + token=hf_token, + ) + + # total time in minutes + print(f"Total time: {(time.time() - start_time) / 60} minutes") + + +if __name__ == "__main__": + upload_dataset() diff --git a/scripts/recon/dataset.py b/scripts/recon/dataset.py index 4c4192c5..8eb2b76c 100644 --- a/scripts/recon/dataset.py +++ b/scripts/recon/dataset.py @@ -1,12 +1,21 @@ """ -Apply ADMM reconstruction to folder. +Apply ADMM reconstruction to a dataset downloaded from HuggingFace. + +By default, to 25 files from DiffuserCam MirFlickr test set: https://huggingface.co/datasets/bezzam/DiffuserCam-Lensless-Mirflickr-Dataset/viewer/default/test ``` python scripts/recon/dataset.py ``` +To apply to CelebA measured with DigiCam: https://huggingface.co/datasets/bezzam/DigiCam-CelebA-10K/viewer/default/test +You can run the following command: +```python +python scripts/recon/dataset.py -cn recon_celeba_digicam +``` + To run APGD, use the following command: ``` +# (first-time): pip install git+https://github.com/matthieumeo/pycsou.git@38e9929c29509d350a7ff12c514e2880fdc99d6e python scripts/recon/dataset.py algo=apgd ``` @@ -18,50 +27,60 @@ """ import hydra -from hydra.utils import to_absolute_path import os import time -import numpy as np -from lensless.utils.io import load_psf, load_image, save_image +from lensless.utils.io import load_psf, save_image from lensless import ADMM import torch -import glob from tqdm import tqdm -from lensless.recon.apgd import APGD from joblib import Parallel, delayed +import numpy as np +from datasets import load_dataset +from huggingface_hub import hf_hub_download +from lensless.utils.image import resize + + +def prep_data( + data, + psf, + bg=None, + flip_ud=False, + flip_lr=False, + use_torch=False, + torch_dtype=None, + torch_device=None, +): + data = np.array(data) + if flip_ud: + data = np.flipud(data) + if flip_lr: + data = np.fliplr(data) + data = data / data.max() + if data.shape[:2] != psf.shape[1:3]: + data = resize(data, shape=psf.shape) + if bg is not None: + data = data - bg + data = np.clip(data, a_min=0, a_max=data.max()) + if use_torch: + data = torch.from_numpy(data).type(torch_dtype).to(torch_device) + return data @hydra.main(version_base=None, config_path="../../configs", config_name="recon_dataset") -def admm_dataset(config): +def recon_dataset(config): + repo_id = config.repo_id algo = config.algo - # get raw data file paths - dataset = to_absolute_path(config.input.raw_data) - if not os.path.isdir(dataset): - print(f"No dataset found at {dataset}") - try: - from torchvision.datasets.utils import download_and_extract_archive - except ImportError: - exit() - msg = "Do you want to download the sample CelebA dataset measured with a random Adafruit LCD pattern (1.2 GB)?" - - # default to yes if no input is given - valid = input("%s (Y/n) " % msg).lower() != "n" - if valid: - url = "https://drive.switch.ch/index.php/s/m89D1tFEfktQueS/download" - filename = "celeba_adafruit_random_2mm_20230720_1K.zip" - download_and_extract_archive( - url, os.path.dirname(dataset), filename=filename, remove_finished=True - ) - data_fps = sorted(glob.glob(os.path.join(dataset, "*.png"))) + # load dataset + dataset = load_dataset(repo_id, split=config.split) + n_files = len(dataset) if config.n_files is not None: - data_fps = data_fps[: config.n_files] - n_files = len(data_fps) + n_files = min(n_files, config.n_files) + print(f"Reconstructing {n_files} files...") # load PSF - psf_fp = to_absolute_path(config.input.psf) - flip = config.preprocess.flip + psf_fp = hf_hub_download(repo_id=repo_id, filename=config.psf_fn, repo_type="dataset") dtype = config.input.dtype print("\nPSF:") psf, bg = load_psf( @@ -69,7 +88,8 @@ def admm_dataset(config): verbose=True, downsample=config.preprocess.downsample, return_bg=True, - flip=flip, + flip_lr=config.preprocess.flip_lr, + flip_ud=config.preprocess.flip_ud, dtype=dtype, ) print(f"Downsampled PSF shape: {psf.shape}") @@ -80,8 +100,10 @@ def admm_dataset(config): else: data_dim = psf.shape - # -- create output folder - output_folder = to_absolute_path(config.output_folder) + # create output folder + output_folder = config.output_folder + if output_folder is None: + output_folder = os.path.join(os.getcwd(), os.path.basename(repo_id)) if algo == "apgd": output_folder = output_folder + f"_apgd{config.apgd.max_iter}" elif algo == "admm": @@ -90,10 +112,13 @@ def admm_dataset(config): output_folder = output_folder + "_raw" output_folder = output_folder + f"_{data_dim[-3]}x{data_dim[-2]}" os.makedirs(output_folder, exist_ok=True) + print(f"Output folder: {output_folder}") # -- apply reconstruction if algo == "apgd": + from lensless.recon.apgd import APGD + start_time = time.time() def recover(i): @@ -101,13 +126,15 @@ def recover(i): # reconstruction object recon = APGD(psf=psf, **config.apgd) - data_fp = data_fps[i] - - # load data - data = load_image( - data_fp, flip=flip, bg=bg, as_4d=True, return_float=True, shape=data_dim + data = dataset[i]["lensless"] + data = prep_data( + data, + psf, + bg=bg, + flip_ud=config.preprocess.flip_ud, + flip_lr=config.preprocess.flip_lr, + use_torch=False, ) - data = data[0] # first depth # apply reconstruction recon.set_data(data) @@ -122,8 +149,7 @@ def recover(i): roi = config.roi img = img[roi[0] : roi[2], roi[1] : roi[3]] - bn = os.path.basename(data_fp) - output_fp = os.path.join(output_folder, bn) + output_fp = os.path.join(output_folder, f"{i}.png") save_image(img, output_fp) n_jobs = config.apgd.n_jobs @@ -149,16 +175,21 @@ def recover(i): start_time = time.time() for i in tqdm(range(n_files)): - data_fp = data_fps[i] - # load data - data = load_image( - data_fp, flip=flip, bg=bg, as_4d=True, return_float=True, shape=data_dim + # load and prepare data + data = dataset[i]["lensless"] + + data = prep_data( + data, + psf, + bg=bg, + flip_ud=config.preprocess.flip_ud, + flip_lr=config.preprocess.flip_lr, + use_torch=config.torch, + torch_dtype=torch_dtype, + torch_device=torch_device, ) - if config.torch: - data = torch.from_numpy(data).type(torch_dtype).to(torch_device) - if recon is not None: # set data @@ -188,8 +219,7 @@ def recover(i): if config.roi is not None: img = img[config.roi[0] : config.roi[2], config.roi[1] : config.roi[3]] - bn = os.path.basename(data_fp) - output_fp = os.path.join(output_folder, bn) + output_fp = os.path.join(output_folder, f"{i}.png") save_image(img, output_fp) print(f"Processing time : {time.time() - start_time} s") @@ -199,4 +229,4 @@ def recover(i): if __name__ == "__main__": - admm_dataset() + recon_dataset()