Skip to content

Commit

Permalink
[new] add dataset.utils.download_file
Browse files Browse the repository at this point in the history
  • Loading branch information
ShuntaroAoki committed Nov 24, 2023
1 parent 9564eb8 commit a3ac7d0
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
1 change: 1 addition & 0 deletions bdpy/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Dataset package."""
47 changes: 47 additions & 0 deletions bdpy/dataset/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Dataset utilities."""

from typing import Union

import hashlib
import urllib.request

from tqdm import tqdm

Check warning on line 8 in bdpy/dataset/utils.py

View workflow job for this annotation

GitHub Actions / type-check

Hint: "python3 -m pip install types-tqdm"

Check warning on line 8 in bdpy/dataset/utils.py

View workflow job for this annotation

GitHub Actions / type-check

(or run "mypy --install-types" to install all missing stub packages)

Check warning on line 8 in bdpy/dataset/utils.py

View workflow job for this annotation

GitHub Actions / type-check

See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports

Check warning on line 8 in bdpy/dataset/utils.py

View workflow job for this annotation

GitHub Actions / type-check

Hint: "python3 -m pip install types-tqdm"

Check warning on line 8 in bdpy/dataset/utils.py

View workflow job for this annotation

GitHub Actions / type-check

(or run "mypy --install-types" to install all missing stub packages)

Check warning on line 8 in bdpy/dataset/utils.py

View workflow job for this annotation

GitHub Actions / type-check

See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports


def download_file(url: str, destination: str, progress_bar: bool = True, md5sum: Union[str, None] = None) -> None:
"""Download a file.
Parameters
----------
url: str
File URL.
destination: str
Path to save the file.
progress_bar: bool = True
Show progress bar if True.
md5sum: Union[str, None] = None
md5sum hash of the file.
Returns
-------
None
"""
response = urllib.request.urlopen(url)
file_size = int(response.info()["Content-Length"])

def __show_progress(block_num: int, block_size: int, total_size: int) -> None:
downloaded = block_num * block_size
if total_size > 0:
progress_bar.update(downloaded - progress_bar.n)

with tqdm(total=file_size, unit='B', unit_scale=True, desc=destination, ncols=100) as progress_bar:
urllib.request.urlretrieve(url, destination, __show_progress)

if md5sum is not None:
md5_hash = hashlib.md5()
with open(destination, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
md5_hash.update(chunk)
md5sum_test = md5_hash.hexdigest()
if md5sum != md5sum_test:
raise ValueError(f'md5sum mismatch. \nExpected: {md5sum}\nActual: {md5sum_test}')

4 comments on commit a3ac7d0

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
bdpy
   __init__.py330%9–11
bdpy/bdata
   __init__.py220%7–8
   bdata.py4024020%26–924
   featureselector.py64640%8–124
   metadata.py67670%8–154
   utils.py1131130%4–258
bdpy/dataform
   __init__.py440%7–10
   datastore.py1071070%7–265
   features.py2922920%8–540
   pd.py990%7–44
   sparse.py67670%6–126
bdpy/dataset
   utils.py21210%3–47
bdpy/distcomp
   __init__.py110%6
   distcomp.py92920%7–127
bdpy/dl
   caffe.py60600%4–129
bdpy/dl/torch
   __init__.py220%1–2
   base.py43430%6–105
   models.py3323320%4–874
   torch.py1091090%3–258
bdpy/evals
   metrics.py95950%3–179
bdpy/feature
   __init__.py110%3
   feature.py30300%1–74
bdpy/fig
   __init__.py440%6–9
   draw_group_image_set.py90900%3–182
   fig.py88880%16–164
   makeplots.py3363360%1–729
   tile_images.py59590%1–193
bdpy/ml
   __init__.py770%8–14
   crossvalidation.py59590%7–196
   ensemble.py13130%5–46
   learning.py3083080%4–613
   model.py1401400%4–285
   regress.py11110%6–38
   searchlight.py16160%4–51
bdpy/mri
   __init__.py770%7–13
   fmriprep.py4974970%4–866
   glm.py40400%4–95
   image.py24240%4–54
   load_epi.py28280%7–88
   load_mri.py19190%4–36
   roi.py2482480%4–499
   spm.py1581580%1–300
bdpy/opendata
   __init__.py110%1
   openneuro.py2102100%1–329
bdpy/preproc
   __init__.py330%8–10
   interface.py52520%8–217
   preprocessor.py1291290%8–236
   select_top.py22220%8–61
   util.py660%6–22
bdpy/recon
   utils.py55550%4–146
bdpy/recon/torch
   __init__.py110%1
   icnn.py1611610%15–478
bdpy/stats
   __init__.py110%13
   corr.py43430%6–112
bdpy/util
   __init__.py330%7–9
   info.py47470%4–79
   math.py13130%4–38
   utils.py36360%7–145
TOTAL485148510% 

Tests Skipped Failures Errors Time
110 0 💤 13 ❌ 6 🔥 7.628s ⏱️

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
bdpy
   __init__.py330%9–11
bdpy/bdata
   __init__.py220%7–8
   bdata.py4024020%26–924
   featureselector.py64640%8–124
   metadata.py67670%8–154
   utils.py1131130%4–258
bdpy/dataform
   __init__.py440%7–10
   datastore.py1071070%7–265
   features.py2922920%8–540
   pd.py990%7–44
   sparse.py67670%6–126
bdpy/dataset
   utils.py21210%3–47
bdpy/distcomp
   __init__.py110%6
   distcomp.py92920%7–127
bdpy/dl
   caffe.py60600%4–129
bdpy/dl/torch
   __init__.py220%1–2
   base.py43430%6–105
   models.py3323320%4–874
   torch.py1091090%3–258
bdpy/evals
   metrics.py95950%3–179
bdpy/feature
   __init__.py110%3
   feature.py30300%1–74
bdpy/fig
   __init__.py440%6–9
   draw_group_image_set.py90900%3–182
   fig.py88880%16–164
   makeplots.py3363360%1–729
   tile_images.py59590%1–193
bdpy/ml
   __init__.py770%8–14
   crossvalidation.py59590%7–196
   ensemble.py13130%5–46
   learning.py3083080%4–613
   model.py1401400%4–285
   regress.py11110%6–38
   searchlight.py16160%4–51
bdpy/mri
   __init__.py770%7–13
   fmriprep.py4974970%4–866
   glm.py40400%4–95
   image.py24240%4–54
   load_epi.py28280%7–88
   load_mri.py19190%4–36
   roi.py2482480%4–499
   spm.py1581580%1–300
bdpy/opendata
   __init__.py110%1
   openneuro.py2102100%1–329
bdpy/preproc
   __init__.py330%8–10
   interface.py52520%8–217
   preprocessor.py1291290%8–236
   select_top.py22220%8–61
   util.py660%6–22
bdpy/recon
   utils.py55550%4–146
bdpy/recon/torch
   __init__.py110%1
   icnn.py1611610%15–478
bdpy/stats
   __init__.py110%13
   corr.py43430%6–112
bdpy/util
   __init__.py330%7–9
   info.py47470%4–79
   math.py13130%4–38
   utils.py36360%7–145
TOTAL485148510% 

Tests Skipped Failures Errors Time
110 0 💤 13 ❌ 6 🔥 7.868s ⏱️

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
bdpy
   __init__.py330%9–11
bdpy/bdata
   __init__.py220%7–8
   bdata.py4024020%26–924
   featureselector.py64640%8–124
   metadata.py67670%8–154
   utils.py1131130%4–258
bdpy/dataform
   __init__.py440%7–10
   datastore.py1071070%7–265
   features.py2922920%8–540
   pd.py990%7–44
   sparse.py67670%6–126
bdpy/dataset
   utils.py21210%3–47
bdpy/distcomp
   __init__.py110%6
   distcomp.py92920%7–127
bdpy/dl
   caffe.py60600%4–129
bdpy/dl/torch
   __init__.py220%1–2
   base.py43430%6–105
   models.py3323320%4–874
   torch.py1091090%3–258
bdpy/evals
   metrics.py95950%3–179
bdpy/feature
   __init__.py110%3
   feature.py30300%1–74
bdpy/fig
   __init__.py440%6–9
   draw_group_image_set.py90900%3–182
   fig.py88880%16–164
   makeplots.py3363360%1–729
   tile_images.py59590%1–193
bdpy/ml
   __init__.py770%8–14
   crossvalidation.py59590%7–196
   ensemble.py13130%5–46
   learning.py3083080%4–613
   model.py1401400%4–285
   regress.py11110%6–38
   searchlight.py16160%4–51
bdpy/mri
   __init__.py770%7–13
   fmriprep.py4974970%4–866
   glm.py40400%4–95
   image.py24240%4–54
   load_epi.py28280%7–88
   load_mri.py19190%4–36
   roi.py2482480%4–499
   spm.py1581580%1–300
bdpy/opendata
   __init__.py110%1
   openneuro.py2102100%1–329
bdpy/preproc
   __init__.py330%8–10
   interface.py52520%8–217
   preprocessor.py1291290%8–236
   select_top.py22220%8–61
   util.py660%6–22
bdpy/recon
   utils.py55550%4–146
bdpy/recon/torch
   __init__.py110%1
   icnn.py1611610%15–478
bdpy/stats
   __init__.py110%13
   corr.py43430%6–112
bdpy/util
   __init__.py330%7–9
   info.py47470%4–79
   math.py13130%4–38
   utils.py36360%7–145
TOTAL485148510% 

Tests Skipped Failures Errors Time
110 0 💤 13 ❌ 6 🔥 8.579s ⏱️

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
bdpy
   __init__.py330%9–11
bdpy/bdata
   __init__.py220%7–8
   bdata.py4024020%26–924
   featureselector.py64640%8–124
   metadata.py67670%8–154
   utils.py1131130%4–258
bdpy/dataform
   __init__.py440%7–10
   datastore.py1071070%7–265
   features.py2922920%8–540
   pd.py990%7–44
   sparse.py67670%6–126
bdpy/dataset
   utils.py21210%3–47
bdpy/distcomp
   __init__.py110%6
   distcomp.py92920%7–127
bdpy/dl
   caffe.py60600%4–129
bdpy/dl/torch
   __init__.py220%1–2
   base.py43430%6–105
   models.py3323320%4–874
   torch.py1091090%3–258
bdpy/evals
   metrics.py95950%3–179
bdpy/feature
   __init__.py110%3
   feature.py30300%1–74
bdpy/fig
   __init__.py440%6–9
   draw_group_image_set.py90900%3–182
   fig.py88880%16–164
   makeplots.py3363360%1–729
   tile_images.py59590%1–193
bdpy/ml
   __init__.py770%8–14
   crossvalidation.py59590%7–196
   ensemble.py13130%5–46
   learning.py3083080%4–613
   model.py1401400%4–285
   regress.py11110%6–38
   searchlight.py16160%4–51
bdpy/mri
   __init__.py770%7–13
   fmriprep.py4974970%4–866
   glm.py40400%4–95
   image.py24240%4–54
   load_epi.py28280%7–88
   load_mri.py19190%4–36
   roi.py2482480%4–499
   spm.py1581580%1–300
bdpy/opendata
   __init__.py110%1
   openneuro.py2102100%1–329
bdpy/preproc
   __init__.py330%8–10
   interface.py52520%8–217
   preprocessor.py1291290%8–236
   select_top.py22220%8–61
   util.py660%6–22
bdpy/recon
   utils.py55550%4–146
bdpy/recon/torch
   __init__.py110%1
   icnn.py1611610%15–478
bdpy/stats
   __init__.py110%13
   corr.py43430%6–112
bdpy/util
   __init__.py330%7–9
   info.py47470%4–79
   math.py13130%4–38
   utils.py36360%7–145
TOTAL485148510% 

Tests Skipped Failures Errors Time
110 0 💤 13 ❌ 6 🔥 7.712s ⏱️

Please sign in to comment.