From 1541144e765191b9c340d31237d9c1889000c48e Mon Sep 17 00:00:00 2001 From: Milan Chandna Date: Wed, 14 Mar 2018 00:48:25 +0530 Subject: [PATCH] fixed upload issue for directory with single file --- HISTORY.rst | 4 ++++ azure/datalake/store/__init__.py | 2 +- azure/datalake/store/multithread.py | 4 +++- tests/test_multithread.py | 18 +++++++++++++++++- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 5a7440e..45435f3 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,10 @@ Release History =============== +0.0.19 (2018-03-14) +------------------- +* Fixed upload issue where destination filename was wrong while upload of directory with single file #208 + 0.0.18 (2018-02-05) ------------------- * Fixed read issue where whole file was cached while doing positional reads #198 diff --git a/azure/datalake/store/__init__.py b/azure/datalake/store/__init__.py index a6c7d5f..49b8a48 100644 --- a/azure/datalake/store/__init__.py +++ b/azure/datalake/store/__init__.py @@ -6,7 +6,7 @@ # license information. # -------------------------------------------------------------------------- -__version__ = "0.0.18" +__version__ = "0.0.19" from .core import AzureDLFileSystem from .multithread import ADLDownloader diff --git a/azure/datalake/store/multithread.py b/azure/datalake/store/multithread.py index d70d149..e6bcc77 100644 --- a/azure/datalake/store/multithread.py +++ b/azure/datalake/store/multithread.py @@ -441,6 +441,7 @@ def hash(self): def _setup(self): """ Create set of parameters to loop over """ + is_path_walk_empty = False if "*" not in self.lpath: out = os.walk(self.lpath) lfiles = sum(([os.path.join(dir, f) for f in fnames] for @@ -448,10 +449,11 @@ def _setup(self): if (not lfiles and os.path.exists(self.lpath) and not os.path.isdir(self.lpath)): lfiles = [self.lpath] + is_path_walk_empty = True else: lfiles = glob.glob(self.lpath) - if len(lfiles) > 1: + if len(lfiles) > 0 and not is_path_walk_empty: local_rel_lpath = str(AzureDLPath(self.lpath).globless_prefix) file_pairs = [(f, self.rpath / AzureDLPath(f).relative_to(local_rel_lpath)) for f in lfiles] elif lfiles: diff --git a/tests/test_multithread.py b/tests/test_multithread.py index 3675a69..7b56c58 100644 --- a/tests/test_multithread.py +++ b/tests/test_multithread.py @@ -226,7 +226,6 @@ def local_files(tempdir): f.write(b'0123456789') yield filenames - @my_vcr.use_cassette def test_upload_one(local_files, azure): with azure_teardown(azure): @@ -253,6 +252,23 @@ def test_upload_one(local_files, azure): azure.rm(test_dir / 'bigfile') +@my_vcr.use_cassette +def test_upload_single_file_in_dir(tempdir, azure): + with azure_teardown(azure): + lpath_dir = tempdir + lfilename = os.path.join(lpath_dir, 'singlefile') + with open(lfilename, 'wb') as f: + f.write(b'0123456789') + + # transfer client w/ deterministic temporary directory + from azure.datalake.store.multithread import put_chunk + client = ADLTransferClient(azure, transfer=put_chunk, + unique_temporary=False) + + up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1, + overwrite=True) + assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10 + azure.rm(test_dir / 'singlefiledir' / 'singlefile') @my_vcr.use_cassette def test_upload_one_empty_file(local_files, azure):