From 2dc4fb7521de64c23b7e70655b522596706d28c9 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Thu, 6 Jun 2024 10:07:16 +0300 Subject: [PATCH] Remove statuses from `ls_with_sizes` and do not mask failures on POSIX. (#5043) [SC-23179](https://app.shortcut.com/tiledb-inc/story/23179) [SC-48851](https://app.shortcut.com/tiledb-inc/story/48851) [SC-48854](https://app.shortcut.com/tiledb-inc/story/48854) This PR updates the signature of `ls_with_sizes` in the `VFS` class and the specific VFS implementations[^1] to indicate errors by throwing exceptions instead of returning `Status`. On top of that, `Posix::ls_with_sizes` was updated to fail if `opendir` failed with an error code other than `ENOENT` (see also https://github.com/TileDB-Inc/TileDB/pull/5037#discussion_r1624775918). Also in `Posix::ls_with_sizes`, the pointer returned by `opendir` was updated to be placed inside a smart pointer, making sure it gets freed. This caused errors in the `find_head_api_violations.py` script, which were fixed. [^1]: HDFS was not updated to minimize risk. --- TYPE: BUG DESC: Do not mask failures when listing a directory fails on POSIX. --------- Co-authored-by: Luc Rancourt --- scripts/find_heap_api_violations.py | 7 +- test/src/unit-vfs.cc | 8 +-- test/support/src/vfs_helpers.cc | 14 ++-- tiledb/sm/array/array_directory.cc | 4 +- tiledb/sm/filesystem/azure.cc | 21 ++---- tiledb/sm/filesystem/azure.h | 3 +- tiledb/sm/filesystem/filesystem_base.h | 4 +- tiledb/sm/filesystem/gcs.cc | 22 +++---- tiledb/sm/filesystem/gcs.h | 3 +- tiledb/sm/filesystem/mem_filesystem.cc | 18 ++---- tiledb/sm/filesystem/mem_filesystem.h | 14 +++- tiledb/sm/filesystem/posix.cc | 88 ++++++++++++++++++++------ tiledb/sm/filesystem/posix.h | 4 +- tiledb/sm/filesystem/s3.cc | 30 ++++----- tiledb/sm/filesystem/s3.h | 7 +- tiledb/sm/filesystem/vfs.cc | 64 +++++++------------ tiledb/sm/filesystem/vfs.h | 3 +- tiledb/sm/filesystem/win.cc | 15 ++--- tiledb/sm/filesystem/win.h | 3 +- 19 files changed, 167 insertions(+), 165 deletions(-) diff --git a/scripts/find_heap_api_violations.py b/scripts/find_heap_api_violations.py index 1bc28dd8463..044dcad0446 100755 --- a/scripts/find_heap_api_violations.py +++ b/scripts/find_heap_api_violations.py @@ -92,8 +92,7 @@ # Contains per-file exceptions to violations of "make_shared". make_shared_exceptions = { - "*": ["tdb::make_shared"], - "*": ["tiledb::common::make_shared"], + "*": ["tdb::make_shared", "tiledb::common::make_shared"], } # Match C++ unique_ptr objects. @@ -103,7 +102,7 @@ unique_ptr_exceptions = { "*": ["tdb_unique_ptr", "tiledb_unique_ptr", "tdb::pmr::unique_ptr"], "zstd_compressor.h": ["std::unique_ptr ctx_;", "std::unique_ptr ctx_;"], - "posix.cc": ["static std::unique_ptr cwd_(getcwd(nullptr, 0), free);"], + "posix.cc": ["std::unique_ptr", "static std::unique_ptr cwd_(getcwd(nullptr, 0), free);"], "curl.h": ["std::unique_ptr"], "pmr.h": ["std::unique_ptr", "unique_ptr make_unique("], } @@ -148,7 +147,7 @@ class Violation: def iter_file_violations(file_path: str) -> Iterable[Violation]: - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: for line_num, line in enumerate(f): line = line.strip() for checker in violation_checkers: diff --git a/test/src/unit-vfs.cc b/test/src/unit-vfs.cc index d2117965ec8..1999bd8e5d5 100644 --- a/test/src/unit-vfs.cc +++ b/test/src/unit-vfs.cc @@ -339,9 +339,7 @@ TEMPLATE_LIST_TEST_CASE( std::string s = "abcdef"; require_tiledb_ok(vfs.write(ls_file, s.data(), s.size())); require_tiledb_ok(vfs.close_file(ls_file)); - auto&& [status, opt_children] = vfs.ls_with_sizes(ls_dir); - require_tiledb_ok(status); - auto children = opt_children.value(); + auto children = vfs.ls_with_sizes(ls_dir); #ifdef _WIN32 // Normalization only for Windows ls_file = URI(tiledb::sm::path_win::uri_from_path(ls_file.to_string())); @@ -599,9 +597,7 @@ TEST_CASE("VFS: test ls_with_sizes", "[vfs][ls-with-sizes]") { require_tiledb_ok(vfs_ls.write(URI(subdir_file), s2.data(), s2.size())); // List - auto&& [status, rv] = vfs_ls.ls_with_sizes(URI(dir)); - auto children = *rv; - require_tiledb_ok(status); + auto children = vfs_ls.ls_with_sizes(URI(dir)); #ifdef _WIN32 // Normalization only for Windows diff --git a/test/support/src/vfs_helpers.cc b/test/support/src/vfs_helpers.cc index 326cc583b5d..2dcb9fda794 100644 --- a/test/support/src/vfs_helpers.cc +++ b/test/support/src/vfs_helpers.cc @@ -451,12 +451,16 @@ VFSTestBase::VFSTestBase( } VFSTestBase::~VFSTestBase() { - if (vfs_.supports_uri_scheme(temp_dir_)) { - bool is_dir = false; - vfs_.is_dir(temp_dir_, &is_dir).ok(); - if (is_dir) { - vfs_.remove_dir(temp_dir_).ok(); + try { + if (vfs_.supports_uri_scheme(temp_dir_)) { + bool is_dir = false; + vfs_.is_dir(temp_dir_, &is_dir).ok(); + if (is_dir) { + vfs_.remove_dir(temp_dir_).ok(); + } } + } catch (const std::exception& e) { + // Suppress exceptions in destructors. } } diff --git a/tiledb/sm/array/array_directory.cc b/tiledb/sm/array/array_directory.cc index 48c0a65e49e..e031ed5f4a9 100644 --- a/tiledb/sm/array/array_directory.cc +++ b/tiledb/sm/array/array_directory.cc @@ -583,9 +583,7 @@ const std::set& ArrayDirectory::dir_names() { } std::vector ArrayDirectory::ls(const URI& uri) const { - auto&& [st, opt_dir_entries] = resources_.get().vfs().ls_with_sizes(uri); - throw_if_not_ok(st); - auto dir_entries = opt_dir_entries.value(); + auto dir_entries = resources_.get().vfs().ls_with_sizes(uri); auto dirs = dir_names(); std::vector uris; diff --git a/tiledb/sm/filesystem/azure.cc b/tiledb/sm/filesystem/azure.cc index 6ffbbffa926..7b45be982fa 100644 --- a/tiledb/sm/filesystem/azure.cc +++ b/tiledb/sm/filesystem/azure.cc @@ -585,32 +585,26 @@ Status Azure::ls( const int max_paths) const { assert(paths); - auto&& [st, entries] = ls_with_sizes(uri, delimiter, max_paths); - RETURN_NOT_OK(st); - - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(uri, delimiter, max_paths)) { paths->emplace_back(fs.path().native()); } return Status::Ok(); } -tuple>> Azure::ls_with_sizes( +std::vector Azure::ls_with_sizes( const URI& uri, const std::string& delimiter, int max_paths) const { const auto& c = client(); const URI uri_dir = uri.add_trailing_slash(); if (!uri_dir.is_azure()) { - auto st = LOG_STATUS(Status_AzureError( - std::string("URI is not an Azure URI: " + uri_dir.to_string()))); - return {st, nullopt}; + throw AzureException("URI is not an Azure URI: " + uri_dir.to_string()); } std::string container_name; std::string blob_path; - RETURN_NOT_OK_TUPLE( - parse_azure_uri(uri_dir, &container_name, &blob_path), nullopt); + throw_if_not_ok(parse_azure_uri(uri_dir, &container_name, &blob_path)); auto container_client = c.GetBlobContainerClient(container_name); @@ -624,9 +618,8 @@ tuple>> Azure::ls_with_sizes( try { response = container_client.ListBlobsByHierarchy(delimiter, options); } catch (const ::Azure::Storage::StorageException& e) { - auto st = LOG_STATUS(Status_AzureError( - "List blobs failed on: " + uri_dir.to_string() + "; " + e.Message)); - return {st, nullopt}; + throw AzureException( + "List blobs failed on: " + uri_dir.to_string() + "; " + e.Message); } for (const auto& blob : response.Blobs) { @@ -648,7 +641,7 @@ tuple>> Azure::ls_with_sizes( options.ContinuationToken = response.NextPageToken; } while (options.ContinuationToken.HasValue()); - return {Status::Ok(), entries}; + return entries; } Status Azure::move_object(const URI& old_uri, const URI& new_uri) { diff --git a/tiledb/sm/filesystem/azure.h b/tiledb/sm/filesystem/azure.h index 985d6d290c5..c1889da60dc 100644 --- a/tiledb/sm/filesystem/azure.h +++ b/tiledb/sm/filesystem/azure.h @@ -392,8 +392,7 @@ class Azure { * @param max_paths The maximum number of paths to be retrieved * @return A list of directory_entry objects */ - tuple>> - ls_with_sizes( + std::vector ls_with_sizes( const URI& uri, const std::string& delimiter = "/", int max_paths = -1) const; diff --git a/tiledb/sm/filesystem/filesystem_base.h b/tiledb/sm/filesystem/filesystem_base.h index ff54707376f..cf8f53dc098 100644 --- a/tiledb/sm/filesystem/filesystem_base.h +++ b/tiledb/sm/filesystem/filesystem_base.h @@ -115,8 +115,8 @@ class FilesystemBase { * @param parent The target directory to list. * @return All entries that are contained in the parent */ - virtual tuple>> - ls_with_sizes(const URI& parent) const = 0; + virtual std::vector ls_with_sizes( + const URI& parent) const = 0; /** * Renames a file. diff --git a/tiledb/sm/filesystem/gcs.cc b/tiledb/sm/filesystem/gcs.cc index 540399f8f0e..e5381ff998a 100644 --- a/tiledb/sm/filesystem/gcs.cc +++ b/tiledb/sm/filesystem/gcs.cc @@ -477,32 +477,27 @@ Status GCS::ls( const std::string& delimiter, const int max_paths) const { assert(paths); - auto&& [st, entries] = ls_with_sizes(uri, delimiter, max_paths); - RETURN_NOT_OK(st); - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(uri, delimiter, max_paths)) { paths->emplace_back(fs.path().native()); } return Status::Ok(); } -tuple>> GCS::ls_with_sizes( +std::vector GCS::ls_with_sizes( const URI& uri, const std::string& delimiter, int max_paths) const { - RETURN_NOT_OK_TUPLE(init_client(), nullopt); + throw_if_not_ok(init_client()); const URI uri_dir = uri.add_trailing_slash(); if (!uri_dir.is_gcs()) { - auto st = LOG_STATUS(Status_GCSError( - std::string("URI is not a GCS URI: " + uri_dir.to_string()))); - return {st, nullopt}; + throw GCSException("URI is not a GCS URI: " + uri_dir.to_string()); } std::string bucket_name; std::string object_path; - RETURN_NOT_OK_TUPLE( - parse_gcs_uri(uri_dir, &bucket_name, &object_path), nullopt); + throw_if_not_ok(parse_gcs_uri(uri_dir, &bucket_name, &object_path)); std::vector entries; google::cloud::storage::Prefix prefix_option(object_path); @@ -515,10 +510,9 @@ tuple>> GCS::ls_with_sizes( if (!object_metadata.ok()) { const google::cloud::Status status = object_metadata.status(); - auto st = LOG_STATUS(Status_GCSError(std::string( + throw GCSException( "List objects failed on: " + uri.to_string() + " (" + - status.message() + ")"))); - return {st, nullopt}; + status.message() + ")"); } if (entries.size() >= static_cast(max_paths)) { @@ -549,7 +543,7 @@ tuple>> GCS::ls_with_sizes( } } - return {Status::Ok(), entries}; + return entries; } LsObjects GCS::ls_filtered_impl( diff --git a/tiledb/sm/filesystem/gcs.h b/tiledb/sm/filesystem/gcs.h index f3b9ab29435..a89c46a21c9 100644 --- a/tiledb/sm/filesystem/gcs.h +++ b/tiledb/sm/filesystem/gcs.h @@ -268,8 +268,7 @@ class GCS { * @param max_paths The maximum number of paths to be retrieved * @return A list of directory_entry objects */ - tuple>> - ls_with_sizes( + std::vector ls_with_sizes( const URI& uri, const std::string& delimiter = "/", int max_paths = -1) const; diff --git a/tiledb/sm/filesystem/mem_filesystem.cc b/tiledb/sm/filesystem/mem_filesystem.cc index 329f262ed16..2e5f97d4720 100644 --- a/tiledb/sm/filesystem/mem_filesystem.cc +++ b/tiledb/sm/filesystem/mem_filesystem.cc @@ -374,18 +374,16 @@ bool MemFilesystem::is_file(const std::string& path) const { Status MemFilesystem::ls( const std::string& path, std::vector* const paths) const { assert(paths); - auto&& [st, entries] = ls_with_sizes(URI("mem://" + path)); - RETURN_NOT_OK(st); - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(URI("mem://" + path))) { paths->emplace_back(fs.path().native()); } return Status::Ok(); } -tuple>> -MemFilesystem::ls_with_sizes(const URI& path) const { +std::vector MemFilesystem::ls_with_sizes( + const URI& path) const { auto abspath = path.to_path(); std::vector tokens = tokenize(abspath); @@ -398,9 +396,7 @@ MemFilesystem::ls_with_sizes(const URI& path) const { dir = dir + token + "/"; if (cur->children_.count(token) != 1) { - auto st = LOG_STATUS(Status_MemFSError( - std::string("Unable to list on non-existent path ") + abspath)); - return {st, nullopt}; + throw MemFSException("Unable to list on non-existent path " + abspath); } cur = cur->children_[token].get(); @@ -410,11 +406,9 @@ MemFilesystem::ls_with_sizes(const URI& path) const { assert(cur_lock.owns_lock()); assert(cur_lock.mutex() == &cur->mutex_); auto&& [st, entries] = cur->ls(dir); - if (!st.ok()) { - return {st, nullopt}; - } + throw_if_not_ok(st); - return {Status::Ok(), entries}; + return *entries; } Status MemFilesystem::move( diff --git a/tiledb/sm/filesystem/mem_filesystem.h b/tiledb/sm/filesystem/mem_filesystem.h index dec513176f8..c03e1a2305d 100644 --- a/tiledb/sm/filesystem/mem_filesystem.h +++ b/tiledb/sm/filesystem/mem_filesystem.h @@ -37,6 +37,7 @@ #include #include +#include "tiledb/common/exception/exception.h" #include "tiledb/common/macros.h" #include "tiledb/common/status.h" @@ -52,6 +53,14 @@ namespace sm { class URI; +/** Class for MemFS status exceptions. */ +class MemFSException : public StatusException { + public: + explicit MemFSException(const std::string& msg) + : StatusException("MemFS", msg) { + } +}; + /** * The in-memory filesystem. * @@ -139,10 +148,9 @@ class MemFilesystem { * Lists files and files information under path * * @param path The parent path to list sub-paths - * @return Status + * @return A list of directory_entry objects */ - tuple>> - ls_with_sizes(const URI& path) const; + std::vector ls_with_sizes(const URI& path) const; /** * Move a given filesystem path. diff --git a/tiledb/sm/filesystem/posix.cc b/tiledb/sm/filesystem/posix.cc index 4b8011f2510..6e0b678941b 100644 --- a/tiledb/sm/filesystem/posix.cc +++ b/tiledb/sm/filesystem/posix.cc @@ -58,6 +58,69 @@ using filesystem::directory_entry; namespace tiledb::sm { +/** + * A class that wraps a POSIX DIR* and closes it when it goes out of scope. + */ +class PosixDIR { + public: + DISABLE_COPY_AND_COPY_ASSIGN(PosixDIR); + DISABLE_MOVE_AND_MOVE_ASSIGN(PosixDIR); + + /** Destructor. */ + ~PosixDIR() { + if (dir_.has_value() && dir_.value() != nullptr) { + // The only possible error is EBADF, which should not happen here. + [[maybe_unused]] auto status = closedir(dir_.value()); + assert(status == 0); + } + } + + /** Returns the wrapped directory pointer. */ + DIR* get() const { + return dir_.value(); + } + + /** Returns if the dir is empty. */ + bool empty() { + return !dir_.has_value(); + } + + /** + * Opens a directory and returns a UniqueDIR if it exists. + * + * @param path The path to the directory to open. + * @return A UniqueDIR if the directory was opened successfully, or nullopt if + * the directory does not exist. + * @throws IOError an error occurs while opening the directory. + */ + static PosixDIR open(const std::string& path) { + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) { + auto last_error = errno; + if (last_error == ENOENT) { + return {}; + } + throw IOError( + std::string("Cannot open directory; ") + strerror(last_error)); + } + return {dir}; + } + + private: + /** + * Internal constructor that gets called from the `open` method. + * + * @param dir The directory pointer to wrap. + */ + PosixDIR(optional dir = nullopt) + : dir_(dir) { + assert(dir != nullptr); + } + + /** The wrapped directory pointer. */ + optional dir_; +}; + Posix::Posix(const Config& config) { // Initialize member variables with posix config parameters. @@ -305,18 +368,17 @@ void Posix::write( } } -tuple>> Posix::ls_with_sizes( - const URI& uri) const { +std::vector Posix::ls_with_sizes(const URI& uri) const { std::string path = uri.to_path(); struct dirent* next_path = nullptr; - DIR* dir = opendir(path.c_str()); - if (dir == nullptr) { - return {Status::Ok(), std::vector{}}; + auto dir = PosixDIR::open(path); + if (dir.empty()) { + return {}; } std::vector entries; - while ((next_path = readdir(dir)) != nullptr) { + while ((next_path = readdir(dir.get())) != nullptr) { if (!strcmp(next_path->d_name, ".") || !strcmp(next_path->d_name, "..")) continue; std::string abspath = path + "/" + next_path->d_name; @@ -333,22 +395,12 @@ tuple>> Posix::ls_with_sizes( entries.emplace_back(abspath, size, false); } } - // close parent directory - if (closedir(dir) != 0) { - auto st = LOG_STATUS(Status_IOError( - std::string("Cannot close parent directory; ") + strerror(errno))); - return {st, nullopt}; - } - return {Status::Ok(), entries}; + return entries; } Status Posix::ls( const std::string& path, std::vector* paths) const { - auto&& [st, entries] = ls_with_sizes(URI(path)); - - RETURN_NOT_OK(st); - - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(URI(path))) { paths->emplace_back(fs.path().native()); } diff --git a/tiledb/sm/filesystem/posix.h b/tiledb/sm/filesystem/posix.h index a97e1a0f678..277735241ad 100644 --- a/tiledb/sm/filesystem/posix.h +++ b/tiledb/sm/filesystem/posix.h @@ -260,8 +260,8 @@ class Posix : public FilesystemBase { * @param uri The parent path to list sub-paths. * @return A list of directory_entry objects */ - tuple>> - ls_with_sizes(const URI& uri) const override; + std::vector ls_with_sizes( + const URI& uri) const override; /** * Lists objects and object information that start with `prefix`, invoking diff --git a/tiledb/sm/filesystem/s3.cc b/tiledb/sm/filesystem/s3.cc index 708716f8891..b7cc0f31d81 100644 --- a/tiledb/sm/filesystem/s3.cc +++ b/tiledb/sm/filesystem/s3.cc @@ -588,8 +588,7 @@ void S3::write( } } -tuple>> S3::ls_with_sizes( - const URI& parent) const { +std::vector S3::ls_with_sizes(const URI& parent) const { return ls_with_sizes(parent, "/", -1); } @@ -868,27 +867,22 @@ Status S3::ls( std::vector* paths, const std::string& delimiter, int max_paths) const { - auto&& [st, entries] = ls_with_sizes(prefix, delimiter, max_paths); - RETURN_NOT_OK(st); - - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(prefix, delimiter, max_paths)) { paths->emplace_back(fs.path().native()); } return Status::Ok(); } -tuple>> S3::ls_with_sizes( +std::vector S3::ls_with_sizes( const URI& prefix, const std::string& delimiter, int max_paths) const { - RETURN_NOT_OK_TUPLE(init_client(), nullopt); + throw_if_not_ok(init_client()); const auto prefix_dir = prefix.add_trailing_slash(); auto prefix_str = prefix_dir.to_string(); if (!prefix_dir.is_s3()) { - auto st = LOG_STATUS( - Status_S3Error(std::string("URI is not an S3 URI: " + prefix_str))); - return {st, nullopt}; + throw S3Exception("URI is not an S3 URI: " + prefix_str); } Aws::Http::URI aws_uri = prefix_str.c_str(); @@ -914,11 +908,10 @@ tuple>> S3::ls_with_sizes( auto list_objects_outcome = client_->ListObjectsV2(list_objects_request); if (!list_objects_outcome.IsSuccess()) { - auto st = LOG_STATUS(Status_S3Error( + throw S3Exception( std::string("Error while listing with prefix '") + prefix_str + "' and delimiter '" + delimiter + "'" + - outcome_error_message(list_objects_outcome))); - return {st, nullopt}; + outcome_error_message(list_objects_outcome)); } for (const auto& object : list_objects_outcome.GetResult().GetContents()) { @@ -946,16 +939,15 @@ tuple>> S3::ls_with_sizes( Aws::String next_marker = list_objects_outcome.GetResult().GetNextContinuationToken(); if (next_marker.empty()) { - auto st = - LOG_STATUS(Status_S3Error("Failed to retrieve next continuation " - "token for ListObjectsV2 request.")); - return {st, nullopt}; + throw S3Exception( + "Failed to retrieve next continuation " + "token for ListObjectsV2 request."); } list_objects_request.SetContinuationToken(std::move(next_marker)); } } - return {Status::Ok(), entries}; + return entries; } Status S3::move_object(const URI& old_uri, const URI& new_uri) const { diff --git a/tiledb/sm/filesystem/s3.h b/tiledb/sm/filesystem/s3.h index 8c074479aee..fbb2d18c12a 100644 --- a/tiledb/sm/filesystem/s3.h +++ b/tiledb/sm/filesystem/s3.h @@ -791,8 +791,7 @@ class S3 : FilesystemBase { * @param parent The target directory to list. * @return All entries that are contained in the parent */ - tuple>> ls_with_sizes( - const URI& parent) const override; + std::vector ls_with_sizes(const URI& parent) const override; /** * Disconnects a S3 client. @@ -889,9 +888,9 @@ class S3 : FilesystemBase { * @param prefix The parent path to list sub-paths. * @param delimiter The uri is truncated to the first delimiter. * @param max_paths The maximum number of paths to be retrieved. - * @return Status tuple where second is a list of directory_entry objects. + * @return A list of directory_entry objects. */ - tuple>> ls_with_sizes( + std::vector ls_with_sizes( const URI& prefix, const std::string& delimiter, int max_paths) const; /** diff --git a/tiledb/sm/filesystem/vfs.cc b/tiledb/sm/filesystem/vfs.cc index 3b329573192..3b74fcd3a87 100644 --- a/tiledb/sm/filesystem/vfs.cc +++ b/tiledb/sm/filesystem/vfs.cc @@ -222,10 +222,8 @@ Status VFS::dir_size(const URI& dir_name, uint64_t* dir_size) const { do { auto uri = to_ls.front(); to_ls.pop_front(); - auto&& [st, children] = ls_with_sizes(uri); - RETURN_NOT_OK(st); - for (const auto& child : *children) { + for (const auto& child : ls_with_sizes(uri)) { if (!child.is_directory()) { *dir_size += child.file_size(); } else { @@ -703,90 +701,74 @@ Status VFS::is_bucket(const URI& uri, bool* is_bucket) const { Status VFS::ls(const URI& parent, std::vector* uris) const { stats_->add_counter("ls_num", 1); - auto&& [st, entries] = ls_with_sizes(parent); - RETURN_NOT_OK(st); - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(parent)) { uris->emplace_back(fs.path().native()); } return Status::Ok(); } -tuple>> VFS::ls_with_sizes( - const URI& parent) const { +std::vector VFS::ls_with_sizes(const URI& parent) const { // Noop if `parent` is not a directory, do not error out. // For S3, GCS and Azure, `ls` on a non-directory will just // return an empty `uris` vector. if (!(parent.is_s3() || parent.is_gcs() || parent.is_azure())) { bool flag = false; - RETURN_NOT_OK_TUPLE(this->is_dir(parent, &flag), nullopt); + throw_if_not_ok(this->is_dir(parent, &flag)); if (!flag) { - return {Status::Ok(), std::vector()}; + return {}; } } - optional> entries; + std::vector entries; if (parent.is_file()) { #ifdef _WIN32 - Status st; - std::tie(st, entries) = win_.ls_with_sizes(parent); + entries = win_.ls_with_sizes(parent); #else - Status st; - std::tie(st, entries) = posix_.ls_with_sizes(parent); + entries = posix_.ls_with_sizes(parent); #endif - RETURN_NOT_OK_TUPLE(st, nullopt); } else if (parent.is_s3()) { #ifdef HAVE_S3 - Status st; - std::tie(st, entries) = s3().ls_with_sizes(parent); + entries = s3().ls_with_sizes(parent); #else - auto st = Status_VFSError("TileDB was built without S3 support"); + throw BuiltWithout("S3"); #endif - RETURN_NOT_OK_TUPLE(st, nullopt); } else if (parent.is_azure()) { #ifdef HAVE_AZURE - Status st; - std::tie(st, entries) = azure_.ls_with_sizes(parent); + entries = azure_.ls_with_sizes(parent); #else - auto st = Status_VFSError("TileDB was built without Azure support"); + throw BuiltWithout("Azure"); #endif - RETURN_NOT_OK_TUPLE(st, nullopt); } else if (parent.is_gcs()) { #ifdef HAVE_GCS - Status st; - std::tie(st, entries) = gcs_.ls_with_sizes(parent); + entries = gcs_.ls_with_sizes(parent); #else - auto st = Status_VFSError("TileDB was built without GCS support"); + throw BuiltWithout("GCS"); #endif - RETURN_NOT_OK_TUPLE(st, nullopt); } else if (parent.is_hdfs()) { #ifdef HAVE_HDFS - Status st; - std::tie(st, entries) = hdfs_->ls_with_sizes(parent); + auto&& [st, entries_optional] = hdfs_->ls_with_sizes(parent); + throw_if_not_ok(st); + entries = *std::move(entries_optional); #else - auto st = Status_VFSError("TileDB was built without HDFS support"); + throw BuiltWithout("HDFS"); #endif - RETURN_NOT_OK_TUPLE(st, nullopt); } else if (parent.is_memfs()) { - Status st; - std::tie(st, entries) = - memfs_.ls_with_sizes(URI("mem://" + parent.to_path())); - RETURN_NOT_OK_TUPLE(st, nullopt); + entries = memfs_.ls_with_sizes(URI("mem://" + parent.to_path())); } else { - auto st = Status_VFSError("Unsupported URI scheme: " + parent.to_string()); - return {st, std::nullopt}; + throw UnsupportedURI(parent.to_string()); } parallel_sort( compute_tp_, - entries->begin(), - entries->end(), + entries.begin(), + entries.end(), [](const directory_entry& l, const directory_entry& r) { return l.path().native() < r.path().native(); }); - return {Status::Ok(), entries}; + return entries; } Status VFS::move_file(const URI& old_uri, const URI& new_uri) { diff --git a/tiledb/sm/filesystem/vfs.h b/tiledb/sm/filesystem/vfs.h index a327b77bc01..b3af46c6050 100644 --- a/tiledb/sm/filesystem/vfs.h +++ b/tiledb/sm/filesystem/vfs.h @@ -506,8 +506,7 @@ class VFS : private VFSBase, protected S3_within_VFS { * @param parent The target directory to list. * @return All entries that are contained in the parent */ - tuple>> ls_with_sizes( - const URI& parent) const; + std::vector ls_with_sizes(const URI& parent) const; /** * Lists objects and object information that start with `prefix`, invoking diff --git a/tiledb/sm/filesystem/win.cc b/tiledb/sm/filesystem/win.cc index 265768f5fd6..f3ead73063a 100644 --- a/tiledb/sm/filesystem/win.cc +++ b/tiledb/sm/filesystem/win.cc @@ -45,6 +45,7 @@ #include #include +#include "filesystem_base.h" #include "path_win.h" #include "tiledb/common/common.h" #include "tiledb/common/filesystem/directory_entry.h" @@ -334,18 +335,14 @@ bool Win::is_file(const std::string& path) const { } Status Win::ls(const std::string& path, std::vector* paths) const { - auto&& [st, entries] = ls_with_sizes(URI(path)); - RETURN_NOT_OK(st); - - for (auto& fs : *entries) { + for (auto& fs : ls_with_sizes(URI(path))) { paths->emplace_back(fs.path().native()); } return Status::Ok(); } -tuple>> Win::ls_with_sizes( - const URI& uri) const { +std::vector Win::ls_with_sizes(const URI& uri) const { auto path = uri.to_path(); bool ends_with_slash = path.length() > 0 && path[path.length() - 1] == '\\'; const std::string glob = path + (ends_with_slash ? "*" : "\\*"); @@ -389,7 +386,7 @@ tuple>> Win::ls_with_sizes( } FindClose(find_h); - return {Status::Ok(), entries}; + return entries; err: auto gle = GetLastError(); @@ -401,11 +398,9 @@ tuple>> Win::ls_with_sizes( offender.append(" "); offender.append(what_call); } - std::string errmsg( + throw IOError( "Failed to list directory \"" + path + "\" " + get_last_error_msg(gle, offender.c_str())); - auto st = LOG_STATUS(Status_IOError(errmsg)); - return {st, nullopt}; } Status Win::move_path( diff --git a/tiledb/sm/filesystem/win.h b/tiledb/sm/filesystem/win.h index e5d3ded2206..a53c1c03836 100644 --- a/tiledb/sm/filesystem/win.h +++ b/tiledb/sm/filesystem/win.h @@ -161,8 +161,7 @@ class Win { * @param path The parent path to list sub-paths. * @return A list of directory_entry objects */ - tuple>> - ls_with_sizes(const URI& path) const; + std::vector ls_with_sizes(const URI& path) const; /** * Lists objects and object information that start with `prefix`, invoking