From 8c6bda4b6330aeac6d566c62e690f0a7198e5522 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Thu, 5 Oct 2023 15:30:38 -0400 Subject: [PATCH] Move callback into sm::VFS + Add unit_vfs --- test/src/unit-capi-vfs.cc | 2 +- test/src/unit-cppapi-vfs.cc | 6 +- tiledb/api/c_api/vfs/vfs_api.cc | 17 +-- tiledb/api/c_api/vfs/vfs_api_experimental.h | 3 +- tiledb/api/c_api/vfs/vfs_api_internal.h | 6 +- tiledb/sm/cpp_api/vfs_experimental.h | 74 ++----------- tiledb/sm/filesystem/test/CMakeLists.txt | 7 +- tiledb/sm/filesystem/test/unit_vfs.cc | 114 ++++++++++++++++++++ tiledb/sm/filesystem/vfs.cc | 17 ++- tiledb/sm/filesystem/vfs.h | 84 ++++++++++++++- 10 files changed, 242 insertions(+), 88 deletions(-) create mode 100644 tiledb/sm/filesystem/test/unit_vfs.cc diff --git a/test/src/unit-capi-vfs.cc b/test/src/unit-capi-vfs.cc index 685beb445ad..f9b3d530c36 100644 --- a/test/src/unit-capi-vfs.cc +++ b/test/src/unit-capi-vfs.cc @@ -989,7 +989,7 @@ TEST_CASE_METHOD( return 0; } ls_data->path_data_[ls_data->path_pos_] = (char*)malloc(path_len + 1); - strcpy(ls_data->path_data_[ls_data->path_pos_], path); + strncpy(ls_data->path_data_[ls_data->path_pos_], path, path_len + 1); ls_data->object_sizes_[ls_data->path_pos_] = object_size; ls_data->path_pos_++; } diff --git a/test/src/unit-cppapi-vfs.cc b/test/src/unit-cppapi-vfs.cc index 53cf9890343..2b487899f68 100644 --- a/test/src/unit-cppapi-vfs.cc +++ b/test/src/unit-cppapi-vfs.cc @@ -571,8 +571,10 @@ TEST_CASE_METHOD( } CHECKED_ELSE(expected_results_.empty()) { SECTION("Throwing filter with N objects should throw") { - CHECK_THROWS(tiledb::VFSExperimental::ls_recursive( - ctx_, vfs_, temp_dir_.to_string(), filter)); + CHECK_THROWS_AS( + tiledb::VFSExperimental::ls_recursive( + ctx_, vfs_, temp_dir_.to_string(), filter), + std::runtime_error); } } } diff --git a/tiledb/api/c_api/vfs/vfs_api.cc b/tiledb/api/c_api/vfs/vfs_api.cc index 5f2a442b036..7690a404ca5 100644 --- a/tiledb/api/c_api/vfs/vfs_api.cc +++ b/tiledb/api/c_api/vfs/vfs_api.cc @@ -300,7 +300,7 @@ capi_return_t tiledb_vfs_ls( capi_return_t tiledb_vfs_ls_recursive( tiledb_vfs_t* vfs, const char* path, - int32_t (*callback)(const char*, size_t, uint64_t, void*), + tiledb::sm::VFS::LsCallback callback, void* data) { ensure_vfs_is_valid(vfs); if (path == nullptr) { @@ -310,20 +310,7 @@ capi_return_t tiledb_vfs_ls_recursive( "Invalid TileDB object: Callback function is null."); } ensure_output_pointer_is_valid(data); - auto children = vfs->ls_recursive(tiledb::sm::URI(path)); - - int rc = 1; - for (const auto& entry : children) { - sm::URI uri{entry.path().native()}; - rc = callback(uri.c_str(), uri.to_string().size(), entry.file_size(), data); - if (rc != 1) { - break; - } - } - - if (rc == -1) { - return TILEDB_ERR; - } + auto children = vfs->ls_recursive(tiledb::sm::URI(path), callback, data); return TILEDB_OK; } diff --git a/tiledb/api/c_api/vfs/vfs_api_experimental.h b/tiledb/api/c_api/vfs/vfs_api_experimental.h index d62b7735552..3fca35b5e64 100644 --- a/tiledb/api/c_api/vfs/vfs_api_experimental.h +++ b/tiledb/api/c_api/vfs/vfs_api_experimental.h @@ -36,6 +36,7 @@ #include "tiledb/api/c_api/api_external_common.h" #include "tiledb/api/c_api/context/context_api_external.h" #include "tiledb/api/c_api/vfs/vfs_api_external.h" +#include "tiledb/sm/filesystem/vfs.h" #ifdef __cplusplus extern "C" { @@ -80,7 +81,7 @@ TILEDB_EXPORT capi_return_t tiledb_vfs_ls_recursive( tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* path, - int32_t (*callback)(const char*, size_t, uint64_t, void*), + tiledb::sm::VFS::LsCallback callback, void* data) TILEDB_NOEXCEPT; #ifdef __cplusplus diff --git a/tiledb/api/c_api/vfs/vfs_api_internal.h b/tiledb/api/c_api/vfs/vfs_api_internal.h index 2b084e512e7..7e9e2d42348 100644 --- a/tiledb/api/c_api/vfs/vfs_api_internal.h +++ b/tiledb/api/c_api/vfs/vfs_api_internal.h @@ -144,8 +144,10 @@ struct tiledb_vfs_handle_t } std::vector ls_recursive( - const tiledb::sm::URI& parent) const { - return vfs_.ls_recursive(parent); + const tiledb::sm::URI& parent, + tiledb::sm::VFS::LsCallback cb, + void* data) const { + return vfs_.ls_recursive(parent, cb, data); } Status touch(const tiledb::sm::URI& uri) const { diff --git a/tiledb/sm/cpp_api/vfs_experimental.h b/tiledb/sm/cpp_api/vfs_experimental.h index 40b6fd5127c..05a4c09b4f6 100644 --- a/tiledb/sm/cpp_api/vfs_experimental.h +++ b/tiledb/sm/cpp_api/vfs_experimental.h @@ -36,36 +36,12 @@ #include #include #include "context.h" +#include "tiledb/sm/filesystem/vfs.h" #include "tiledb_experimental.h" #include "vfs.h" namespace tiledb { class VFSExperimental { - /* ********************************* */ - /* TYPE DEFINITIONS */ - /* ********************************* */ - - /** - * Typedef for ls inclusion predicate function used to check if a single - * result should be included in the final results returned from ls or - * ls_recursive functions. - * - * @param path The path of a visited object for the relative filesystem. - * @return True if the result should be included, else false. - * @sa ls_include - */ - typedef std::function LsInclude; - - /** - * Struct to store recursive ls results data. - * 'object_paths_' Stores all paths collected as a vector of strings. - * `file_sizes` stores all file sizes as a vector of uint64_t. - */ - struct LsRecursiveData { - std::vector object_paths_; - std::vector object_sizes_; - }; - public: /* ********************************* */ /* PUBLIC STATIC METHODS */ @@ -77,66 +53,36 @@ class VFSExperimental { * be included in the results. By default all entries are included using the * ls_include predicate, which simply returns true for all results. * + * @param ctx The TileDB context. + * @param vfs The VFS instance to use. * @param uri The base URI to list results recursively. - * @param callback The callback predicate to invoke on each entry collected. + * @param predicate The inclusion predicate to invoke on each entry collected. * @return Vector of pairs storing the path and object size of each result. * @sa ls_recursive_gather - * @sa LsInclude + * @sa ls_include */ static std::vector> ls_recursive( const Context& ctx, const VFS& vfs, const std::string& uri, - const LsInclude& callback = ls_include) { - LsRecursiveData ls_data; + const tiledb::sm::VFS::LsInclude& predicate = + tiledb::sm::VFS::ls_include) { + tiledb::sm::VFS::LsRecursiveData ls_data; ctx.handle_error(tiledb_vfs_ls_recursive( ctx.ptr().get(), vfs.ptr().get(), uri.c_str(), - ls_recursive_gather, + tiledb::sm::VFS::ls_recursive_gather, &ls_data)); std::vector> results; for (size_t i = 0; i < ls_data.object_paths_.size(); i++) { - if (callback(ls_data.object_paths_[i])) { + if (predicate(ls_data.object_paths_[i])) { results.emplace_back( ls_data.object_paths_[i], ls_data.object_sizes_[i]); } } return results; } - - private: - /* ********************************* */ - /* PRIVATE STATIC METHODS */ - /* ********************************* */ - - /** - * Callback function to be used when invoking the C TileDB function - * for recursive ls. The callback will cast 'data' to LsRecursiveData struct. - * The struct stores a vector of strings for each path collected, and a uint64 - * vector of file sizes for the objects at each path. - * - * @param path The path of a visited TileDB object - * @param data Cast to LsRecursiveData struct to store paths and offsets. - * @return If `1` then the walk should continue to the next object. - */ - static int ls_recursive_gather( - const char* path, size_t path_length, uint64_t file_size, void* data) { - auto ls_data = static_cast(data); - ls_data->object_paths_.emplace_back(path, path_length); - ls_data->object_sizes_.push_back(file_size); - return 1; - } - - /** - * Default inclusion predicate for ls functions. Optionally, a user can - * provide their own inclusion predicate to filter results. - * - * @return True if the result should be included, else false. - */ - static bool ls_include(const std::string_view&) { - return true; - } }; } // namespace tiledb diff --git a/tiledb/sm/filesystem/test/CMakeLists.txt b/tiledb/sm/filesystem/test/CMakeLists.txt index 09f962de380..73fd80d654d 100644 --- a/tiledb/sm/filesystem/test/CMakeLists.txt +++ b/tiledb/sm/filesystem/test/CMakeLists.txt @@ -25,7 +25,12 @@ # include(unit_test) -commence(unit_test vfs) +commence(unit_test uri) this_target_object_libraries(vfs) this_target_sources(main.cc unit_uri.cc) conclude(unit_test) + +commence(unit_test vfs) + this_target_object_libraries(vfs) + this_target_sources(main.cc unit_vfs.cc) +conclude(unit_test) diff --git a/tiledb/sm/filesystem/test/unit_vfs.cc b/tiledb/sm/filesystem/test/unit_vfs.cc new file mode 100644 index 00000000000..974849a3d87 --- /dev/null +++ b/tiledb/sm/filesystem/test/unit_vfs.cc @@ -0,0 +1,114 @@ +/** + * @file unit_vfs.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the `VFS` class. + */ + +#include +#include "tiledb/sm/filesystem/vfs.h" + +struct VfsFixture { + VfsFixture() + : stats_("unit_vfs") + , compute_(4) + , io_(4) + , vfs_(&stats_, &compute_, &io_, tiledb::sm::Config()) { + } + + void create_objects( + const tiledb::sm::URI& dir, size_t count, const std::string& prefix) { + for (size_t i = 0; i < count; i++) { + auto uri = dir.join_path(prefix + std::to_string(i)); + vfs_.touch(uri).ok(); + std::string data(i * 10, 'a'); + vfs_.write(uri, data.data(), data.size()).ok(); + expected_results_.emplace_back(uri.to_path(), data.size(), false); + } + } + + protected: + // A dummy `Stats` instance for testing internal VFS functions. + tiledb::sm::stats::Stats stats_; + + ThreadPool compute_, io_; + tiledb::sm::VFS vfs_; + + std::vector expected_results_; +}; + +TEST_CASE_METHOD(VfsFixture, "VFS: Default arguments ls_recursive", "[vfs]") { + tiledb::sm::URI temp_dir("vfs_default_args"); + vfs_.create_dir(temp_dir).ok(); + SECTION("Empty directory") { + auto results = vfs_.ls_recursive(temp_dir); + CHECK(results.empty()); + } + + SECTION("Single file") { + create_objects(temp_dir, 1, "file"); + } + + SECTION("Multiple files in single directory") { + create_objects(temp_dir, 10, "file"); + auto results = vfs_.ls_recursive(temp_dir); + } + + SECTION("Multiple files in nested directories") { + create_objects(temp_dir, 10, "file"); + + auto subdir = temp_dir.join_path("subdir"); + vfs_.create_dir(subdir).ok(); + create_objects(subdir, 10, "file"); + } + + auto results = vfs_.ls_recursive(temp_dir); + REQUIRE(results.size() == expected_results_.size()); + for (size_t i = 0; i < expected_results_.size(); i++) { + CHECK(results[i].path().native() == expected_results_[i].path().native()); + CHECK(results[i].file_size() == expected_results_[i].file_size()); + } + vfs_.remove_dir(temp_dir).ok(); +} + +TEST_CASE_METHOD(VfsFixture, "VFS: Throwing callback ls_recursive", "[vfs]") { + auto cb = [](const char*, size_t, uint64_t, void*) -> int32_t { + throw std::logic_error("Throwing callback"); + }; + tiledb::sm::URI temp_dir("vfs_throwing_callback"); + int data = 1; + SECTION("Throwing callback with 0 objects should not throw") { + CHECK_NOTHROW(vfs_.ls_recursive(temp_dir, cb, &data)); + } + SECTION("Throwing callback with N objects should throw") { + vfs_.create_dir(temp_dir).ok(); + vfs_.touch(temp_dir.join_path("file")).ok(); + CHECK_THROWS_AS(vfs_.ls_recursive(temp_dir, cb, &data), std::logic_error); + vfs_.remove_dir(temp_dir).ok(); + } +} diff --git a/tiledb/sm/filesystem/vfs.cc b/tiledb/sm/filesystem/vfs.cc index 2b86e49d72e..4f6f7255dcc 100644 --- a/tiledb/sm/filesystem/vfs.cc +++ b/tiledb/sm/filesystem/vfs.cc @@ -53,6 +53,8 @@ using tiledb::common::filesystem::directory_entry; namespace tiledb::sm { +VFS::LsRecursiveData VFS::ls_recursive_data_; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -740,7 +742,8 @@ Status VFS::ls(const URI& parent, std::vector* uris) const { return Status::Ok(); } -std::vector VFS::ls_recursive(const URI& parent) const { +std::vector VFS::ls_recursive( + const URI& parent, LsCallback cb, void* data) const { Status st; optional> entries; std::vector results; @@ -798,6 +801,18 @@ std::vector VFS::ls_recursive(const URI& parent) const { } } + int rc = 1; + for (const auto& result : results) { + URI uri{result.path().native()}; + rc = cb(uri.c_str(), uri.to_string().size(), result.file_size(), data); + if (rc != 1) { + break; + } + } + if (rc == -1) { + throw VFSException("Error in user callback"); + } + return results; } diff --git a/tiledb/sm/filesystem/vfs.h b/tiledb/sm/filesystem/vfs.h index 54e8e6d778f..ff2c2b3dbca 100644 --- a/tiledb/sm/filesystem/vfs.h +++ b/tiledb/sm/filesystem/vfs.h @@ -75,6 +75,10 @@ using namespace tiledb::common; +namespace tiledb { +class VFSExperimental; +} + namespace tiledb::sm { class Tile; @@ -149,6 +153,8 @@ struct S3_within_VFS { * function execution to the appropriate backend based on the input URI. */ class VFS : private VFSBase, S3_within_VFS { + friend class tiledb::VFSExperimental; + public: /* ********************************* */ /* TYPE DEFINITIONS */ @@ -187,6 +193,41 @@ class VFS : private VFSBase, S3_within_VFS { Status status; }; + /** + * Typedef for ls inclusion predicate function used to check if a single + * result should be included in the final results returned from ls_recursive. + * + * @param path The path of a visited object for the relative filesystem. + * @return True if the result should be included, else false. + * @sa ls_include + */ + typedef std::function LsInclude; + + /** + * Typedef for ls_recursive callback function used to gather results. + * + * @param path The path of a visited object for the relative filesystem. + * @param path_len The length of the path. + * @param object_size The size of the object at the current path. + * @param data Data passed to the callback used to store collected results. + * @sa ls_recursive_gather + */ + typedef int32_t (*LsCallback)( + const char* path, size_t path_len, uint64_t object_size, void* data); + + /** + * Struct to store recursive ls results data. + * `object_paths_` Stores all paths collected as a vector of strings. + * `object_sizes_` stores all file sizes as a vector of uint64_t. + */ + struct LsRecursiveData { + std::vector object_paths_; + std::vector object_sizes_; + }; + + /// Used as default argument for internal recursive ls. + static LsRecursiveData ls_recursive_data_; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -391,10 +432,14 @@ class VFS : private VFSBase, S3_within_VFS { * Recursively lists objects and object information that start with `prefix`. * * @param parent The parent path to list sub-objects recursively. + * @param cb The callback to invoke on each object collected. + * @param data Data passed to the callback used to store collected results. * @return Vector of directory_entry results from recursive ls on 'parent'. */ std::vector ls_recursive( - const URI& parent) const; + const URI& parent, + LsCallback cb = ls_recursive_gather, + void* data = &ls_recursive_data_) const; /** * Retrieves all the entries contained in the parent. @@ -561,6 +606,43 @@ class VFS : private VFSBase, S3_within_VFS { } private: + /* ********************************* */ + /* PRIVATE STATIC METHODS */ + /* ********************************* */ + + /** + * Default callback function to be used when invoking recursive ls. The + * callback will cast 'data' to LsRecursiveData struct, which stores a + * vector of strings for each path collected and a uint64 vector of file + * sizes for the objects at each path. + * + * @param path The path of a visited object for the relative filesystem. + * @param path_length The length of the path string. + * @param file_size The size of the object at the path. + * @param data Cast to LsRecursiveData struct to store paths and offsets. + * @return `1` if the walk should continue to the next object, `0` if the walk + * should stop, and `-1` on error. + * @sa LsCallback + * @sa LsRecursiveData + */ + static int ls_recursive_gather( + const char* path, size_t path_length, uint64_t file_size, void* data) { + auto ls_data = static_cast(data); + ls_data->object_paths_.emplace_back(path, path_length); + ls_data->object_sizes_.push_back(file_size); + return 1; + } + + /** + * Default inclusion predicate for ls recursive which includes all results. + * Optionally a custom inclusion predicate can be used to filter results. + * + * @return True if the result should be included, else false. + */ + static bool ls_include(const std::string_view&) { + return true; + } + /* ********************************* */ /* PRIVATE DATATYPES */ /* ********************************* */