Skip to content

Commit

Permalink
Rewrite directory indexing algorithm to not use recursion
Browse files Browse the repository at this point in the history
Fixes stack overflow issues when indexing deeply nested directory structures (close issue #223)
  • Loading branch information
rikyoz committed Jul 7, 2024
1 parent 25838d3 commit e8bd812
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 26 deletions.
45 changes: 22 additions & 23 deletions src/internal/fsindexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "internal/fsitem.hpp"
#include "internal/fsutil.hpp"
#include "internal/genericinputitem.hpp"
#include "internal/stringutil.hpp"

#include <memory>
#include <vector>
Expand All @@ -43,44 +44,42 @@ FilesystemIndexer::FilesystemIndexer( FilesystemItem directory,
}

// NOTE: It indexes all the items whose metadata are needed in the archive to be created!
// NOLINTNEXTLINE(misc-no-recursion)
void FilesystemIndexer::listDirectoryItems( std::vector< std::unique_ptr< GenericInputItem > >& result,
bool recursive,
const fs::path& prefix ) {
fs::path path = mDirItem.filesystemPath();
if ( !prefix.empty() ) {
path = path / prefix;
}
bool recursive ) {
const bool includeRootPath = mFilter.empty() ||
!mDirItem.filesystemPath().has_parent_path() ||
mDirItem.inArchivePath().filename() != mDirItem.filesystemName();
const bool shouldIncludeMatchedItems = mPolicy == FilterPolicy::Include;

fs::path basePath = mDirItem.filesystemPath();
std::error_code error;
for ( const auto& currentEntry : fs::directory_iterator( path, error ) ) {
auto searchPath = includeRootPath ? mDirItem.inArchivePath() : fs::path{};
if ( !prefix.empty() ) {
searchPath = searchPath.empty() ? prefix : searchPath / prefix;
}
for ( auto iterator = fs::recursive_directory_iterator{ basePath, error };
iterator != fs::recursive_directory_iterator{};
++iterator ) {
const auto& currentEntry = *iterator;
const auto& itemPath = currentEntry.path();

const auto prefix = fs::relative( itemPath, basePath, error ).remove_filename();
const auto searchPath = includeRootPath ? mDirItem.inArchivePath() / prefix : prefix;

const auto itemIsDir = currentEntry.is_directory( error );
const auto itemName = path_to_tstring( itemPath.filename() );

const FilesystemItem currentItem{ currentEntry, searchPath, mSymlinkPolicy };
/* An item matches if:
* - Its name matches the wildcard pattern, and
* - Either is a file, or we are interested also to include folders in the index.
*
* Note: The boolean expression uses short-circuiting to optimize the evaluation. */
const bool itemMatches = ( !mOnlyFiles || !currentItem.isDir() ) &&
fsutil::wildcard_match( mFilter, currentItem.name() );
const bool itemMatches = ( !mOnlyFiles || !itemIsDir ) && fsutil::wildcard_match( mFilter, itemName );
if ( itemMatches == shouldIncludeMatchedItems ) {
result.emplace_back( std::make_unique< FilesystemItem >( currentItem ) );
result.emplace_back( std::make_unique< FilesystemItem >( currentEntry, searchPath, mSymlinkPolicy ) );
}

if ( currentItem.isDir() && ( recursive || ( itemMatches == shouldIncludeMatchedItems ) ) ) {
//currentItem is a directory, and we must list it only if:
// > indexing is done recursively
// > indexing is not recursive, but the directory name matched the filter.
const fs::path nextDir = prefix.empty() ?
currentItem.filesystemName() : prefix / currentItem.filesystemName();
listDirectoryItems( result, true, nextDir );
/* We don't need to recurse inside the current item if:
* - it is not a directory; or
* - we are not indexing recursively, and the directory's name doesn't match the wildcard filter. */
if ( !itemIsDir || ( !recursive && ( itemMatches != shouldIncludeMatchedItems ) ) ) {
iterator.disable_recursion_pending();
}
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/internal/fsindexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ class FilesystemIndexer final {
SymlinkPolicy symlinkPolicy = SymlinkPolicy::Follow,
bool onlyFiles = false );

void listDirectoryItems( std::vector< std::unique_ptr< GenericInputItem > >& result,
bool recursive,
const fs::path& prefix = fs::path{} );
void listDirectoryItems( std::vector< std::unique_ptr< GenericInputItem > >& result, bool recursive );

private:
FilesystemItem mDirItem;
Expand Down
154 changes: 154 additions & 0 deletions tests/src/test_bititemsvector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,10 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (only files)", "[bititems
TEST_CASE( "BitItemsVector: Indexing a valid directory (retaining folder structure)", "[bititemsvector]" ) {
static const TestDirectory testDir{ test_filesystem_dir };

/* NOTE: BitItemsVector uses the retainFolderStructure option only to decide if it must include
* the root folder in the in-archive paths; the actual folder structure is still kept here,
* and will be discarded by the extract callback (e.g., see FileExtractCallback). */
// TODO: Rationalize the handling of retainFolderStructure.
IndexingOptions options{};
options.retainFolderStructure = true;

Expand Down Expand Up @@ -1289,6 +1293,32 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (relative path)", "[bitit
"test_filesystem/folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/..",
{
"italy.svg",
"Lorem Ipsum.pdf",
"noext",
BIT7Z_NATIVE_STRING( "σαράντα δύο.txt" ),
"dot.folder",
"dot.folder/hello.json",
"empty",
"folder",
"folder/clouds.jpg",
"folder/subfolder",
"folder/subfolder2",
"folder/subfolder2/homework.doc",
"folder/subfolder2/The quick brown fox.pdf",
"folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/../dot.folder",
{
"dot.folder",
"dot.folder/hello.json"
}
},
TestInputPath{ "../test_filesystem/empty", { "empty" } },
TestInputPath{
"../test_filesystem/folder",
Expand All @@ -1302,6 +1332,18 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (relative path)", "[bitit
"folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/subfolder2/../",
{
"folder",
"folder/clouds.jpg",
"folder/subfolder",
"folder/subfolder2",
"folder/subfolder2/homework.doc",
"folder/subfolder2/The quick brown fox.pdf",
"folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"../test_filesystem/folder/subfolder2",
{
Expand Down Expand Up @@ -1350,6 +1392,32 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (relative path, non-recur
"test_filesystem/folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/..",
{
"italy.svg",
"Lorem Ipsum.pdf",
"noext",
BIT7Z_NATIVE_STRING( "σαράντα δύο.txt" ),
"dot.folder",
"dot.folder/hello.json",
"empty",
"folder",
"folder/clouds.jpg",
"folder/subfolder",
"folder/subfolder2",
"folder/subfolder2/homework.doc",
"folder/subfolder2/The quick brown fox.pdf",
"folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/../dot.folder",
{
"dot.folder",
"dot.folder/hello.json"
}
},
TestInputPath{ "../test_filesystem/empty", { "empty" } },
TestInputPath{
"../test_filesystem/folder",
Expand All @@ -1363,6 +1431,18 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (relative path, non-recur
"folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/subfolder2/../",
{
"folder",
"folder/clouds.jpg",
"folder/subfolder",
"folder/subfolder2",
"folder/subfolder2/homework.doc",
"folder/subfolder2/The quick brown fox.pdf",
"folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"../test_filesystem/folder/subfolder2",
{
Expand Down Expand Up @@ -1409,6 +1489,46 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (custom path mapping)", "
"custom_folder/folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/..",
{
"custom_folder",
"custom_folder/dot.folder",
"custom_folder/dot.folder/hello.json",
"custom_folder/italy.svg",
"custom_folder/Lorem Ipsum.pdf",
"custom_folder/noext",
BIT7Z_NATIVE_STRING( "custom_folder/σαράντα δύο.txt" ),
"custom_folder/empty",
"custom_folder/folder",
"custom_folder/folder/clouds.jpg",
"custom_folder/folder/subfolder",
"custom_folder/folder/subfolder2",
"custom_folder/folder/subfolder2/homework.doc",
"custom_folder/folder/subfolder2/The quick brown fox.pdf",
"custom_folder/folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/../",
{
"custom_folder",
"custom_folder/dot.folder",
"custom_folder/dot.folder/hello.json",
"custom_folder/italy.svg",
"custom_folder/Lorem Ipsum.pdf",
"custom_folder/noext",
BIT7Z_NATIVE_STRING( "custom_folder/σαράντα δύο.txt" ),
"custom_folder/empty",
"custom_folder/folder",
"custom_folder/folder/clouds.jpg",
"custom_folder/folder/subfolder",
"custom_folder/folder/subfolder2",
"custom_folder/folder/subfolder2/homework.doc",
"custom_folder/folder/subfolder2/The quick brown fox.pdf",
"custom_folder/folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{ "empty", { "custom_folder" } },
TestInputPath{ "./empty", { "custom_folder" } },
TestInputPath{
Expand All @@ -1435,6 +1555,30 @@ TEST_CASE( "BitItemsVector: Indexing a valid directory (custom path mapping)", "
"custom_folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"./folder/subfolder2/..",
{
"custom_folder",
"custom_folder/clouds.jpg",
"custom_folder/subfolder",
"custom_folder/subfolder2",
"custom_folder/subfolder2/homework.doc",
"custom_folder/subfolder2/The quick brown fox.pdf",
"custom_folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"./folder/subfolder2/../",
{
"custom_folder",
"custom_folder/clouds.jpg",
"custom_folder/subfolder",
"custom_folder/subfolder2",
"custom_folder/subfolder2/homework.doc",
"custom_folder/subfolder2/The quick brown fox.pdf",
"custom_folder/subfolder2/frequency.xlsx"
}
},
TestInputPath{
"folder/subfolder2",
{
Expand Down Expand Up @@ -1823,9 +1967,11 @@ TEST_CASE( "BitItemsVector: Indexing a directory as a file should fail", "[bitit
BitItemsVector itemsVector;
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "." ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "dot.folder" ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "dot.folder/../" ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "empty" ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "folder" ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "folder/subfolder" ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "folder/subfolder/../.." ) ) );
REQUIRE_THROWS( itemsVector.indexFile( BIT7Z_STRING( "folder/subfolder2" ) ) );
}

Expand All @@ -1849,10 +1995,14 @@ TEST_CASE( "BitItemsVector: Indexing a single file", "[bititemsvector]" ) {
const auto testInput =
GENERATE(
TestFile{ "Lorem Ipsum.pdf", "Lorem Ipsum.pdf" },
TestFile{ "empty/../Lorem Ipsum.pdf", "Lorem Ipsum.pdf" },
TestFile{ "folder/subfolder2/../../Lorem Ipsum.pdf", "Lorem Ipsum.pdf" },
TestFile{ "italy.svg", "italy.svg" },
TestFile{ "noext", "noext" },
TestFile{ BIT7Z_NATIVE_STRING( "σαράντα δύο.txt" ), BIT7Z_NATIVE_STRING( "σαράντα δύο.txt" ) },
TestFile{ "folder/clouds.jpg", "folder/clouds.jpg" },
TestFile{ "dot.folder/../folder/clouds.jpg", "clouds.jpg" },
TestFile{ "folder/subfolder2/../clouds.jpg", "clouds.jpg" },
TestFile{ "folder/subfolder2/homework.doc", "folder/subfolder2/homework.doc" },
TestFile{ "folder/subfolder2/The quick brown fox.pdf", "folder/subfolder2/The quick brown fox.pdf" },
TestFile{ "folder/subfolder2/frequency.xlsx", "folder/subfolder2/frequency.xlsx" },
Expand All @@ -1862,9 +2012,13 @@ TEST_CASE( "BitItemsVector: Indexing a single file", "[bititemsvector]" ) {
const auto testInput =
GENERATE(
TestFile{ "Lorem Ipsum.pdf", "Lorem Ipsum.pdf" },
TestFile{ "empty/../Lorem Ipsum.pdf", "Lorem Ipsum.pdf" },
TestFile{ "folder/subfolder2/../../Lorem Ipsum.pdf", "Lorem Ipsum.pdf" },
TestFile{ "italy.svg", "italy.svg" },
TestFile{ "noext", "noext" },
TestFile{ "folder/clouds.jpg", "folder/clouds.jpg" },
TestFile{ "dot.folder/../folder/clouds.jpg", "clouds.jpg" },
TestFile{ "folder/subfolder2/../clouds.jpg", "clouds.jpg" },
TestFile{ "folder/subfolder2/homework.doc", "folder/subfolder2/homework.doc" },
TestFile{ "folder/subfolder2/The quick brown fox.pdf", "folder/subfolder2/The quick brown fox.pdf" },
TestFile{ "folder/subfolder2/frequency.xlsx", "folder/subfolder2/frequency.xlsx" },
Expand Down

0 comments on commit e8bd812

Please sign in to comment.