Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cruft: parallelize globs matching #13

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dpkg_popen.o: dpkg_popen.cc dpkg.h
cruftold: $(SHARED_OBJS) $(CRUFT_OBJS) mlocate.o dpkg_popen.o
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(CPPFLAGS) $(SHARED_OBJS) $(CRUFT_OBJS) mlocate.o dpkg_popen.o -lstdc++fs -pthread -o cruftold
cruft: $(SHARED_OBJS) $(CRUFT_OBJS) plocate.o dpkg_lib.o
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(CPPFLAGS) $(SHARED_OBJS) $(CRUFT_OBJS) plocate.o dpkg_lib.o $(LIBDPKG_LIBS) -pthread -o cruft
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(CPPFLAGS) $(SHARED_OBJS) $(CRUFT_OBJS) plocate.o dpkg_lib.o $(LIBDPKG_LIBS) -pthread -ltbb -o cruft

cpigsold: $(SHARED_OBJS) cpigs.o mlocate.o dpkg_popen.o
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(CPPFLAGS) $(SHARED_OBJS) cpigs.o mlocate.o dpkg_popen.o -lstdc++fs -o cpigsold
Expand Down
80 changes: 55 additions & 25 deletions cruft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@
#include <ctime>
#include <thread>

#ifdef __has_include
# if __has_include(<version>)
# include <version>
# endif
#endif

#if defined __cpp_lib_scoped_lock && defined __cpp_lib_parallel_algorithm
#include <execution>
#include <mutex>
#endif

#include <sys/stat.h>
#include <getopt.h>
#include <cstring>
Expand Down Expand Up @@ -132,6 +143,44 @@ static void one_file(const string& path)
cerr << "no matching package found\n";
}

static vector<string> filter_cruft(const vector<string>& extras, const vector<owner>& globs, const vector<owner>& explain)
{
vector<string> result;

#if defined __cpp_lib_scoped_lock && defined __cpp_lib_parallel_algorithm
mutex m;
#endif

for_each(
#if defined __cpp_lib_scoped_lock && defined __cpp_lib_parallel_algorithm
execution::par,
#endif
extras.begin(), extras.end(), [&](auto&& extra){
bool match = any_of(globs.begin(), globs.end(), [&](auto&& glob) {
return myglob(extra, glob.path);
});

if (!match) {
match = any_of(explain.begin(), explain.end(), [&](auto&& expl){
return extra == expl.path;
});
}

if (!match) {
#if defined __cpp_lib_scoped_lock && defined __cpp_lib_parallel_algorithm
scoped_lock<mutex> lock { m };
#endif
result.push_back(extra);
}
});

#if defined __cpp_lib_scoped_lock && defined __cpp_lib_parallel_algorithm
sort(execution::par, result.begin(), result.end());
#endif

return result;
}

static clock_t beg = clock();

static void elapsed(const string& action)
Expand Down Expand Up @@ -332,38 +381,19 @@ int main(int argc, char *argv[])
elapsed("missing2");
if (debug) cerr << "count stat():" << count_stat << '\n';

// match the globs against reduced database
vector<owner> globs;
read_filters(filter_dir, ruleset_file, packages, globs);
elapsed("read filters");
vector<string> cruft3;
for (const auto& cr: cruft) {
bool match=false;
for (const auto& gl: globs) {
match=myglob(cr, gl.path);
if (match) break;
}
if (!match) cruft3.push_back(cr);
}
elapsed("extra vs globs");
if (debug) cerr << cruft3.size() << " files in cruft3 database\n\n";

// match the dynamic "explain" filters
vector<owner> explain;
read_explain(explain_dir, packages, explain);
elapsed("read explain");
vector<string> cruft4;
for (const auto& cr: cruft3) {
bool match=false;
for (const auto& ex: explain) {
match=(cr==ex.path);
if (match) break;
}
if (!match) cruft4.push_back(cr);
}
elapsed("extra vs explain");
if (debug) cerr << explain.size() << " explain entries\n";

if (debug) cerr << cruft4.size() << " files in cruft4 database\n";
// match the globs against reduced database
vector<string> cruft3 = filter_cruft(cruft, globs, explain);
elapsed("extra vs globs and explain");
if (debug) cerr << cruft3.size() << " files in cruft3 database\n";

//TODO: some smarter algo when run as non-root
// like checking the R/X bits of parent dir
Expand All @@ -374,7 +404,7 @@ int main(int argc, char *argv[])

//TODO: split by filesystem
cout << "---- unexplained: / ----\n";
for (const auto& cr: cruft4) {
for (const auto& cr: cruft3) {
cout << " " << cr;
auto bug = bugs.find(cr);
if (bug != bugs.end()) {
Expand Down
1 change: 1 addition & 0 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Build-Depends:
Build-Depends-Arch:
pkgconf,
libdpkg-dev,
libtbb-dev,
Standards-Version: 4.6.1.0
Homepage: https://github.com/a-detiste/cruft-ng/
Vcs-Git: https://github.com/a-detiste/cruft-ng.git
Expand Down