From 27866173e0f48f18a9cd8a34fbefb85b7a06ed3c Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Mon, 16 Sep 2024 20:30:44 -0700 Subject: [PATCH] Update run-clang-tidy-cached.cc from upstream. Get a fresh version of the clang-tidy runner from upstream. --- .github/bin/run-clang-tidy-cached.cc | 260 +++++++++++++++++++-------- 1 file changed, 184 insertions(+), 76 deletions(-) diff --git a/.github/bin/run-clang-tidy-cached.cc b/.github/bin/run-clang-tidy-cached.cc index 0318e0e08..8f4e7d6d8 100755 --- a/.github/bin/run-clang-tidy-cached.cc +++ b/.github/bin/run-clang-tidy-cached.cc @@ -15,6 +15,9 @@ B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@"; // See the License for the specific language governing permissions and // limitations under the License. +// Location: https://github.com/hzeller/dev-tools +// Last update: 2024-09-16 cf86e3b6c51dbf620a08fb2a59f6ea71b6bebe3d + // Script to run clang-tidy on files in a bazel project while caching the // results as clang-tidy can be pretty slow. The clang-tidy output messages // are content-addressed in a hash(cc-file-content) cache file. @@ -25,11 +28,11 @@ B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@"; // to clang-tidy as-is. Typical use could be for instance // run-clang-tidy-cached.cc --checks="-*,modernize-use-override" --fix // -// Note: usefule environment variables to configure are +// Note: useful environment variables to configure are // CLANG_TIDY = binary to run; default would just be clang-tidy. // CACHE_DIR = where to put the cached content; default ~/.cache -// This file shall be self-contained, so we don't use any re2 or absl niceties +// This file shall be c++17 self-contained; not using any re2 or absl niceties. #include #include @@ -55,24 +58,72 @@ B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@"; #include #include -// Some configuration for this project. -static const std::string kProjectCachePrefix = "verible_"; -static constexpr std::string_view kWorkspaceFile = "MODULE.bazel"; +// These are the configuration defaults. +// Dont' change anything here, override for your project below in kConfig. +struct ConfigValues { + // Prefix for the cache to write to. If empty, use name of toplevel directory. + std::string_view cache_prefix; + + // Directory to start recurse for sources createing a file list. + // Some projects need e.g. "src/". + std::string_view start_dir; + + // Regular expression matching files that should be included from file list. + std::string_view file_include_re = ".*"; + + // Regular expxression matching files that should be excluded from file list. + // If overriding, make sure to include at least ".git". + std::string_view file_exclude_re = ".git/|.github/"; + + // A file in the toplevel of the project that should exist, typically + // something used to set up the build environment, such as MODULE.bazel, + // CMakeLists.txt or similar. + // It is used two-fold: + // - As validation if this script is started in the correct directory. + // - To take changes there into account as that build file might provide + // additional dependencies which might change clang-tidy outcome. + // (if revisit_brokenfiles_if_build_config_newer is on). + // (Default configuration: just .clang-tidy as this should always be there) + std::string_view toplevel_build_file = ".clang-tidy"; + + // If the compilation DB or toplevel_build_file changed in timestamp, it + // might be worthwhile revisiting sources that previously had issues. + // This flag enables that. + // + // It is good to set once the project is 'clean' and there are only a + // few problematic sources to begin with, otherwise every update of the + // compilation DB will re-trigger revisiting all of them. + bool revisit_brokenfiles_if_build_config_newer = true; +}; -static constexpr std::string_view kSearchDir = "."; -static const std::string kFileExcludeRe = - ".git/|.github/" // stuff we're not interested in - "|vscode/" // some generated code in there - "|tree_operations_test" // very slow - "|symbol_table_test"; +// --------------[ Project-specific configuration ]-------------- +static constexpr ConfigValues kConfig = { + .cache_prefix = "verible_", + .file_exclude_re = ".git/|.github/" // stuff we're not interested in + "|vscode/" // some generated code in there + "|tree_operations_test" // very slow + "|symbol_table_test", + .toplevel_build_file = "MODULE.bazel", +}; +// -------------------------------------------------------------- +// Files to be considered. +inline bool ConsiderExtension(const std::string &extension) { + return extension == ".cc" || extension == ".cpp" || extension == ".h"; +} + +// Configuration of clang-tidy itself. static constexpr std::string_view kClangConfigFile = ".clang-tidy"; -static constexpr std::string_view kExtraArgs[] = {"-Wno-unknown-pragmas"}; +static constexpr std::string_view kExtraArgs[] = { + "-Wno-unknown-pragmas", "-Wno-unknown-warning-option"}; + +namespace { namespace fs = std::filesystem; using file_time = std::filesystem::file_time_type; using ReIt = std::sregex_iterator; -using filepath_contenthash_t = std::pair; +using hash_t = uint64_t; +using filepath_contenthash_t = std::pair; // Some helpers std::string GetContent(FILE *f) { @@ -100,9 +151,8 @@ std::string GetCommandOutput(const std::string &prog) { return GetContent(popen(prog.c_str(), "r")); // NOLINT } -using hash_t = uint64_t; -hash_t hash(const std::string &s) { return std::hash()(s); } -std::string toHex(uint64_t value, int show_lower_nibbles = 16) { +hash_t hashContent(const std::string &s) { return std::hash()(s); } +std::string ToHex(uint64_t value, int show_lower_nibbles = 16) { char out[16 + 1]; snprintf(out, sizeof(out), "%016" PRIx64, value); return out + (16 - show_lower_nibbles); @@ -120,7 +170,7 @@ class ContentAddressedStore { fs::path PathFor(const filepath_contenthash_t &c) const { // Name is human readable, the content hash makes it unique. std::string name_with_contenthash = c.first.filename().string(); - name_with_contenthash.append("-").append(toHex(c.second)); + name_with_contenthash.append("-").append(ToHex(c.second)); return content_dir / name_with_contenthash; } @@ -133,11 +183,15 @@ class ContentAddressedStore { bool NeedsRefresh(const filepath_contenthash_t &c, file_time min_freshness) const { const fs::path content_hash_file = PathFor(c); - // Recreate if we don't have it yet or if it contains messages but is - // older than WORKSPACE or compilation db. Maybe something got fixed. - return (!fs::exists(content_hash_file) || - (fs::file_size(content_hash_file) > 0 && - fs::last_write_time(content_hash_file) < min_freshness)); + if (!fs::exists(content_hash_file)) return true; + + // If file exists but is broken (i.e. has a non-zero size with messages), + // consider recreating if if older than compilation db. + const bool timestamp_trigger = + kConfig.revisit_brokenfiles_if_build_config_newer && + (fs::file_size(content_hash_file) > 0 && + fs::last_write_time(content_hash_file) < min_freshness); + return timestamp_trigger; } private: @@ -146,10 +200,10 @@ class ContentAddressedStore { class ClangTidyRunner { public: - ClangTidyRunner(int argc, char **argv) + ClangTidyRunner(const std::string &cache_prefix, int argc, char **argv) : clang_tidy_(getenv("CLANG_TIDY") ?: "clang-tidy"), clang_tidy_args_(AssembleArgs(argc, argv)) { - project_cache_dir_ = AssembleProjectCacheDir(); + project_cache_dir_ = AssembleProjectCacheDir(cache_prefix); } const fs::path &project_cache_dir() const { return project_cache_dir_; } @@ -230,27 +284,27 @@ class ClangTidyRunner { return result; } - fs::path AssembleProjectCacheDir() const { + fs::path AssembleProjectCacheDir(const std::string &cache_prefix) const { const fs::path cache_dir = GetCacheBaseDir() / "clang-tidy"; // Use major version as part of name of our configuration specific dir. auto version = GetCommandOutput(clang_tidy_ + " --version"); std::smatch version_match; const std::string major_version = - std::regex_search(version, version_match, - std::regex{"version ([0-9]+)"}) - ? version_match[1].str() - : "UNKNOWN"; + std::regex_search(version, version_match, std::regex{"version ([0-9]+)"}) + ? version_match[1].str() + : "UNKNOWN"; + // Make sure directory filename depends on .clang-tidy content. - return cache_dir / - fs::path(kProjectCachePrefix + "v" + major_version + "_" + - toHex(hash(version + clang_tidy_ + clang_tidy_args_) ^ - hash(GetContent(kClangConfigFile)), - 8)); + hash_t cache_unique_id = hashContent(version + clang_tidy_args_); + cache_unique_id ^= hashContent(GetContent(kClangConfigFile)); + return cache_dir / fs::path(cache_prefix + "v" + major_version + "_" + + ToHex(cache_unique_id, 8)); } // Fix filename paths found in logfiles that are not emitted relative to - // project root in the log (bazel has its own) + // project root in the log (bazel has its own, so this fixes bazel-specific + // projects) static void RepairFilenameOccurences(const fs::path &infile, const fs::path &outfile) { static const std::regex sFixPathsRe = []() { @@ -262,7 +316,7 @@ class ClangTidyRunner { } canonicalize_expr += fs::current_path().string() + "/"; // $(pwd)/ canonicalize_expr += - ")?(\\./)?"; // Some start with, or have a trailing ./ + ")?(\\./)?"; // Some start with, or have a trailing ./ return std::regex{canonicalize_expr}; }(); @@ -279,24 +333,41 @@ class ClangTidyRunner { class FileGatherer { public: FileGatherer(ContentAddressedStore &store, std::string_view search_dir) - : store_(store), root_dir_(search_dir) {} + : store_(store), root_dir_(search_dir.empty() ? "." : search_dir) {} // Find all the files we're interested in, and assemble a list of // paths that need refreshing. std::list BuildWorkList(file_time min_freshness) { // Gather all *.cc and *.h files; remember content hashes of includes. - const std::regex exclude_re(kFileExcludeRe); + static const std::regex include_re(std::string{kConfig.file_include_re}); + static const std::regex exclude_re(std::string{kConfig.file_exclude_re}); std::map header_hashes; for (const auto &dir_entry : fs::recursive_directory_iterator(root_dir_)) { const fs::path &p = dir_entry.path().lexically_normal(); - if (!fs::is_regular_file(p)) continue; - if (!kFileExcludeRe.empty() && - std::regex_search(p.string(), exclude_re)) { + if (!fs::is_regular_file(p)) { + continue; + } + const std::string file = p.string(); + if (!kConfig.file_include_re.empty() && + !std::regex_search(file, include_re)) { + continue; + } + if (!kConfig.file_exclude_re.empty() && + std::regex_search(file, exclude_re)) { continue; } - if (auto ext = p.extension(); ext == ".cc" || ext == ".h") { + const auto extension = p.extension(); + if (ConsiderExtension(extension)) { files_of_interest_.emplace_back(p, 0); // <- hash to be filled later. - if (ext == ".h") header_hashes[p.string()] = hash(GetContent(p)); + } + // Remember content hash of header, so that we can make changed headers + // influence the hash of a file including this. + if (extension == ".h") { + // Since the files might be included sloppily without prefix path, + // just keep track of the basename (but since there might be collisions, + // accomodate all of them by xor-ing the hashes). + const std::string just_basename = fs::path(file).filename(); + header_hashes[just_basename] ^= hashContent(GetContent(p)); } } std::cerr << files_of_interest_.size() << " files of interest.\n"; @@ -308,11 +379,12 @@ class FileGatherer { const std::regex inc_re("\"([0-9a-zA-Z_/-]+\\.h)\""); // match include for (filepath_contenthash_t &f : files_of_interest_) { const auto content = GetContent(f.first); - f.second = hash(content); + f.second = hashContent(content); for (ReIt it(content.begin(), content.end(), inc_re); it != ReIt(); ++it) { const std::string &header_path = (*it)[1].str(); - f.second ^= header_hashes[header_path]; + const std::string header_basename = fs::path(header_path).filename(); + f.second ^= header_hashes[header_basename]; } // Recreate if we don't have it yet or if it contains messages but is // older than WORKSPACE or compilation db. Maybe something got fixed. @@ -325,9 +397,12 @@ class FileGatherer { // Tally up findings for files of interest and assemble in one file. // (BuildWorkList() needs to be called first). - std::map CreateReport(const fs::path &project_dir, - std::string_view symlink_to) { + size_t CreateReport(const fs::path &project_dir, + std::string_view symlink_detail, + std::string_view symlink_summary) const { const fs::path tidy_outfile = project_dir / "tidy.out"; + const fs::path tidy_summary = project_dir / "tidy-summary.out"; + // Assemble the separate outputs into a single file. Tally up per-check const std::regex check_re("(\\[[a-zA-Z.-]+\\])\n"); std::map checks_seen; @@ -339,11 +414,39 @@ class FileGatherer { checks_seen[(*it)[1].str()]++; } } - + tidy_collect.close(); std::error_code ignored_error; - fs::remove(symlink_to, ignored_error); - fs::create_symlink(tidy_outfile, symlink_to, ignored_error); - return checks_seen; + fs::remove(symlink_detail, ignored_error); + fs::create_symlink(tidy_outfile, symlink_detail, ignored_error); + + // Report headline. + if (checks_seen.empty()) { + std::cerr << "No clang-tidy complaints. 😎\n"; + } else { + std::cerr << "Details: " << symlink_detail << "\n" + << "Summary: " << symlink_summary << "\n"; + std::cerr << "---- Summary ----\n"; + } + + // Produce a ordered-by-count report. + using check_count_t = std::pair; + std::vector by_count(checks_seen.begin(), checks_seen.end()); + std::stable_sort(by_count.begin(), by_count.end(), + [](const check_count_t &a, const check_count_t &b) { + return b.second < a.second; // reverse count + }); + + FILE *summary_file = fopen(tidy_summary.c_str(), "wb"); + for (const auto &counts : by_count) { + fprintf(stdout, "%5d %s\n", counts.second, counts.first.c_str()); + fprintf(summary_file, "%5d %s\n", counts.second, counts.first.c_str()); + } + fclose(summary_file); + + fs::remove(symlink_summary, ignored_error); + fs::create_symlink(tidy_summary, symlink_summary, ignored_error); + + return checks_seen.size(); } private: @@ -351,47 +454,52 @@ class FileGatherer { const std::string root_dir_; std::vector files_of_interest_; }; +} // namespace int main(int argc, char *argv[]) { - const std::string kTidySymlink = kProjectCachePrefix + "clang-tidy.out"; - // Test that key files exist and remember their last change. + if (!fs::exists(kClangConfigFile)) { + std::cerr << "Need a " << kClangConfigFile << " config file.\n"; + return EXIT_FAILURE; + } + std::error_code ec; - const auto workspace_ts = fs::last_write_time(kWorkspaceFile, ec); + const auto toplevel_build_ts = + fs::last_write_time(kConfig.toplevel_build_file, ec); if (ec.value() != 0) { - std::cerr << "Script needs to be executed in toplevel bazel project dir\n"; + std::cerr << "Script needs to be executed in toplevel project dir.\n"; return EXIT_FAILURE; } + const auto compdb_ts = fs::last_write_time("compile_commands.json", ec); if (ec.value() != 0) { - std::cerr << "No compilation db found. First, run make-compilation-db.sh\n"; + std::cerr << "No compilation db compile_commands.json found; " + << "create that first. For cmake projects, often simply\n" + << "\tln -s build/compile_commands.json .\n"; return EXIT_FAILURE; } - const auto build_env_latest_change = std::max(workspace_ts, compdb_ts); - ClangTidyRunner runner(argc, argv); + const auto build_env_latest_change = std::max(toplevel_build_ts, compdb_ts); + + std::string cache_prefix{kConfig.cache_prefix}; + if (cache_prefix.empty()) { + // Cache prefix not set, choose name of directory + cache_prefix = fs::current_path().filename().string() + "_"; + } + ClangTidyRunner runner(cache_prefix, argc, argv); ContentAddressedStore store(runner.project_cache_dir()); std::cerr << "Cache dir " << runner.project_cache_dir() << "\n"; - FileGatherer cc_file_gatherer(store, kSearchDir); + FileGatherer cc_file_gatherer(store, kConfig.start_dir); auto work_list = cc_file_gatherer.BuildWorkList(build_env_latest_change); + + // Now the expensive part... runner.RunClangTidyOn(store, &work_list); - auto checks_seen = - cc_file_gatherer.CreateReport(runner.project_cache_dir(), kTidySymlink); - if (checks_seen.empty()) { - std::cerr << "No clang-tidy complaints. 😎\n"; - } else { - std::cerr << "--- Summary --- (details in " << kTidySymlink << ")\n"; - using check_count_t = std::pair; - std::vector by_count(checks_seen.begin(), checks_seen.end()); - std::stable_sort(by_count.begin(), by_count.end(), - [](const check_count_t &a, const check_count_t &b) { - return b.second < a.second; // reverse count - }); - for (const auto &counts : by_count) { - fprintf(stdout, "%5d %s\n", counts.second, counts.first.c_str()); - } - } - return checks_seen.empty() ? EXIT_SUCCESS : EXIT_FAILURE; + const std::string detailed_report = cache_prefix + "clang-tidy.out"; + const std::string summary = cache_prefix + "clang-tidy.summary"; + const size_t tidy_count = cc_file_gatherer.CreateReport( + runner.project_cache_dir(), detailed_report, summary); + + return tidy_count == 0 ? EXIT_SUCCESS : EXIT_FAILURE; }