diff --git a/bin/commands/contributors.rb b/bin/commands/contributors.rb index 59c9b93..79c6e8a 100644 --- a/bin/commands/contributors.rb +++ b/bin/commands/contributors.rb @@ -32,4 +32,12 @@ end end end + + g.desc 'Create name,email CSV list of all DCO signers' + g.command 'dco-csv' do |c| + c.action do |_global_options, options, _args| + org = GitHub::Organization.new(options) + puts org.commits(options).unique_dco_signers_csv + end + end end diff --git a/lib/github/commit.rb b/lib/github/commit.rb new file mode 100644 index 0000000..401a54c --- /dev/null +++ b/lib/github/commit.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +# frozen_string_literal: true + +module GitHub + class Commit < Item + # Association list of all name/email pairs extracted from the DCO sign off + # in the commit message + def dco_signoff_names_and_mails + commit.message.scan(/Signed-off-by: (.+) <([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+)>/) + end + + def to_s + "#{repository.full_name} - #{commit.author.email} - #{sha}" + end + end +end diff --git a/lib/github/commits.rb b/lib/github/commits.rb new file mode 100644 index 0000000..609d4fe --- /dev/null +++ b/lib/github/commits.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module GitHub + class Commits < Items + def initialize(arr_or_options) + super arr_or_options, GitHub::Commit + end + + def unique_dco_signers_csv + # Create an association list of all name->email pairs, e.g: + # [['John Lennon', 'john@beatles.com'], ['Paul McCartney', 'paul@beatles.com']] + signers_list = each.map(&:dco_signoff_names_and_mails).flatten(1) + + # De-dupe all entries by email address into an email=>[name1,name2] hash + email_to_names = {} + signers_list.each do |signer| + name = signer[0] + email = signer[1] + email_to_names[email] = [] unless email_to_names.include?(email) + email_to_names[email].push(name) + end + + email_to_names.to_a + # For each email pick the best name and then reverse the association + .map { |e| [single_best_name(e[1]), e[0]] } + # Sort all "noreply" email addresses to the bottom (for manual curation), then sort by name + .sort_by { |e| [e[1].include?('noreply') ? 1 : 0, e[0].downcase] } + .map { |e| e.join(',') } + .join("\n") + end + + def page(options) + data = $github.search_commits(query(options), per_page: 1000).items + raise 'There are 1000+ commits returned from a single query, reduce --page.' if data.size >= 1000 + + data.reject do |commit| + commit.commit.author.email.include?('[bot]') + end + end + + def query(options = {}) + GitHub::Searchables.new(options).to_a.concat( + [ + "committer-date:#{options[:from]}..#{options[:to]}" + ] + ).compact.join(' ') + end + + private + + # This is a simple heuristic for picking the "best" name by choosing the + # one with the most words. For example, if we find both "paul" and + # "Paul McCartney" then we'll choose "Paul McCartney". + def single_best_name(names) + names.max { |a, b| a.split.length <=> b.split.length } + end + end +end diff --git a/lib/github/organization.rb b/lib/github/organization.rb index bdf7270..65eec68 100644 --- a/lib/github/organization.rb +++ b/lib/github/organization.rb @@ -28,6 +28,10 @@ def pull_requests(options = {}) @pull_requests ||= GitHub::PullRequests.new({ org: name, status: :merged }.merge(options)) end + def commits(options = {}) + @commits ||= GitHub::Commits.new({ org: name }.merge(options)) + end + def issues(options = {}) @issues ||= GitHub::Issues.new({ org: name }.merge(options)) end diff --git a/lib/tools.rb b/lib/tools.rb index 9d8401c..6b81b30 100644 --- a/lib/tools.rb +++ b/lib/tools.rb @@ -19,6 +19,8 @@ require_relative 'github/repos' require_relative 'github/pull_requests' require_relative 'github/pull_request' +require_relative 'github/commits' +require_relative 'github/commit' require_relative 'github/contributors' require_relative 'github/contributor' require_relative 'github/maintainers'