Skip to content

Commit

Permalink
Create tool for getting DCO sign off emails
Browse files Browse the repository at this point in the history
This tool digs through commit messages and parses out names and email
addresses from the `Signed-off-by:` tags, collects all unique email
addresses and outputs a CSV of name/email pairs.

Signed-off-by: Andrew Ross <[email protected]>
  • Loading branch information
andrross committed Dec 2, 2022
1 parent 998fd60 commit 72531b1
Show file tree
Hide file tree
Showing 10 changed files with 889 additions and 0 deletions.
11 changes: 11 additions & 0 deletions bin/commands/contributors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,15 @@
end
end
end

g.desc 'Create a list of all DCO signers'
g.command 'dco-signers' do |c|
c.action do |_global_options, options, _args|
org = GitHub::Organization.new(options)
signers = org.commits(options).dco_signers
signers.sort_for_display.each do |signer|
puts signer.to_s
end
end
end
end
13 changes: 13 additions & 0 deletions lib/github/commit.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

module GitHub
class Commit < Item
# Creates an array of Signers from all 'Signed-off-by' tags included in the
# commit message
def dco_signers
commit.message.scan(/Signed-off-by: (.+) <([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+)>/).map do |signer|
Signer.new(signer[0], signer[1])
end
end
end
end
31 changes: 31 additions & 0 deletions lib/github/commits.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true

module GitHub
class Commits < Items
def initialize(arr_or_options)
super arr_or_options, GitHub::Commit
end

# Gets all unique DCO signers (by email address) from all commits
def dco_signers
Signers.new(each.map(&:dco_signers).flatten)
end

def page(options)
data = $github.search_commits(query(options), per_page: 1000).items
raise 'There are 1000+ commits returned from a single query, reduce --page.' if data.size >= 1000

data.reject do |commit|
commit.commit.author.email.include?('[bot]')
end
end

def query(options = {})
GitHub::Searchables.new(options).to_a.concat(
[
"committer-date:#{options[:from]}..#{options[:to]}"
]
).compact.join(' ')
end
end
end
4 changes: 4 additions & 0 deletions lib/github/organization.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ def pull_requests(options = {})
@pull_requests ||= GitHub::PullRequests.new({ org: name, status: :merged }.merge(options))
end

def commits(options = {})
@commits ||= GitHub::Commits.new({ org: name }.merge(options))
end

def issues(options = {})
@issues ||= GitHub::Issues.new({ org: name }.merge(options))
end
Expand Down
16 changes: 16 additions & 0 deletions lib/github/signer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

module GitHub
class Signer
attr_reader :email, :name

def initialize(name, email)
@name = name
@email = email
end

def to_s
"#{name},#{email}"
end
end
end
38 changes: 38 additions & 0 deletions lib/github/signers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true

module GitHub
class Signers < Array
def initialize(arr)
# De-dupe by email address, choosing the "best" name
by_email = Hash.new
arr.each do |signer|
by_email[signer.email] = best_signer(by_email[signer.email], signer)
end
super by_email.values
end

# Sort all "noreply" email addresses to the bottom (for manual curation), then sort by name
def sort_for_display
Signers.new(sort_by { |signer| [signer.email.include?('noreply') ? 1 : 0, signer.name.downcase] })
end

private

def best_signer(left, right)
if left == nil
right
elsif right == nil
left
else
# The "best" name is defined by the name with the most words. For example,
# if both "dblock" and "Daniel (dB.) Doubrovkine" are encountered, then
# "Daniel (dB.) Doubrovkine" will be chosen.
if left.name.split.length > right.name.split.length
left
else
right
end
end
end
end
end
4 changes: 4 additions & 0 deletions lib/tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
require_relative 'github/repos'
require_relative 'github/pull_requests'
require_relative 'github/pull_request'
require_relative 'github/commits'
require_relative 'github/commit'
require_relative 'github/contributors'
require_relative 'github/contributor'
require_relative 'github/maintainers'
require_relative 'github/signers'
require_relative 'github/signer'
require_relative 'github/users'
require_relative 'github/user'
require_relative 'github/issues'
Expand Down

Large diffs are not rendered by default.

51 changes: 51 additions & 0 deletions spec/github/commit_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# frozen_string_literal: true

describe GitHub::Commit do
subject do
message = %{Bump opencensus-contrib-http-util from 0.18.0 to 0.31.1 in /plugins/repository-gcs (#3633)
* Bump opencensus-contrib-http-util in /plugins/repository-gcs
Bumps [opencensus-contrib-http-util](https://github.com/census-instrumentation/opencensus-java) from 0.18.0 to 0.31.1.
- [Release notes](https://github.com/census-instrumentation/opencensus-java/releases)
- [Changelog](https://github.com/census-instrumentation/opencensus-java/blob/master/CHANGELOG.md)
- [Commits](census-instrumentation/[email protected])
---
updated-dependencies:
- dependency-name: io.opencensus:opencensus-contrib-http-util
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <[email protected]>
* Updating SHAs
Signed-off-by: dependabot[bot] <[email protected]>
* Adding missing classes
Signed-off-by: Vacha Shah <[email protected]>
* changelog change
Signed-off-by: Poojita Raj <[email protected]>
Signed-off-by: dependabot[bot] <[email protected]>
Signed-off-by: Vacha Shah <[email protected]>
Signed-off-by: Poojita Raj <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: dependabot[bot] <dependabot[bot]@users.noreply.github.com>
Co-authored-by: Vacha Shah <[email protected]>
Co-authored-by: Poojita Raj <[email protected]>
}
resource = Sawyer::Resource.new(Sawyer::Agent.new('fake'), {"commit": {"message": message}})
GitHub::Commit.new(resource)
end

it 'parses signers from a commit message' do
expect(subject.dco_signers.count).to eq 7
expect(subject.dco_signers.map(&:name)).to eq ["dependabot[bot]", "dependabot[bot]", "Vacha Shah", "Poojita Raj", "dependabot[bot]", "Vacha Shah", "Poojita Raj"]
end
end
25 changes: 25 additions & 0 deletions spec/github/commits_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

describe GitHub::PullRequests do
context 'contributors' do
context 'org' do
context 'january 2022' do
context 'OpenSearch commits', vcr: { cassette_name: 'search/opensearch-project/commits_2022-01-01_2022-01-31' } do
subject do
GitHub::Commits.new(org: 'opensearch-project', repo: 'OpenSearch', from: Date.new(2022, 1, 1), to: Date.new(2022, 1, 31), page: 7)
end

it 'fetches commits between two dates' do
expect(subject.count).to eq 62
expect(subject.first['sha']).to eq 'db23f72a2a5da1f21d674bde3a9d1cbe4fb74b19'
end

it 'collects DCO signers from commits' do
expect(subject.dco_signers.count).to eq 25
expect(subject.dco_signers.first.name).to eq 'Tianli Feng'
end
end
end
end
end
end

0 comments on commit 72531b1

Please sign in to comment.