From 4a7a5d231e850bcc953a508362d82879d9ed68db Mon Sep 17 00:00:00 2001 From: dblock Date: Mon, 25 Mar 2024 17:54:05 -0400 Subject: [PATCH] Consider contractors and students as external in metrics. Signed-off-by: dblock --- bin/commands/maintainers.rb | 44 ++++++++++++++++--------------- bin/commands/members.rb | 2 +- bin/commands/org.rb | 4 +-- lib/github/buckets.rb | 20 +++++++++----- lib/github/contributors.rb | 14 +++++----- lib/github/data.rb | 52 ++++++++++++++++++++----------------- lib/github/maintainers.rb | 18 ++++++++++--- lib/github/pull_requests.rb | 2 +- lib/github/repos.rb | 24 ++++++++++++++--- 9 files changed, 109 insertions(+), 71 deletions(-) diff --git a/bin/commands/maintainers.rb b/bin/commands/maintainers.rb index 2084cd0..a1967eb 100644 --- a/bin/commands/maintainers.rb +++ b/bin/commands/maintainers.rb @@ -17,32 +17,34 @@ class Commands GitHub::Organization.new(options.merge(org: options['org'] || 'opensearch-project')).repos end maintainers = repos.maintainers(dt) - puts "As of #{dt || Date.today}, #{repos.count} repos have #{maintainers.unique_count} maintainers, where #{maintainers.external_unique_percent}% (#{maintainers.external_unique_count}/#{maintainers.unique_count}) are external." - puts "A total of #{repos.external_maintainers_percent}% (#{repos.external_maintained_size}/#{repos.count}) of repos have at least one of #{maintainers.external_unique_count} external maintainers." - puts "\n# Maintainers\n" - puts "unique: #{maintainers.unique_count}" - maintainers.each_pair do |bucket, logins| - puts "#{bucket}: #{logins.size} (#{logins.map(&:to_s).join(', ')})" - end + puts "As of #{dt || Date.today}, #{repos.count} repos have #{maintainers.unique_count} maintainers, where #{maintainers.all_external_unique_percent}% (#{maintainers.all_external_unique_count}/#{maintainers.unique_count}) are external." + puts "A total of #{repos.all_external_maintainers_percent}% (#{repos.all_external_maintained_size}/#{repos.count}) of repos have at least one of #{maintainers.all_external_unique_count} external maintainers." + puts "\n# External Maintainers\n" - repos.maintained[:external]&.sort_by(&:name)&.each do |repo| - puts "#{repo.html_url}: #{repo.maintainers[:external]} (#{repo.maintainers.external_unique_percent}%, #{repo.maintainers.external_unique_count}/#{repo.maintainers.unique_count})" + repos.externally_maintained.each do |repo| + puts "#{repo.html_url}: #{repo.maintainers.all_external} (#{repo.maintainers.all_external_unique_percent}%, #{repo.maintainers.all_external_unique_count}/#{repo.maintainers.unique_count})" end - puts "\n# Student Maintainers\n" - repos.maintained[:students]&.sort_by(&:name)&.each do |repo| - puts "#{repo.html_url}: #{repo.maintainers[:students]}" - end + # GitHub::Maintainers::ALL_EXTERNAL.each do |bucket| + # repos.maintained[bucket]&.sort_by(&:name)&.each do |repo| + # puts "#{repo.html_url}: #{repo.maintainers.all_external} (#{repo.maintainers.all_external_unique_percent}%, #{repo.maintainers.all_external_unique_count}/#{repo.maintainers.unique_count})" + # end + # end - puts "\n# Contractor Maintainers\n" - repos.maintained[:contractors]&.sort_by(&:name)&.each do |repo| - puts "#{repo.html_url}: #{repo.maintainers[:contractors]}" - end + # puts "\n# All Maintainers\n" + # puts "unique: #{maintainers.unique_count}" + # maintainers.each_pair do |bucket, logins| + # puts "#{bucket}: #{logins.size} (#{logins.map(&:to_s).join(', ')})" + # end - puts "\n# Unknown Maintainers\n" - repos.maintained[:unknown]&.sort_by(&:name)&.each do |repo| - puts "#{repo.html_url}: #{repo.maintainers[:unknown]}" - end + # %i[external students contractors unknown].each do |bucket| + # next unless repos.maintained[bucket]&.any? + + # puts "\n# #{bucket.capitalize} Maintainers\n" + # repos.maintained[bucket]&.sort_by(&:name)&.each do |repo| + # puts "#{repo.html_url}: #{repo.maintainers[bucket]}" + # end + # end end end diff --git a/bin/commands/members.rb b/bin/commands/members.rb index 25a6886..f4729f7 100644 --- a/bin/commands/members.rb +++ b/bin/commands/members.rb @@ -7,7 +7,7 @@ class Commands g.desc 'Check GitHub affiliation information for contributors.' g.command 'check' do |c| c.action do |_global_options, _options, _args| - GitHub::User.wrap(GitHub::Data.members).each do |contributor| + GitHub::User.wrap(GitHub::Data.members_data).each do |contributor| unless contributor.member? puts "#{contributor.login}: #{[contributor.company, contributor.bio].compact.join(' ')}" diff --git a/bin/commands/org.rb b/bin/commands/org.rb index da10646..34f06a8 100644 --- a/bin/commands/org.rb +++ b/bin/commands/org.rb @@ -20,8 +20,8 @@ class Commands org = GitHub::Organization.new(options) puts "org: #{org.name}" puts "members: #{org.members.count}" - puts "missing in data/users/members.txt: #{(org.members.logins - GitHub::Data.members).join(' ')}" - puts "no longer members: #{(GitHub::Data.members - org.members.logins).join(' ')}" + puts "missing in data/users/members.txt: #{(org.members.logins - GitHub::Data.members_data).join(' ')}" + puts "no longer members: #{(GitHub::Data.members_data - org.members.logins).join(' ')}" end end diff --git a/lib/github/buckets.rb b/lib/github/buckets.rb index 1c39bd9..30f3f6e 100644 --- a/lib/github/buckets.rb +++ b/lib/github/buckets.rb @@ -35,11 +35,17 @@ def all_humans end def all_members - members.to_a + contractors.to_a + members.to_a end def all_external - external.to_a + students.to_a + all = [] + Maintainers::ALL_EXTERNAL.each do |bucket| + next unless buckets[bucket] + + all.concat(buckets[bucket].to_a) + end + all end def all_external_percent @@ -56,15 +62,15 @@ def percent end def self.bucket(username) - if GitHub::Data.members.include?(username.to_s) + if GitHub::Data.members_data.include?(username.to_s) :members - elsif GitHub::Data.contractors.include?(username.to_s) + elsif GitHub::Data.contractors_data.include?(username.to_s) :contractors - elsif GitHub::Data.college_contributors.include?(username.to_s) + elsif GitHub::Data.students_data.include?(username.to_s) :students - elsif GitHub::Data.external_users.include?(username.to_s) + elsif GitHub::Data.external_data.include?(username.to_s) :external - elsif GitHub::Data.bots.include?(username.to_s) + elsif GitHub::Data.bots_data.include?(username.to_s) :bots else :unknown diff --git a/lib/github/contributors.rb b/lib/github/contributors.rb index 0af6aa3..f54f7f9 100644 --- a/lib/github/contributors.rb +++ b/lib/github/contributors.rb @@ -10,7 +10,7 @@ def initialize(arr) end def humans - reject { |item| item.type == 'Bot' || GitHub::Data.bots.include?(item.to_s) } + reject { |item| item.type == 'Bot' || GitHub::Data.bots_data.include?(item.to_s) } end def all @@ -18,15 +18,15 @@ def all end def self.bucket(username) - if GitHub::Data.members.include?(username.to_s) + if GitHub::Data.members_data.include?(username.to_s) :members - elsif GitHub::Data.contractors.include?(username.to_s) + elsif GitHub::Data.contractors_data.include?(username.to_s) :contractors - elsif GitHub::Data.students.include?(username.to_s) - :external # :students - elsif GitHub::Data.external_users.include?(username.to_s) + elsif GitHub::Data.students_data.include?(username.to_s) + :students + elsif GitHub::Data.external_data.include?(username.to_s) :external - elsif GitHub::Data.bots.include?(username.to_s) + elsif GitHub::Data.bots_data.include?(username.to_s) :bots else :unknown diff --git a/lib/github/data.rb b/lib/github/data.rb index a6ac322..8608267 100644 --- a/lib/github/data.rb +++ b/lib/github/data.rb @@ -26,48 +26,52 @@ def data DATA end - def backports - @backports ||= load_list(BACKPORTS) + def backports_data + @backports_data ||= load_list(BACKPORTS) end - def members - @members ||= load_list(MEMBERS) + def members_data + @members_data ||= load_list(MEMBERS) end - def contractors - @contractors ||= load_list(CONTRACTORS) + def contractors_data + @contractors_data ||= load_list(CONTRACTORS) end - def external_users - @external_users ||= load_list(EXTERNAL) + def external_data + @external_data ||= load_list(EXTERNAL) end - def students - @students ||= load_list(STUDENTS) + def students_data + @students_data ||= load_list(STUDENTS) end - def check_dups! - %i[members contractors students external_users].combination(2).each do |l, r| - send(l).intersection(send(r)).each do |user| - warn "WARNING: #{user} is found in both #{l} and #{r}" - end - end + def bots_data + @bots_data ||= load_list(BOTS) end - def bots - @bots ||= load_list(BOTS) + def companies_data + @companies_data ||= load_list(COMPANIES) end - def companies - @companies ||= load_list(COMPANIES) + def all_members_data + members_data end - def all_members - members + contractors + def check_dups! + %i[members_data contractors_data students_data external_data].combination(2).each do |l, r| + send(l).intersection(send(r)).each do |user| + warn "WARNING: #{user} is found in both #{l} and #{r}" + end + end end - def all_external - external_users + students + def all_external_data + all = [] + Maintainers::ALL_EXTERNAL.each do |bucket| + all.concat(send("#{bucket}_data")) + end + all end end end diff --git a/lib/github/maintainers.rb b/lib/github/maintainers.rb index b03a0f3..2fc18ce 100644 --- a/lib/github/maintainers.rb +++ b/lib/github/maintainers.rb @@ -4,6 +4,8 @@ module GitHub class Maintainers < Array include GitHub::Buckets + ALL_EXTERNAL = %i[contractors external students].freeze + def buckets @buckets ||= begin buckets = {} @@ -16,18 +18,26 @@ def buckets end end - def external_unique_percent + def all_external + ALL_EXTERNAL.map do |bucket| + buckets[bucket] + end.flatten.compact.uniq + end + + def all_external_unique_percent return 0 unless unique_count - ((external_unique_count.to_f / unique_count) * 100).to_i + ((all_external_unique_count.to_f / unique_count) * 100).to_i end def unique_count buckets.values.map(&:size).sum end - def external_unique_count - (buckets[:external]&.size || 0) + (buckets[:students]&.size || 0) + def all_external_unique_count + ALL_EXTERNAL.map do |bucket| + buckets[bucket]&.size || 0 + end.sum end def each_pair(&_block) diff --git a/lib/github/pull_requests.rb b/lib/github/pull_requests.rb index 913fac8..31ef3a9 100644 --- a/lib/github/pull_requests.rb +++ b/lib/github/pull_requests.rb @@ -29,7 +29,7 @@ def page(options) raise 'There are 1000+ PRs returned from a single query, reduce --page.' if data.size >= 1000 data.reject do |pr| - pr.user.type == 'Bot' || GitHub::Data.backports.any? { |b| pr.title&.downcase&.include?(b) } || project_website_authors?(pr) + pr.user.type == 'Bot' || GitHub::Data.backports_data.any? { |b| pr.title&.downcase&.include?(b) } || project_website_authors?(pr) end end diff --git a/lib/github/repos.rb b/lib/github/repos.rb index 9a17277..fd8c027 100644 --- a/lib/github/repos.rb +++ b/lib/github/repos.rb @@ -43,14 +43,30 @@ def maintained end end - def external_maintained_size - (maintained[:external]&.size || 0) + (maintained[:students]&.size || 0) + def externally_maintained + @externally_maintained ||= begin + all = Set.new + Maintainers::ALL_EXTERNAL.each do |bucket| + each do |repo| + next unless ((repo.maintainers & maintainers[bucket]) || []).any? + + all.add(repo) + end + end + all + end + end + + def all_external_maintained_size + Maintainers::ALL_EXTERNAL.map do |bucket| + maintained[bucket]&.size || 0 + end.sum end - def external_maintainers_percent + def all_external_maintainers_percent return 0 unless any? - (((maintained[:external]&.size.to_f + maintained[:students]&.size.to_f) / size) * 100).to_i + (all_external_maintained_size.to_f * 100 / size).to_i end end end