diff --git a/bin/commands/pr.rb b/bin/commands/pr.rb index bf771a0..825d531 100644 --- a/bin/commands/pr.rb +++ b/bin/commands/pr.rb @@ -29,7 +29,7 @@ class Commands if !options['ignore-unknown'] && prs.contributors[:unknown]&.any? puts 'Add the following users to either data/users/members.txt, external.txt, students.txt or contractors.txt and re-run.' - prs.contributors[:unknown].keys.take(10).each do |user| + prs.contributors[:unknown].keys.each do |user| puts user system "open https://github.com/#{user}" end diff --git a/lib/github/pull_requests.rb b/lib/github/pull_requests.rb index ba10f27..5ab8562 100644 --- a/lib/github/pull_requests.rb +++ b/lib/github/pull_requests.rb @@ -29,7 +29,7 @@ def page(options) raise 'There are 1000+ PRs returned from a single query, reduce --page.' if data.size >= 1000 data.reject do |pr| - pr.user.type == 'Bot' || GitHub::Data.backports.any? { |b| pr.title&.downcase&.include?(b) } + pr.user.type == 'Bot' || GitHub::Data.backports.any? { |b| pr.title&.downcase&.include?(b) } || project_website_authors?(pr) end end @@ -44,5 +44,15 @@ def query(options = {}) ].compact ).compact.join(' ') end + + # exclude a high number of misleading contributions from a student program + # modifying https://github.com/opensearch-project/project-website/commits/main/_authors + def project_website_authors?(pr) + return false unless pr.repository_url == 'https://api.github.com/repos/opensearch-project/project-website' + + repo = pr.repository_url.split('/')[4, 2].join('/') + files = $github.pull_request_files(repo, pr.number) + files.all? { |file| file.filename.start_with?('_authors/') } + end end end