diff --git a/lib/queries/github_search_org.graphql b/lib/queries/github_search_org.graphql new file mode 100644 index 0000000..5da9108 --- /dev/null +++ b/lib/queries/github_search_org.graphql @@ -0,0 +1,9 @@ +query($query: String!) { + search(type: USER, query: $query, first: 1) { + nodes { + ...on Organization { + login + } + } + } +} diff --git a/lib/queries/github_user_info.graphql b/lib/queries/github_user_info.graphql new file mode 100644 index 0000000..50e065c --- /dev/null +++ b/lib/queries/github_user_info.graphql @@ -0,0 +1,6 @@ +query($user: String!) { + user(login: $user) { + login + updatedAt + } +} diff --git a/lib/queries/index.js b/lib/queries/index.js index 4f770c3..41bbe46 100644 --- a/lib/queries/index.js +++ b/lib/queries/index.js @@ -1,3 +1,5 @@ const { loadQuery } = require('../utils') module.exports.GITHUB_REPO_INFO_QUERY = loadQuery('github_repo_info') +module.exports.GITHUB_SEARCH_ORG_QUERY = loadQuery('github_search_org') +module.exports.GITHUB_USER_INFO_QUERY = loadQuery('github_user_info') diff --git a/lib/scrape.js b/lib/scrape.js index cbae581..9423fcf 100644 --- a/lib/scrape.js +++ b/lib/scrape.js @@ -8,7 +8,11 @@ const validUsername = require('valid-github-username') const wdk = require('wikidata-sdk') const cheerio = require('cheerio') -const { GITHUB_REPO_INFO_QUERY } = require('./queries') +const { + GITHUB_REPO_INFO_QUERY, + GITHUB_SEARCH_ORG_QUERY, + GITHUB_USER_INFO_QUERY, +} = require('./queries') const { getLatestCommitMessage } = require('./utils') const GH_BASE = 'https://github.com' @@ -137,6 +141,7 @@ async function fetchRepositoryInfo(org) { ;({ data, errors } = await client.query(GITHUB_REPO_INFO_QUERY, { org })) } catch (error) { console.warn(`GitHub query for org ${org} fails, error: ${error}`) + return [] } if (data && data.organization) { @@ -209,12 +214,34 @@ async function checkGitHubUserExists(user) { } async function searchGitHubOrgs(query) { + let results = [] + // use REST API to fetch const res = await fetch( - `${GH_API_BASE}/search/users?q=${query}%20type:org`, + `${GH_API_BASE}/search/users?q=${query}`, GH_API_OPTIONS ) + console.log('query=', `${GH_API_BASE}/search/users?q=${query}`) const { items } = await res.json() - return items || [] + results = results.concat(items) + // use GraphQL API to fetch + let data, errors + try { + ;({ data, errors } = await client.query(GITHUB_SEARCH_ORG_QUERY, { query })) + } catch (error) { + console.warn(`GitHub query ${query} fails, error: ${error}`) + } + + if (data && data.search && data.search.nodes) { + results = results.concat(data.search.nodes) + } else { + const errorMessage = + errors && errors.length ? errors[0].message : 'unknown error' + console.warn( + `Cannot query ${query} from GitHub, error message: ${errorMessage}` + ) + } + console.log('results', results) + return unique(results) } async function getGitHubUserHistory(user, from, to) { @@ -262,12 +289,34 @@ function findMatches(input, pattern) { } async function getGitHubUser(user) { - const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS) - let response = await res.json() - if (response && response.message) { - response = undefined + let data, errors + try { + ;({ data, errors } = await client.query(GITHUB_USER_INFO_QUERY, { user })) + } catch (error) { + console.warn(`GitHub query for user ${user} fails, error: ${error}`) + } + + if (data && data.user) { + return data.user + } else { + const errorMessage = + errors && errors.length ? errors[0].message : 'unknown error' + console.warn( + `Cannot fetch user ${user} via GitHub GraphQL,`, + `error message: ${errorMessage}, resorting to GitHub REST API hit` + ) + const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS) + const response = await res.json() + if (response && response.message) { + console.warn( + `Cannot fetch user ${user} via GitHub REST API,`, + `error message: ${response.message}` + ) + return undefined + } else { + return response + } } - return response } async function findOrganization({ @@ -307,10 +356,14 @@ async function findOrganization({ ) const removePattern = /the|project|\([a-zA-Z]+\)/gi - const searchQuery = name.replace(removePattern, '').trim() + const searchQuery = name.replace(removePattern, '').trim() + ' type:org' const searchResults = await searchGitHubOrgs(searchQuery) - if (searchResults.length > 0 && searchResults[0].score > MIN_SEARCH_SCORE) { + if (searchResults.length > 0) { + if (searchResults[0].score && searchResults[0].score <= MIN_SEARCH_SCORE) { + // GitHub REST API returns a list of matches with confidence score + return null + } return searchResults[0].login } @@ -658,6 +711,10 @@ async function fetchDates() { return res.json() } +function unique(arr) { + return Array.from(new Set(arr)) +} + ;(async () => { const { competition_open_starts } = await fetchProgram() COMPETITION_OPEN = new Date(competition_open_starts)