Skip to content

Commit

Permalink
scrape.js: Use GraphQL API
Browse files Browse the repository at this point in the history
Use GitHub GraphQL API instead of REST API

Closes #111
  • Loading branch information
li-boxuan committed Apr 17, 2018
1 parent 5fe816c commit 3ec6e81
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 25 deletions.
9 changes: 9 additions & 0 deletions lib/queries/github_search_org.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
query($query: String!) {
search(type:USER, query:$query, first:1) {
nodes {
...on Organization {
login
}
}
}
}
6 changes: 6 additions & 0 deletions lib/queries/github_user_info.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
query ($user: String!) {
user(login: $user) {
login
updatedAt
}
}
2 changes: 2 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const { loadQuery } = require('../utils')

module.exports.GITHUB_REPO_INFO_QUERY = loadQuery('github_repo_info')
module.exports.GITHUB_SEARCH_ORG_QUERY = loadQuery('github_search_org')
module.exports.GITHUB_USER_INFO_QUERY = loadQuery('github_user_info')
5 changes: 4 additions & 1 deletion lib/rss.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ module.exports = ({
feed_items: current,
data_updated: dataUpdated,
}) => {
const oldOrgs = createOrgHash(oldData)
const newOrgs = createOrgHash(newData)
let oldOrgs = newOrgs
if (oldData) {
oldOrgs = createOrgHash(oldData)
}

if (oldOrgs && newOrgs) {
const diffs = generateDiff(oldOrgs, newOrgs) || []
Expand Down
40 changes: 16 additions & 24 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,16 @@ const wdk = require('wikidata-sdk')
const cheerio = require('cheerio')

const { GITHUB_REPO_INFO_QUERY } = require('./queries')
const { GITHUB_SEARCH_ORG_QUERY } = require('./queries')
const { GITHUB_USER_INFO_QUERY } = require('./queries')
const { getLatestCommitMessage } = require('./utils')

const GH_BASE = 'https://github.com'
const GH_USER_BASE = `${GH_BASE}/users`
const GH_ORG_BASE = `${GH_BASE}/orgs`
const GH_API_BASE = 'https://api.github.com'
const GH_GQL_BASE = 'https://api.github.com/graphql'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

const MIN_SEARCH_SCORE = 10

// The time to cache GitHub usernames for in milliseconds
const GITHUB_CACHE_TIME = 2 * 24 * 60 * 60 * 1000

Expand Down Expand Up @@ -53,12 +52,6 @@ const CHAT_IMAGES = {
OTHER: 'images/chat.png',
}

const GH_API_OPTIONS = {
headers: process.env.GITHUB_TOKEN
? { Authorization: `token ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_GQL_OPTIONS = {
url: GH_GQL_BASE,
headers: process.env.GITHUB_TOKEN
Expand Down Expand Up @@ -118,8 +111,7 @@ async function fetchRepositoryInfo(org) {
if (repositoryInfo[org]) return repositoryInfo[org]

const { data } = await client.query(GITHUB_REPO_INFO_QUERY, { org })

if (data) {
if (data && data.organization) {
const info = data.organization.repositories.nodes.map(node => ({
watchers: node.watchers.nodes,
stargazers: node.stargazers.nodes,
Expand Down Expand Up @@ -184,12 +176,12 @@ async function checkGitHubUserExists(user) {
}

async function searchGitHubOrgs(query) {
const res = await fetch(
`${GH_API_BASE}/search/users?q=${query}%20type:org`,
GH_API_OPTIONS
)
const { items } = await res.json()
return items || []
const res = await client.query(GITHUB_SEARCH_ORG_QUERY, { query })
if (res && res.data) {
return res.data.search.nodes
} else {
return []
}
}

async function getGitHubUserHistory(user, from, to) {
Expand Down Expand Up @@ -238,12 +230,12 @@ function findMatches(input, pattern) {
}

async function getGitHubUser(user) {
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
const res = await client.query(GITHUB_USER_INFO_QUERY, { user })
if (res && res.data.user) {
return res.data.user
} else {
return undefined
}
return response
}

async function findOrganization({
Expand Down Expand Up @@ -283,10 +275,10 @@ async function findOrganization({
)

const removePattern = /the|project|\([a-zA-Z]+\)/gi
const searchQuery = name.replace(removePattern, '').trim()
const searchQuery = name.replace(removePattern, '').trim() + ' type:org'
const searchResults = await searchGitHubOrgs(searchQuery)

if (searchResults.length > 0 && searchResults[0].score > MIN_SEARCH_SCORE) {
if (searchResults.length > 0) {
return searchResults[0].login
}

Expand Down

0 comments on commit 3ec6e81

Please sign in to comment.