Skip to content

Commit

Permalink
Identify GCI users in repository information
Browse files Browse the repository at this point in the history
This begins to identify users in several meta repository stats,
including stargazers, watchers and forks.

Closes #100
  • Loading branch information
andrewda committed Dec 24, 2017
1 parent ed4f74e commit 43cc2d4
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 914 deletions.
3 changes: 3 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
const { loadQuery } = require('../utils')

module.exports.REPO_INFO_QUERY = loadQuery('repo_info')
28 changes: 28 additions & 0 deletions lib/queries/repo_info.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
query($org: String!) {
organization(login: $org) {
repositories(first: 100) {
nodes {
watchers(first: 100) {
nodes {
login
name
}
}
stargazers(last: 100) {
nodes {
login
name
}
}
forks(last: 100) {
nodes {
owner {
login
}
createdAt
}
}
}
}
}
}
106 changes: 99 additions & 7 deletions lib/scrape.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
const fetch = require('node-fetch')
const GraphQL = require('graphql-client')
const chattie = require('chattie')
const fs = require('fs')
const json2yaml = require('json2yaml')
const validUsername = require('valid-github-username')
const wdk = require('wikidata-sdk')

const { REPO_INFO_QUERY } = require('./queries')

const GH_BASE = 'https://github.com'
const GH_USER_BASE = `${GH_BASE}/users`
const GH_ORG_BASE = `${GH_BASE}/orgs`
const GH_API_BASE = 'https://api.github.com'
const GH_GQL_BASE = 'https://api.github.com/graphql'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

const MIN_SEARCH_SCORE = 10

// The time to cache GitHub usernames for in milliseconds
const GITHUB_CACHE_TIME = 2 * 24 * 60 * 60 * 1000

const BUST_GITHUB_CACHE = true

const CHAT_IMAGES = {
GITTER: 'images/logos/gitter.png',
SLACK: 'images/logos/slack.png',
Expand All @@ -32,6 +38,13 @@ const GH_API_OPTIONS = {
: {},
}

const GH_GQL_OPTIONS = {
url: GH_GQL_BASE,
headers: process.env.GITHUB_TOKEN
? { Authorization: `bearer ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_WEB_OPTIONS = {
headers: {
Accept: 'text/html',
Expand All @@ -43,6 +56,10 @@ const GH_WEB_OPTIONS = {
compress: false,
}

const client = GraphQL(GH_GQL_OPTIONS)

let COMPETITION_OPEN

let existingData = []
try {
existingData = JSON.parse(
Expand All @@ -69,6 +86,71 @@ async function fetchLeaders(id) {
return leaders
}

let repositoryInfo = {}
async function fetchRepositoryInfo(org) {
if (repositoryInfo[org]) return repositoryInfo[org]

const { data } = await client.query(REPO_INFO_QUERY, { org })

if (data) {
const info = data.organization.repositories.nodes.map(node => ({
watchers: node.watchers.nodes,
stargazers: node.stargazers.nodes,
forks: node.forks.nodes,
}))

repositoryInfo[org] = info

return info
} else {
return []
}
}

async function getGitHubUserFromRepoInfo(org, displayName, shortName) {
let repos = []
try {
repos = await fetchRepositoryInfo(org)
} catch (e) {
console.error(`Could not fetch repository info for ${org}...`)
}

let logins = []
let names = {}

repos.forEach(repo => {
logins = logins
.concat(repo.watchers.map(u => u.login.toLowerCase()))
.concat(repo.stargazers.map(u => u.login.toLowerCase()))
.concat(
repo.forks
.map(u => {
const createdAt = new Date(u.createdAt)
if (createdAt.getTime() > COMPETITION_OPEN.getTime()) {
return u.owner.login.toLowerCase()
}
})
.filter(login => login)
)

repo.watchers.forEach(watcher => {
if (watcher.name) {
names[watcher.name.toLowerCase()] = watcher.login
}
})
})

logins = logins.filter((item, pos, self) => self.indexOf(item) == pos)

if (logins.includes(shortName.toLowerCase())) {
return shortName
}

if (names[displayName.toLowerCase()]) {
return names[displayName.toLowerCase()]
}
}

async function checkGitHubUserExists(user) {
const res = await fetch(`${GH_BASE}/${user}`)
return res.status === 200
Expand Down Expand Up @@ -232,8 +314,15 @@ async function findGitHubUser(displayName, org) {

const shortName = validUsername(displayName)

const username = await findGitHubUserInOrg(displayName, org)
if (username) return username
const userFromRepo = await getGitHubUserFromRepoInfo(
org,
displayName,
shortName
)
if (userFromRepo) return userFromRepo

const userInOrg = await findGitHubUserInOrg(displayName, org)
if (userInOrg) return userInOrg

let user
try {
Expand All @@ -245,12 +334,9 @@ async function findGitHubUser(displayName, org) {

const login = user.login

const { competition_open_starts } = await fetchProgram()

const updatedTime = new Date(user.updated_at)
const openTime = new Date(competition_open_starts)

if (updatedTime.getTime() - openTime.getTime() < 0) return
if (updatedTime.getTime() - COMPETITION_OPEN.getTime() < 0) return

let orgs = []
try {
Expand Down Expand Up @@ -291,7 +377,8 @@ async function freshenUserGitHubCache(user, existingUser, organization) {
existingUser &&
existingUser.github_updated &&
existingUser.github_account
)
) ||
BUST_GITHUB_CACHE
) {
return {
login: await findGitHubUser(user.display_name, organization),
Expand Down Expand Up @@ -346,6 +433,8 @@ async function fetchOrgsWithData() {
const orgWiki = await Promise.all(fetchingWiki)

const fetchingAll = orgs.map(async (org, index) => {
await fetchRepositoryInfo(orgGitHub[index])

const existingOrg = existingData.find(existing => existing.id === org.id)
const fetchingUsers = orgLeaders[index].map(async user => {
let existingUser
Expand Down Expand Up @@ -390,6 +479,9 @@ async function fetchDates() {
}

;(async () => {
const { competition_open_starts } = await fetchProgram()
COMPETITION_OPEN = new Date(competition_open_starts)

const data = await fetchOrgsWithData()
const dates = await fetchDates()

Expand Down
4 changes: 4 additions & 0 deletions lib/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
const fs = require('fs')

module.exports.loadQuery = name =>
fs.readFileSync(`${__dirname}/queries/${name}.graphql`).toString()
Loading

0 comments on commit 43cc2d4

Please sign in to comment.