Skip to content

Commit

Permalink
Implement loading & storing of metadata for a single repo
Browse files Browse the repository at this point in the history
  • Loading branch information
ad-si committed Nov 19, 2023
1 parent 1c75aa3 commit 4781a84
Show file tree
Hide file tree
Showing 9 changed files with 406 additions and 97 deletions.
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# haskell-repos
# Haskell Repos

[![GitHub CI](https://github.com/ad-si/haskell-repos/workflows/CI/badge.svg)](https://github.com/ad-si/haskell-repos/actions)
[![Hackage](https://img.shields.io/hackage/v/haskell-repos.svg?logo=haskell)](https://hackage.haskell.org/package/haskell-repos)
[![Stackage Lts](http://stackage.org/package/haskell-repos/badge/lts)](http://stackage.org/lts/package/haskell-repos)
[![Stackage Nightly](http://stackage.org/package/haskell-repos/badge/nightly)](http://stackage.org/nightly/package/haskell-repos)
Load metadata for all Haskell repositories on GitHub and store it in Airsequel.

See README for more info

## Related

- [GrimoireLab] - Open source tools for software development analytics.

[GrimoireLab]: http://chaoss.github.io/grimoirelab/
2 changes: 2 additions & 0 deletions Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
256 changes: 253 additions & 3 deletions app/Main.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,257 @@
module Main (main) where
{-# LANGUAGE QuasiQuotes #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}

import HaskellRepos (someFunc)
{-# HLINT ignore "Use maybe" #-}

module Main where

import Protolude as P (
Bool,
Either (Left, Right),
IO,
Integer,
Maybe (..),
Text,
elem,
encodeUtf8,
find,
fromMaybe,
lastMay,
print,
pure,
putErrText,
putLByteString,
putText,
readMaybe,
show,
($),
(&),
(.),
(<&>),
(<>),
(>>=),
)

import Data.Aeson (encode, object, (.=))
import Data.Text qualified as T
import Data.Time.Format.ISO8601 (iso8601Show)
import Network.HTTP.Client (
RequestBody (RequestBodyLBS),
Response (responseHeaders),
httpLbs,
method,
newManager,
parseRequest,
requestBody,
requestHeaders,
)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.HTTP.Link.Types (Link, LinkParam (..), linkParams)
import Network.URI (URI)
import Text.RawString.QQ (r)

import Control.Arrow ((>>>))
import Data.List (lookup)
import GitHub qualified as GH
import GitHub.Endpoints.Activity.Starring as GH (Repo, untagName)
import Network.HTTP.Link (href, parseLinkHeaderBS)
import System.Environment (lookupEnv)

data ExtendedRepo = ExtendedRepo
{ core :: GH.Repo
, commitsCount :: Integer
}

-- | The ID of the Airsequel database loaded from the environment
loadDbId :: IO Text
loadDbId =
lookupEnv "AIRSEQUEL_DB_ID" <&> (fromMaybe "" >>> T.pack)

loadDbEndpoint :: IO Text
loadDbEndpoint = do
dbId <- loadDbId
pure $ "https://www.airsequel.com/dbs/" <> dbId <> "/graphql"

loadWriteToken :: IO Text
loadWriteToken =
lookupEnv "AIRSEQUEL_API_TOKEN" <&> (fromMaybe "" >>> T.pack)

formatRepo :: ExtendedRepo -> Text
formatRepo extendedRepo =
let
repo = core extendedRepo
in
"\n\n"
<> ("repo_url: " <> show (GH.repoHtmlUrl repo) <> "\n")
<> ( "description: "
<> (repo & GH.repoDescription & fromMaybe "")
<> "\n"
)
<> ("homepage: " <> (repo & GH.repoHomepage & fromMaybe "") <> "\n")
<> ( "language: "
<> (repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "")
<> "\n"
)
<> ("stargazers_count: " <> show (GH.repoStargazersCount repo) <> "\n")
<> ("commits_count: " <> show (commitsCount extendedRepo) <> "\n")
<> ("open_issues_count: " <> show (GH.repoOpenIssuesCount repo) <> "\n")
<> ( "created_at: "
<> (repo & GH.repoCreatedAt <&> show & fromMaybe "")
<> "\n"
)
<> ( "updated_at: "
<> (repo & GH.repoUpdatedAt <&> show & fromMaybe "")
<> "\n"
)

-- queryRepos :: Text
-- queryRepos =
-- [r|
-- query reposQuery {
-- repos( limit: 100 ) {
-- rowid
-- id
-- name
-- language
-- url
-- stars
-- updated_utc
-- }
-- }
-- |]

-- | Query @Link@ header with @rel=last@ from the request headers
getLastUrl :: Response a -> Maybe URI
getLastUrl req = do
let
isRelNext :: Link uri -> Bool
isRelNext = elem relNextLinkParam . linkParams

relNextLinkParam :: (LinkParam, Text)
relNextLinkParam = (Rel, "last")

linkHeader <- lookup "Link" (responseHeaders req)
links <- parseLinkHeaderBS linkHeader
nextURI <- find isRelNext links
pure $ href nextURI

{- | Workaround to get the number of commits for a repo
| https://stackoverflow.com/a/70610670
-}
getNumberOfCommits :: Repo -> IO Integer
getNumberOfCommits repo = do
let apiEndpoint =
"https://api.github.com/repos/"
<> (repo & GH.repoOwner & GH.simpleOwnerLogin & untagName)
<> "/"
<> (repo & GH.repoName & untagName)
<> "/commits?per_page=1"

manager <- newManager tlsManagerSettings
initialRequest <- parseRequest $ T.unpack apiEndpoint
let request =
initialRequest
{ method = "HEAD"
, requestHeaders = [("User-Agent", "haskell-repos")]
}

response <- httpLbs request manager

getLastUrl response
<&> (show >>> T.pack >>> T.splitOn "&page=")
>>= lastMay
>>= readMaybe
& fromMaybe 0
& pure

insertRepoQuery :: ExtendedRepo -> Text
insertRepoQuery extendedRepo =
let
repo = extendedRepo.core
commitsCount = extendedRepo.commitsCount
getTimestamp field =
repo
& field
<&> iso8601Show
& fromMaybe ""
& T.pack
in
[r|
mutation {
insert_repos(objects: [
{
owner: "{{name}}"
name: "{{name}}"
description: "{{description}}"
homepage: "{{homepage}}"
language: "{{language}}"
stargazers_count: {{stargazers_count}}
open_issues_count: {{open_issues_count}}
commits_count: {{commits_count}}
created_utc: "{{created_utc}}"
updated_utc: "{{updated_utc}}"
}
]) {
affected_rows
}
}
|]
& T.replace
"{{owner}}"
(repo & GH.repoOwner & GH.simpleOwnerLogin & untagName)
& T.replace "{{name}}" (repo & GH.repoName & untagName)
& T.replace
"{{description}}"
(repo & GH.repoDescription & fromMaybe "")
& T.replace "{{homepage}}" (repo & GH.repoHomepage & fromMaybe "")
& T.replace
"{{language}}"
(repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "")
& T.replace "{{stargazers_count}}" (repo & GH.repoWatchersCount & show)
& T.replace
"{{open_issues_count}}"
(repo & GH.repoOpenIssuesCount & show)
& T.replace "{{commits_count}}" (show commitsCount)
& T.replace "{{created_utc}}" (getTimestamp GH.repoCreatedAt)
& T.replace "{{updated_utc}}" (getTimestamp GH.repoUpdatedAt)

-- | Save the repo in Airsequel via a POST request executed by http-client
saveRepoInAirsequel :: ExtendedRepo -> IO ()
saveRepoInAirsequel extendedRepo = do
dbEndpoint <- loadDbEndpoint
writeToken <- loadWriteToken

manager <- newManager tlsManagerSettings

let requestObject = object ["query" .= insertRepoQuery extendedRepo]
initialRequest <- parseRequest $ T.unpack dbEndpoint
let request =
initialRequest
{ method = "POST"
, requestHeaders =
[ ("Content-Type", "application/json")
, ("Authorization", "Bearer " <> writeToken & encodeUtf8)
]
, requestBody = RequestBodyLBS $ encode requestObject
}

putLByteString $ encode requestObject

response <- httpLbs request manager
print response

main :: IO ()
main = someFunc
main = do
possibleRepo <- GH.github () GH.repositoryR "ad-si" "haskell-repos"
case possibleRepo of
Left error ->
putErrText $ "Error: " <> show error
Right repo -> do
commitsCount <- getNumberOfCommits repo
let extendedRepo =
ExtendedRepo
{ core = repo
, commitsCount = commitsCount
}
putText $ formatRepo extendedRepo
saveRepoInAirsequel extendedRepo
Loading

0 comments on commit 4781a84

Please sign in to comment.