From 4781a848565bcda3644fcd5c31d293fdc59527dd Mon Sep 17 00:00:00 2001 From: Adrian Sieber Date: Sun, 19 Nov 2023 21:36:18 +0000 Subject: [PATCH] Implement loading & storing of metadata for a single repo --- README.md | 14 +-- Setup.hs | 2 + app/Main.hs | 256 +++++++++++++++++++++++++++++++++++++++++++- haskell-repos.cabal | 139 ++++++++++++------------ package.yaml | 57 ++++++++++ source/Lib.hs | 9 ++ src/HaskellRepos.hs | 15 --- stack.yaml | 6 +- test/Spec.hs | 5 - 9 files changed, 406 insertions(+), 97 deletions(-) create mode 100644 Setup.hs create mode 100644 package.yaml create mode 100644 source/Lib.hs delete mode 100644 src/HaskellRepos.hs delete mode 100644 test/Spec.hs diff --git a/README.md b/README.md index 3238cf3..4391a5c 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ -# haskell-repos +# Haskell Repos -[![GitHub CI](https://github.com/ad-si/haskell-repos/workflows/CI/badge.svg)](https://github.com/ad-si/haskell-repos/actions) -[![Hackage](https://img.shields.io/hackage/v/haskell-repos.svg?logo=haskell)](https://hackage.haskell.org/package/haskell-repos) -[![Stackage Lts](http://stackage.org/package/haskell-repos/badge/lts)](http://stackage.org/lts/package/haskell-repos) -[![Stackage Nightly](http://stackage.org/package/haskell-repos/badge/nightly)](http://stackage.org/nightly/package/haskell-repos) +Load metadata for all Haskell repositories on GitHub and store it in Airsequel. -See README for more info + +## Related + +- [GrimoireLab] - Open source tools for software development analytics. + +[GrimoireLab]: http://chaoss.github.io/grimoirelab/ diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/app/Main.hs b/app/Main.hs index 1f11bd3..0727c77 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,7 +1,257 @@ -module Main (main) where +{-# LANGUAGE QuasiQuotes #-} +{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} -import HaskellRepos (someFunc) +{-# HLINT ignore "Use maybe" #-} +module Main where + +import Protolude as P ( + Bool, + Either (Left, Right), + IO, + Integer, + Maybe (..), + Text, + elem, + encodeUtf8, + find, + fromMaybe, + lastMay, + print, + pure, + putErrText, + putLByteString, + putText, + readMaybe, + show, + ($), + (&), + (.), + (<&>), + (<>), + (>>=), + ) + +import Data.Aeson (encode, object, (.=)) +import Data.Text qualified as T +import Data.Time.Format.ISO8601 (iso8601Show) +import Network.HTTP.Client ( + RequestBody (RequestBodyLBS), + Response (responseHeaders), + httpLbs, + method, + newManager, + parseRequest, + requestBody, + requestHeaders, + ) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import Network.HTTP.Link.Types (Link, LinkParam (..), linkParams) +import Network.URI (URI) +import Text.RawString.QQ (r) + +import Control.Arrow ((>>>)) +import Data.List (lookup) +import GitHub qualified as GH +import GitHub.Endpoints.Activity.Starring as GH (Repo, untagName) +import Network.HTTP.Link (href, parseLinkHeaderBS) +import System.Environment (lookupEnv) + +data ExtendedRepo = ExtendedRepo + { core :: GH.Repo + , commitsCount :: Integer + } + +-- | The ID of the Airsequel database loaded from the environment +loadDbId :: IO Text +loadDbId = + lookupEnv "AIRSEQUEL_DB_ID" <&> (fromMaybe "" >>> T.pack) + +loadDbEndpoint :: IO Text +loadDbEndpoint = do + dbId <- loadDbId + pure $ "https://www.airsequel.com/dbs/" <> dbId <> "/graphql" + +loadWriteToken :: IO Text +loadWriteToken = + lookupEnv "AIRSEQUEL_API_TOKEN" <&> (fromMaybe "" >>> T.pack) + +formatRepo :: ExtendedRepo -> Text +formatRepo extendedRepo = + let + repo = core extendedRepo + in + "\n\n" + <> ("repo_url: " <> show (GH.repoHtmlUrl repo) <> "\n") + <> ( "description: " + <> (repo & GH.repoDescription & fromMaybe "") + <> "\n" + ) + <> ("homepage: " <> (repo & GH.repoHomepage & fromMaybe "") <> "\n") + <> ( "language: " + <> (repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "") + <> "\n" + ) + <> ("stargazers_count: " <> show (GH.repoStargazersCount repo) <> "\n") + <> ("commits_count: " <> show (commitsCount extendedRepo) <> "\n") + <> ("open_issues_count: " <> show (GH.repoOpenIssuesCount repo) <> "\n") + <> ( "created_at: " + <> (repo & GH.repoCreatedAt <&> show & fromMaybe "") + <> "\n" + ) + <> ( "updated_at: " + <> (repo & GH.repoUpdatedAt <&> show & fromMaybe "") + <> "\n" + ) + +-- queryRepos :: Text +-- queryRepos = +-- [r| +-- query reposQuery { +-- repos( limit: 100 ) { +-- rowid +-- id +-- name +-- language +-- url +-- stars +-- updated_utc +-- } +-- } +-- |] + +-- | Query @Link@ header with @rel=last@ from the request headers +getLastUrl :: Response a -> Maybe URI +getLastUrl req = do + let + isRelNext :: Link uri -> Bool + isRelNext = elem relNextLinkParam . linkParams + + relNextLinkParam :: (LinkParam, Text) + relNextLinkParam = (Rel, "last") + + linkHeader <- lookup "Link" (responseHeaders req) + links <- parseLinkHeaderBS linkHeader + nextURI <- find isRelNext links + pure $ href nextURI + +{- | Workaround to get the number of commits for a repo +| https://stackoverflow.com/a/70610670 +-} +getNumberOfCommits :: Repo -> IO Integer +getNumberOfCommits repo = do + let apiEndpoint = + "https://api.github.com/repos/" + <> (repo & GH.repoOwner & GH.simpleOwnerLogin & untagName) + <> "/" + <> (repo & GH.repoName & untagName) + <> "/commits?per_page=1" + + manager <- newManager tlsManagerSettings + initialRequest <- parseRequest $ T.unpack apiEndpoint + let request = + initialRequest + { method = "HEAD" + , requestHeaders = [("User-Agent", "haskell-repos")] + } + + response <- httpLbs request manager + + getLastUrl response + <&> (show >>> T.pack >>> T.splitOn "&page=") + >>= lastMay + >>= readMaybe + & fromMaybe 0 + & pure + +insertRepoQuery :: ExtendedRepo -> Text +insertRepoQuery extendedRepo = + let + repo = extendedRepo.core + commitsCount = extendedRepo.commitsCount + getTimestamp field = + repo + & field + <&> iso8601Show + & fromMaybe "" + & T.pack + in + [r| + mutation { + insert_repos(objects: [ + { + owner: "{{name}}" + name: "{{name}}" + description: "{{description}}" + homepage: "{{homepage}}" + language: "{{language}}" + stargazers_count: {{stargazers_count}} + open_issues_count: {{open_issues_count}} + commits_count: {{commits_count}} + created_utc: "{{created_utc}}" + updated_utc: "{{updated_utc}}" + } + ]) { + affected_rows + } + } + |] + & T.replace + "{{owner}}" + (repo & GH.repoOwner & GH.simpleOwnerLogin & untagName) + & T.replace "{{name}}" (repo & GH.repoName & untagName) + & T.replace + "{{description}}" + (repo & GH.repoDescription & fromMaybe "") + & T.replace "{{homepage}}" (repo & GH.repoHomepage & fromMaybe "") + & T.replace + "{{language}}" + (repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "") + & T.replace "{{stargazers_count}}" (repo & GH.repoWatchersCount & show) + & T.replace + "{{open_issues_count}}" + (repo & GH.repoOpenIssuesCount & show) + & T.replace "{{commits_count}}" (show commitsCount) + & T.replace "{{created_utc}}" (getTimestamp GH.repoCreatedAt) + & T.replace "{{updated_utc}}" (getTimestamp GH.repoUpdatedAt) + +-- | Save the repo in Airsequel via a POST request executed by http-client +saveRepoInAirsequel :: ExtendedRepo -> IO () +saveRepoInAirsequel extendedRepo = do + dbEndpoint <- loadDbEndpoint + writeToken <- loadWriteToken + + manager <- newManager tlsManagerSettings + + let requestObject = object ["query" .= insertRepoQuery extendedRepo] + initialRequest <- parseRequest $ T.unpack dbEndpoint + let request = + initialRequest + { method = "POST" + , requestHeaders = + [ ("Content-Type", "application/json") + , ("Authorization", "Bearer " <> writeToken & encodeUtf8) + ] + , requestBody = RequestBodyLBS $ encode requestObject + } + + putLByteString $ encode requestObject + + response <- httpLbs request manager + print response main :: IO () -main = someFunc +main = do + possibleRepo <- GH.github () GH.repositoryR "ad-si" "haskell-repos" + case possibleRepo of + Left error -> + putErrText $ "Error: " <> show error + Right repo -> do + commitsCount <- getNumberOfCommits repo + let extendedRepo = + ExtendedRepo + { core = repo + , commitsCount = commitsCount + } + putText $ formatRepo extendedRepo + saveRepoInAirsequel extendedRepo diff --git a/haskell-repos.cabal b/haskell-repos.cabal index bbbb84c..cca6394 100644 --- a/haskell-repos.cabal +++ b/haskell-repos.cabal @@ -1,74 +1,79 @@ -cabal-version: 2.4 -name: haskell-repos -version: 0.0.0.0 -synopsis: See README for more info -description: See README for more info -homepage: https://github.com/ad-si/haskell-repos -bug-reports: https://github.com/ad-si/haskell-repos/issues -license: NONE -author: Adrian Sieber -maintainer: Adrian Sieber -copyright: 2020 Adrian Sieber -category: Utility -build-type: Simple -extra-doc-files: README.md - CHANGELOG.md -tested-with: GHC == 8.8.3 +cabal-version: 2.2 -source-repository head - type: git - location: https://github.com/ad-si/haskell-repos.git +-- This file has been generated from package.yaml by hpack version 0.36.0. +-- +-- see: https://github.com/sol/hpack +-- +-- hash: a056babab5004ab1b5703fca4e303777b0644be275bee04bb4ef61ac2cfb7ffb -common common-options - build-depends: base ^>= 4.13.0.0 - , protolude - - mixins: base hiding (Prelude) - , protolude (Protolude as Prelude) - - ghc-options: -Wall - -Wcompat - -Widentities - -Wincomplete-uni-patterns - -Wincomplete-record-updates - if impl(ghc >= 8.0) - ghc-options: -Wredundant-constraints - if impl(ghc >= 8.2) - ghc-options: -fhide-source-paths - if impl(ghc >= 8.4) - ghc-options: -Wmissing-export-lists - -Wpartial-fields - if impl(ghc >= 8.8) - ghc-options: -Wmissing-deriving-strategies +name: haskell-repos +version: 0.1.0.0 +synopsis: Download all Haskell repositories from GitHub and save them to Airsequel - default-language: Haskell2010 - default-extensions: LambdaCase - OverloadedStrings - RecordWildCards - ScopedTypeVariables - StandaloneDeriving - TupleSections +description: For more information check out the readme. +category: Web +homepage: https://github.com/ad-si/haskell-repos#readme +author: Adrian Sieber +maintainer: mail@adriansieber.com +copyright: Adrian Sieber +license: AGPL-3.0-or-later +build-type: Simple +extra-source-files: + readme.md library - import: common-options - hs-source-dirs: src - exposed-modules: HaskellRepos + exposed-modules: + Lib + other-modules: + Paths_haskell_repos + autogen-modules: + Paths_haskell_repos + hs-source-dirs: + source + default-extensions: + ImportQualifiedPost + NoImplicitPrelude + OverloadedRecordDot + OverloadedStrings + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -fno-warn-orphans + build-depends: + aeson + , base + , github + , http-client + , http-client-tls + , http-link-header + , network-uri + , protolude + , raw-strings-qq + , text + , time + default-language: GHC2021 executable haskell-repos - import: common-options - hs-source-dirs: app - main-is: Main.hs - build-depends: haskell-repos - ghc-options: -threaded - -rtsopts - -with-rtsopts=-N - -test-suite haskell-repos-test - import: common-options - type: exitcode-stdio-1.0 - hs-source-dirs: test - main-is: Spec.hs - build-depends: haskell-repos - ghc-options: -threaded - -rtsopts - -with-rtsopts=-N + main-is: Main.hs + other-modules: + Paths_haskell_repos + autogen-modules: + Paths_haskell_repos + hs-source-dirs: + app + default-extensions: + ImportQualifiedPost + NoImplicitPrelude + OverloadedRecordDot + OverloadedStrings + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -fno-warn-orphans + build-depends: + aeson + , base + , github + , http-client + , http-client-tls + , http-link-header + , network-uri + , protolude + , raw-strings-qq + , text + , time + default-language: GHC2021 diff --git a/package.yaml b/package.yaml new file mode 100644 index 0000000..74356de --- /dev/null +++ b/package.yaml @@ -0,0 +1,57 @@ +name: haskell-repos +version: 0.1.0.0 +synopsis: | + Download all Haskell repositories from GitHub and save them to Airsequel +description: | + For more information check out the readme. +homepage: https://github.com/ad-si/haskell-repos#readme +license: AGPL-3.0-or-later +author: Adrian Sieber +maintainer: mail@adriansieber.com +copyright: Adrian Sieber +category: Web + +extra-source-files: + - readme.md + +dependencies: + - aeson + - base + - protolude + - raw-strings-qq + - text + - http-link-header + - network-uri + - time + +default-extensions: + - ImportQualifiedPost + - NoImplicitPrelude + - OverloadedRecordDot + - OverloadedStrings + +ghc-options: + - -Wall + - -Wcompat + - -Wincomplete-record-updates + - -Wincomplete-uni-patterns + - -Wredundant-constraints + - -fno-warn-orphans + +library: + language: GHC2021 + source-dirs: source + dependencies: + - http-client-tls + - http-client + - github + +executables: + haskell-repos: + language: GHC2021 + source-dirs: app + main: Main.hs + dependencies: + - http-client-tls + - http-client + - github diff --git a/source/Lib.hs b/source/Lib.hs new file mode 100644 index 0000000..01c188a --- /dev/null +++ b/source/Lib.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Lib where + +import Protolude (Text, (<>)) + +hello :: Text -> Text +hello name = + "Hello " <> name <> "!" diff --git a/src/HaskellRepos.hs b/src/HaskellRepos.hs deleted file mode 100644 index 22ba542..0000000 --- a/src/HaskellRepos.hs +++ /dev/null @@ -1,15 +0,0 @@ -{- | -Copyright: (c) 2020 Adrian Sieber -SPDX-License-Identifier: NONE -Maintainer: Adrian Sieber - -See README for more info --} - -module HaskellRepos - ( someFunc - ) where - - -someFunc :: IO () -someFunc = putStrLn ("someFunc" :: String) diff --git a/stack.yaml b/stack.yaml index c1800cc..7f7722a 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1 +1,5 @@ -resolver: lts-15.5 +resolver: lts-21.21 +packages: + - . +extra-deps: + - github-0.29 diff --git a/test/Spec.hs b/test/Spec.hs deleted file mode 100644 index 1a44846..0000000 --- a/test/Spec.hs +++ /dev/null @@ -1,5 +0,0 @@ -module Main (main) where - - -main :: IO () -main = putStrLn ("Test suite is not implemented" :: String)