From 2a94f292c21f44dc3fdc8b7439df1a00786b5b1a Mon Sep 17 00:00:00 2001 From: Andrew Nova Date: Thu, 1 Feb 2024 16:37:56 -0500 Subject: [PATCH 1/3] Support determining activity date from default branch rather than most recent push activity_method defaults to the current behavior: repo.pushed_at however, I think the most recent commit to the default branch is another valid way of determining staleness/activity. Our organization uses dependabot, which continues to open PRs and push commits to repositories which have not had any human contribution in more than a year. With this change, we will easily be able to surface repositories that otherwise would be missed due to bot activity keeping them "fresh". --- README.md | 16 ++++++----- stale_repos.py | 40 ++++++++++++++++++++++----- test_stale_repos.py | 66 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index c634a15..376fc69 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ This project identifies and reports repositories with no activity for configurable amount of time, in order to surface inactive repos to be considered for archival. The current approach assumes that the repos that you want to evaluate are available in a single GitHub organization. -For the purpose of this action, a repository is considered inactive if it has not had a `push` in a configurable amount of days. +For the purpose of this action, a repository is considered inactive if it has not had a `push` in a configurable amount of days (can also be configured to determine activity based on default branch). This action was developed by GitHub so that we can keep our open source projects well maintained, and it was made open source in the hopes that it would help you too! We are actively using and are archiving things in batches since there are many repositories on our report. @@ -30,12 +30,13 @@ Below are the allowed configuration options: | field | required | default | description | |-----------------------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `GH_TOKEN` | true | | The GitHub Token used to scan repositories. Must have read access to all repositories you are interested in scanning | -| `ORGANIZATION` | false | | The organization to scan for stale repositories. If no organization is provided, this tool will search through repositories owned by the GH_TOKEN owner | -| `INACTIVE_DAYS` | true | | The number of days used to determine if repository is stale, based on `push` events | -| `EXEMPT_TOPICS` | false | | Comma separated list of topics to exempt from being flagged as stale | -| `EXEMPT_REPOS` | false | | Comma separated list of repositories to exempt from being flagged as stale. Supports Unix shell-style wildcards. ie. `EXEMPT_REPOS = "stale-repos,test-repo,conf-*"` | -| `GH_ENTERPRISE_URL` | false | `""` | URL of GitHub Enterprise instance to use for auth instead of github.com | +| `GH_TOKEN` | true | | The GitHub Token used to scan repositories. Must have read access to all repositories you are interested in scanning | +| `ORGANIZATION` | false | | The organization to scan for stale repositories. If no organization is provided, this tool will search through repositories owned by the GH_TOKEN owner | +| `INACTIVE_DAYS` | true | | The number of days used to determine if repository is stale, based on `push` events | +| `EXEMPT_TOPICS` | false | | Comma separated list of topics to exempt from being flagged as stale | +| `EXEMPT_REPOS` | false | | Comma separated list of repositories to exempt from being flagged as stale. Supports Unix shell-style wildcards. ie. `EXEMPT_REPOS = "stale-repos,test-repo,conf-*"` | +| `GH_ENTERPRISE_URL` | false | `""` | URL of GitHub Enterprise instance to use for auth instead of github.com | +| `ACTIVITY_METHOD` | false | `"pushed"` | How to get the last active date of the repository. Defaults to `pushed`, which is the last time any branch had a push. Can also be set to `default_branch_updated` to instead measure from the latest commit on the default branch (good for filtering out dependabot ) | ### Example workflow @@ -62,6 +63,7 @@ jobs: ORGANIZATION: ${{ secrets.ORGANIZATION }} EXEMPT_TOPICS: "keep,template" INACTIVE_DAYS: 365 + ACTIVITY_METHOD: "pushed" # This next step updates an existing issue. If you want a new issue every time, remove this step and remove the `issue-number: ${{ env.issue_number }}` line below. - name: Check for the stale report issue diff --git a/stale_repos.py b/stale_repos.py index 7f863ea..8a856f4 100755 --- a/stale_repos.py +++ b/stale_repos.py @@ -125,16 +125,15 @@ def get_inactive_repos(github_connection, inactive_days_threshold, organization) if is_repo_exempt(repo, exempt_repos, exempt_topics): continue - # Get last push date - last_push_str = repo.pushed_at # type: ignore - if last_push_str is None: + # Get last active date + active_date = get_active_date(repo) + if active_date is None: continue - last_push = parse(last_push_str) - last_push_disp_date = last_push.date().isoformat() - days_inactive = (datetime.now(timezone.utc) - last_push).days + active_date_disp = active_date.date().isoformat() + days_inactive = (datetime.now(timezone.utc) - active_date).days if days_inactive > int(inactive_days_threshold) and not repo.archived: - inactive_repos.append((repo.html_url, days_inactive, last_push_disp_date)) + inactive_repos.append((repo.html_url, days_inactive, active_date_disp)) print(f"{repo.html_url}: {days_inactive} days inactive") # type: ignore if organization: print(f"Found {len(inactive_repos)} stale repos in {organization}") @@ -143,6 +142,33 @@ def get_inactive_repos(github_connection, inactive_days_threshold, organization) return inactive_repos +def get_active_date(repo): + """Get the last activity date of the repository. + + Args: + repo: A Github repository object. + + Returns: + A date object representing the last activity date of the repository. + """ + activity_method = os.getenv("ACTIVITY_METHOD", "pushed") + if activity_method == "default_branch_updated": + commit = repo.branch(repo.default_branch).commit + active_date = parse(commit.commit.as_dict()['committer']['date']) + elif activity_method == "pushed": + last_push_str = repo.pushed_at # type: ignored + if last_push_str is None: + return None + active_date = parse(last_push_str) + else: + raise ValueError(f""" + ACTIVITY_METHOD environment variable has unsupported value: '{activity_method}'. + Allowed values are: 'pushed' and 'default_branch_updated' + """ + ) + return active_date + + def write_to_markdown(inactive_repos, inactive_days_threshold, file=None): """Write the list of inactive repos to a markdown file. diff --git a/test_stale_repos.py b/test_stale_repos.py index b96fcd5..266eb01 100644 --- a/test_stale_repos.py +++ b/test_stale_repos.py @@ -349,6 +349,72 @@ def test_get_inactive_repos_with_no_organization_set(self): ] assert inactive_repos == expected_inactive_repos + @patch.dict(os.environ, {"ACTIVITY_METHOD": "default_branch_updated"}) + def test_get_inactive_repos_with_default_branch_updated(self): + """Test that get_inactive_repos works with alternative method. + + This test uses a MagicMock object to simulate a GitHub API connection with a list + of repositories with varying levels of inactivity. It then calls the get_inactive_repos + function with the mock GitHub API connection, a threshold of 30 days, and the + default_branch_updated setting. It mocks the branch method on the repo object to return + the necessary data for the active_date determination Finally, it checks that the function + returns the expected list of inactive repos. + + """ + # Create a MagicMock object to simulate a GitHub API connection + mock_github = MagicMock() + + # Create a MagicMock object to simulate the organization object returned by the + # GitHub API connection + mock_org = MagicMock() + + # Create MagicMock objects to simulate the repositories returned by the organization object + forty_days_ago = datetime.now(timezone.utc) - timedelta(days=40) + twenty_days_ago = datetime.now(timezone.utc) - timedelta(days=20) + mock_repo1 = MagicMock( + html_url="https://github.com/example/repo1", + default_branch="master", + archived=False, + ) + mock_repo1.topics().names = [] + mock_repo1.branch().commit.commit.as_dict = MagicMock( + return_value={"committer": {"date": twenty_days_ago.isoformat()}} + ) + mock_repo2 = MagicMock( + html_url="https://github.com/example/repo2", + archived=False, + ) + mock_repo2.topics().names = [] + mock_repo2.branch().commit.commit.as_dict = MagicMock( + return_value={"committer": {"date": forty_days_ago.isoformat()}} + ) + mock_repo3 = MagicMock( + html_url="https://github.com/example/repo3", + archived=True, + ) + mock_repo3.topics().names = [] + mock_repo3.branch().commit.commit.as_dict = MagicMock( + return_value={"committer": {"date": forty_days_ago.isoformat()}} + ) + + # Set up the MagicMock objects to return the expected values when called + mock_github.organization.return_value = mock_org + mock_org.repositories.return_value = [ + mock_repo1, + mock_repo2, + mock_repo3, + ] + + # Call the get_inactive_repos function with the mock GitHub API + # connection and a threshold of 30 days + inactive_repos = get_inactive_repos(mock_github, 30, "example") + + # Check that the function returns the expected list of inactive repos + expected_inactive_repos = [ + ("https://github.com/example/repo2", 40, forty_days_ago.date().isoformat()), + ] + assert inactive_repos == expected_inactive_repos + class WriteToMarkdownTestCase(unittest.TestCase): """ From c26012bfe04b01a14ccafc050c5f0204e59bb88b Mon Sep 17 00:00:00 2001 From: Andrew Nova <33638031+anovadox@users.noreply.github.com> Date: Thu, 1 Feb 2024 23:40:50 -0500 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Zack Koppert --- README.md | 2 +- stale_repos.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 376fc69..e33add1 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ This project identifies and reports repositories with no activity for configurable amount of time, in order to surface inactive repos to be considered for archival. The current approach assumes that the repos that you want to evaluate are available in a single GitHub organization. -For the purpose of this action, a repository is considered inactive if it has not had a `push` in a configurable amount of days (can also be configured to determine activity based on default branch). +For the purpose of this action, a repository is considered inactive if it has not had a `push` in a configurable amount of days (can also be configured to determine activity based on default branch. See `ACTIVITY_METHOD` for more details.). This action was developed by GitHub so that we can keep our open source projects well maintained, and it was made open source in the hopes that it would help you too! We are actively using and are archiving things in batches since there are many repositories on our report. diff --git a/stale_repos.py b/stale_repos.py index 8a856f4..2414315 100755 --- a/stale_repos.py +++ b/stale_repos.py @@ -151,7 +151,7 @@ def get_active_date(repo): Returns: A date object representing the last activity date of the repository. """ - activity_method = os.getenv("ACTIVITY_METHOD", "pushed") + activity_method = os.getenv("ACTIVITY_METHOD", "pushed").lower() if activity_method == "default_branch_updated": commit = repo.branch(repo.default_branch).commit active_date = parse(commit.commit.as_dict()['committer']['date']) From c260ab2e2924f571f0773c27469c8b6ec2e63290 Mon Sep 17 00:00:00 2001 From: Zack Koppert Date: Fri, 2 Feb 2024 09:41:48 -0800 Subject: [PATCH 3/3] run black formmatter Signed-off-by: Zack Koppert --- stale_repos.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stale_repos.py b/stale_repos.py index 2414315..fd2c597 100755 --- a/stale_repos.py +++ b/stale_repos.py @@ -154,18 +154,19 @@ def get_active_date(repo): activity_method = os.getenv("ACTIVITY_METHOD", "pushed").lower() if activity_method == "default_branch_updated": commit = repo.branch(repo.default_branch).commit - active_date = parse(commit.commit.as_dict()['committer']['date']) + active_date = parse(commit.commit.as_dict()["committer"]["date"]) elif activity_method == "pushed": last_push_str = repo.pushed_at # type: ignored if last_push_str is None: return None active_date = parse(last_push_str) else: - raise ValueError(f""" + raise ValueError( + f""" ACTIVITY_METHOD environment variable has unsupported value: '{activity_method}'. Allowed values are: 'pushed' and 'default_branch_updated' """ - ) + ) return active_date