diff --git a/.github/actions/clean_up_package_registry/action.yml b/.github/actions/clean_up_package_registry/action.yml new file mode 100644 index 0000000000000..a201adf9238c6 --- /dev/null +++ b/.github/actions/clean_up_package_registry/action.yml @@ -0,0 +1,65 @@ +# Copyright (C) 2023 The ORT Project Authors (see ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# License-Filename: LICENSE + +name: "Delete old non-release packages from Github package registry" +description: "Delete older packages set by a minimal level input" +author: "The ORT Project Authors" + +inputs: + registry: + description: "Github container registry" + default: "ghcr.io" + token: + description: "Github token" + required: true + keep: + description: "Number of non-release packages to keep" + required: false + default: "3" + packages: + description: "Name of the packages to be cleaned up" + required: true + dry-run: + description: "Execute a dry run operation to check the execution is correct" + default: "true" + ignore-skip-tagged: + description: "DANGEROUS: Clean up even the packages marked to skip" + default: "false" + +runs: + using: "composite" + + steps: + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: "pip" + + - name: Execute the operation + id: check_image + shell: bash + env: + INPUT_REGISTRY: ${{ inputs.registry }} + INPUT_TOKEN: ${{ inputs.token }} + INPUT_KEEP: ${{ inputs.keep }} + INPUT_PACKAGES: ${{ inputs.packages }} + INPUT_DRY_RUN: ${{ inputs.dry-run}} + INPUT_IGNORE_SKIP_TAGGED: ${{ inputs.ignore-skip-tagged }} + run: | + pip install -q -U pip requests rich + python ./.github/actions/clean_up_package_registry/clean_up_package_registry.py diff --git a/.github/actions/clean_up_package_registry/clean_up_package_registry.py b/.github/actions/clean_up_package_registry/clean_up_package_registry.py new file mode 100644 index 0000000000000..dd82f15fd4e11 --- /dev/null +++ b/.github/actions/clean_up_package_registry/clean_up_package_registry.py @@ -0,0 +1,264 @@ +# Copyright (C) 2023 The ORT Project Authors (see ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# License-Filename: LICENSE + + +import os +import re +import sys +from time import sleep +from typing import Any +from urllib.parse import parse_qs, urlparse + +import requests +from requests.structures import CaseInsensitiveDict +from rich import print + +""" Use current Github API to list packages + in registry and remove all but last 3 or custom + set number of packages. + Reference: https://docs.github.com/en/rest/packages/packages?apiVersion=2022-11-28#about-github-packages +""" + +dry_run: bool = False if os.getenv("INPUT_DRY_RUN") == "false" else True +input_keep: str | None = os.getenv("INPUT_KEEP") +org = os.getenv("GITHUB_REPOSITORY_OWNER") +input_packages: str | None = os.getenv("INPUT_PACKAGES") +token = os.getenv("INPUT_TOKEN") +ignore_skip: bool = True if os.getenv("INPUT_IGNORE_SKIP_TAGGED") == "true" else False + +if not input_packages: + print(":cross_mark: No packages input.") + sys.exit(1) + +packages = input_packages.split(",") +keep: int = int(input_keep) if input_keep else 0 + +headers = { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", +} + +# Assembly organization packages url string +pkg_url: str = f"https://api.github.com/orgs/{org}/packages" + +# List of packages that will be deleted +urls_to_be_deleted: list = [] + +# Exclusion image list +exclusion_list: list = [] + + +def get_last_page(headers: CaseInsensitiveDict[str]) -> int: + """ + Get the last page number from the headers. + + Args: + headers (CaseInsensitiveDict[str]): The headers containing the link information. + + Returns: + int: The last page number. + + """ + if "link" not in headers: + return 1 + + links = headers["link"].split(", ") + + last_page = None + for link in links: + if 'rel="last"' in link: + last_page = link + break + + if last_page: + parsed_url = urlparse( + last_page[last_page.index("<") + 1 : last_page.index(">")] + ) + return int(parse_qs(parsed_url.query)["page"][0]) + + return 1 + + +def get_package_layers(package: str, tag: str) -> None: + url = f"https://ghcr.io/v2/{org}/{package}/manifests/{tag}" + + # Get ghcr temprary token + ghcr_headers = {"Authorization": f"Bearer {token}"} + auth_response = requests.get( + f"https://ghcr.io/token?service=ghcr.io&scope=repository:{org}/ort:pull", + headers=ghcr_headers, + ) + if auth_response.status_code == 200: + access_token = auth_response.json()["token"] + + ghcr_headers = { + "Authorization": f"Bearer {access_token}", + "Accept": "application/vnd.oci.image.index.v1+json", + } + url = f"https://ghcr.io/v2/{org}/{package}/manifests/{tag}" + if "DEBUG" in os.environ: + print(url) + response = requests.get(url, headers=ghcr_headers) + + main_manifest: dict[str, Any] = {} + if response.status_code == 200: + main_manifest: dict[str, Any] = response.json() + else: + print(f"Failed to get manifest: {response.status_code}, {response.text}") + + for manifest in main_manifest["manifests"]: + if "platform" in manifest and manifest["platform"]["architecture"] in [ + "amd64", + "arm64", + ]: + ghcr_headers["Accept"] = "application/vnd.oci.image.manifest.v1+json" + url = ( + f"https://ghcr.io/v2/{org}/{package}/manifests/{manifest['digest']}" + ) + response = requests.get(url, headers=ghcr_headers) + if "DEBUG" in os.environ: + from rich.pretty import pprint + + pprint(response.json()) + layer_manifest = response.json() + if 'layers' in layer_manifest: + for layer in layer_manifest['layers']: + exclusion_list.append(layer['digest']) + print(f":locked: Added digest to exclusion list {layer['digest']}") + + + +def delete_packages(): + """ + Deletes packages from the package registry. + + This function iterates over the packages and deletes them from the package registry. + It retrieves the versions of each package, sorts them by ID, and deletes the excess versions + based on the specified 'keep' value. It also skips deleting the latest or non-snapshot tagged images. + The function prints the status of each deletion operation and the total number of packages deleted. + + Args: + None + + Returns: + None + """ + # Number of packages deleted + packages_deleted: int = 0 + + for package in packages: + # Start page is 1 as stated by documentation + url = f"{pkg_url}/container/{package.replace('/', '%2F')}/versions?page=1&per_page=50" + + # Get the header + response = requests.head(url, headers=headers) + pages: int = get_last_page(response.headers) + + for page in range(pages, 0, -1): + print(f"Page: {page}") + url = f"{pkg_url}/container/{package.replace('/', '%2F')}/versions?page={page}&per_page=50" + + try: + response = requests.get(url, headers=headers) + except requests.exceptions.RequestException as e: + print(f":cross_mark: Connection Error. {e}") + sys.exit(1) + + if response.status_code == 404: + print(f":cross_mark: Not found - {url}") + continue + elif response.status_code == 401: + print(f":cross_mark: Requires authentication - {url}") + sys.exit(1) + elif response.status_code == 403: + print(f":cross_mark: Forbidden - {url}") + sys.exit(1) + + # Sort all images on id. + images = sorted(response.json(), key=lambda x: x["id"], reverse=True) + + # Slice and remove all + if len(images) > keep: + for image in images if page != 1 else images[keep + 1 :]: + url = f"{pkg_url}/container/{package.replace('/', '%2F')}/versions/{image['id']}" + + # Never remove latest or non snapshot tagged images + if restrict_delete_tags(image["metadata"]["container"]["tags"]): + print( + f":package: Skip tagged {package} id {image['id']} tags {image['metadata']['container']['tags']}" + ) + # Mark sublayers to not be deleted + get_package_layers( + package, image["metadata"]["container"]["tags"][0] + ) + continue + urls_to_be_deleted.append(url) + tags = image["metadata"]["container"]["tags"] + + if tags: + print( + f":white_heavy_check_mark: Deleted tagged package {package} version id {image['id']}" + f" with tags {tags}." + ) + else: + print( + f":white_heavy_check_mark: Deleted untagged package {package} version id {image['id']}" + ) + # Make a slow operation to avoid rate limit + sleep(1) + + # Effectively delete the packages + if not dry_run: + for url in urls_to_be_deleted: + response = requests.delete(url, headers=headers) + if response.status_code == 404: + print(f":cross_mark: Failed to delete package {url}.") + continue + elif response.status_code == 401: + print(f":cross_mark: Requires authentication - {url}") + sys.exit(1) + elif response.status_code == 403: + print(f":cross_mark: Forbidden - {url}") + sys.exit(1) + + packages_deleted = packages_deleted + 1 + # Make a slow operation to avoid rate limit + sleep(1) + + print(f":package: Deleted {packages_deleted} packages in the organization.") + + +def restrict_delete_tags(tags: list) -> bool: + if not tags: + return False + for tag in tags: + if tag == "latest": + return True + elif ".sha." in tag: + return False + elif "SNAPSHOT" in tag: + return False + else: + pattern = re.compile(r"^\d+\.\d+\.\d+$") + if pattern.match(tag): + return True + return False + + +if __name__ == "__main__": + delete_packages() diff --git a/.github/actions/clean_up_package_registry/requirements.txt b/.github/actions/clean_up_package_registry/requirements.txt new file mode 100644 index 0000000000000..65f11586b3d18 --- /dev/null +++ b/.github/actions/clean_up_package_registry/requirements.txt @@ -0,0 +1,9 @@ +certifi==2023.7.22 +charset-normalizer==3.3.2 +idna==3.4 +markdown-it-py==3.0.0 +mdurl==0.1.2 +Pygments==2.16.1 +requests==2.31.0 +rich==13.6.0 +urllib3==2.1.0 diff --git a/.github/workflows/clean_up_package_registry.yml b/.github/workflows/clean_up_package_registry.yml new file mode 100644 index 0000000000000..888fd89fc365d --- /dev/null +++ b/.github/workflows/clean_up_package_registry.yml @@ -0,0 +1,50 @@ +# Copyright (C) 2023 The ORT Project Authors (see ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# License-Filename: LICENSE + +name: Clean up packages in Github package registry + +on: + workflow_dispatch: + # Runs always Sunday Midnight + # schedule: + # - cron: "0 0 * * 0" + +jobs: + clean_all: + name: Clean up package registry + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/clean_up_package_registry + with: + token: ${{ secrets.GITHUB_TOKEN }} + dry-run: 'false' + ignore-skip-tagged: 'true' + packages: 'ort-extended' + # ort-minimal + # ort/base + # ort/rust + # ort/golang + # ort/nodejs + # ort/python + # ort/ruby + # ort/dotnet + # ort/dart + # ort/android + # ort/haskell + # ort/swift + # ort/scala