From 19aa56501d3317dcb1bb8f4346ea3535f3d446dd Mon Sep 17 00:00:00 2001 From: Javier Maestro Date: Fri, 13 Sep 2024 00:15:04 +0100 Subject: [PATCH] feat: support Debian flat repos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes issue #56 Follow-up and credit to @alexconrey (PR #55), @ericlchen1 (PR #64) and @benmccown (PR #67) for their work on similar PRs that I've reviewed and drawn some inspiration to create "one 💍 PR to merge them all" 😅 Problem: Debian has two types of repos: "canonical" and "flat". Each has a different sources.list syntax: "canonical": ``` deb uri distribution [component1] [component2] [...] ``` (see https://wiki.debian.org/DebianRepository/Format#Overview) flat: ``` deb uri directory/ ``` (see https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format) A flat repository does not use the dists hierarchy of directories, and instead places meta index and indices directly into the archive root (or some part below it) Thus, the URL logic in _fetch_package_index() is incorrect for these repos and it always fails to fetch the Package index. Solution: Just use the Debian sources.list convention in the 'sources' section of the manifest to add canonical and flat repos. Depending on whether the channel has one directory that ends in '/' or a (dist, component, ...) structure the _fetch_package_index and other internal logic will know whether the source is a canonical or a flat repo. For example: ``` version: 1 sources: # canonical repo - channel: bullseye main contrib url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z # flat repos, note the trailing '/' and the lack of distribution or components - channel: bullseye-cran40/ url: https://cloud.r-project.org/bin/linux/debian - channel: ubuntu2404/x86_64/ url: https://developer.download.nvidia.com/compute/cuda/repos archs: - amd64 packages: - bash - r-mathlib - nvidia-container-toolkit-base ``` Disregarding the "mixing" of Ubuntu and Debian repos for the purpose of the example, this manifest shows that you can mix canonical and flat repos and you can mix multiarch and single-arch flat repos and canonical repos. You will still have the same problems as before with packages that only exist for one architecture and/or repos that only support one architecture. In those cases, simply separate the repos and packages into their own manifests. NOTE: The NVIDIA CUDA repos don't follow Debian specs and have issues with the package filenames. This is addressed in a separate commit. --- WORKSPACE.bazel | 11 +++ apt/private/package_index.bzl | 92 +++++++++++++++---- apt/private/resolve.bzl | 28 ++++-- examples/debian_flat_repo/BUILD.bazel | 48 ++++++++++ .../bullseye_rproject.lock.json | 14 +++ .../debian_flat_repo/bullseye_rproject.yaml | 20 ++++ .../debian_flat_repo/test_linux_amd64.yaml | 9 ++ 7 files changed, 196 insertions(+), 26 deletions(-) create mode 100644 examples/debian_flat_repo/BUILD.bazel create mode 100644 examples/debian_flat_repo/bullseye_rproject.lock.json create mode 100644 examples/debian_flat_repo/bullseye_rproject.yaml create mode 100644 examples/debian_flat_repo/test_linux_amd64.yaml diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index ac68fbb..f717469 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -36,6 +36,17 @@ load("@bullseye//:packages.bzl", "bullseye_packages") bullseye_packages() +# bazel run @bullseye_rproject//:lock +deb_index( + name = "bullseye_rproject", + lock = "//examples/debian_flat_repo:bullseye_rproject.lock.json", + manifest = "//examples/debian_flat_repo:bullseye_rproject.yaml", +) + +load("@bullseye_rproject//:packages.bzl", "bullseye_rproject_packages") + +bullseye_rproject_packages() + # bazel run @apt_security//:lock deb_index( name = "apt_security", diff --git a/apt/private/package_index.bzl b/apt/private/package_index.bzl index d498767..0ca36b8 100644 --- a/apt/private/package_index.bzl +++ b/apt/private/package_index.bzl @@ -2,8 +2,8 @@ load(":util.bzl", "util") -def _fetch_package_index(rctx, url, dist, comp, arch, integrity): - target_triple = "{dist}/{comp}/{arch}".format(dist = dist, comp = comp, arch = arch) +def _fetch_package_index(rctx, url, arch, dist = None, comp = None, directory = None): + # TODO: validate mutually exclusive args (dist, comp) VS directory # See https://linux.die.net/man/1/xz and https://linux.die.net/man/1/gzip # --keep -> keep the original file (Bazel might be still committing the output to the cache) @@ -16,23 +16,53 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity): failed_attempts = [] - for (ext, cmd) in supported_extensions.items(): - output = "{}/Packages.{}".format(target_triple, ext) - dist_url = "{}/dists/{}/{}/binary-{}/Packages.{}".format(url, dist, comp, arch, ext) + for ext, cmd in supported_extensions.items(): + index = "Packages" + index_full = "{}.{}".format(index, ext) + + if directory != None: # flat repo + output = "{directory}/{arch}/{index}".format( + directory = directory, + arch = arch, + index = index, + ) + + index_url = "{url}/{directory}/{index_full}".format( + url = url, + directory = directory, + index_full = index_full, + ) + else: + output = "{dist}/{comp}/{arch}/{index}".format( + dist = dist, + comp = comp, + arch = arch, + index = index, + ) + + index_url = "{url}/dists/{dist}/{comp}/binary-{arch}/{index_full}".format( + url = url, + dist = dist, + comp = comp, + arch = arch, + index_full = index_full, + ) + + output_full = "{}.{}".format(output, ext) + download = rctx.download( - url = dist_url, - output = output, - integrity = integrity, + url = index_url, + output = output_full, allow_fail = True, ) decompress_r = None if download.success: - decompress_r = rctx.execute(cmd + [output]) + decompress_r = rctx.execute(cmd + [output_full]) if decompress_r.return_code == 0: integrity = download.integrity break - failed_attempts.append((dist_url, download, decompress_r)) + failed_attempts.append((index_url, download, decompress_r)) if len(failed_attempts) == len(supported_extensions): attempt_messages = [] @@ -51,11 +81,14 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity): {} """.format(len(failed_attempts), "\n".join(attempt_messages))) - return ("{}/Packages".format(target_triple), integrity) + return (output, integrity) -def _parse_package_index(state, contents, arch, root): +def _parse_package_index(state, contents, arch, root_url, directory = None): last_key = "" pkg = {} + total_pkgs = 0 + out_of_spec = [] + for group in contents.split("\n\n"): for line in group.split("\n"): if line.strip() == "": @@ -82,8 +115,12 @@ def _parse_package_index(state, contents, arch, root): pkg[key] = value if len(pkg.keys()) != 0: - pkg["Root"] = root - util.set_dict(state.packages, value = pkg, keys = (arch, pkg["Package"], pkg["Version"])) + pkg["Root"] = root_url + + # NOTE: this fixes the arch for multi-arch flat repos + arch_ = arch if pkg["Architecture"] == "all" else pkg["Architecture"] + + util.set_dict(state.packages, value = pkg, keys = (arch_, pkg["Package"], pkg["Version"])) last_key = "" pkg = {} @@ -105,7 +142,16 @@ def _create(rctx, sources, archs): ) for arch in archs: - for (url, dist, comp) in sources: + for source in sources: + if len(source) == 2: # flat repo + url, directory = source + index = directory + dist, comp = None, None + else: + url, dist, comp = source + index = "%s/%s" % (dist, comp) + directory = None + # We assume that `url` does not contain a trailing forward slash when passing to # functions below. If one is present, remove it. Some HTTP servers do not handle # redirects properly when a path contains "//" @@ -113,12 +159,20 @@ def _create(rctx, sources, archs): # on misconfigured HTTP servers) url = url.rstrip("/") - rctx.report_progress("Fetching package index: {}/{} for {}".format(dist, comp, arch)) - (output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "") + rctx.report_progress("Fetching %s package index: %s" % (arch, index)) + output, _ = _fetch_package_index( + rctx, + url, + arch, + dist = dist, + comp = comp, + directory = directory, + ) + + rctx.report_progress("Parsing %s package index: %s" % (arch, index)) # TODO: this is expensive to perform. - rctx.report_progress("Parsing package index: {}/{} for {}".format(dist, comp, arch)) - _parse_package_index(state, rctx.read(output), arch, url) + _parse_package_index(state, rctx.read(output), arch, url, directory) return struct( package_versions = lambda **kwargs: _package_versions(state, **kwargs), diff --git a/apt/private/resolve.bzl b/apt/private/resolve.bzl index 7fa93b1..1bd4b39 100644 --- a/apt/private/resolve.bzl +++ b/apt/private/resolve.bzl @@ -40,13 +40,27 @@ def internal_resolve(rctx, yq_toolchain_prefix, manifest, include_transitive): sources = [] for src in manifest["sources"]: - distr, components = src["channel"].split(" ", 1) - for comp in components.split(" "): - sources.append(( - src["url"], - distr, - comp, - )) + channel_chunks = src["channel"].split(" ") + + if len(channel_chunks) == 1: + # it's a flat repo, see: + # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + # vs the "canonical" repo: + # https://wiki.debian.org/DebianRepository/Format#Overview + directory = channel_chunks[0] + + if not directory.endswith("/"): + fail("Debian flat repo directory must end in '/'") + + sources.append((src["url"], directory.rstrip("/"))) + else: + distr, components = channel_chunks[0], channel_chunks[1:] + + if distr.endswith("/"): + fail("Debian distribution ends in '/' but this is not a flat repo") + + for comp in components: + sources.append((src["url"], distr, comp)) pkgindex = package_index.new(rctx, sources = sources, archs = manifest["archs"]) pkgresolution = package_resolution.new(index = pkgindex) diff --git a/examples/debian_flat_repo/BUILD.bazel b/examples/debian_flat_repo/BUILD.bazel new file mode 100644 index 0000000..d491029 --- /dev/null +++ b/examples/debian_flat_repo/BUILD.bazel @@ -0,0 +1,48 @@ +load("@container_structure_test//:defs.bzl", "container_structure_test") +load("@rules_distroless//apt:defs.bzl", "dpkg_status") +load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") + +PACKAGES = [ + "@bullseye//dpkg", + "@bullseye//apt", + "@bullseye_rproject//r-mathlib", +] + +# Creates /var/lib/dpkg/status with installed package information. +dpkg_status( + name = "dpkg_status", + controls = [ + "%s/amd64:control" % package + for package in PACKAGES + ], +) + +oci_image( + name = "apt", + architecture = "amd64", + os = "linux", + tars = [ + ":dpkg_status", + ] + [ + "%s/amd64" % package + for package in PACKAGES + ], +) + +oci_load( + name = "tarball", + image = ":apt", + repo_tags = [ + "distroless/test:latest", + ], +) + +container_structure_test( + name = "test", + configs = ["test_linux_amd64.yaml"], + image = ":apt", + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) diff --git a/examples/debian_flat_repo/bullseye_rproject.lock.json b/examples/debian_flat_repo/bullseye_rproject.lock.json new file mode 100644 index 0000000..90eb9d8 --- /dev/null +++ b/examples/debian_flat_repo/bullseye_rproject.lock.json @@ -0,0 +1,14 @@ +{ + "packages": [ + { + "arch": "amd64", + "dependencies": [], + "key": "r-mathlib_4.4.1-1_bullseyecran.0_amd64", + "name": "r-mathlib", + "sha256": "cbe3abbcc74261f2ad84159b423b856c1a0b4ebe6fef2de763d8783ff00245d5", + "url": "https://cloud.r-project.org/bin/linux/debian/bullseye-cran40/r-mathlib_4.4.1-1~bullseyecran.0_amd64.deb", + "version": "4.4.1-1~bullseyecran.0" + } + ], + "version": 1 +} \ No newline at end of file diff --git a/examples/debian_flat_repo/bullseye_rproject.yaml b/examples/debian_flat_repo/bullseye_rproject.yaml new file mode 100644 index 0000000..6c1cd90 --- /dev/null +++ b/examples/debian_flat_repo/bullseye_rproject.yaml @@ -0,0 +1,20 @@ +# Packages for examples/debian_flat_repo. +# +# Anytime this file is changed, the lockfile needs to be regenerated. +# +# To generate the bullseye_rproject.lock.json run the following command +# +# bazel run @bullseye_rproject//:lock +# +# See debian_package_index at WORKSPACE.bazel +version: 1 + +sources: + - channel: bullseye-cran40/ + url: https://cloud.r-project.org/bin/linux/debian + +archs: + - amd64 + +packages: + - r-mathlib diff --git a/examples/debian_flat_repo/test_linux_amd64.yaml b/examples/debian_flat_repo/test_linux_amd64.yaml new file mode 100644 index 0000000..4e9d1d8 --- /dev/null +++ b/examples/debian_flat_repo/test_linux_amd64.yaml @@ -0,0 +1,9 @@ +schemaVersion: "2.0.0" + +commandTests: + - name: "apt list --installed" + command: "apt" + args: ["list", "--installed"] + expectedOutput: + - Listing\.\.\. + - r-mathlib/now 4.4.1-1~bullseyecran.0 amd64 \[installed,local\]