Skip to content

Commit

Permalink
fix: NVIDIA CUDA flat repos don't follow Debian repo spec
Browse files Browse the repository at this point in the history
Although the Debian repo spec for 'Filename' (see
https://wiki.debian.org/DebianRepository/Format#Filename) clearly says
that 'Filename' should be relative to the base directory of the repo and
should be in canonical form (i.e. without '.' or '..') there are cases
where this is not honored.

In those cases we try to work around this by assuming 'Filename' is
relative to the sources.list directory/ so we combine them and normalize
the new 'Filename' path.

Note that, so far, only the NVIDIA CUDA repos needed this workaround so
maybe this heuristic will break for other repos that don't conform to
the Debian repo spec.
  • Loading branch information
jjmaestro committed Sep 20, 2024
1 parent 7b56ecb commit 88daaa6
Show file tree
Hide file tree
Showing 15 changed files with 230 additions and 26 deletions.
4 changes: 2 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module(
compatibility_level = 1,
)

bazel_dep(name = "bazel_skylib", version = "1.5.0")
bazel_dep(name = "bazel_skylib", version = "1.7.1")
bazel_dep(name = "aspect_bazel_lib", version = "2.7.9")

bazel_lib_toolchains = use_extension("@aspect_bazel_lib//lib:extensions.bzl", "toolchains")
Expand All @@ -21,7 +21,7 @@ use_repo(bazel_lib_toolchains, "yq_linux_s390x")
use_repo(bazel_lib_toolchains, "yq_windows_amd64")

bazel_dep(name = "gazelle", version = "0.34.0", dev_dependency = True, repo_name = "bazel_gazelle")
bazel_dep(name = "bazel_skylib_gazelle_plugin", version = "1.5.0", dev_dependency = True)
bazel_dep(name = "bazel_skylib_gazelle_plugin", version = "1.7.1", dev_dependency = True)
bazel_dep(name = "buildifier_prebuilt", version = "6.1.2", dev_dependency = True)
bazel_dep(name = "platforms", version = "0.0.10", dev_dependency = True)
bazel_dep(name = "rules_oci", version = "2.0.0-rc0", dev_dependency = True)
Expand Down
12 changes: 6 additions & 6 deletions MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions WORKSPACE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ load("@bullseye_rproject//:packages.bzl", "bullseye_rproject_packages")

bullseye_rproject_packages()

# bazel run @nvidia_ubuntu2404_cuda//:lock
deb_index(
name = "nvidia_ubuntu2404_cuda",
lock = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.lock.json",
manifest = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.yaml",
)

load("@nvidia_ubuntu2404_cuda//:packages.bzl", "nvidia_ubuntu2404_cuda_packages")

nvidia_ubuntu2404_cuda_packages()

deb_index(
name = "apt_security",
manifest = "//examples/debian_snapshot_security:security.yaml",
Expand Down
3 changes: 3 additions & 0 deletions apt/private/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ bzl_library(
name = "pkg",
srcs = ["pkg.bzl"],
visibility = ["//apt:__subpackages__"],
deps = [
"@bazel_skylib//lib:paths",
],
)

bzl_library(
Expand Down
56 changes: 54 additions & 2 deletions apt/private/package_index.bzl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"package index"

load("@bazel_skylib//lib:paths.bzl", "paths")
load(":util.bzl", "util")
load(":version.bzl", version_lib = "version")

def _fetch_package_index(rctx, source):
Expand Down Expand Up @@ -56,6 +58,40 @@ def _fetch_package_index(rctx, source):

return rctx.read(source.output)

def _make_file_url(pkg, source):
filename = pkg["Filename"]

invalid_filename = not paths.is_normalized(
filename,
look_for_same_level_references = True,
)

if invalid_filename:
# NOTE:
# Although the Debian repo spec for 'Filename' (see
# https://wiki.debian.org/DebianRepository/Format#Filename) clearly
# says that 'Filename' should be relative to the base directory of the
# repo and should be in canonical form (i.e. without '.' or '..') there
# are cases where this is not honored.
#
# In those cases we try to work around this by assuming 'Filename' is
# relative to the sources.list "index path" (e.g. directory/ for flat
# repos) so we combine them and normalize the new 'Filename' path.
#
# Note that, so far, only the NVIDIA CUDA repos needed this workaround
# so maybe this heuristic will break for other repos that don't conform
# to the Debian repo spec.
filename = paths.normalize(paths.join(source.index_path, filename))

base_url = util.parse_url(source.base_url)
file_url = "{}://{}{}".format(
base_url.scheme,
base_url.host,
paths.join(base_url.path, filename),
)

return file_url, invalid_filename

def _package_set(packages, keys, package):
for key in keys[:-1]:
if key not in packages:
Expand All @@ -66,6 +102,9 @@ def _package_set(packages, keys, package):
def _parse_package_index(packages, contents, source):
last_key = ""
pkg = {}
total_pkgs = 0
out_of_spec = []

for group in contents.split("\n\n"):
for line in group.split("\n"):
if line.strip() == "":
Expand All @@ -92,7 +131,10 @@ def _parse_package_index(packages, contents, source):
pkg[key] = value

if len(pkg.keys()) != 0:
pkg["Root"] = source.base_url
pkg["FileUrl"], invalid_filename = _make_file_url(pkg, source)

if invalid_filename:
out_of_spec.append(pkg["Package"])

# NOTE: workaround for multi-arch flat repos
arch = source.arch if pkg["Architecture"] == "all" else pkg["Architecture"]
Expand All @@ -104,6 +146,9 @@ def _parse_package_index(packages, contents, source):
)
last_key = ""
pkg = {}
total_pkgs += 1

return out_of_spec, total_pkgs

def _package_get(packages, arch, name, version = None):
versions = packages.get(arch, {}).get(name, {})
Expand All @@ -123,7 +168,13 @@ def _index(rctx, manifest):
output = _fetch_package_index(rctx, source)

rctx.report_progress("Parsing package index: %s" % index)
_parse_package_index(packages, output, source)
out_of_spec, total_pkgs = _parse_package_index(packages, output, source)

if out_of_spec:
count = len(out_of_spec)
pct = int(100.0 * count / total_pkgs)
msg = "Warning: index {} has {} packages ({}%) with invalid 'Filename' fields"
print(msg.format(index, count, pct))

return struct(
packages = packages,
Expand Down Expand Up @@ -287,6 +338,7 @@ package_index = struct(
parse_depends = _parse_depends,
# NOTE: these are exposed here for testing purposes, DO NOT USE OTHERWISE
_fetch_package_index = _fetch_package_index,
_make_file_url = _make_file_url,
_parse_package_index = _parse_package_index,
_package_set = _package_set,
_package_get = _package_get,
Expand Down
2 changes: 1 addition & 1 deletion apt/private/pkg.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ def _pkg_from_index(package, arch):
return struct(
name = package["Package"],
version = package["Version"],
url = "%s/%s" % (package["Root"], package["Filename"]),
url = package["FileUrl"],
sha256 = package["SHA256"],
arch = arch,
dependencies = [],
Expand Down
17 changes: 17 additions & 0 deletions apt/private/util.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,27 @@ def _get_dupes(list_):

return dupes

def _parse_url(url):
if "://" not in url:
fail("Invalid URL: %s" % url)

scheme, url_ = url.split("://", 1)

path = "/"

if "/" in url_:
host, path_ = url_.split("/", 1)
path += path_
else:
host = url_

return struct(scheme = scheme, host = host, path = path)

def _sanitize(str):
return str.replace("+", "-p-").replace(":", "-").replace("~", "_")

util = struct(
get_dupes = _get_dupes,
parse_url = _parse_url,
sanitize = _sanitize,
)
3 changes: 3 additions & 0 deletions apt/tests/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
load(":manifest_test.bzl", "manifest_tests")
load(":package_index_test.bzl", "package_index_tests")
load(":util_test.bzl", "util_tests")
load(":version_test.bzl", "version_tests")

manifest_tests()

package_index_tests()

util_tests()

version_tests()
15 changes: 12 additions & 3 deletions apt/tests/package_index_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _parse_package_index_test(ctx):
package_index._parse_package_index(actual, output, source)

asserts.equals(env, "foo", actual[arch][name][version]["Package"])
asserts.equals(env, url, actual[arch][name][version]["Root"])
asserts.true(env, actual[arch][name][version]["FileUrl"].startswith(url))

return unittest.end(env)

Expand Down Expand Up @@ -107,6 +107,8 @@ def _index_test(ctx):

url = "http://mirror.com"

source = mock.manifest(url, arch, name).sources[0]

mock_rctx = mock.rctx(
read = mock.read(output),
download = mock.download(success = True),
Expand All @@ -116,10 +118,17 @@ def _index_test(ctx):
actual = package_index._index(mock_rctx, mock.manifest(url, arch, name))

expected_pkg = mock.pkg(arch, name, version)
expected_pkg["Root"] = url
file_url, _ = package_index._make_file_url(expected_pkg, source)
expected_pkg["FileUrl"] = file_url

actual_pkg = actual.package_get(arch, name, version)
asserts.equals(env, expected_pkg, actual_pkg)

# NOTE: we compare key-by-key because the error output of
# asserts.equals(env, expected_pkg, actual_pkg) is quite
# hard to read
asserts.equals(env, expected_pkg.keys(), actual_pkg.keys())
for key in expected_pkg.keys():
asserts.equals(env, expected_pkg[key], actual_pkg[key])

expected_packages = {arch: {name: {version: expected_pkg}}}
asserts.equals(env, expected_packages, actual.packages)
Expand Down
25 changes: 25 additions & 0 deletions apt/tests/util_test.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"unit tests for util"

load("@bazel_skylib//lib:unittest.bzl", "asserts", "unittest")
load("//apt/private:util.bzl", "util")

_TEST_SUITE_PREFIX = "util/"

def _parse_url_test(ctx):
env = unittest.begin(ctx)

parameters = {
"https://mirror.com": struct(scheme = "https", host = "mirror.com", path = "/"),
"http://mirror.com/foo/bar": struct(scheme = "http", host = "mirror.com", path = "/foo/bar"),
}

for url, expected in parameters.items():
actual = util.parse_url(url)
asserts.equals(env, expected, actual)

return unittest.end(env)

parse_url_test = unittest.make(_parse_url_test)

def util_tests():
parse_url_test(name = _TEST_SUITE_PREFIX + "parse_url")
52 changes: 40 additions & 12 deletions examples/debian_flat_repo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,51 @@ load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
PACKAGES = [
"@bullseye//dpkg",
"@bullseye//apt",
]

PACKAGES_AMD64 = PACKAGES + [
"@bullseye_rproject//r-mathlib",
"@nvidia_ubuntu2404_cuda//nvidia-container-toolkit-base",
]

PACKAGES_ARM64 = PACKAGES + [
"@nvidia_ubuntu2404_cuda//nvidia-container-toolkit-base",
]

# Creates /var/lib/dpkg/status with installed package information.
dpkg_status(
name = "dpkg_status",
controls = [
"%s/amd64:control" % package
for package in PACKAGES
],
controls = select({
"@platforms//cpu:x86_64": [
"%s/amd64:control" % package
for package in PACKAGES_AMD64
],
"@platforms//cpu:arm64": [
"%s/arm64:control" % package
for package in PACKAGES_ARM64
],
}),
)

oci_image(
name = "apt",
architecture = "amd64",
architecture = select({
"@platforms//cpu:x86_64": "amd64",
"@platforms//cpu:arm64": "arm64",
}),
os = "linux",
tars = [
":dpkg_status",
] + [
"%s/amd64" % package
for package in PACKAGES
],
] + select({
"@platforms//cpu:x86_64": [
"%s/amd64" % package
for package in PACKAGES_AMD64
],
"@platforms//cpu:arm64": [
"%s/arm64" % package
for package in PACKAGES_ARM64
],
}),
)

oci_load(
Expand All @@ -39,10 +62,15 @@ oci_load(

container_structure_test(
name = "test",
configs = ["test_linux_amd64.yaml"],
configs = select({
"@platforms//cpu:x86_64": ["test_linux_amd64.yaml"],
"@platforms//cpu:arm64": ["test_linux_arm64.yaml"],
}),
image = ":apt",
target_compatible_with = [
"@platforms//cpu:x86_64",
target_compatible_with = select({
"@platforms//cpu:x86_64": ["@platforms//cpu:x86_64"],
"@platforms//cpu:arm64": ["@platforms//cpu:arm64"],
}) + [
"@platforms//os:linux",
],
)
Loading

0 comments on commit 88daaa6

Please sign in to comment.