Skip to content

Commit

Permalink
Merge pull request #3502 from lexming/sync-20241110
Browse files Browse the repository at this point in the history
sync with develop (2024-11-10)
  • Loading branch information
Micket authored Nov 12, 2024
2 parents dec719f + 92a5007 commit cce4c2d
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 58 deletions.
44 changes: 37 additions & 7 deletions easybuild/easyblocks/generic/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
@author: Pieter De Baets (Ghent University)
@author: Jens Timmerman (Ghent University)
@author: Jasper Grimm (University of York)
@author: Jan Andre Reuter (Juelich Supercomputing Centre)
"""
import copy
import os
Expand Down Expand Up @@ -70,8 +71,8 @@ def __init__(self, *args, **kwargs):
self.altroot = None
self.altversion = None

# list of EasyConfig instances for components
self.comp_cfgs = []
# list of EasyConfig instances and their EasyBlocks for components
self.comp_instances = []

# list of EasyConfig instances of components for which to run sanity checks
self.comp_cfgs_sanity_check = []
Expand Down Expand Up @@ -197,7 +198,7 @@ def __init__(self, *args, **kwargs):
if comp_cfg['patches']:
self.cfg.update('patches', comp_cfg['patches'])

self.comp_cfgs.append(comp_cfg)
self.comp_instances.append((comp_cfg, comp_cfg.easyblock(comp_cfg)))

self.cfg.update('checksums', checksums_patches)

Expand All @@ -216,7 +217,7 @@ def check_checksums(self):
"""
checksum_issues = super(Bundle, self).check_checksums()

for comp in self.comp_cfgs:
for comp, _ in self.comp_instances:
checksum_issues.extend(self.check_checksums_for(comp, sub="of component %s" % comp['name']))

return checksum_issues
Expand Down Expand Up @@ -246,14 +247,12 @@ def build_step(self):
def install_step(self):
"""Install components, if specified."""
comp_cnt = len(self.cfg['components'])
for idx, cfg in enumerate(self.comp_cfgs):
for idx, (cfg, comp) in enumerate(self.comp_instances):

print_msg("installing bundle component %s v%s (%d/%d)..." %
(cfg['name'], cfg['version'], idx + 1, comp_cnt))
self.log.info("Installing component %s v%s using easyblock %s", cfg['name'], cfg['version'], cfg.easyblock)

comp = cfg.easyblock(cfg)

# correct build/install dirs
comp.builddir = self.builddir
comp.install_subdir, comp.installdir = self.install_subdir, self.installdir
Expand Down Expand Up @@ -323,6 +322,37 @@ def install_step(self):
# close log for this component
comp.close_log()

def make_module_req_guess(self):
"""
Set module requirements from all components, e.g. $PATH, etc.
During the install step, we only set these requirements temporarily.
Later on when building the module, those paths are not considered.
Therefore, iterate through all the components again and gather
the requirements.
Do not remove duplicates or check for existance of folders,
as this is done while creating the modulefile already.
"""
# Start with the paths from the generic EasyBlock.
# If not added here, they might be missing entirely and fail sanity checks.
final_reqs = super(Bundle, self).make_module_req_guess()

for cfg, comp in self.comp_instances:
self.log.info("Gathering module paths for component %s v%s", cfg['name'], cfg['version'])
reqs = comp.make_module_req_guess()

try:
for key, value in sorted(reqs.items()):
if isinstance(reqs, str):
value = [value]
final_reqs.setdefault(key, [])
final_reqs[key] += value
except AttributeError:
raise EasyBuildError("Cannot process module requirements of bundle component %s v%s",
cfg['name'], cfg['version'])

return final_reqs

def make_module_extra(self, *args, **kwargs):
"""Set extra stuff in module file, e.g. $EBROOT*, $EBVERSION*, etc."""
if not self.altroot and not self.altversion:
Expand Down
217 changes: 166 additions & 51 deletions easybuild/easyblocks/generic/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
@author: Mikael Oehman (Chalmers University of Technology)
@author: Alex Domingo (Vrije Universiteit Brussel)
@author: Alexander Grund (TU Dresden)
"""

import os
Expand All @@ -37,10 +38,10 @@
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.framework.easyconfig import CUSTOM
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
from easybuild.tools.filetools import extract_file, change_dir
from easybuild.tools.filetools import extract_file
from easybuild.tools.run import run_shell_cmd
from easybuild.tools.config import build_option
from easybuild.tools.filetools import compute_checksum, mkdir, write_file
from easybuild.tools.filetools import compute_checksum, mkdir, move_file, read_file, write_file, CHECKSUM_TYPE_SHA256
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC

CRATESIO_SOURCE = "https://crates.io/api/v1/crates"
Expand All @@ -54,14 +55,74 @@
"""

CONFIG_TOML_PATCH_GIT = """
[patch."{repo}"]
{crates}
CONFIG_TOML_SOURCE_GIT = """
[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
replace-with = "vendored-sources"
"""
CONFIG_TOML_PATCH_GIT_CRATES = """{0} = {{ path = "{1}" }}

CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
[source."real-{url}?rev={rev}"]
directory = "{workspace_dir}"
[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
replace-with = "real-{url}?rev={rev}"
"""

CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{chksum}"}}'
CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'


def get_workspace_members(crate_dir):
"""Find all members of a cargo workspace in crate_dir.
(Minimally) parse the Cargo.toml file.
If it is a workspace return all members (subfolder names).
Otherwise return None.
"""
cargo_toml = os.path.join(crate_dir, 'Cargo.toml')

# We are looking for this:
# [workspace]
# members = [
# "reqwest-middleware",
# "reqwest-tracing",
# "reqwest-retry",
# ]

lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
try:
start_idx = lines.index('[workspace]')
except ValueError:
return None
# Find "members = [" and concatenate the value, stop at end of section or file
member_str = None
for line in lines[start_idx + 1:]:
if line.startswith('#'):
continue # Skip comments
if re.match(r'\[\w+\]', line):
break
if member_str is None:
m = re.match(r'members\s+=\s+\[', line)
if m:
member_str = line[m.end():]
elif line.endswith(']'):
member_str += line[:-1].strip()
break
else:
member_str += line
# Split at commas after removing possibly trailing ones and remove the quotes
members = [member.strip().strip('"') for member in member_str.rstrip(',').split(',')]
# Sanity check that we didn't pick up anything unexpected
invalid_members = [member for member in members if not re.match(r'(\w|-)+', member)]
if invalid_members:
raise EasyBuildError('Failed to parse %s: Found seemingly invalid members: %s',
cargo_toml, ', '.join(invalid_members))
return [os.path.join(crate_dir, m) for m in members]


class Cargo(ExtensionEasyBlock):
Expand All @@ -81,12 +142,15 @@ def extra_options(extra_vars=None):
return extra_vars

@staticmethod
def crate_src_filename(pkg_name, pkg_version, *args):
"""Crate tarball filename based on package name and version"""
return "{0}-{1}.tar.gz".format(pkg_name, pkg_version)
def crate_src_filename(pkg_name, pkg_version, _=None, rev=None):
"""Crate tarball filename based on package name, version and optionally git revision"""
parts = [pkg_name, pkg_version]
if rev is not None:
parts.append(rev)
return '-'.join(parts) + ".tar.gz"

@staticmethod
def crate_download_filename(pkg_name, pkg_version, *args):
def crate_download_filename(pkg_name, pkg_version):
"""Crate download filename based on package name and version"""
return "{0}/{1}/download".format(pkg_name, pkg_version)

Expand Down Expand Up @@ -122,7 +186,6 @@ def __init__(self, *args, **kwargs):
"""Constructor for Cargo easyblock."""
super(Cargo, self).__init__(*args, **kwargs)
self.cargo_home = os.path.join(self.builddir, '.cargo')
self.vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
env.setvar('CARGO_HOME', self.cargo_home)
env.setvar('RUSTC', 'rustc')
env.setvar('RUSTDOC', 'rustdoc')
Expand All @@ -148,7 +211,7 @@ def __init__(self, *args, **kwargs):
repo_name = repo_name[:-4]
sources.append({
'git_config': {'url': url, 'repo_name': repo_name, 'commit': rev},
'filename': self.crate_src_filename(crate, version),
'filename': self.crate_src_filename(crate, version, rev=rev),
})

# copy EasyConfig instance before we make changes to it
Expand All @@ -165,67 +228,119 @@ def extract_step(self):
"""
Unpack the source files and populate them with required .cargo-checksum.json if offline
"""
mkdir(self.vendor_dir)
vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
mkdir(vendor_dir)
# Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
# If we put such a workspace into the vendor folder, cargo fails with
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
# Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
mkdir(git_vendor_dir)

vendor_crates = {self.crate_src_filename(*crate): crate for crate in self.crates}
git_sources = {crate[2]: [] for crate in self.crates if len(crate) == 4}
# Track git sources for building the cargo config and avoiding duplicated folders
git_sources = {}

for src in self.src:
extraction_dir = self.builddir
# Check for git crates, `git_key` will be set to a true-ish value for those
try:
crate_name, _, git_repo, rev = vendor_crates[src['name']]
except (ValueError, KeyError):
git_key = None
else:
git_key = (git_repo, rev)
self.log.debug("Sources of %s(%s) belong to git repo: %s rev %s",
crate_name, src['name'], git_repo, rev)
# Do a sanity check that sources for the same repo and revision are the same
try:
previous_source = git_sources[git_key]
except KeyError:
git_sources[git_key] = src
else:
previous_checksum = previous_source['checksum']
current_checksum = src['checksum']
if previous_checksum and current_checksum and previous_checksum != current_checksum:
raise EasyBuildError("Sources for the same git repository need to be identical."
"Mismatch found for %s rev %s in %s vs %s",
git_repo, rev, previous_source['name'], src['name'])
self.log.info("Source %s already extracted to %s by %s. Skipping extraction.",
src['name'], previous_source['finalpath'], previous_source['name'])
src['finalpath'] = previous_source['finalpath']
continue

is_vendor_crate = src['name'] in vendor_crates
# Extract dependency crates into vendor subdirectory, separate from sources of main package
if src['name'] in vendor_crates:
extraction_dir = self.vendor_dir
if is_vendor_crate:
extraction_dir = git_vendor_dir if git_key else vendor_dir
else:
extraction_dir = self.builddir

self.log.info("Unpacking source of %s", src['name'])
existing_dirs = set(os.listdir(extraction_dir))
crate_dir = None
src_dir = extract_file(src['path'], extraction_dir, cmd=src['cmd'],
extra_options=self.cfg['unpack_options'], change_into_dir=False)
new_extracted_dirs = set(os.listdir(extraction_dir)) - existing_dirs

if len(new_extracted_dirs) == 1:
# Expected crate tarball with 1 folder
crate_dir = new_extracted_dirs.pop()
src_dir = os.path.join(extraction_dir, crate_dir)
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
elif len(new_extracted_dirs) == 0:
if len(new_extracted_dirs) == 0:
# Extraction went wrong
raise EasyBuildError("Unpacking sources of '%s' failed", src['name'])
# Expected crate tarball with 1 folder
# TODO: properly handle case with multiple extracted folders
# this is currently in a grey area, might still be used by cargo
if len(new_extracted_dirs) == 1:
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)

change_dir(src_dir)
self.src[self.src.index(src)]['finalpath'] = src_dir

if self.cfg['offline'] and crate_dir:
# Create checksum file for extracted sources required by vendored crates.io sources
self.log.info('creating .cargo-checksums.json file for : %s', crate_dir)
chksum = compute_checksum(src['path'], checksum_type='sha256')
chkfile = os.path.join(extraction_dir, crate_dir, '.cargo-checksum.json')
write_file(chkfile, CARGO_CHECKSUM_JSON.format(chksum=chksum))
# Add path to extracted sources for any crate from a git repo
try:
crate_name, _, crate_repo, _ = vendor_crates[src['name']]
except (ValueError, KeyError):
pass
else:
self.log.debug("Sources of %s belong to git repo: %s", src['name'], crate_repo)
git_src_dir = (crate_name, src_dir)
git_sources[crate_repo].append(git_src_dir)
if is_vendor_crate and self.cfg['offline']:
# Create checksum file for extracted sources required by vendored crates

# By default there is only a single crate
crate_dirs = [src_dir]
# For git sources determine the folders that contain crates by taking workspaces into account
if git_key:
member_dirs = get_workspace_members(src_dir)
if member_dirs:
crate_dirs = member_dirs

try:
checksum = src[CHECKSUM_TYPE_SHA256]
except KeyError:
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
for crate_dir in crate_dirs:
self.log.info('creating .cargo-checksums.json file for %s', os.path.basename(crate_dir))
chkfile = os.path.join(src_dir, crate_dir, '.cargo-checksum.json')
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
# Move non-workspace git crates to the vendor folder
if git_key and member_dirs is None:
src_dir = os.path.join(vendor_dir, os.path.basename(crate_dirs[0]))
move_file(crate_dirs[0], src_dir)

src['finalpath'] = src_dir

if self.cfg['offline']:
self.log.info("Setting vendored crates dir for offline operation")
config_toml = os.path.join(self.cargo_home, 'config.toml')
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
# because the rust source subdirectories might differ with python packages
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=self.vendor_dir), append=True)

# also vendor sources from other git sources (could be many crates for one git source)
for git_repo, repo_crates in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s", git_repo)
config_crates = ''.join([CONFIG_TOML_PATCH_GIT_CRATES.format(*crate) for crate in repo_crates])
write_file(config_toml, CONFIG_TOML_PATCH_GIT.format(repo=git_repo, crates=config_crates), append=True)
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=vendor_dir), append=True)

# Tell cargo about the vendored git sources to avoid it failing with:
# Unable to update https://github.com/[...]
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
for (git_repo, rev), src in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s rev %s", git_repo, rev)
src_dir = src['finalpath']
if os.path.dirname(src_dir) == vendor_dir:
# Non-workspace sources are in vendor_dir
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT.format(url=git_repo, rev=rev),
append=True)
else:
# Workspace sources stay in their own separate folder.
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
append=True)

# Use environment variable since it would also be passed along to builds triggered via python packages
env.setvar('CARGO_NET_OFFLINE', 'true')
Expand Down

0 comments on commit cce4c2d

Please sign in to comment.