diff --git a/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py b/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py index 0982a796ed..a7d857868d 100644 --- a/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py +++ b/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py @@ -345,12 +345,80 @@ def _get_files_owned_by_rpms(context, dirpath, pkgs=None, recursive=False): return files_owned_by_rpms +def _copy_decouple(srcdir, dstdir): + """ + Copy `srcdir` to `dstdir` while decoupling symlinks. + + What we mean by decoupling the `srcdir` is that any symlinks pointing + outside the directory will be copied as regular files. This means that the + directory will become independent from its surroundings with respect to + symlinks. Any symlink (or symlink chains) within the directory will be + preserved. + + """ + + for root, dummy_dirs, files in os.walk(srcdir): + for filename in files: + relpath = os.path.relpath(root, srcdir) + source_filepath = os.path.join(root, filename) + target_filepath = os.path.join(dstdir, relpath, filename) + + # Skip and report broken symlinks + if not os.path.exists(source_filepath): + api.current_logger().warning( + 'File {} is a broken symlink! Will not copy the file.'.format(source_filepath)) + continue + + # Copy symlinks to the target userspace + source_is_symlink = os.path.islink(source_filepath) + pointee = None + if source_is_symlink: + pointee = os.readlink(source_filepath) + + # If source file is a symlink within `srcdir` then preserve it, + # otherwise resolve and copy it as a file it points to + if pointee is not None and not pointee.startswith(srcdir): + # Follow the path until we hit a file or get back to /etc/pki + while not pointee.startswith(srcdir) and os.path.islink(pointee): + pointee = os.readlink(pointee) + + # Pointee points to a _regular file_ outside /etc/pki so we + # copy it instead + if not pointee.startswith(srcdir) and not os.path.islink(pointee): + source_is_symlink = False + source_filepath = pointee + else: + # pointee points back to /etc/pki + pass + + # Ensure parent directory exists + parent_dir = os.path.dirname(target_filepath) + # Note: This is secure because we know that parent_dir is located + # inside of `$target_userspace/etc/pki` which is a directory that + # is not writable by unprivileged users. If this function is used + # elsewhere we may need to be more careful before running `mkdir -p`. + run(['mkdir', '-p', parent_dir]) + + if source_is_symlink: + # Preserve the owner and permissions of the original symlink + run(['ln', '-s', pointee, target_filepath]) + run(['chmod', '--reference={}'.format(source_filepath), target_filepath]) + continue + + run(['cp', '-a', source_filepath, target_filepath]) + + def _copy_certificates(context, target_userspace): """ - Copy the needed certificates into the container, but preserve original ones + Copy certificates from source system into the container, but preserve + original ones Some certificates are already installed in the container and those are default certificates for the target OS, so we preserve these. + + We respect the symlink hierarchy of the source system within the /etc/pki + folder. Dangling symlinks will be ignored. + """ target_pki = os.path.join(target_userspace, 'etc', 'pki') @@ -360,36 +428,56 @@ def _copy_certificates(context, target_userspace): files_owned_by_rpms = _get_files_owned_by_rpms(target_context, '/etc/pki', recursive=True) api.current_logger().debug('Files owned by rpms: {}'.format(' '.join(files_owned_by_rpms))) + # Backup container /etc/pki run(['mv', target_pki, backup_pki]) - context.copytree_from('/etc/pki', target_pki) + # Copy source /etc/pki to the container + _copy_decouple('/etc/pki', target_pki) + + # Assertion: after running _copy_decouple(), no broken symlinks exist in /etc/pki in the container + # So any broken symlinks created will be by the installed packages. + + # Recover installed packages as they always get precedence for filepath in files_owned_by_rpms: src_path = os.path.join(backup_pki, filepath) dst_path = os.path.join(target_pki, filepath) # Resolve and skip any broken symlinks is_broken_symlink = False - while os.path.islink(src_path): - # The symlink points to a path relative to the target userspace so - # we need to readjust it - next_path = os.path.join(target_userspace, os.readlink(src_path)[1:]) - if not os.path.exists(next_path): - is_broken_symlink = True - - # The path original path of the broken symlink in the container - report_path = os.path.join(target_pki, os.path.relpath(src_path, backup_pki)) - api.current_logger().warning('File {} is a broken symlink!'.format(report_path)) - break - - src_path = next_path + pointee = None + if os.path.islink(src_path): + pointee = os.path.join(target_userspace, os.readlink(src_path)[1:]) + + seen = set() + while os.path.islink(pointee): + # The symlink points to a path relative to the target userspace so + # we need to readjust it + pointee = os.path.join(target_userspace, os.readlink(src_path)[1:]) + if not os.path.exists(pointee) or pointee in seen: + is_broken_symlink = True + + # The path original path of the broken symlink in the container + report_path = os.path.join(target_pki, os.path.relpath(src_path, backup_pki)) + api.current_logger().warning( + 'File {} is a broken symlink! Will not copy!'.format(report_path)) + break + + seen.add(pointee) if is_broken_symlink: continue + # Cleanup conflicting files run(['rm', '-rf', dst_path]) + + # Ensure destination exists parent_dir = os.path.dirname(dst_path) run(['mkdir', '-p', parent_dir]) - run(['cp', '-a', src_path, dst_path]) + + # Copy the new file + run(['cp', '-R', '--preserve=all', src_path, dst_path]) + + run(['rm', '-rf', backup_pki]) def _prep_repository_access(context, target_userspace): @@ -401,6 +489,10 @@ def _prep_repository_access(context, target_userspace): backup_yum_repos_d = os.path.join(target_etc, 'yum.repos.d.backup') _copy_certificates(context, target_userspace) + # NOTE(dkubek): context.call(['update-ca-trust']) seems to not be working. + # I am not really sure why. The changes to files are not + # being written to disk. + run(["chroot", target_userspace, "/bin/bash", "-c", "su - -c update-ca-trust"]) if not rhsm.skip_rhsm(): run(['rm', '-rf', os.path.join(target_etc, 'rhsm')]) diff --git a/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py b/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py index a519275e46..a7bb090bce 100644 --- a/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py +++ b/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py @@ -1,4 +1,8 @@ +from __future__ import division + import os +import subprocess +import sys from collections import namedtuple import pytest @@ -11,6 +15,12 @@ from leapp.libraries.common.testutils import CurrentActorMocked, logger_mocked, produce_mocked from leapp.utils.deprecation import suppress_deprecation +if sys.version_info < (2, 8): + from pathlib2 import Path +else: + from pathlib import Path + + CUR_DIR = os.path.dirname(os.path.abspath(__file__)) _CERTS_PATH = os.path.join(CUR_DIR, '../../../files', userspacegen.PROD_CERTS_FOLDER) _DEFAULT_CERT_PATH = os.path.join(_CERTS_PATH, '8.1', '479.pem') @@ -48,6 +58,220 @@ def __exit__(self, exception_type, exception_value, traceback): pass +def traverse_structure(structure, root=Path('/')): + for filename, links_to in structure.items(): + filepath = root / filename + + if isinstance(links_to, dict): + for pair in traverse_structure(links_to, filepath): + yield pair + else: + yield (filepath, links_to) + + +def assert_directory_structure_matches(root, initial, expected): + # Assert every file that is supposed to be present is present + for filepath, links_to in traverse_structure(expected, root=root / 'expected'): + assert filepath.exists() + + if links_to is None: + assert filepath.is_file() + continue + + assert filepath.is_symlink() + assert os.readlink(str(filepath)) == str(root / 'initial' / links_to.lstrip('/')) + + # Assert there are no extra files + result_dir = str(root / 'expected') + for fileroot, dummy_dirs, files in os.walk(result_dir): + for filename in files: + dir_path = os.path.relpath(fileroot, result_dir).split('/') + + cwd = expected + for dir in dir_path: + cwd = cwd[dir] + + assert filename in cwd + + filepath = os.path.join(fileroot, filename) + if os.path.islink(filepath): + links_to = '/' + os.path.relpath(os.readlink(filepath), str(root / 'initial')) + assert cwd[filename] == links_to + + +@pytest.fixture +def temp_directory_layout(tmp_path, initial_structure): + for filepath, links_to in traverse_structure(initial_structure, root=tmp_path / 'initial'): + file_path = tmp_path / filepath + file_path.parent.mkdir(parents=True, exist_ok=True) + + if links_to is None: + file_path.touch() + continue + + file_path.symlink_to(tmp_path / 'initial' / links_to.lstrip('/')) + + (tmp_path / 'expected').mkdir() + assert (tmp_path / 'expected').exists() + + return tmp_path + + +# The semantics of initial_structure and expected_structure are as follows: +# +# 1. The outermost dictionary encodes the root of a directory structure +# +# 2. Depending on the value for a key in a dict, each key in the dictionary +# denotes the name of either a: +# a) directory -- if value is dict +# b) regular file -- if value is None +# c) symlink -- if a value is str +# +# 3. The value of a symlink entry is a absolute path to a file in the context of +# the structure. +# +@pytest.mark.parametrize('initial_structure,expected_structure', [ + ({ # Copy a regular file + 'dir': { + 'fileA': None + } + }, { + 'dir': { + 'fileA': None + } + }), + ({ # Do not copy a broken symlink + 'dir': { + 'fileA': 'nonexistent' + } + }, { + 'dir': {} + }), + ({ # Copy a regular symlink + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': None + } + }, { + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': None + } + }), + ({ # Do not copy a chain of broken symlinks + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': 'nonexistent' + } + }, { + 'dir': {} + }), + ({ # Copy a chain of symlinks + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': None + } + }, { + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': None + } + }), + ({ # Circular symlinks + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': '/dir/fileC', + } + }, { + 'dir': {} + }), + ({ # Copy a link to a file outside the considered directory as file + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': '/outside/fileOut', + 'fileE': None + }, + 'outside': { + 'fileOut': '/outside/fileD', + 'fileD': '/dir/fileE' + } + }, { + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': '/dir/fileE', + 'fileE': None, + } + }), + ({ # Same test with a nested structure within the source dir + 'dir': { + 'nested': { + 'fileA': '/dir/nested/fileB', + 'fileB': '/dir/nested/fileC', + 'fileC': '/outside/fileOut', + 'fileE': None + } + }, + 'outside': { + 'fileOut': '/outside/fileD', + 'fileD': '/dir/nested/fileE' + } + }, { + 'dir': { + 'nested': { + 'fileA': '/dir/nested/fileB', + 'fileB': '/dir/nested/fileC', + 'fileC': '/dir/nested/fileE', + 'fileE': None + } + } + }), + ({ # Same test with a nested structure in the outside dir + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': '/outside/nested/fileOut', + 'fileE': None + }, + 'outside': { + 'nested': { + 'fileOut': '/outside/nested/fileD', + 'fileD': '/dir/fileE' + } + } + }, { + 'dir': { + 'fileA': '/dir/fileB', + 'fileB': '/dir/fileC', + 'fileC': '/dir/fileE', + 'fileE': None, + } + }), +] +) +def test_copy_decouple(monkeypatch, temp_directory_layout, initial_structure, expected_structure): + + def run_mocked(command): + subprocess.Popen( + ' '.join(command), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ).wait() + + monkeypatch.setattr(userspacegen, 'run', run_mocked) + userspacegen._copy_decouple( + str(temp_directory_layout / 'initial' / 'dir'), + str(temp_directory_layout / 'expected' / 'dir'), + ) + + assert_directory_structure_matches(temp_directory_layout, initial_structure, expected_structure) + + @pytest.mark.parametrize('result,dst_ver,arch,prod_type', [ (os.path.join(_CERTS_PATH, '8.1', '479.pem'), '8.1', architecture.ARCH_X86_64, 'ga'), (os.path.join(_CERTS_PATH, '8.1', '419.pem'), '8.1', architecture.ARCH_ARM64, 'ga'),