From bc23ec0eec4df740e8057ebee24909df8cc01bda Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Mon, 22 Jan 2024 03:38:11 -0800 Subject: [PATCH] Fix a cornercase in choosing whether to copy or link a file. In the present code, when we have two separate links to the same file outside of /etc/pki, the real file is copied to the location of both links. The problem with this is when the user makes changes to the file in the future, they will have to edit both files whereas before they would only have had to edit one of the files (since they linked to the same underlying file). Example: Host: lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileA -> /etc/sourceFile lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileB -> /etc/sourceFile Becomes: -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileA -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileB fileA and fileB are separate copies of /etc/sourceFile. This change makes it so anytime two links eventually point to the same exterior file, the first link will be copied and the second link will becone a symlinks to that copy like this: -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileA lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileB -> /etc/pki/fileA This is the behaviour even if the second link went through several other exterior links to reach the same sourceFile (but not if there is another interior link in between). Example: Host: lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileA -> /etc/sourceFile lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileB -> /etc/sourceFile lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileC -> /etc/external-to-same-file lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileD -> /etc/external-to-interior-link lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 /etc/external-to-same-file -> /etc/sourceFile lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 /etc/external-to-interior-link -> pki/fileB Becomes: -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileA lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileB -> /etc/pki/fileA lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileC -> /etc/pki/fileA lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileD -> /etc/pki/fileB The drawback to the end user is that, unless you trace all the way through the chain of symlinks, it is confusing that fileA ends up being a real file and fileC ends up pointing to it. It's not immediately obvious how the two are related. The advantage is if the end user makes any changes to either fileA or fileC, then the changes will be visible in both files. This is the behaviour that existed on the host system. --- .../libraries/userspacegen.py | 139 ++++++++++++------ .../tests/unit_test_targetuserspacecreator.py | 82 +++++------ 2 files changed, 128 insertions(+), 93 deletions(-) diff --git a/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py b/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py index d917bfd511..9febe072f8 100644 --- a/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py +++ b/repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py @@ -348,6 +348,62 @@ def _mkdir_with_copied_mode(path, mode_from): run(['chmod', '--reference={}'.format(mode_from), path]) +def _canonize(pointee_as_abspath, srcdir): + """ + Helper function to return canonical forms of directory paths. + + This is an ugly function that just avoids having to duplicate this code in two places. + """ + # If srcdir is a symlink, then we need a name for it that we can compare + # with other paths. + canonical_srcdir = os.path.realpath(srcdir) + + # To make comparisons, we need to resolve all symlinks in the directory + # structure leading up to pointee. However, we can't include pointee + # itself otherwise it will resolve to the file that it points to in the + # end (which would be wrong if pointee_filename is a symlink). + canonical_pointee_dir, pointee_filename = os.path.split(pointee_as_abspath) + canonical_pointee_dir = os.path.realpath(canonical_pointee_dir) + + return (pointee_filename, canonical_pointee_dir, canonical_srcdir) + + +def _format_source_link(pointee_as_abspath, symlink, srcdir): + """ + Format the source for a symlink as either an absolute path or a path relative to srcdir. + + :param pointee_as_abspath: Sbsolute path to the source of the symlink. + :param symlink: Absolute path to the symlink that started us looking + through the chain of links. If the symlink points to a relative path, this function will + return a relative path. If it is an absolute path, it will return an absolute path. + :param srcdir: The directory in the source tree that we want everything to live within. + """ + pointee_filename, canonical_pointee_dir, canonical_srcdir = _canonize(pointee_as_abspath, srcdir) + link_to = os.readlink(symlink) + + if link_to.startswith('/'): + # The original symlink was an absolute path so we will set this + # one to absolute too + # Note: Because absolute paths are constructed inside of + # srcdir, the relative path that we need to join with has to be + # relative to srcdir, not the directory that the symlink is + # being created in. + relative_to_srcdir = os.path.relpath(canonical_pointee_dir, canonical_srcdir) + corrected_path = os.path.normpath(os.path.join(srcdir, + relative_to_srcdir, + pointee_filename) + ) + else: + # If the original link is a relative link, then we want the new + # link to be relative (to the path that we will place the symlink in) as well + canonical_symlink_dir = os.path.realpath(os.path.dirname(symlink)) + relative_to_symlink_location = os.path.relpath(canonical_pointee_dir, canonical_symlink_dir) + corrected_path = os.path.normpath(os.path.join(relative_to_symlink_location, + pointee_filename) + ) + return corrected_path + + def _choose_copy_or_link(symlink, srcdir): """ Determine whether to copy file contents or create a symlink depending on where the pointee resides. @@ -378,10 +434,6 @@ def _choose_copy_or_link(symlink, srcdir): if not os.path.exists(symlink): raise BrokenSymlinkError('File {} is a broken symlink!'.format(symlink)) - # If srcdir is a symlink, then we need a name for it that we can compare - # with other paths. - canonical_srcdir = os.path.realpath(srcdir) - pointee_as_abspath = symlink seen = set([pointee_as_abspath]) @@ -412,43 +464,17 @@ def _choose_copy_or_link(symlink, srcdir): seen.add(pointee_as_abspath) - # To make comparisons, we need to resolve all symlinks in the directory - # structure leading up to pointee. However, we can't include pointee - # itself otherwise it will resolve to the file that it points to in the - # end (which would be wrong if pointee_filename is a symlink). - canonical_pointee_dir, pointee_filename = os.path.split(pointee_as_abspath) - canonical_pointee_dir = os.path.realpath(canonical_pointee_dir) - - if canonical_pointee_dir.startswith(canonical_srcdir): - # Absolute path inside of the correct dir so we need to link to it - # But we need to determine what the link path should be before - # returning. - - # Construct a relative path that points from the symlinks directory - # to the pointee. - link_to = os.readlink(symlink) - canonical_symlink_dir = os.path.realpath(os.path.dirname(symlink)) - relative_path = os.path.relpath(canonical_pointee_dir, canonical_symlink_dir) - - if link_to.startswith('/'): - # The original symlink was an absolute path so we will set this - # one to absolute too - # Note: Because absolute paths are constructed inside of - # srcdir, the relative path that we need to join here has to be - # relative to srcdir, not the directory that the symlink is - # being created in. - relative_to_srcdir = os.path.relpath(canonical_pointee_dir, canonical_srcdir) - corrected_path = os.path.normpath(os.path.join(srcdir, relative_to_srcdir, pointee_filename)) - - else: - # If the original link is a relative link, then we want the new - # link to be relative as well - corrected_path = os.path.normpath(os.path.join(relative_path, pointee_filename)) - - return ("link", corrected_path) + dummy_pointee_filename, canonical_pointee_dir, canonical_srcdir = _canonize(pointee_as_abspath, srcdir) + if not canonical_pointee_dir.startswith(canonical_srcdir): + # pointee is a symlink that points outside of the srcdir so continue to + # the next symlink in the chain. + continue - # pointee is a symlink that points outside of the srcdir so continue to - # the next symlink in the chain. + # Absolute path inside of the correct dir so we need to link to it + # Note: We have to make the source of the link relative or absolute + # depending on what the original link was + corrected_path = _format_source_link(pointee_as_abspath, symlink, srcdir) + return ("link", corrected_path) # The file is not a link so copy it return ('copy', pointee_as_abspath) @@ -464,6 +490,7 @@ def _copy_symlinks(symlinks_to_process, srcdir): :param srcdir: The root directory that every piece of content must be present in. :raises ValueError: if the arguments are not correct """ + copied_links = {} for source_linkpath, target_linkpath in symlinks_to_process: try: action, source_path = _choose_copy_or_link(source_linkpath, srcdir) @@ -473,14 +500,32 @@ def _copy_symlinks(symlinks_to_process, srcdir): continue if action == "copy": - # Note: source_path could be a directory, so '-a' or '-r' must be - # given to cp. - run(['cp', '-a', source_path, target_linkpath]) - elif action == 'link': + # Need the canonical path so that we can check if the pointed to + # file has been copied before + canonical_source_path = os.path.realpath(source_path) + if canonical_source_path in copied_links: + # The link was copied before so we need to change this to a link action + # with the previous copied link location as the source for this link + action = "link" + source_abspath = os.path.abspath(os.path.normpath(copied_links[canonical_source_path])) + source_path = _format_source_link(source_abspath, source_linkpath, srcdir) + else: + # All other links to the same target will need to link to the path + # that we're processing now since they all pointed to the same + # target before. + copied_links[canonical_source_path] = source_linkpath + + # Note: source_path could be a directory, so '-a' or '-r' must be + # given to cp. + run(['cp', '-a', source_path, target_linkpath]) + continue + + if action == 'link': run(["ln", "-s", source_path, target_linkpath]) - else: - # This will not happen unless _copy_or_link() has a bug. - raise RuntimeError("Programming error: _copy_or_link() returned an unknown action:{}".format(action)) + continue + + # This will not happen unless _copy_or_link() has a bug. + raise RuntimeError("Programming error: _copy_or_link() returned an unknown action:{}".format(action)) def _copy_decouple(srcdir, dstdir): diff --git a/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py b/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py index 19b760a184..8c9b0436f4 100644 --- a/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py +++ b/repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py @@ -381,29 +381,24 @@ def temp_directory_layout(tmp_path, initial_structure): }, id="Absolute_symlink_to_a_file_inside_via_a_symlink_to_the_rootdir" )), - # This should be fixed but not necessarily for this release. - # It makes sure that when we have two separate links to the - # same file outside of /etc/pki, one of the links is copied - # as a real file and the other is made a link to the copy. - # (Right now, the real file is copied in place of both links.) - # (pytest.param( - # { - # 'dir': { - # 'fileA': '/outside/fileC', - # 'fileB': '/outside/fileC', - # }, - # 'outside': { - # 'fileC': None, - # }, - # }, - # { - # 'dir': { - # 'fileA': None, - # 'fileB': '/dir/fileA', - # }, - # }, - # id="Absolute_two_symlinks_to_the_same_copied_file" - # )), + (pytest.param( + { + 'dir': { + 'fileA': '/outside/fileC', + 'fileB': '/outside/fileC', + }, + 'outside': { + 'fileC': None, + }, + }, + { + 'dir': { + 'fileA': None, + 'fileB': '/dir/fileA', + }, + }, + id="Absolute_two_symlinks_to_the_same_copied_file" + )), (pytest.param( { 'dir': { @@ -735,29 +730,24 @@ def temp_directory_layout(tmp_path, initial_structure): }, id="Relative_symlink_to_a_file_inside_via_a_symlink_to_the_rootdir" )), - # This should be fixed but not necessarily for this release. - # It makes sure that when we have two separate links to the - # same file outside of /etc/pki, one of the links is copied - # as a real file and the other is made a link to the copy. - # (Right now, the real file is copied in place of both links.) - # (pytest.param( - # { - # 'dir': { - # 'fileA': '../outside/fileC', - # 'fileB': '../outside/fileC', - # }, - # 'outside': { - # 'fileC': None, - # }, - # }, - # { - # 'dir': { - # 'fileA': None, - # 'fileB': 'fileA', - # }, - # }, - # id="Relative_two_symlinks_to_the_same_copied_file" - # )), + (pytest.param( + { + 'dir': { + 'fileA': '../outside/fileC', + 'fileB': '../outside/fileC', + }, + 'outside': { + 'fileC': None, + }, + }, + { + 'dir': { + 'fileA': None, + 'fileB': 'fileA', + }, + }, + id="Relative_two_symlinks_to_the_same_copied_file" + )), (pytest.param( { 'dir': {