Skip to content

Commit

Permalink
Fix a cornercase in choosing whether to copy or link a file.
Browse files Browse the repository at this point in the history
In the present code, when we have two separate links to the same file outside of /etc/pki, the real
file is copied to the location of both links.  The problem with this is when the user makes changes
to the file in the future, they will have to edit both files whereas before they would only have had
to edit one of the files (since they linked to the same underlying file). Example:

  Host:
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileA -> /etc/sourceFile
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileB -> /etc/sourceFile

  Becomes:

  -rw-r--r--.  1 badger badger  12 Jan 22 03:43 fileA
  -rw-r--r--.  1 badger badger  12 Jan 22 03:43 fileB

fileA and fileB are separate copies of /etc/sourceFile.

This change makes it so anytime two links eventually point to the same exterior file, the first link will be copied and the second link will becone a symlinks to that copy like this:

  -rw-r--r--.  1 badger badger  12 Jan 22 03:43 fileA
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileB -> /etc/pki/fileA

This is the behaviour even if the second link went through several other exterior links to reach the
same sourceFile (but not if there is another interior link in between).  Example:

  Host:
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileA -> /etc/sourceFile
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileB -> /etc/sourceFile
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileC -> /etc/external-to-same-file
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileD -> /etc/external-to-interior-link

  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 /etc/external-to-same-file -> /etc/sourceFile
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 /etc/external-to-interior-link -> pki/fileB

  Becomes:
  -rw-r--r--.  1 badger badger  12 Jan 22 03:43 fileA
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileB -> /etc/pki/fileA
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileC -> /etc/pki/fileA
  lrwxrwxrwx.  1 badger badger  13 Jan 22 03:42 fileD -> /etc/pki/fileB

The drawback to the end user is that, unless you trace all the way through the chain of symlinks, it
is confusing that fileA ends up being a real file and fileC ends up pointing to it.  It's not
immediately obvious how the two are related.

The advantage is if the end user makes any changes to either fileA or fileC, then the changes will
be visible in both files.  This is the behaviour that existed on the host system.
  • Loading branch information
abadger committed Jan 22, 2024
1 parent 41321a1 commit bc23ec0
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,62 @@ def _mkdir_with_copied_mode(path, mode_from):
run(['chmod', '--reference={}'.format(mode_from), path])


def _canonize(pointee_as_abspath, srcdir):
"""
Helper function to return canonical forms of directory paths.
This is an ugly function that just avoids having to duplicate this code in two places.
"""
# If srcdir is a symlink, then we need a name for it that we can compare
# with other paths.
canonical_srcdir = os.path.realpath(srcdir)

# To make comparisons, we need to resolve all symlinks in the directory
# structure leading up to pointee. However, we can't include pointee
# itself otherwise it will resolve to the file that it points to in the
# end (which would be wrong if pointee_filename is a symlink).
canonical_pointee_dir, pointee_filename = os.path.split(pointee_as_abspath)
canonical_pointee_dir = os.path.realpath(canonical_pointee_dir)

return (pointee_filename, canonical_pointee_dir, canonical_srcdir)


def _format_source_link(pointee_as_abspath, symlink, srcdir):
"""
Format the source for a symlink as either an absolute path or a path relative to srcdir.
:param pointee_as_abspath: Sbsolute path to the source of the symlink.
:param symlink: Absolute path to the symlink that started us looking
through the chain of links. If the symlink points to a relative path, this function will
return a relative path. If it is an absolute path, it will return an absolute path.
:param srcdir: The directory in the source tree that we want everything to live within.
"""
pointee_filename, canonical_pointee_dir, canonical_srcdir = _canonize(pointee_as_abspath, srcdir)
link_to = os.readlink(symlink)

if link_to.startswith('/'):
# The original symlink was an absolute path so we will set this
# one to absolute too
# Note: Because absolute paths are constructed inside of
# srcdir, the relative path that we need to join with has to be
# relative to srcdir, not the directory that the symlink is
# being created in.
relative_to_srcdir = os.path.relpath(canonical_pointee_dir, canonical_srcdir)
corrected_path = os.path.normpath(os.path.join(srcdir,
relative_to_srcdir,
pointee_filename)
)
else:
# If the original link is a relative link, then we want the new
# link to be relative (to the path that we will place the symlink in) as well
canonical_symlink_dir = os.path.realpath(os.path.dirname(symlink))
relative_to_symlink_location = os.path.relpath(canonical_pointee_dir, canonical_symlink_dir)
corrected_path = os.path.normpath(os.path.join(relative_to_symlink_location,
pointee_filename)
)
return corrected_path


def _choose_copy_or_link(symlink, srcdir):
"""
Determine whether to copy file contents or create a symlink depending on where the pointee resides.
Expand Down Expand Up @@ -378,10 +434,6 @@ def _choose_copy_or_link(symlink, srcdir):
if not os.path.exists(symlink):
raise BrokenSymlinkError('File {} is a broken symlink!'.format(symlink))

# If srcdir is a symlink, then we need a name for it that we can compare
# with other paths.
canonical_srcdir = os.path.realpath(srcdir)

pointee_as_abspath = symlink
seen = set([pointee_as_abspath])

Expand Down Expand Up @@ -412,43 +464,17 @@ def _choose_copy_or_link(symlink, srcdir):

seen.add(pointee_as_abspath)

# To make comparisons, we need to resolve all symlinks in the directory
# structure leading up to pointee. However, we can't include pointee
# itself otherwise it will resolve to the file that it points to in the
# end (which would be wrong if pointee_filename is a symlink).
canonical_pointee_dir, pointee_filename = os.path.split(pointee_as_abspath)
canonical_pointee_dir = os.path.realpath(canonical_pointee_dir)

if canonical_pointee_dir.startswith(canonical_srcdir):
# Absolute path inside of the correct dir so we need to link to it
# But we need to determine what the link path should be before
# returning.

# Construct a relative path that points from the symlinks directory
# to the pointee.
link_to = os.readlink(symlink)
canonical_symlink_dir = os.path.realpath(os.path.dirname(symlink))
relative_path = os.path.relpath(canonical_pointee_dir, canonical_symlink_dir)

if link_to.startswith('/'):
# The original symlink was an absolute path so we will set this
# one to absolute too
# Note: Because absolute paths are constructed inside of
# srcdir, the relative path that we need to join here has to be
# relative to srcdir, not the directory that the symlink is
# being created in.
relative_to_srcdir = os.path.relpath(canonical_pointee_dir, canonical_srcdir)
corrected_path = os.path.normpath(os.path.join(srcdir, relative_to_srcdir, pointee_filename))

else:
# If the original link is a relative link, then we want the new
# link to be relative as well
corrected_path = os.path.normpath(os.path.join(relative_path, pointee_filename))

return ("link", corrected_path)
dummy_pointee_filename, canonical_pointee_dir, canonical_srcdir = _canonize(pointee_as_abspath, srcdir)
if not canonical_pointee_dir.startswith(canonical_srcdir):
# pointee is a symlink that points outside of the srcdir so continue to
# the next symlink in the chain.
continue

# pointee is a symlink that points outside of the srcdir so continue to
# the next symlink in the chain.
# Absolute path inside of the correct dir so we need to link to it
# Note: We have to make the source of the link relative or absolute
# depending on what the original link was
corrected_path = _format_source_link(pointee_as_abspath, symlink, srcdir)
return ("link", corrected_path)

# The file is not a link so copy it
return ('copy', pointee_as_abspath)
Expand All @@ -464,6 +490,7 @@ def _copy_symlinks(symlinks_to_process, srcdir):
:param srcdir: The root directory that every piece of content must be present in.
:raises ValueError: if the arguments are not correct
"""
copied_links = {}
for source_linkpath, target_linkpath in symlinks_to_process:
try:
action, source_path = _choose_copy_or_link(source_linkpath, srcdir)
Expand All @@ -473,14 +500,32 @@ def _copy_symlinks(symlinks_to_process, srcdir):
continue

if action == "copy":
# Note: source_path could be a directory, so '-a' or '-r' must be
# given to cp.
run(['cp', '-a', source_path, target_linkpath])
elif action == 'link':
# Need the canonical path so that we can check if the pointed to
# file has been copied before
canonical_source_path = os.path.realpath(source_path)
if canonical_source_path in copied_links:
# The link was copied before so we need to change this to a link action
# with the previous copied link location as the source for this link
action = "link"
source_abspath = os.path.abspath(os.path.normpath(copied_links[canonical_source_path]))
source_path = _format_source_link(source_abspath, source_linkpath, srcdir)
else:
# All other links to the same target will need to link to the path
# that we're processing now since they all pointed to the same
# target before.
copied_links[canonical_source_path] = source_linkpath

# Note: source_path could be a directory, so '-a' or '-r' must be
# given to cp.
run(['cp', '-a', source_path, target_linkpath])
continue

if action == 'link':
run(["ln", "-s", source_path, target_linkpath])
else:
# This will not happen unless _copy_or_link() has a bug.
raise RuntimeError("Programming error: _copy_or_link() returned an unknown action:{}".format(action))
continue

# This will not happen unless _copy_or_link() has a bug.
raise RuntimeError("Programming error: _copy_or_link() returned an unknown action:{}".format(action))


def _copy_decouple(srcdir, dstdir):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,29 +381,24 @@ def temp_directory_layout(tmp_path, initial_structure):
},
id="Absolute_symlink_to_a_file_inside_via_a_symlink_to_the_rootdir"
)),
# This should be fixed but not necessarily for this release.
# It makes sure that when we have two separate links to the
# same file outside of /etc/pki, one of the links is copied
# as a real file and the other is made a link to the copy.
# (Right now, the real file is copied in place of both links.)
# (pytest.param(
# {
# 'dir': {
# 'fileA': '/outside/fileC',
# 'fileB': '/outside/fileC',
# },
# 'outside': {
# 'fileC': None,
# },
# },
# {
# 'dir': {
# 'fileA': None,
# 'fileB': '/dir/fileA',
# },
# },
# id="Absolute_two_symlinks_to_the_same_copied_file"
# )),
(pytest.param(
{
'dir': {
'fileA': '/outside/fileC',
'fileB': '/outside/fileC',
},
'outside': {
'fileC': None,
},
},
{
'dir': {
'fileA': None,
'fileB': '/dir/fileA',
},
},
id="Absolute_two_symlinks_to_the_same_copied_file"
)),
(pytest.param(
{
'dir': {
Expand Down Expand Up @@ -735,29 +730,24 @@ def temp_directory_layout(tmp_path, initial_structure):
},
id="Relative_symlink_to_a_file_inside_via_a_symlink_to_the_rootdir"
)),
# This should be fixed but not necessarily for this release.
# It makes sure that when we have two separate links to the
# same file outside of /etc/pki, one of the links is copied
# as a real file and the other is made a link to the copy.
# (Right now, the real file is copied in place of both links.)
# (pytest.param(
# {
# 'dir': {
# 'fileA': '../outside/fileC',
# 'fileB': '../outside/fileC',
# },
# 'outside': {
# 'fileC': None,
# },
# },
# {
# 'dir': {
# 'fileA': None,
# 'fileB': 'fileA',
# },
# },
# id="Relative_two_symlinks_to_the_same_copied_file"
# )),
(pytest.param(
{
'dir': {
'fileA': '../outside/fileC',
'fileB': '../outside/fileC',
},
'outside': {
'fileC': None,
},
},
{
'dir': {
'fileA': None,
'fileB': 'fileA',
},
},
id="Relative_two_symlinks_to_the_same_copied_file"
)),
(pytest.param(
{
'dir': {
Expand Down

0 comments on commit bc23ec0

Please sign in to comment.