Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure get_contents() method of Node objects return 'bytes' and fix decoding errors in Value.get_csig() #3738

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
9 changes: 9 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER
- Fix minor bug affecting SCons.Node.FS.File.get_csig()'s usage of the MD5 chunksize.
User-facing behavior does not change with this fix (GH Issue #3726).

From Eugene Huang:
- Ensure all Node subclasses return 'bytes' in their get_contents()
methods.
- Make Value.get_text_contents() calculate the contents using child node
csigs instead of child contents (similar to Alias node contents). The
new implementation is not prone to decoding errors so Value.get_csig()
should always work, whereas previously, Value nodes sometimes could not
be used as target nodes.

From Joachim Kuebart:
- Suppress missing SConscript deprecation warning if `must_exist=False`
is used.
Expand Down
2 changes: 1 addition & 1 deletion SCons/Action.py
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ def get_contents(self, target, source, env):
except AttributeError:
# No __call__() method, so it might be a builtin
# or something like that. Do the best we can.
contents = repr(actfunc)
contents = repr(actfunc).encode()

return contents

Expand Down
6 changes: 3 additions & 3 deletions SCons/ActionTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2048,9 +2048,9 @@ def __call__(self):
af = SCons.Action.ActionFactory(str, strfunc)
ac = SCons.Action.ActionCaller(af, [], {})
c = ac.get_contents([], [], Environment())
assert c == "<built-in function str>" or \
c == "<type 'str'>" or \
c == "<class 'str'>", repr(c)
assert c == b"<built-in function str>" or \
c == b"<type 'str'>" or \
c == b"<class 'str'>", repr(c)
# ^^ class str for python3

def test___call__(self):
Expand Down
2 changes: 1 addition & 1 deletion SCons/Executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def get_action_side_effects(self):
def __call__(self, *args, **kw):
return 0
def get_contents(self):
return ''
return b''
def _morph(self):
"""Morph this Null executor to a real Executor object."""
batches = self.batches
Expand Down
2 changes: 1 addition & 1 deletion SCons/Node/Alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def get_contents(self):
"""The contents of an alias is the concatenation
of the content signatures of all its sources."""
childsigs = [n.get_csig() for n in self.children()]
return ''.join(childsigs)
return ''.join(childsigs).encode('utf-8')

def sconsign(self):
"""An Alias is not recorded in .sconsign files"""
Expand Down
4 changes: 2 additions & 2 deletions SCons/Node/AliasTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self, contents):
def get_csig(self):
return self.contents
def get_contents(self):
return self.contents
return self.contents.encode()

ans = SCons.Node.Alias.AliasNameSpace()

Expand All @@ -66,7 +66,7 @@ def get_contents(self):
a.sources = [ DummyNode('one'), DummyNode('two'), DummyNode('three') ]

c = a.get_contents()
assert c == 'onetwothree', c
assert c == 'onetwothree'.encode('utf-8'), c

def test_lookup(self):
"""Test the lookup() method
Expand Down
3 changes: 2 additions & 1 deletion SCons/Node/FS.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,7 +1856,8 @@ def scanner_key(self):
def get_text_contents(self):
"""We already emit things in text, so just return the binary
version."""
return self.get_contents()
# Function get_contents_dir() returns utf-8 encoded text.
return self.get_contents().decode('utf-8')

def get_contents(self):
"""Return content signatures and names of all our children
Expand Down
20 changes: 10 additions & 10 deletions SCons/Node/FSTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1411,7 +1411,7 @@ def nonexistent(method, s):
# test Entry.get_contents()
e = fs.Entry('does_not_exist')
c = e.get_contents()
assert c == "", c
assert c == b"", c
assert e.__class__ == SCons.Node.FS.Entry

test.write("file", "file\n")
Expand Down Expand Up @@ -1441,7 +1441,7 @@ def nonexistent(method, s):
test.subdir("dir")
e = fs.Entry('dir')
c = e.get_contents()
assert c == "", c
assert c == b"", c
assert e.__class__ == SCons.Node.FS.Dir

c = e.get_text_contents()
Expand All @@ -1455,7 +1455,7 @@ def nonexistent(method, s):
e = fs.Entry('dangling_symlink')
c = e.get_contents()
assert e.__class__ == SCons.Node.FS.Entry, e.__class__
assert c == "", c
assert c == b"", c
c = e.get_text_contents()
assert c == "", c

Expand Down Expand Up @@ -2048,14 +2048,14 @@ def test_get_contents(self):
e = self.fs.Dir(os.path.join('d', 'empty'))
s = self.fs.Dir(os.path.join('d', 'sub'))

files = d.get_contents().split('\n')
files = d.get_contents().split('\n'.encode('utf-8'))

assert e.get_contents() == '', e.get_contents()
assert e.get_contents() == b'', e.get_contents()
assert e.get_text_contents() == '', e.get_text_contents()
assert e.get_csig() + " empty" == files[0], files
assert f.get_csig() + " f" == files[1], files
assert g.get_csig() + " g" == files[2], files
assert s.get_csig() + " sub" == files[3], files
assert (e.get_csig() + " empty").encode('utf-8') == files[0], files
assert (f.get_csig() + " f").encode('utf-8') == files[1], files
assert (g.get_csig() + " g").encode('utf-8') == files[2], files
assert (s.get_csig() + " sub").encode('utf-8') == files[3], files

def test_md5_chunksize(self):
"""
Expand Down Expand Up @@ -2590,7 +2590,7 @@ def get_timestamp(self):
return self.timestamp

def get_contents(self):
return self.name
return self.name.encode()

def get_ninfo(self):
""" mocked to ensure csig will equal the filename"""
Expand Down
23 changes: 8 additions & 15 deletions SCons/Node/Python.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,26 +138,20 @@ def read(self):
def get_text_contents(self):
"""By the assumption that the node.built_value is a
deterministic product of the sources, the contents of a Value
are the concatenation of all the contents of its sources. As
the value need not be built when get_contents() is called, we
are the concatenation of all the content signatures of its sources.
As the value need not be built when get_contents() is called, we
cannot use the actual node.built_value."""
###TODO: something reasonable about universal newlines
contents = str(self.value)
for kid in self.children(None):
contents = contents + kid.get_contents().decode()
contents += ''.join(n.get_csig() for n in self.children())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I've said before I'm not sure concatenating the contents of the child was correct, and this seems even less correct.

return contents

def get_contents(self):
"""
Get contents for signature calculations.
Same as get_text_contents() but encode as utf-8 in case other
functions rely on get_contents() existing.
:return: bytes
"""
text_contents = self.get_text_contents()
try:
return text_contents.encode()
except UnicodeDecodeError:
# Already encoded as python2 str are bytes
return text_contents
return self.get_text_contents().encode('utf-8')

def changed_since_last_build(self, target, prev_ni):
cur_csig = self.get_csig()
Expand All @@ -178,10 +172,9 @@ def get_csig(self, calc=None):
except AttributeError:
pass

contents = self.get_text_contents()
csig = self.get_text_contents()

self.get_ninfo().csig = contents
return contents
return csig


def ValueWithMemo(value, built_value=None, name=None):
Expand Down
52 changes: 51 additions & 1 deletion SCons/Node/PythonTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,58 @@ def test_get_csig(self):
csig = v3.get_csig(None)
assert csig == 'None', csig

def test_get_csig_with_children(self):
"""Test calculating the content signature of a Value() object with child
nodes.
"""
class DummyNode:
def __init__(self, csig):
self.csig = csig
def get_csig(self, calc=None):
return self.csig

v = SCons.Node.Python.Value('aaa')

c1 = DummyNode(csig='csig1')
c2 = DummyNode(csig='csig2')

v.add_dependency([c1, c2])
csig = v.get_csig(None)
assert csig == 'aaacsig1csig2', csig

def test_get_text_contents_with_children(self):
"""Test calculating text contents with child nodes
"""
class DummyNode:
def __init__(self, csig):
self.csig = csig
def get_csig(self):
return self.csig

v = SCons.Node.Python.Value('aaa')
c1 = DummyNode(csig='csig1')
c2 = DummyNode(csig='csig2')

v.add_dependency([c1, c2])
text_contents = v.get_text_contents()
assert text_contents == 'aaacsig1csig2', text_contents

def test_get_content_with_children(self):
"""Test calculating contents with child nodes
"""
class DummyNode:
def __init__(self, csig):
self.csig = csig
def get_csig(self):
return self.csig

v = SCons.Node.Python.Value('aaa')
c1 = DummyNode(csig='csig1')
c2 = DummyNode(csig='csig2')

v.add_dependency([c1, c2])
contents = v.get_contents()
assert contents == v.get_text_contents().encode('utf-8'), contents


class ValueNodeInfoTestCase(unittest.TestCase):
Expand All @@ -129,7 +179,7 @@ def test___init__(self):
node._func_get_contents = 2 # Pretend to be a Dir.
node.add_to_implicit([value])
contents = node.get_contents()
expected_contents = '%s %s\n' % (value.get_csig(), value.name)
expected_contents = ('%s %s\n' % (value.get_csig(), value.name)).encode('utf-8')
assert contents == expected_contents


Expand Down
6 changes: 3 additions & 3 deletions SCons/Node/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def get_contents_entry(node):
# string so calls to get_contents() in emitters and the
# like (e.g. in qt.py) don't have to disambiguate by hand
# or catch the exception.
return ''
return b''
else:
return _get_contents_map[node._func_get_contents](node)

Expand All @@ -214,8 +214,8 @@ def get_contents_dir(node):
separated by new-lines. Ensure that the nodes are sorted."""
contents = []
for n in sorted(node.children(), key=lambda t: t.name):
contents.append('%s %s\n' % (n.get_csig(), n.name))
return ''.join(contents)
contents.append(('%s %s\n' % (n.get_csig(), n.name)).encode('utf-8'))
return b''.join(contents)

def get_contents_file(node):
if not node.rexists():
Expand Down
2 changes: 1 addition & 1 deletion SCons/SConfTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_TryBuild(self):

class MyAction:
def get_contents(self, target, source, env):
return 'MyBuilder-MyAction $SOURCE $TARGET'
return b'MyBuilder-MyAction $SOURCE $TARGET'

class Attrs:
__slots__ = ('shared', '__dict__')
Expand Down
2 changes: 1 addition & 1 deletion SCons/Scanner/ScannerTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def rfile(self):
def exists(self):
return self._exists
def get_contents(self):
return self._contents
return self._contents.encode()
def get_text_contents(self):
return self._contents
def get_dir(self):
Expand Down