From d6ea1ba856b3186255f12829523c0ce18e2e1900 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 7 Jul 2020 13:55:00 +1000 Subject: [PATCH 1/8] Make sure get_contents() method of Nodes return 'bytes' Dir, Alias, ActionCaller compute their contents. The get_contents() of Dir and Alias now return a utf-8 formatted string. The content of ActionCaller is usually the 'co_code' attribute of the code object of the function it wraps, when this is unavailable, return the 'repr' of the wrapped function encoded with default options (currently 'utf-8'). --- SCons/Action.py | 2 +- SCons/Executor.py | 2 +- SCons/Node/Alias.py | 2 +- SCons/Node/FS.py | 3 ++- SCons/Node/__init__.py | 6 +++--- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/SCons/Action.py b/SCons/Action.py index 1f499eee5a..abe7172124 100644 --- a/SCons/Action.py +++ b/SCons/Action.py @@ -1405,7 +1405,7 @@ def get_contents(self, target, source, env): except AttributeError: # No __call__() method, so it might be a builtin # or something like that. Do the best we can. - contents = repr(actfunc) + contents = repr(actfunc).encode() return contents diff --git a/SCons/Executor.py b/SCons/Executor.py index f6582298c6..c317e230c4 100644 --- a/SCons/Executor.py +++ b/SCons/Executor.py @@ -643,7 +643,7 @@ def get_action_side_effects(self): def __call__(self, *args, **kw): return 0 def get_contents(self): - return '' + return b'' def _morph(self): """Morph this Null executor to a real Executor object.""" batches = self.batches diff --git a/SCons/Node/Alias.py b/SCons/Node/Alias.py index 55d94f9808..64186106e4 100644 --- a/SCons/Node/Alias.py +++ b/SCons/Node/Alias.py @@ -132,7 +132,7 @@ def get_contents(self): """The contents of an alias is the concatenation of the content signatures of all its sources.""" childsigs = [n.get_csig() for n in self.children()] - return ''.join(childsigs) + return ''.join(childsigs).encode('utf-8') def sconsign(self): """An Alias is not recorded in .sconsign files""" diff --git a/SCons/Node/FS.py b/SCons/Node/FS.py index 9e5e5692d9..c884092464 100644 --- a/SCons/Node/FS.py +++ b/SCons/Node/FS.py @@ -1856,7 +1856,8 @@ def scanner_key(self): def get_text_contents(self): """We already emit things in text, so just return the binary version.""" - return self.get_contents() + # Function get_contents_dir() returns utf-8 encoded text. + return self.get_contents().decode('utf-8') def get_contents(self): """Return content signatures and names of all our children diff --git a/SCons/Node/__init__.py b/SCons/Node/__init__.py index 82bb715fbd..5e9860356e 100644 --- a/SCons/Node/__init__.py +++ b/SCons/Node/__init__.py @@ -206,7 +206,7 @@ def get_contents_entry(node): # string so calls to get_contents() in emitters and the # like (e.g. in qt.py) don't have to disambiguate by hand # or catch the exception. - return '' + return b'' else: return _get_contents_map[node._func_get_contents](node) @@ -215,8 +215,8 @@ def get_contents_dir(node): separated by new-lines. Ensure that the nodes are sorted.""" contents = [] for n in sorted(node.children(), key=lambda t: t.name): - contents.append('%s %s\n' % (n.get_csig(), n.name)) - return ''.join(contents) + contents.append(('%s %s\n' % (n.get_csig(), n.name)).encode('utf-8')) + return b''.join(contents) def get_contents_file(node): if not node.rexists(): From 479caf96e89595dc34d04746290d6af6b345cc79 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 7 Jul 2020 14:08:06 +1000 Subject: [PATCH 2/8] Update tests that were expecting get_contents() to be str Some mock objects' get_contents() were returning strings, modify them to return bytes. Some tests were asserting get_contents() equals a string, modify expected values to be bytes. For tests that check the value of Dir.get_contents() and Alias.get_contents(), make the tests explicitly expect 'utf-8' encoded content. --- SCons/ActionTests.py | 6 +++--- SCons/Node/AliasTests.py | 4 ++-- SCons/Node/FSTests.py | 20 ++++++++++---------- SCons/Node/PythonTests.py | 2 +- SCons/SConfTests.py | 2 +- SCons/Scanner/ScannerTests.py | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/SCons/ActionTests.py b/SCons/ActionTests.py index 81dacfaf15..09fa118262 100644 --- a/SCons/ActionTests.py +++ b/SCons/ActionTests.py @@ -2048,9 +2048,9 @@ def __call__(self): af = SCons.Action.ActionFactory(str, strfunc) ac = SCons.Action.ActionCaller(af, [], {}) c = ac.get_contents([], [], Environment()) - assert c == "" or \ - c == "" or \ - c == "", repr(c) + assert c == b"" or \ + c == b"" or \ + c == b"", repr(c) # ^^ class str for python3 def test___call__(self): diff --git a/SCons/Node/AliasTests.py b/SCons/Node/AliasTests.py index 613a5c02d2..3c366df31f 100644 --- a/SCons/Node/AliasTests.py +++ b/SCons/Node/AliasTests.py @@ -57,7 +57,7 @@ def __init__(self, contents): def get_csig(self): return self.contents def get_contents(self): - return self.contents + return self.contents.encode() ans = SCons.Node.Alias.AliasNameSpace() @@ -67,7 +67,7 @@ def get_contents(self): a.sources = [ DummyNode('one'), DummyNode('two'), DummyNode('three') ] c = a.get_contents() - assert c == 'onetwothree', c + assert c == 'onetwothree'.encode('utf-8'), c def test_lookup(self): """Test the lookup() method diff --git a/SCons/Node/FSTests.py b/SCons/Node/FSTests.py index bfe2a77d0e..20a471f0f3 100644 --- a/SCons/Node/FSTests.py +++ b/SCons/Node/FSTests.py @@ -1411,7 +1411,7 @@ def nonexistent(method, s): # test Entry.get_contents() e = fs.Entry('does_not_exist') c = e.get_contents() - assert c == "", c + assert c == b"", c assert e.__class__ == SCons.Node.FS.Entry test.write("file", "file\n") @@ -1441,7 +1441,7 @@ def nonexistent(method, s): test.subdir("dir") e = fs.Entry('dir') c = e.get_contents() - assert c == "", c + assert c == b"", c assert e.__class__ == SCons.Node.FS.Dir c = e.get_text_contents() @@ -1455,7 +1455,7 @@ def nonexistent(method, s): e = fs.Entry('dangling_symlink') c = e.get_contents() assert e.__class__ == SCons.Node.FS.Entry, e.__class__ - assert c == "", c + assert c == b"", c c = e.get_text_contents() assert c == "", c @@ -2049,14 +2049,14 @@ def test_get_contents(self): e = self.fs.Dir(os.path.join('d', 'empty')) s = self.fs.Dir(os.path.join('d', 'sub')) - files = d.get_contents().split('\n') + files = d.get_contents().split('\n'.encode('utf-8')) - assert e.get_contents() == '', e.get_contents() + assert e.get_contents() == b'', e.get_contents() assert e.get_text_contents() == '', e.get_text_contents() - assert e.get_csig() + " empty" == files[0], files - assert f.get_csig() + " f" == files[1], files - assert g.get_csig() + " g" == files[2], files - assert s.get_csig() + " sub" == files[3], files + assert (e.get_csig() + " empty").encode('utf-8') == files[0], files + assert (f.get_csig() + " f").encode('utf-8') == files[1], files + assert (g.get_csig() + " g").encode('utf-8') == files[2], files + assert (s.get_csig() + " sub").encode('utf-8') == files[3], files def test_implicit_re_scans(self): """Test that adding entries causes a directory to be re-scanned @@ -2532,7 +2532,7 @@ def get_timestamp(self): return self.timestamp def get_contents(self): - return self.name + return self.name.encode() def get_ninfo(self): """ mocked to ensure csig will equal the filename""" diff --git a/SCons/Node/PythonTests.py b/SCons/Node/PythonTests.py index b6fa859d2d..3fe7ec7316 100644 --- a/SCons/Node/PythonTests.py +++ b/SCons/Node/PythonTests.py @@ -129,7 +129,7 @@ def test___init__(self): node._func_get_contents = 2 # Pretend to be a Dir. node.add_to_implicit([value]) contents = node.get_contents() - expected_contents = '%s %s\n' % (value.get_csig(), value.name) + expected_contents = ('%s %s\n' % (value.get_csig(), value.name)).encode('utf-8') assert contents == expected_contents diff --git a/SCons/SConfTests.py b/SCons/SConfTests.py index 8ce6b3a554..e9d1db8795 100644 --- a/SCons/SConfTests.py +++ b/SCons/SConfTests.py @@ -160,7 +160,7 @@ def test_TryBuild(self): class MyAction: def get_contents(self, target, source, env): - return 'MyBuilder-MyAction $SOURCE $TARGET' + return b'MyBuilder-MyAction $SOURCE $TARGET' class MyBuilder(SCons.Builder.BuilderBase): def __init__(self): diff --git a/SCons/Scanner/ScannerTests.py b/SCons/Scanner/ScannerTests.py index bf9879eb61..7569d6d5d7 100644 --- a/SCons/Scanner/ScannerTests.py +++ b/SCons/Scanner/ScannerTests.py @@ -494,7 +494,7 @@ def rfile(self): def exists(self): return self._exists def get_contents(self): - return self._contents + return self._contents.encode() def get_text_contents(self): return self._contents def get_dir(self): From 18194cf6dee81e6dee400c2efe31042e9a7e4e36 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 7 Jul 2020 14:13:25 +1000 Subject: [PATCH 3/8] Fix Value.get_contents() and Value.get_csig() decoding errors Value.get_contents() is currently calculating its binary contents by getting its text contents via Value.get_text_contents() and encoding the returned text content. This can be a problem if get_text_contents() fails. The current implementation of File.get_text_contents() never fails, as a last resort, undecodable bytes are handled by 'backslashreplace', but Value.get_text_contents() calculates its text content by decoding child node binary contents manually using bytes.decode() with default options (encoding='utf-8' and errors='strict') so has the potential to fail (e.g. a child File node contains non-utf8 binary data). Reimplement get_contents() to mirror get_text_contents() implementation as a potential solution. The current get_csig() also uses the return of get_text_contents() as the content signature. Because get_text_contents() might fail, make get_csig() return get_contents() decoded with default encoding and errors='backslashreplace'. --- SCons/Node/Python.py | 12 +++++------- SCons/Node/PythonTests.py | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/SCons/Node/Python.py b/SCons/Node/Python.py index 68c6ee88be..d44f803ddb 100644 --- a/SCons/Node/Python.py +++ b/SCons/Node/Python.py @@ -152,12 +152,10 @@ def get_contents(self): Get contents for signature calculations. :return: bytes """ - text_contents = self.get_text_contents() - try: - return text_contents.encode() - except UnicodeDecodeError: - # Already encoded as python2 str are bytes - return text_contents + contents = str(self.value).encode('utf-8') + for kid in self.children(None): + contents = contents + kid.get_contents() + return contents def changed_since_last_build(self, target, prev_ni): cur_csig = self.get_csig() @@ -178,7 +176,7 @@ def get_csig(self, calc=None): except AttributeError: pass - contents = self.get_text_contents() + contents = self.get_contents().decode(errors='backslashreplace') self.get_ninfo().csig = contents return contents diff --git a/SCons/Node/PythonTests.py b/SCons/Node/PythonTests.py index 3fe7ec7316..4ca3f32f07 100644 --- a/SCons/Node/PythonTests.py +++ b/SCons/Node/PythonTests.py @@ -103,8 +103,39 @@ def test_get_csig(self): csig = v3.get_csig(None) assert csig == 'None', csig - - + def test_get_content_with_child_binary_content(self): + class DummyNode: + def __init__(self, contents): + self.contents = contents + def get_contents(self): + return self.contents + + # Node with binary content that is not valid utf-8. + node_with_binary = DummyNode(b'\xff') + + v = SCons.Node.Python.Value('v') + v.add_dependency([node_with_binary]) + + # Just make sure this call doesn't fail. Not sure what to check the + # return value against. + v.get_contents() + + def test_get_csig_with_child_binary_content(self): + class DummyNode: + def __init__(self, contents): + self.contents = contents + def get_contents(self): + return self.contents + + # Node with binary content that is not valid utf-8. + node_with_binary = DummyNode(b'\xff') + + v = SCons.Node.Python.Value('v') + v.add_dependency([node_with_binary]) + + # Just make sure this call doesn't fail. Not sure what to check the + # return value against. + v.get_csig() class ValueNodeInfoTestCase(unittest.TestCase): From 4e79f0708d6ea262cfa66ace6d2e6e140689a882 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Wed, 8 Jul 2020 12:15:06 +1000 Subject: [PATCH 4/8] Update CHANGES.txt --- CHANGES.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES.txt b/CHANGES.txt index a3db40dec8..3eb1646a14 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -11,6 +11,16 @@ RELEASE 4.1.0.devyyyymmdd - Mon, 04 Jul 2020 16:06:40 -0700 From John Doe: - Whatever John Doe did. + + From Eugene Huang: + - Ensure all Node subclasses return 'bytes' in their get_contents() + methods. This was causing problems in Value.get_text_contents() as + 'str' types do not have a decode() method. + - Make sure Value.get_contents() never fails because of decoding errors + from trying to decode child nodes binary contents. Modify + Value.get_csig() to use get_contents() as basis for csig instead since + get_text_contents() is prone to decoding errors, which prevented Value + nodes from being used as targets in some cases. RELEASE 4.0.0 - Sat, 04 Jul 2020 12:00:27 +0000 From 5e65d1417bbb001f3625803f4892a8e0bcb53ef7 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Wed, 8 Jul 2020 11:23:19 +1000 Subject: [PATCH 5/8] Make Value.get_csig() not include the contents of its children Current implementation might lead to high memory usage. New implementation depends on children csigs instead. Remove test that makes sure Value.get_csig() works even if child binary contents are invalid utf-8 since get_csig() no longer depends on children content. --- SCons/Node/Python.py | 7 ++++--- SCons/Node/PythonTests.py | 30 ++++++++++++++++-------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/SCons/Node/Python.py b/SCons/Node/Python.py index d44f803ddb..491b836982 100644 --- a/SCons/Node/Python.py +++ b/SCons/Node/Python.py @@ -176,10 +176,11 @@ def get_csig(self, calc=None): except AttributeError: pass - contents = self.get_contents().decode(errors='backslashreplace') + csig = str(self.value) + csig += ''.join(child.get_csig() for child in self.children(None)) - self.get_ninfo().csig = contents - return contents + self.get_ninfo().csig = csig + return csig def ValueWithMemo(value, built_value=None, name=None): diff --git a/SCons/Node/PythonTests.py b/SCons/Node/PythonTests.py index 4ca3f32f07..23f3aeef55 100644 --- a/SCons/Node/PythonTests.py +++ b/SCons/Node/PythonTests.py @@ -103,24 +103,26 @@ def test_get_csig(self): csig = v3.get_csig(None) assert csig == 'None', csig - def test_get_content_with_child_binary_content(self): + def test_get_csig_with_children(self): + """Test calculating the content signature of a Value() object with child + nodes. + """ class DummyNode: - def __init__(self, contents): - self.contents = contents - def get_contents(self): - return self.contents + def __init__(self, csig): + self.csig = csig + def get_csig(self, calc=None): + return self.csig - # Node with binary content that is not valid utf-8. - node_with_binary = DummyNode(b'\xff') + v = SCons.Node.Python.Value('aaa') - v = SCons.Node.Python.Value('v') - v.add_dependency([node_with_binary]) + c1 = DummyNode(csig='csig1') + c2 = DummyNode(csig='csig2') - # Just make sure this call doesn't fail. Not sure what to check the - # return value against. - v.get_contents() + v.add_dependency([c1, c2]) + csig = v.get_csig(None) + assert csig == 'aaacsig1csig2', csig - def test_get_csig_with_child_binary_content(self): + def test_get_content_with_child_binary_content(self): class DummyNode: def __init__(self, contents): self.contents = contents @@ -135,7 +137,7 @@ def get_contents(self): # Just make sure this call doesn't fail. Not sure what to check the # return value against. - v.get_csig() + v.get_contents() class ValueNodeInfoTestCase(unittest.TestCase): From 8530029814b6223721d07a51c20f6f1433716df8 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Wed, 8 Jul 2020 12:48:19 +1000 Subject: [PATCH 6/8] Update CHANGES.txt --- CHANGES.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 3eb1646a14..c786381d94 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,11 +17,11 @@ RELEASE 4.1.0.devyyyymmdd - Mon, 04 Jul 2020 16:06:40 -0700 methods. This was causing problems in Value.get_text_contents() as 'str' types do not have a decode() method. - Make sure Value.get_contents() never fails because of decoding errors - from trying to decode child nodes binary contents. Modify - Value.get_csig() to use get_contents() as basis for csig instead since - get_text_contents() is prone to decoding errors, which prevented Value - nodes from being used as targets in some cases. - + from trying to decode child nodes binary contents. + - Make Value.get_csig() use children csigs instead of content for + efficiency. Using get_text_contents() is also bad because it is prone + to decoding errors which will prevent Value nodes from being used as + targets in some cases. RELEASE 4.0.0 - Sat, 04 Jul 2020 12:00:27 +0000 From 428e225d3a91468cc80999ebcc65a10c7e6af25b Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Wed, 8 Jul 2020 12:00:39 +1000 Subject: [PATCH 7/8] Make Value.get_text_contents() use child csigs instead of contents Current implementation of Value.get_text_contents() returns a concatenation of all child contents. Instead, make the contents of a Value similar to an Alias, except prepend the stringified 'value' attribute as well. Since Value.get_text_contents() will no longer fail (as long as child nodes implement a working get_csig() which they should), make get_contents() return get_text_contents() but utf-8 encoded. Also make get_csig() just return get_text_contents() Add new tests for testing get_contents() and get_text_contents() only rely on child node csigs, and not directly on child content. --- SCons/Node/Python.py | 20 +++++++------------ SCons/Node/PythonTests.py | 41 +++++++++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/SCons/Node/Python.py b/SCons/Node/Python.py index 491b836982..63ddb8496c 100644 --- a/SCons/Node/Python.py +++ b/SCons/Node/Python.py @@ -138,24 +138,20 @@ def read(self): def get_text_contents(self): """By the assumption that the node.built_value is a deterministic product of the sources, the contents of a Value - are the concatenation of all the contents of its sources. As - the value need not be built when get_contents() is called, we + are the concatenation of all the content signatures of its sources. + As the value need not be built when get_contents() is called, we cannot use the actual node.built_value.""" - ###TODO: something reasonable about universal newlines contents = str(self.value) - for kid in self.children(None): - contents = contents + kid.get_contents().decode() + contents += ''.join(n.get_csig() for n in self.children()) return contents def get_contents(self): """ - Get contents for signature calculations. + Same as get_text_contents() but encode as utf-8 in case other + functions rely on get_contents() existing. :return: bytes """ - contents = str(self.value).encode('utf-8') - for kid in self.children(None): - contents = contents + kid.get_contents() - return contents + return self.get_text_contents().encode('utf-8') def changed_since_last_build(self, target, prev_ni): cur_csig = self.get_csig() @@ -176,10 +172,8 @@ def get_csig(self, calc=None): except AttributeError: pass - csig = str(self.value) - csig += ''.join(child.get_csig() for child in self.children(None)) + csig = self.get_text_contents() - self.get_ninfo().csig = csig return csig diff --git a/SCons/Node/PythonTests.py b/SCons/Node/PythonTests.py index 23f3aeef55..2acd747926 100644 --- a/SCons/Node/PythonTests.py +++ b/SCons/Node/PythonTests.py @@ -122,22 +122,39 @@ def get_csig(self, calc=None): csig = v.get_csig(None) assert csig == 'aaacsig1csig2', csig - def test_get_content_with_child_binary_content(self): + def test_get_text_contents_with_children(self): + """Test calculating text contents with child nodes + """ class DummyNode: - def __init__(self, contents): - self.contents = contents - def get_contents(self): - return self.contents + def __init__(self, csig): + self.csig = csig + def get_csig(self): + return self.csig - # Node with binary content that is not valid utf-8. - node_with_binary = DummyNode(b'\xff') + v = SCons.Node.Python.Value('aaa') + c1 = DummyNode(csig='csig1') + c2 = DummyNode(csig='csig2') - v = SCons.Node.Python.Value('v') - v.add_dependency([node_with_binary]) + v.add_dependency([c1, c2]) + text_contents = v.get_text_contents() + assert text_contents == 'aaacsig1csig2', text_contents - # Just make sure this call doesn't fail. Not sure what to check the - # return value against. - v.get_contents() + def test_get_content_with_children(self): + """Test calculating contents with child nodes + """ + class DummyNode: + def __init__(self, csig): + self.csig = csig + def get_csig(self): + return self.csig + + v = SCons.Node.Python.Value('aaa') + c1 = DummyNode(csig='csig1') + c2 = DummyNode(csig='csig2') + + v.add_dependency([c1, c2]) + contents = v.get_contents() + assert contents == v.get_text_contents().encode('utf-8'), contents class ValueNodeInfoTestCase(unittest.TestCase): From 9861322b2306f17fc9a9037791fa689427c7275e Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Wed, 8 Jul 2020 12:41:21 +1000 Subject: [PATCH 8/8] Update CHANGES.txt --- CHANGES.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index c786381d94..5a83ae4800 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -14,14 +14,13 @@ RELEASE 4.1.0.devyyyymmdd - Mon, 04 Jul 2020 16:06:40 -0700 From Eugene Huang: - Ensure all Node subclasses return 'bytes' in their get_contents() - methods. This was causing problems in Value.get_text_contents() as - 'str' types do not have a decode() method. - - Make sure Value.get_contents() never fails because of decoding errors - from trying to decode child nodes binary contents. - - Make Value.get_csig() use children csigs instead of content for - efficiency. Using get_text_contents() is also bad because it is prone - to decoding errors which will prevent Value nodes from being used as - targets in some cases. + methods. + - Make Value.get_text_contents() calculate the contents using child node + csigs instead of child contents (similar to Alias node contents). The + new implementation is not prone to decoding errors so Value.get_csig() + should always work, whereas previously, Value nodes sometimes could not + be used as target nodes. + RELEASE 4.0.0 - Sat, 04 Jul 2020 12:00:27 +0000