Skip to content

Commit

Permalink
Do not fail with --replace-originals
Browse files Browse the repository at this point in the history
These archive should not crash extraction when using --replace-originals

Reported-by: Smascer @Smascer
Reported-by: Bryan Sutula @sutula
Reference: #31
Reference: aboutcode-org/scancode-toolkit#2723
Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Oct 8, 2021
1 parent 7f007db commit 8c86536
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 6 deletions.
8 changes: 8 additions & 0 deletions src/extractcode/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,17 @@ def extract(
processed_events_append = processed_events.append
for event in extract_events:
yield event
if event.warnings or event.errors:
if TRACE:
logger.debug(
f'extract:replace_originals: {event} has errors. '
'not replacing originals'
)
continue
if replace_originals:
processed_events_append(event)


# move files around when done
if replace_originals:
for xevent in reversed(processed_events):
Expand Down
Binary file added tests/data/cli/replace-originals/issue6550.gz
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/cli/replace-originals/issue6550.gz.ABOUT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
download_url: https://golang.org/src/compress/gzip/testdata/issue6550.gz.base64
Binary file added tests/data/cli/replace-originals/rake.1.gz
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/cli/replace-originals/rake.1.gz.ABOUT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
download_url: https://github.com/ruby/rake/blob/v0.9.2.2/doc/rake.1.gz?raw=true
14 changes: 8 additions & 6 deletions tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,19 +274,21 @@ def test_extract_tree_recursive_replace_originals(self):
check_files(test_dir, expected)

def test_extract_with_replace_originals_does_not_fail_with_gz_with_trailing(self):
expected = (
)
expected = ('rake.1.gz',)
test_dir = self.get_test_loc('extract/replace-originals/rake.1.gz', copy=True)
result = list(extract.extract(test_dir, recurse=True, replace_originals=True))
check_no_error(result)
r = result[-1]
assert r.errors and all(e.startswith('Not a gzipped file') for e in r.errors)
assert not r.warnings
check_files(test_dir, expected)

def test_extract_with_replace_originals_does_not_fail_with_corrupted_archive(self):
expected = (
)
expected = ('issue6550.gz',)
test_dir = self.get_test_loc('extract/replace-originals/issue6550.gz', copy=True)
result = list(extract.extract(test_dir, recurse=True, replace_originals=True))
check_no_error(result)
r = result[-1]
assert r.errors and all(e.startswith('Error') for e in r.errors)
assert not r.warnings
check_files(test_dir, expected)

def test_extract_tree_shallow_then_recursive(self):
Expand Down
16 changes: 16 additions & 0 deletions tests/test_extractcode_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,22 @@ def test_extractcode_command_can_ignore():
assert sorted(expected) == sorted(file_result)


def test_extractcode_command_does_not_crash_with_replace_originals_and_corrupted_archives():
test_dir = test_env.get_test_loc('cli/replace-originals', copy=True)
result = run_extract(['--replace-originals', '--verbose', test_dir] , expected_rc=1)

assert not os.path.exists(os.path.join(test_dir, 'rake.1.gz-extract'))
assert 'rake.1.gz' in result.stdout

assert 'Extracting archives...' in result.stderr
assert 'ERROR extracting' in result.stderr
assert 'rake.1.gz' in result.stderr
assert 'Not a gzipped file ' in result.stderr
assert 'issue6550.gz' in result.stderr
assert ' too many length or distance symbols' in result.stderr
assert 'Extracting done.' in result.stderr


@pytest.mark.skipif(on_windows, reason='FIXME: this test fails on Windows until we have support for long file names.')
def test_extractcode_command_can_extract_nuget():
test_dir = test_env.get_test_loc('cli/extract_nuget', copy=True)
Expand Down

0 comments on commit 8c86536

Please sign in to comment.