Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add detection for recursive decompression bombs #52

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/extractcode/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from commoncode import fileutils
from commoncode import ignore
from commoncode import hash

import extractcode # NOQA
import extractcode.archive
Expand Down Expand Up @@ -86,6 +87,7 @@ def extract(
recurse=False,
replace_originals=False,
ignore_pattern=(),
known_archive_hashes=set()
):
"""
Walk and extract any archives found at ``location`` (either a file or
Expand Down Expand Up @@ -121,6 +123,7 @@ def extract(
kinds=kinds,
recurse=recurse,
ignore_pattern=ignore_pattern,
known_archive_hashes=known_archive_hashes
)

processed_events = []
Expand Down Expand Up @@ -151,6 +154,7 @@ def extract_files(
kinds=extractcode.default_kinds,
recurse=False,
ignore_pattern=(),
known_archive_hashes=set()
):
"""
Extract the files found at `location`.
Expand Down Expand Up @@ -190,7 +194,7 @@ def extract_files(
if not recurse and extractcode.is_extraction_path(loc):
if TRACE:
logger.debug(
'extract:walk not recurse: skipped file: %(loc)r' % locals())
'extract:walk: not recurse: skipped file: %(loc)r' % locals())
continue

if not extractcode.archive.should_extract(
Expand All @@ -203,6 +207,14 @@ def extract_files(
'extract:walk: skipped file: not should_extract: %(loc)r' % locals())
continue

file_hash = hash.sha256(loc)

if known_archive_hashes and file_hash in known_archive_hashes:
if TRACE:
logger.debug(
'extract:walk: skipped file: decompression bomb detected: %(loc)r' % locals())
continue

target = join(abspath(top), extractcode.get_extraction_path(loc))
if TRACE:
logger.debug('extract:target: %(target)r' % locals())
Expand All @@ -220,11 +232,16 @@ def extract_files(
if recurse:
if TRACE:
logger.debug('extract:walk: recursing on target: %(target)r' % locals())

kah = set(known_archive_hashes)
kah.add(file_hash)

for xevent in extract(
location=target,
kinds=kinds,
recurse=recurse,
ignore_pattern=ignore_pattern,
known_archive_hashes=kah
):
if TRACE:
logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())
Expand Down
Loading