From d9db5a2b32b2dbfba5fb1a2bb630c008c6ee333f Mon Sep 17 00:00:00 2001 From: Ahmed TAHRI Date: Thu, 23 May 2024 06:09:57 +0200 Subject: [PATCH] fix downloader with compressed content #1554 #423 --- CHANGELOG.md | 1 + httpie/downloads.py | 34 ++++++++++++++++++++++++++++++---- tests/test_downloads.py | 4 ++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd0368dca2..47f2248ee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ This project adheres to [Semantic Versioning](https://semver.org/). - Fixed multipart form data having filename not rfc2231 compliant when name contain non-ascii characters. ([#1401](https://github.com/httpie/cli/issues/1401)) - Fixed issue where the configuration directory was not created at runtime that made the update fetcher run everytime. ([#1527](https://github.com/httpie/cli/issues/1527)) - Fixed cookie persistence in HTTPie session when targeting localhost. They were dropped due to the standard library. ([#1527](https://github.com/httpie/cli/issues/1527)) +- Fixed downloader when trying to fetch compressed content. The process will no longer exit with the "Incomplete download" error. ([#1554](https://github.com/httpie/cli/issues/1554)) ([#423](https://github.com/httpie/cli/issues/423)) ([#1527](https://github.com/httpie/cli/issues/1527)) Existing plugins are expected to work without any changes. The only caveat would be that certain plugin explicitly require `requests`. Future contributions may be made in order to relax the constraints where applicable. diff --git a/httpie/downloads.py b/httpie/downloads.py index d987b0c989..d9c711c5fe 100644 --- a/httpie/downloads.py +++ b/httpie/downloads.py @@ -217,11 +217,37 @@ def start( """ assert not self.status.time_started - # FIXME: some servers still might sent Content-Encoding: gzip - # try: - total_size = int(final_response.headers['Content-Length']) - except (KeyError, ValueError, TypeError): + supported_decoders = final_response.raw.CONTENT_DECODERS + except AttributeError: + supported_decoders = ["gzip", "deflate"] + + use_content_length = True + + # If the content is actually compressed, the http client will automatically + # stream decompressed content. This ultimately means that the server send the content-length + # that is related to the compressed body. this might fool the downloader. + # but... there's a catch, we don't decompress everything, everytime. It depends on the + # Content-Encoding. + if 'Content-Encoding' in final_response.headers: + will_decompress = True + + encoding_list = final_response.headers['Content-Encoding'].replace(' ', '').lower().split(',') + + for encoding in encoding_list: + if encoding not in supported_decoders: + will_decompress = False + break + + if will_decompress: + use_content_length = False + + if use_content_length: + try: + total_size = int(final_response.headers['Content-Length']) + except (KeyError, ValueError, TypeError): + total_size = None + else: total_size = None if not self._output_file: diff --git a/tests/test_downloads.py b/tests/test_downloads.py index 6bd8dcc609..f63273aebf 100644 --- a/tests/test_downloads.py +++ b/tests/test_downloads.py @@ -259,3 +259,7 @@ def test_download_with_redirect_original_url_used_for_filename(self, httpbin): assert os.listdir('.') == [expected_filename] finally: os.chdir(orig_cwd) + + def test_download_gzip_content_encoding(self, httpbin): + r = http('--download', httpbin + '/gzip') + assert r.exit_status == 0