fix downloader with compressed content #1554 #423

httpie · May 23, 2024 · d9db5a2 · d9db5a2
1 parent 71ab43e
commit d9db5a2
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).
 - Fixed multipart form data having filename not rfc2231 compliant when name contain non-ascii characters. ([#1401](https://github.com/httpie/cli/issues/1401))
 - Fixed issue where the configuration directory was not created at runtime that made the update fetcher run everytime. ([#1527](https://github.com/httpie/cli/issues/1527))
 - Fixed cookie persistence in HTTPie session when targeting localhost. They were dropped due to the standard library. ([#1527](https://github.com/httpie/cli/issues/1527))
+- Fixed downloader when trying to fetch compressed content. The process will no longer exit with the "Incomplete download" error. ([#1554](https://github.com/httpie/cli/issues/1554)) ([#423](https://github.com/httpie/cli/issues/423)) ([#1527](https://github.com/httpie/cli/issues/1527))
 
 Existing plugins are expected to work without any changes. The only caveat would be that certain plugin explicitly require `requests`.
 Future contributions may be made in order to relax the constraints where applicable.

diff --git a/httpie/downloads.py b/httpie/downloads.py
@@ -217,11 +217,37 @@ def start(
         """
         assert not self.status.time_started
 
-        # FIXME: some servers still might sent Content-Encoding: gzip
-        # <https://github.com/httpie/cli/issues/423>
         try:
-            total_size = int(final_response.headers['Content-Length'])
-        except (KeyError, ValueError, TypeError):
+            supported_decoders = final_response.raw.CONTENT_DECODERS
+        except AttributeError:
+            supported_decoders = ["gzip", "deflate"]
+
+        use_content_length = True
+
+        # If the content is actually compressed, the http client will automatically
+        # stream decompressed content. This ultimately means that the server send the content-length
+        # that is related to the compressed body. this might fool the downloader.
+        # but... there's a catch, we don't decompress everything, everytime. It depends on the
+        # Content-Encoding.
+        if 'Content-Encoding' in final_response.headers:
+            will_decompress = True
+
+            encoding_list = final_response.headers['Content-Encoding'].replace(' ', '').lower().split(',')
+
+            for encoding in encoding_list:
+                if encoding not in supported_decoders:
+                    will_decompress = False
+                    break
+
+            if will_decompress:
+                use_content_length = False
+
+        if use_content_length:
+            try:
+                total_size = int(final_response.headers['Content-Length'])
+            except (KeyError, ValueError, TypeError):
+                total_size = None
+        else:
             total_size = None
 
         if not self._output_file:

diff --git a/tests/test_downloads.py b/tests/test_downloads.py
@@ -259,3 +259,7 @@ def test_download_with_redirect_original_url_used_for_filename(self, httpbin):
                 assert os.listdir('.') == [expected_filename]
             finally:
                 os.chdir(orig_cwd)
+
+    def test_download_gzip_content_encoding(self, httpbin):
+        r = http('--download', httpbin + '/gzip')
+        assert r.exit_status == 0