From 9f47c805d6ee93641b8ea85f1b7ad129550575bd Mon Sep 17 00:00:00 2001 From: Danish Amjad Date: Wed, 25 Sep 2024 16:39:43 +0200 Subject: [PATCH] Fix md5 hash logic. Make it order agnostic. (#640) Co-authored-by: Martin Durant --- gcsfs/checkers.py | 2 +- gcsfs/tests/test_checkers.py | 49 ++++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/gcsfs/checkers.py b/gcsfs/checkers.py index 01091413..abff6351 100644 --- a/gcsfs/checkers.py +++ b/gcsfs/checkers.py @@ -46,7 +46,7 @@ def validate_headers(self, headers): dig = [ bit.split("=")[1] for bit in headers["X-Goog-Hash"].split(",") - if bit.split("=")[0] == "md5" + if bit and bit.strip().startswith("md5=") ] if dig: if b64encode(self.md.digest()).decode().rstrip("=") != dig[0]: diff --git a/gcsfs/tests/test_checkers.py b/gcsfs/tests/test_checkers.py index 47d74226..30b84a62 100644 --- a/gcsfs/tests/test_checkers.py +++ b/gcsfs/tests/test_checkers.py @@ -8,7 +8,6 @@ def google_response_from_data(expected_data: bytes, actual_data=None): - actual_data = actual_data or expected_data checksum = md5(actual_data) checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8") @@ -21,7 +20,29 @@ class response: content_length = len(actual_data) headers = {"X-Goog-Hash": f"md5={checksum_b64}"} if crcmod is not None: - headers["X-Goog-Hash"] += f",crc32c={crc}" + headers["X-Goog-Hash"] += f", crc32c={crc}" + + return response + + +def google_response_from_data_with_reverse_header_order( + expected_data: bytes, actual_data=None +): + actual_data = actual_data or expected_data + checksum = md5(actual_data) + checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8") + if crcmod is not None: + checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF) + checksum.update(actual_data) + crc = base64.b64encode(checksum.digest()).decode() + + class response: + content_length = len(actual_data) + headers = {} + if crcmod is not None: + headers["X-Goog-Hash"] = f"crc32c={crc}, md5={checksum_b64}" + else: + headers["X-Goog-Hash"] = f"md5={checksum_b64}" return response @@ -73,6 +94,30 @@ def test_validate_headers(checker, data, actual_data, raises): checker.validate_headers(response.headers) +params = [ + (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)), + (MD5Checker(), b"hello world", b"hello world", ()), +] + +if crcmod is not None: + params.append( + (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,)) + ) + params.append((Crc32cChecker(), b"hello world", b"hello world", ())) + + +@pytest.mark.parametrize("checker, data, actual_data, raises", params) +def test_validate_headers_with_reverse_order(checker, data, actual_data, raises): + response = google_response_from_data_with_reverse_header_order(actual_data) + checker.update(data) + + if raises: + with pytest.raises(raises): + checker.validate_headers(response.headers) + else: + checker.validate_headers(response.headers) + + params = [ (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)), (MD5Checker(), b"hello world", b"hello world", ()),