Skip to content

Commit

Permalink
Fix md5 hash logic. Make it order agnostic. (#640)
Browse files Browse the repository at this point in the history
Co-authored-by: Martin Durant <[email protected]>
  • Loading branch information
damjad and martindurant authored Sep 25, 2024
1 parent db2c28c commit 9f47c80
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 3 deletions.
2 changes: 1 addition & 1 deletion gcsfs/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def validate_headers(self, headers):
dig = [
bit.split("=")[1]
for bit in headers["X-Goog-Hash"].split(",")
if bit.split("=")[0] == "md5"
if bit and bit.strip().startswith("md5=")
]
if dig:
if b64encode(self.md.digest()).decode().rstrip("=") != dig[0]:
Expand Down
49 changes: 47 additions & 2 deletions gcsfs/tests/test_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@


def google_response_from_data(expected_data: bytes, actual_data=None):

actual_data = actual_data or expected_data
checksum = md5(actual_data)
checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
Expand All @@ -21,7 +20,29 @@ class response:
content_length = len(actual_data)
headers = {"X-Goog-Hash": f"md5={checksum_b64}"}
if crcmod is not None:
headers["X-Goog-Hash"] += f",crc32c={crc}"
headers["X-Goog-Hash"] += f", crc32c={crc}"

return response


def google_response_from_data_with_reverse_header_order(
expected_data: bytes, actual_data=None
):
actual_data = actual_data or expected_data
checksum = md5(actual_data)
checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
if crcmod is not None:
checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
checksum.update(actual_data)
crc = base64.b64encode(checksum.digest()).decode()

class response:
content_length = len(actual_data)
headers = {}
if crcmod is not None:
headers["X-Goog-Hash"] = f"crc32c={crc}, md5={checksum_b64}"
else:
headers["X-Goog-Hash"] = f"md5={checksum_b64}"

return response

Expand Down Expand Up @@ -73,6 +94,30 @@ def test_validate_headers(checker, data, actual_data, raises):
checker.validate_headers(response.headers)


params = [
(MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
(MD5Checker(), b"hello world", b"hello world", ()),
]

if crcmod is not None:
params.append(
(Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,))
)
params.append((Crc32cChecker(), b"hello world", b"hello world", ()))


@pytest.mark.parametrize("checker, data, actual_data, raises", params)
def test_validate_headers_with_reverse_order(checker, data, actual_data, raises):
response = google_response_from_data_with_reverse_header_order(actual_data)
checker.update(data)

if raises:
with pytest.raises(raises):
checker.validate_headers(response.headers)
else:
checker.validate_headers(response.headers)


params = [
(MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
(MD5Checker(), b"hello world", b"hello world", ()),
Expand Down

0 comments on commit 9f47c80

Please sign in to comment.