Skip to content

Commit

Permalink
Switch startup bundle from zip to lz4 (#1499)
Browse files Browse the repository at this point in the history
* Switch from startup.zip to startup.json.mzlz4

* Add lz4 to dependencies
  • Loading branch information
leplatrem authored Nov 22, 2024
1 parent 5459adf commit 6565685
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 22 deletions.
37 changes: 27 additions & 10 deletions commands/build_bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import zipfile
from email.utils import parsedate_to_datetime

import lz4.block
import requests
from google.cloud import storage

Expand Down Expand Up @@ -89,6 +90,23 @@ def write_zip(output_path: str, content: list[tuple[str, bytes]]):
print("Wrote %r" % output_path)


def write_json_mozlz4(output_path: str, changesets):
"""
Write a UTF-8 text file compressed as LZ4.
The goal of this is allow clients like Firefox read and uncompress the data off the main
thread using ``IOUtils.readUTF8(data, {compress: true})``.
There is an open bug to use standard LZ4 (without magic number)
https://bugzilla.mozilla.org/show_bug.cgi?id=1209390
"""
header_magic_number = b"mozLz40\x00"
json_str = json.dumps(changesets).encode("utf-8")
compressed = lz4.block.compress(json_str)
with open(output_path, "wb") as f:
f.write(header_magic_number + compressed)
print("Wrote %r" % output_path)


def sync_cloud_storage(
storage_bucket: str, remote_folder: str, to_upload: list[str], to_delete: list[str]
):
Expand Down Expand Up @@ -119,6 +137,7 @@ def build_bundles(event, context):
Main command entry point that:
- fetches all collections changesets
- builds a `changesets.zip`
- builds a `startup.json.mozlz4`
- fetches attachments of all collections with bundle flag
- builds `{bid}--{cid}.zip` for each of them
- send the bundles to the Cloud storage bucket
Expand Down Expand Up @@ -212,26 +231,24 @@ def build_bundles(event, context):
print("Existing 'changesets.zip' bundle up-to-date. Nothing to do.")

# Build a bundle for collections that are marked with "startup" flag.
startup_file = "startup.json.mozlz4"
existing_bundle_timestamp = get_modified_timestamp(
f"{base_url}{DESTINATION_FOLDER}/startup.zip"
f"{base_url}{DESTINATION_FOLDER}/{startup_file}"
)
print(f"'startup.zip' was published at {existing_bundle_timestamp}")
print(f"{startup_file!r} was published at {existing_bundle_timestamp}")
if BUILD_ALL or existing_bundle_timestamp < highest_timestamp:
write_zip(
"startup.zip",
write_json_mozlz4(
startup_file,
[
(
"{metadata[bucket]}--{metadata[id]}.json".format(**changeset),
json.dumps(changeset),
)
changeset
for changeset in all_changesets
if "startup" in changeset["metadata"].get("flags", [])
and "preview" not in changeset["metadata"]["bucket"]
],
)
bundles_to_upload.append("startup.zip")
bundles_to_upload.append(startup_file)
else:
print("Existing 'startup.zip' bundle up-to-date. Nothing to do.")
print(f"Existing {startup_file!r} bundle up-to-date. Nothing to do.")

if not SKIP_UPLOAD:
sync_cloud_storage(
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ kinto-http
requests
sentry_sdk
google-cloud-storage
lz4
38 changes: 38 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,44 @@ kinto-http==11.4.0 \
--hash=sha256:75264374a15980676e7628874f291f75223151ebc8182437031f0f98459e3085 \
--hash=sha256:cda7e350b57ff2c838de8f687653f0acc8b4df3c87d41da1c835d1d978d11959
# via -r requirements.in
lz4==4.3.3 \
--hash=sha256:01fe674ef2889dbb9899d8a67361e0c4a2c833af5aeb37dd505727cf5d2a131e \
--hash=sha256:054b4631a355606e99a42396f5db4d22046a3397ffc3269a348ec41eaebd69d2 \
--hash=sha256:0a136e44a16fc98b1abc404fbabf7f1fada2bdab6a7e970974fb81cf55b636d0 \
--hash=sha256:0e9c410b11a31dbdc94c05ac3c480cb4b222460faf9231f12538d0074e56c563 \
--hash=sha256:222a7e35137d7539c9c33bb53fcbb26510c5748779364014235afc62b0ec797f \
--hash=sha256:24b3206de56b7a537eda3a8123c644a2b7bf111f0af53bc14bed90ce5562d1aa \
--hash=sha256:2b901c7784caac9a1ded4555258207d9e9697e746cc8532129f150ffe1f6ba0d \
--hash=sha256:2f7b1839f795315e480fb87d9bc60b186a98e3e5d17203c6e757611ef7dcef61 \
--hash=sha256:30e8c20b8857adef7be045c65f47ab1e2c4fabba86a9fa9a997d7674a31ea6b6 \
--hash=sha256:31ea4be9d0059c00b2572d700bf2c1bc82f241f2c3282034a759c9a4d6ca4dc2 \
--hash=sha256:337cb94488a1b060ef1685187d6ad4ba8bc61d26d631d7ba909ee984ea736be1 \
--hash=sha256:33c9a6fd20767ccaf70649982f8f3eeb0884035c150c0b818ea660152cf3c809 \
--hash=sha256:363ab65bf31338eb364062a15f302fc0fab0a49426051429866d71c793c23394 \
--hash=sha256:43cf03059c0f941b772c8aeb42a0813d68d7081c009542301637e5782f8a33e2 \
--hash=sha256:56f4fe9c6327adb97406f27a66420b22ce02d71a5c365c48d6b656b4aaeb7775 \
--hash=sha256:5d35533bf2cee56f38ced91f766cd0038b6abf46f438a80d50c52750088be93f \
--hash=sha256:6756212507405f270b66b3ff7f564618de0606395c0fe10a7ae2ffcbbe0b1fba \
--hash=sha256:6cdc60e21ec70266947a48839b437d46025076eb4b12c76bd47f8e5eb8a75dcc \
--hash=sha256:abc197e4aca8b63f5ae200af03eb95fb4b5055a8f990079b5bdf042f568469dd \
--hash=sha256:b14d948e6dce389f9a7afc666d60dd1e35fa2138a8ec5306d30cd2e30d36b40c \
--hash=sha256:b47839b53956e2737229d70714f1d75f33e8ac26e52c267f0197b3189ca6de24 \
--hash=sha256:b6d9ec061b9eca86e4dcc003d93334b95d53909afd5a32c6e4f222157b50c071 \
--hash=sha256:b891880c187e96339474af2a3b2bfb11a8e4732ff5034be919aa9029484cd201 \
--hash=sha256:bca8fccc15e3add173da91be8f34121578dc777711ffd98d399be35487c934bf \
--hash=sha256:c81703b12475da73a5d66618856d04b1307e43428a7e59d98cfe5a5d608a74c6 \
--hash=sha256:d2507ee9c99dbddd191c86f0e0c8b724c76d26b0602db9ea23232304382e1f21 \
--hash=sha256:e36cd7b9d4d920d3bfc2369840da506fa68258f7bb176b8743189793c055e43d \
--hash=sha256:e7d84b479ddf39fe3ea05387f10b779155fc0990125f4fb35d636114e1c63a2e \
--hash=sha256:eac9af361e0d98335a02ff12fb56caeb7ea1196cf1a49dbf6f17828a131da807 \
--hash=sha256:edfd858985c23523f4e5a7526ca6ee65ff930207a7ec8a8f57a01eae506aaee7 \
--hash=sha256:ee9ff50557a942d187ec85462bb0960207e7ec5b19b3b48949263993771c6205 \
--hash=sha256:f0e822cd7644995d9ba248cb4b67859701748a93e2ab7fc9bc18c599a52e4604 \
--hash=sha256:f180904f33bdd1e92967923a43c22899e303906d19b2cf8bb547db6653ea6e7d \
--hash=sha256:f1d18718f9d78182c6b60f568c9a9cec8a7204d7cb6fad4e511a2ef279e4cb05 \
--hash=sha256:f4c7bf687303ca47d69f9f0133274958fd672efaa33fb5bcde467862d6c621f0 \
--hash=sha256:f76176492ff082657ada0d0f10c794b6da5800249ef1692b35cf49b1e93e8ef7
# via -r requirements.in
proto-plus==1.24.0 \
--hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \
--hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12
Expand Down
36 changes: 24 additions & 12 deletions tests/test_build_bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ def mock_write_zip():
yield mock_write


@pytest.fixture
def mock_write_json_mozlz4():
with patch("commands.build_bundles.write_json_mozlz4") as mock_write:
yield mock_write


@pytest.fixture
def mock_sync_cloud_storage():
with patch("commands.build_bundles.sync_cloud_storage") as mock_sync_cloud_storage:
Expand Down Expand Up @@ -137,7 +143,9 @@ def test_write_zip(tmpdir):


@responses.activate
def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_cloud_storage):
def test_build_bundles(
mock_fetch_all_changesets, mock_write_zip, mock_write_json_mozlz4, mock_sync_cloud_storage
):
server_url = "http://testserver"
event = {"server": server_url}

Expand All @@ -148,10 +156,12 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
)
responses.add(responses.GET, f"{server_url}/attachments/file.jpg", body=b"jpeg_content")

for bundle in ["changesets", "startup"] + [f"bucket{i}--collection{i}" for i in range(5)]:
for bundle in ["changesets.zip", "startup.json.mozlz4"] + [
f"bucket{i}--collection{i}.zip" for i in range(5)
]:
responses.add(
responses.GET,
f"{server_url}/attachments/bundles/{bundle}.zip",
f"{server_url}/attachments/bundles/{bundle}",
headers={
"Last-Modified": "Wed, 03 Jul 2024 11:04:48 GMT" # 1720004688000
},
Expand Down Expand Up @@ -220,9 +230,7 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou

build_bundles(event, context={})

assert (
mock_write_zip.call_count == 3
) # changesets.zip, startup.zip, and only one for the attachments
assert mock_write_zip.call_count == 2 # changesets.zip, and only one for the attachments
calls = mock_write_zip.call_args_list

# Assert the first call (attachments zip)
Expand All @@ -245,19 +253,23 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
assert changesets_zip_files[5][0] == "bucket5--collection5.json"
assert changesets_zip_files[6][0] == "preview-bucket5--collection5.json"

# Assert the third call (startup.zip)
startup_zip_path, startup_zip_files = calls[2][0]
assert startup_zip_path == "startup.zip"
assert len(startup_zip_files) == 1
assert startup_zip_files[0][0] == "bucket5--collection5.json"
# Assert the mozlz4 call
assert mock_write_json_mozlz4.call_count == 1 # startup.json.mozlz
calls = mock_write_json_mozlz4.call_args_list

startup_mozlz4_path, startup_changesets = calls[0][0]
assert startup_mozlz4_path == "startup.json.mozlz4"
assert len(startup_changesets) == 1
assert startup_changesets[0]["metadata"]["bucket"] == "bucket5"
assert startup_changesets[0]["metadata"]["id"] == "collection5"

mock_sync_cloud_storage.assert_called_once_with(
"remote-settings-test-local-attachments",
"bundles",
[
"bucket1--collection1.zip",
"changesets.zip",
"startup.zip",
"startup.json.mozlz4",
],
[
"bucket2--collection2.zip",
Expand Down

0 comments on commit 6565685

Please sign in to comment.