From 6565685ce23b3864372b9976fd8ee2acebca16c7 Mon Sep 17 00:00:00 2001 From: Mathieu Leplatre Date: Fri, 22 Nov 2024 17:49:54 +0100 Subject: [PATCH] Switch startup bundle from zip to lz4 (#1499) * Switch from startup.zip to startup.json.mzlz4 * Add lz4 to dependencies --- commands/build_bundles.py | 37 ++++++++++++++++++++++++++---------- requirements.in | 1 + requirements.txt | 38 +++++++++++++++++++++++++++++++++++++ tests/test_build_bundles.py | 36 +++++++++++++++++++++++------------ 4 files changed, 90 insertions(+), 22 deletions(-) diff --git a/commands/build_bundles.py b/commands/build_bundles.py index 90a8515b..d4f4503e 100644 --- a/commands/build_bundles.py +++ b/commands/build_bundles.py @@ -13,6 +13,7 @@ import zipfile from email.utils import parsedate_to_datetime +import lz4.block import requests from google.cloud import storage @@ -89,6 +90,23 @@ def write_zip(output_path: str, content: list[tuple[str, bytes]]): print("Wrote %r" % output_path) +def write_json_mozlz4(output_path: str, changesets): + """ + Write a UTF-8 text file compressed as LZ4. + The goal of this is allow clients like Firefox read and uncompress the data off the main + thread using ``IOUtils.readUTF8(data, {compress: true})``. + + There is an open bug to use standard LZ4 (without magic number) + https://bugzilla.mozilla.org/show_bug.cgi?id=1209390 + """ + header_magic_number = b"mozLz40\x00" + json_str = json.dumps(changesets).encode("utf-8") + compressed = lz4.block.compress(json_str) + with open(output_path, "wb") as f: + f.write(header_magic_number + compressed) + print("Wrote %r" % output_path) + + def sync_cloud_storage( storage_bucket: str, remote_folder: str, to_upload: list[str], to_delete: list[str] ): @@ -119,6 +137,7 @@ def build_bundles(event, context): Main command entry point that: - fetches all collections changesets - builds a `changesets.zip` + - builds a `startup.json.mozlz4` - fetches attachments of all collections with bundle flag - builds `{bid}--{cid}.zip` for each of them - send the bundles to the Cloud storage bucket @@ -212,26 +231,24 @@ def build_bundles(event, context): print("Existing 'changesets.zip' bundle up-to-date. Nothing to do.") # Build a bundle for collections that are marked with "startup" flag. + startup_file = "startup.json.mozlz4" existing_bundle_timestamp = get_modified_timestamp( - f"{base_url}{DESTINATION_FOLDER}/startup.zip" + f"{base_url}{DESTINATION_FOLDER}/{startup_file}" ) - print(f"'startup.zip' was published at {existing_bundle_timestamp}") + print(f"{startup_file!r} was published at {existing_bundle_timestamp}") if BUILD_ALL or existing_bundle_timestamp < highest_timestamp: - write_zip( - "startup.zip", + write_json_mozlz4( + startup_file, [ - ( - "{metadata[bucket]}--{metadata[id]}.json".format(**changeset), - json.dumps(changeset), - ) + changeset for changeset in all_changesets if "startup" in changeset["metadata"].get("flags", []) and "preview" not in changeset["metadata"]["bucket"] ], ) - bundles_to_upload.append("startup.zip") + bundles_to_upload.append(startup_file) else: - print("Existing 'startup.zip' bundle up-to-date. Nothing to do.") + print(f"Existing {startup_file!r} bundle up-to-date. Nothing to do.") if not SKIP_UPLOAD: sync_cloud_storage( diff --git a/requirements.in b/requirements.in index 2570ba0f..597b1ad5 100644 --- a/requirements.in +++ b/requirements.in @@ -4,3 +4,4 @@ kinto-http requests sentry_sdk google-cloud-storage +lz4 diff --git a/requirements.txt b/requirements.txt index d9ce8f9d..cbf012f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -221,6 +221,44 @@ kinto-http==11.4.0 \ --hash=sha256:75264374a15980676e7628874f291f75223151ebc8182437031f0f98459e3085 \ --hash=sha256:cda7e350b57ff2c838de8f687653f0acc8b4df3c87d41da1c835d1d978d11959 # via -r requirements.in +lz4==4.3.3 \ + --hash=sha256:01fe674ef2889dbb9899d8a67361e0c4a2c833af5aeb37dd505727cf5d2a131e \ + --hash=sha256:054b4631a355606e99a42396f5db4d22046a3397ffc3269a348ec41eaebd69d2 \ + --hash=sha256:0a136e44a16fc98b1abc404fbabf7f1fada2bdab6a7e970974fb81cf55b636d0 \ + --hash=sha256:0e9c410b11a31dbdc94c05ac3c480cb4b222460faf9231f12538d0074e56c563 \ + --hash=sha256:222a7e35137d7539c9c33bb53fcbb26510c5748779364014235afc62b0ec797f \ + --hash=sha256:24b3206de56b7a537eda3a8123c644a2b7bf111f0af53bc14bed90ce5562d1aa \ + --hash=sha256:2b901c7784caac9a1ded4555258207d9e9697e746cc8532129f150ffe1f6ba0d \ + --hash=sha256:2f7b1839f795315e480fb87d9bc60b186a98e3e5d17203c6e757611ef7dcef61 \ + --hash=sha256:30e8c20b8857adef7be045c65f47ab1e2c4fabba86a9fa9a997d7674a31ea6b6 \ + --hash=sha256:31ea4be9d0059c00b2572d700bf2c1bc82f241f2c3282034a759c9a4d6ca4dc2 \ + --hash=sha256:337cb94488a1b060ef1685187d6ad4ba8bc61d26d631d7ba909ee984ea736be1 \ + --hash=sha256:33c9a6fd20767ccaf70649982f8f3eeb0884035c150c0b818ea660152cf3c809 \ + --hash=sha256:363ab65bf31338eb364062a15f302fc0fab0a49426051429866d71c793c23394 \ + --hash=sha256:43cf03059c0f941b772c8aeb42a0813d68d7081c009542301637e5782f8a33e2 \ + --hash=sha256:56f4fe9c6327adb97406f27a66420b22ce02d71a5c365c48d6b656b4aaeb7775 \ + --hash=sha256:5d35533bf2cee56f38ced91f766cd0038b6abf46f438a80d50c52750088be93f \ + --hash=sha256:6756212507405f270b66b3ff7f564618de0606395c0fe10a7ae2ffcbbe0b1fba \ + --hash=sha256:6cdc60e21ec70266947a48839b437d46025076eb4b12c76bd47f8e5eb8a75dcc \ + --hash=sha256:abc197e4aca8b63f5ae200af03eb95fb4b5055a8f990079b5bdf042f568469dd \ + --hash=sha256:b14d948e6dce389f9a7afc666d60dd1e35fa2138a8ec5306d30cd2e30d36b40c \ + --hash=sha256:b47839b53956e2737229d70714f1d75f33e8ac26e52c267f0197b3189ca6de24 \ + --hash=sha256:b6d9ec061b9eca86e4dcc003d93334b95d53909afd5a32c6e4f222157b50c071 \ + --hash=sha256:b891880c187e96339474af2a3b2bfb11a8e4732ff5034be919aa9029484cd201 \ + --hash=sha256:bca8fccc15e3add173da91be8f34121578dc777711ffd98d399be35487c934bf \ + --hash=sha256:c81703b12475da73a5d66618856d04b1307e43428a7e59d98cfe5a5d608a74c6 \ + --hash=sha256:d2507ee9c99dbddd191c86f0e0c8b724c76d26b0602db9ea23232304382e1f21 \ + --hash=sha256:e36cd7b9d4d920d3bfc2369840da506fa68258f7bb176b8743189793c055e43d \ + --hash=sha256:e7d84b479ddf39fe3ea05387f10b779155fc0990125f4fb35d636114e1c63a2e \ + --hash=sha256:eac9af361e0d98335a02ff12fb56caeb7ea1196cf1a49dbf6f17828a131da807 \ + --hash=sha256:edfd858985c23523f4e5a7526ca6ee65ff930207a7ec8a8f57a01eae506aaee7 \ + --hash=sha256:ee9ff50557a942d187ec85462bb0960207e7ec5b19b3b48949263993771c6205 \ + --hash=sha256:f0e822cd7644995d9ba248cb4b67859701748a93e2ab7fc9bc18c599a52e4604 \ + --hash=sha256:f180904f33bdd1e92967923a43c22899e303906d19b2cf8bb547db6653ea6e7d \ + --hash=sha256:f1d18718f9d78182c6b60f568c9a9cec8a7204d7cb6fad4e511a2ef279e4cb05 \ + --hash=sha256:f4c7bf687303ca47d69f9f0133274958fd672efaa33fb5bcde467862d6c621f0 \ + --hash=sha256:f76176492ff082657ada0d0f10c794b6da5800249ef1692b35cf49b1e93e8ef7 + # via -r requirements.in proto-plus==1.24.0 \ --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 diff --git a/tests/test_build_bundles.py b/tests/test_build_bundles.py index 6ffe41a6..aa83e176 100644 --- a/tests/test_build_bundles.py +++ b/tests/test_build_bundles.py @@ -29,6 +29,12 @@ def mock_write_zip(): yield mock_write +@pytest.fixture +def mock_write_json_mozlz4(): + with patch("commands.build_bundles.write_json_mozlz4") as mock_write: + yield mock_write + + @pytest.fixture def mock_sync_cloud_storage(): with patch("commands.build_bundles.sync_cloud_storage") as mock_sync_cloud_storage: @@ -137,7 +143,9 @@ def test_write_zip(tmpdir): @responses.activate -def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_cloud_storage): +def test_build_bundles( + mock_fetch_all_changesets, mock_write_zip, mock_write_json_mozlz4, mock_sync_cloud_storage +): server_url = "http://testserver" event = {"server": server_url} @@ -148,10 +156,12 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou ) responses.add(responses.GET, f"{server_url}/attachments/file.jpg", body=b"jpeg_content") - for bundle in ["changesets", "startup"] + [f"bucket{i}--collection{i}" for i in range(5)]: + for bundle in ["changesets.zip", "startup.json.mozlz4"] + [ + f"bucket{i}--collection{i}.zip" for i in range(5) + ]: responses.add( responses.GET, - f"{server_url}/attachments/bundles/{bundle}.zip", + f"{server_url}/attachments/bundles/{bundle}", headers={ "Last-Modified": "Wed, 03 Jul 2024 11:04:48 GMT" # 1720004688000 }, @@ -220,9 +230,7 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou build_bundles(event, context={}) - assert ( - mock_write_zip.call_count == 3 - ) # changesets.zip, startup.zip, and only one for the attachments + assert mock_write_zip.call_count == 2 # changesets.zip, and only one for the attachments calls = mock_write_zip.call_args_list # Assert the first call (attachments zip) @@ -245,11 +253,15 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou assert changesets_zip_files[5][0] == "bucket5--collection5.json" assert changesets_zip_files[6][0] == "preview-bucket5--collection5.json" - # Assert the third call (startup.zip) - startup_zip_path, startup_zip_files = calls[2][0] - assert startup_zip_path == "startup.zip" - assert len(startup_zip_files) == 1 - assert startup_zip_files[0][0] == "bucket5--collection5.json" + # Assert the mozlz4 call + assert mock_write_json_mozlz4.call_count == 1 # startup.json.mozlz + calls = mock_write_json_mozlz4.call_args_list + + startup_mozlz4_path, startup_changesets = calls[0][0] + assert startup_mozlz4_path == "startup.json.mozlz4" + assert len(startup_changesets) == 1 + assert startup_changesets[0]["metadata"]["bucket"] == "bucket5" + assert startup_changesets[0]["metadata"]["id"] == "collection5" mock_sync_cloud_storage.assert_called_once_with( "remote-settings-test-local-attachments", @@ -257,7 +269,7 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou [ "bucket1--collection1.zip", "changesets.zip", - "startup.zip", + "startup.json.mozlz4", ], [ "bucket2--collection2.zip",