From 3a0e05d0939f3475fbd0eaa6a07b278a6a737169 Mon Sep 17 00:00:00 2001 From: nacho Date: Tue, 28 Nov 2023 11:44:43 +0100 Subject: [PATCH 1/3] set id to target cache mount --- rust/Earthfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/Earthfile b/rust/Earthfile index f419598..e386458 100644 --- a/rust/Earthfile +++ b/rust/Earthfile @@ -80,6 +80,7 @@ CARGO: RUN_WITH_CACHE: COMMAND DO +CHECK_INITED + ARG EARTHLY_TARGET_NAME ARG --required command ARG cache_id = $(cat /tmp/earthly/cfg/cache_id) # Save to restore at the end. @@ -92,7 +93,7 @@ RUN_WITH_CACHE: # ($CARGO_HOME/.package-cache has to be in the cache so Cargo can properly synchronize parallel access to $CARGO_HOME resources). ENV CARGO_HOME="/tmp/earthly/.cargo" RUN --mount=type=cache,mode=0777,id=$cache_id,sharing=shared,target=$CARGO_HOME \ - --mount=type=cache,mode=0777,target=target \ + --mount=type=cache,mode=0777,id="${cache_id}#${EARTHLY_TARGET_NAME}",target=target \ set -e; \ mkdir -p $CARGO_HOME; \ printf "Running:\n $command\n"; \ From 49b7fec81be299177c0b5cfc0b4ff81bff5fb07a Mon Sep 17 00:00:00 2001 From: nacho Date: Wed, 29 Nov 2023 22:50:43 +0100 Subject: [PATCH 2/3] - Store INIT config as ENV entries - Let setting cache ids for +RUN_WITH_CACHE --- rust/Earthfile | 52 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/rust/Earthfile b/rust/Earthfile index e386458..ef77d23 100644 --- a/rust/Earthfile +++ b/rust/Earthfile @@ -1,12 +1,12 @@ VERSION --global-cache 0.7 -# INIT stores the configuration required for the other UDCs in the filesystem, and installs required dependencies. -# - cache_id: Overrides default ID of the global $CARGO_HOME cache. Its value is exported to the build environment under the entry: $CARGO_HOME_CACHE_ID +# INIT stores the configuration required for the other functions in the filesystem, and installs required dependencies. +# - cache_id: Overrides default ID of the global $CARGO_HOME cache. Its value is exported to the build environment under the entry: $EARTHLY_CARGO_HOME_CACHE_ID # - keep_fingerprints (false): Instructs the following +CARGO calls to don't remove the Cargo fingerprints of the source packages. Use only when source packages have been COPYed with --keep-ts option. # - sweep_days (4): +CARGO uses cargo-sweep to clean build artifacts that haven't been accessed for this number of days. INIT: COMMAND - RUN if [ -f /tmp/earthly/cfg/cache_id ]; then \ + RUN if [ -n "$EARTHLY_CARGO_HOME_CACHE_ID" ]; then \ echo "+INIT has already been called in this build environment" ; \ exit 1; \ fi @@ -19,38 +19,34 @@ INIT: DO +INSTALL_CARGO_SWEEP RUN mkdir -p /tmp/earthly/cfg - # cache_id + # EARTHLY_CARGO_HOME_CACHE_ID ARG EARTHLY_TARGET_PROJECT_NO_TAG ARG OS_RELEASE=$(md5sum /etc/os-release | cut -d ' ' -f 1) - ARG cache_id="${EARTHLY_TARGET_PROJECT_NO_TAG}#${OS_RELEASE}#earthly-cargo-cache" - RUN echo "$cache_id">/tmp/earthly/cfg/cache_id - ENV CARGO_HOME_CACHE_ID=$cache_id + ENV EARTHLY_CARGO_HOME_CACHE_ID="${EARTHLY_TARGET_PROJECT_NO_TAG}#${OS_RELEASE}#earthly-cargo-cache" - #keep_fingerprints + # $EARTHLY_KEEP_FINGERPRINTS ARG keep_fingerprints=false - RUN echo "$keep_fingerprints">/tmp/earthly/cfg/keep_fingerprints + ENV EARTHLY_KEEP_FINGERPRINTS=$keep_fingerprints - #sweep_days + # $EARTHLY_SWEEP_DAYS ARG sweep_days=4 - RUN echo "$sweep_days">/tmp/earthly/cfg/sweep_days + ENV EARTHLY_SWEEP_DAYS=$sweep_days # CARGO runs the cargo command "cargo $args". -# This UDC is thread safe. Parallel builds of targets calling this UDC should be free of race conditions. -# Notice that in order to run this UDC, +INIT must be called first. +# This function is thread safe. Parallel builds of targets calling this function should be free of race conditions. +# Notice that in order to run this function, +INIT must be called first. # Arguments: # - args: Cargo subcommand and its arguments. Required. -# - output: Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers). +# - output: Regex matching output artifacts files to be copied to ./target folder in the caller filesystem (image layers). # Use this argument when you want to SAVE an ARTIFACT from the target folder (mounted cache), always trying to minimize the total size of the copied fileset. # For example --output="release/[^\./]+" would keep all the files in /target/release that don't have any extension. CARGO: COMMAND DO +CHECK_INITED ARG --required args - ARG keep_fingerprints=$(cat /tmp/earthly/cfg/keep_fingerprints) - ARG sweep_days=$(cat /tmp/earthly/cfg/sweep_days) ARG output ARG TMP_FOLDER="/tmp/earthly/lib/rust" - IF [ "$keep_fingerprints" = "false" ] + IF [ "$EARTHLY_KEEP_FINGERPRINTS" = "false" ] DO +REMOVE_SOURCE_FINGERPRINTS END DO +RUN_WITH_CACHE --command="set -e; @@ -63,8 +59,8 @@ CARGO: find . -type f -regextype posix-egrep -regex \"./$output\" -exec cp --parents \{\} $TMP_FOLDER \; ; cd ..; fi; - echo \"Running cargo sweep -r -t $sweep_days\" ; - cargo sweep -r -t $sweep_days; + echo \"Running cargo sweep -r -t $EARTHLY_SWEEP_DAYS\" ; + cargo sweep -r -t $EARTHLY_SWEEP_DAYS; echo \"Running cargo sweep -r -i\" ; cargo sweep -r -i;" IF [ "$output" != "" ] @@ -73,33 +69,37 @@ CARGO: END # RUN_WITH_CACHE runs the passed command with the CARGO caches mounted. -# Notice that in order to run this UDC, +INIT must be called first. +# Notice that in order to run this function, +INIT must be called first. This function exports the target cache mount ID under the env entry: $TARGET_CACHE_ID. # Arguments: # - command (required): Command to run, can be any expression. +# - cargo_home_cache_id: ID of the cargo home cache mount. By default: $CARGO_HOME_CACHE_ID as exported by +INIT +# - target_cache_id: ID of the target cache mount. By default: ${CARGO_HOME_CACHE_ID}#${EARTHLY_TARGET_NAME} # RUN_WITH_CACHE: COMMAND DO +CHECK_INITED - ARG EARTHLY_TARGET_NAME ARG --required command - ARG cache_id = $(cat /tmp/earthly/cfg/cache_id) + ARG EARTHLY_TARGET_NAME + ARG cargo_home_cache_id = $CARGO_HOME_CACHE_ID + ARG target_cache_id="${CARGO_HOME_CACHE_ID}#${EARTHLY_TARGET_NAME}" # Save to restore at the end. ARG ORIGINAL_CARGO_HOME=$CARGO_HOME ARG ORIGINAL_CARGO_INSTALL_ROOT=$CARGO_INSTALL_ROOT - # Make sure that crates installed though this UDC are stored in the original cargo home, and not in the cargo home within the mount cache. + # Make sure that crates installed through this function are stored in the original cargo home, and not in the cargo home within the mount cache. # This way, if BK garbage-collects them, the build is not broken. ENV CARGO_INSTALL_ROOT=$ORIGINAL_CARGO_HOME # We change $CARGO_HOME while keeping $ORIGINAL_CARGO_HOME/bin directory in the path. This way, the Cargo binary is still accessible and the whole $CARGO_HOME is within the global cache # ($CARGO_HOME/.package-cache has to be in the cache so Cargo can properly synchronize parallel access to $CARGO_HOME resources). ENV CARGO_HOME="/tmp/earthly/.cargo" - RUN --mount=type=cache,mode=0777,id=$cache_id,sharing=shared,target=$CARGO_HOME \ - --mount=type=cache,mode=0777,id="${cache_id}#${EARTHLY_TARGET_NAME}",target=target \ + RUN --mount=type=cache,mode=0777,id=$cargo_home_cache_id,sharing=shared,target=$CARGO_HOME \ + --mount=type=cache,mode=0777,id=$target_cache_id,sharing=locked,target=target \ set -e; \ mkdir -p $CARGO_HOME; \ printf "Running:\n $command\n"; \ eval $command ENV CARGO_HOME=$ORIGINAL_CARGO_HOME ENV CARGO_INSTALL_ROOT=$ORIGINAL_CARGO_INSTALL_ROOT + ENV TARGET_CACHE_ID=$target_cache_id get-tomljson: FROM alpine:3.18.3 @@ -143,7 +143,7 @@ REMOVE_SOURCE_FINGERPRINTS: CHECK_INITED: COMMAND - RUN if [ ! -f /tmp/earthly/cfg/cache_id ]; then \ + RUN if [ ! -n "$EARTHLY_CARGO_HOME_CACHE_ID" ]; then \ echo "+INIT has not been called yet in this build environment" ; \ exit 1; \ fi; \ No newline at end of file From 89d332a566d37706f608ddd125ce854f46c7cadc Mon Sep 17 00:00:00 2001 From: nacho Date: Wed, 29 Nov 2023 22:51:09 +0100 Subject: [PATCH 3/3] - Add "Mount caches and parallelization" --- rust/README.md | 59 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/rust/README.md b/rust/README.md index 346bd21..b4c5708 100644 --- a/rust/README.md +++ b/rust/README.md @@ -1,8 +1,8 @@ # lib/rust -Earthly's official collection of rust [UDCs](https://docs.earthly.dev/docs/guides/udc). +Earthly's official collection of Rust [functions](https://docs.earthly.dev/docs/guides/functions). -First, import the UDC up in your Earthfile: +First, import the library up in your Earthfile: ```earthfile VERSION --global-cache 0.7 IMPORT github.com/earthly/lib/rust: AS rust @@ -11,9 +11,9 @@ IMPORT github.com/earthly/lib/rust: AS rust ## +INIT -This UDC stores the configuration required by the other UDCs in the build environment filesystem, and installs required dependencies. +This function stores the configuration required by the other functions in the build environment filesystem, and installs required dependencies. -It must be called once per build environment, to avoid passing repetitive arguments to the UDCs called after it, and to install required dependencies before the source files are copied from the build context. +It must be called once per build environment, to avoid passing repetitive arguments to the functions called after it, and to install required dependencies before the source files are copied from the build context. ### Usage @@ -24,21 +24,21 @@ DO rust+INIT ... ### Arguments #### `cache_id` -Overrides default ID of the global `$CARGO_HOME` cache. Its value is exported to the build environment under the entry: `$CARGO_HOME_CACHE_ID`. +Overrides default ID of the global `$CARGO_HOME` cache. Its value is exported to the build environment under the entry: `$EARTHLY_CARGO_HOME_CACHE_ID`. #### `keep_fingerprints (false)` Instructs the following `+CARGO` calls to don't remove the Cargo fingerprints of the source packages. Use only when source packages have been COPYed with `--keep-ts `option. Cargo caches compilations of packages in `target` folder based on their last modification timestamps. -By default, this UDC removes the fingerprints of the packages found in the source code, to force their recompilation and work even when the Earthly `COPY` commands used overwrote the timestamps. +By default, this function removes the fingerprints of the packages found in the source code, to force their recompilation and work even when the Earthly `COPY` commands used overwrote the timestamps. #### `sweep_days (4)` `+CARGO` calls use cargo-sweep to clean build artifacts that haven't been accessed for this number of days. ## +CARGO -This UDC runs the cargo command `cargo $args` caching the contents of `$CARGO_HOME` and `target` for future builds of the same calling target. +This function runs the cargo command `cargo $args` caching the contents of `$CARGO_HOME` and `target` for future builds of the same calling target. See #mount-caches-and-parallelization below for more details. -Notice that in order to run this UDC, [+INIT](#init) must be called first. +Notice that in order to run this function, [+INIT](#init) must be called first. ### Usage @@ -53,25 +53,31 @@ DO rust+CARGO ... Cargo subcommand and its arguments. Required. #### `output` -Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers). +Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers). -Use this argument when you want to `SAVE ARTIFACT` from the target folder (mounted cache), always trying to minimize the total size of the copied fileset. +Use this argument when you want to `SAVE ARTIFACT` from the target folder (mounted cache), always trying to minimize the total size of the copied fileset. For example `--output="release/[^\./]+"` would keep all the files in `/target/release` that don't have any extension. ### Thread safety -This UDC is thread safe. Parallel builds of targets calling this UDC should be free of race conditions. +This function is thread safe. Parallel builds of targets calling this function should be free of race conditions. ## +RUN_WITH_CACHE `+RUN_WITH_CACHE` runs the passed command with the CARGO caches mounted. -Notice that in order to run this UDC, [+INIT](#init) must be called first. +Notice that in order to run this function, [+INIT](#init) must be called first. This function exports the target cache mount ID under the env entry: `$TARGET_CACHE_ID`. ### Arguments -#### `command (required)` +#### `command (required)` Command to run, can be any expression. +#### `cargo_home_cache_id` +ID of the cargo home cache mount. By default: `$CARGO_HOME_CACHE_ID` as exported by `+INIT` + +#### `target_cache_id` +ID of the target cache mount. By default: `${CARGO_HOME_CACHE_ID}#${EARTHLY_TARGET_NAME}` + ### Example Show `$CARGO_HOME` cached-entries size: @@ -103,7 +109,7 @@ The Earthfile would look like: ```earthfile VERSION --global-cache 0.7 -# Importing UDC definition from default branch (in a real case, specify version or commit to guarantee immutability) +# Imports the library definition from default branch (in a real case, specify version or commit to guarantee immutability) IMPORT github.com/earthly/lib/rust AS rust install: @@ -114,7 +120,7 @@ install: RUN rustup component add clippy RUN rustup component add rustfmt # Call +INIT before copying the source file to avoid installing depencies every time source code changes. - # This parametrization will be used in future calls to UDCs of the library + # This parametrization will be used in future calls to functions of the library DO rust+INIT --keep_fingerprints=true source: @@ -148,4 +154,27 @@ lint: check-dependencies: FROM +source DO rust+CARGO --args="deny --all-features check --deny warnings bans license sources" + +# all runs all other targets in parallel +all: + BUILD +lint + BUILD +build + BUILD +test + BUILD +fmt + BUILD +check-dependencies ``` + +## Mount caches and parallelization + +This library uses several mount caches per tuple of `{project, os_release}`: +- One cache mount for `$CARGO_HOME`, shared across all target builds without any locking involved. +- A family of locked cache mounts for `$CARGO_TARGET_DIR`. One per target. + +Notice that: +- the previous targets builds might belong to one or multiple Earthly builds. +- builds will only be blocked by concurrent ones of the same target + +For example, running `earthly +all` in the previous example will: +- run all targets (`+lint,+build,+test,+fmt,+check-dependencies`) in parallel without any blocking involved +- use a common cache mount for `$CARGO_HOME` +- use one individual `$CARGO_TARGET_DIR` cache mount per target \ No newline at end of file