From f8906cea5748b3e3597288b61e10baea53d8571f Mon Sep 17 00:00:00 2001 From: Ignacio del Valle Alles Date: Fri, 3 Nov 2023 21:08:25 +0100 Subject: [PATCH] global caches & cargo-sweep (#20) - Runs cargo sweep after the user cargo sub-command - Introduces @ijc RUN_WITH_CACHE UDC - Caches $CARGO_HOME is a non-locking global cache (shared across all Earthfiles). This significantly reduces cache size --- rust/Earthfile | 118 ++++++++++++++++++++++++++++++++++++------------- rust/README.md | 4 ++ 2 files changed, 92 insertions(+), 30 deletions(-) diff --git a/rust/Earthfile b/rust/Earthfile index c6fc2ea..4e30b22 100644 --- a/rust/Earthfile +++ b/rust/Earthfile @@ -1,54 +1,112 @@ -VERSION 0.7 +VERSION --global-cache 0.7 # CARGO runs the cargo command "cargo $args". # This UDC should be thread safe. Parallel builds of targets using it should be free of race conditions. # Arguments: -# - args: Cargo subcommand and its arguments. Required -# - keep_fingerprints (false): Do not remove source packages fingerprints. Use only when source packages have been COPYed with --keep-ts option +# - args: Cargo subcommand and its arguments. Required. +# - keep_fingerprints (false): Do not remove source packages fingerprints. Use only when source packages have been COPYed with --keep-ts option. +# - sweep_days (4): The UDC uses cargo-sweep to clean build artifacts that haven't been accessed for this number of days. # - output: Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers). # Use this argument when you want to SAVE an ARTIFACT from the target folder (mounted cache), always trying to minimize the total size of the copied fileset. -# For example --output="release/[^\./]+" would keep all the files in /target/release that don't have any extension +# For example --output="release/[^\./]+" would keep all the files in /target/release that don't have any extension. CARGO: COMMAND ARG --required args ARG keep_fingerprints=false + ARG sweep_days=4 ARG output IF [ "$keep_fingerprints" = "false" ] DO +REMOVE_SOURCE_FINGERPRINTS END - RUN --mount=type=cache,mode=0777,sharing=shared,target=$CARGO_HOME/registry \ - --mount=type=cache,mode=0777,sharing=shared,target=$CARGO_HOME/git \ - --mount=type=cache,mode=0777,target=target \ - cargo $args; \ - if [ -n "$output" ]; then \ - mkdir /earthly_lib_rust_temp; \ - cd target; \ - find . -type f -regextype posix-egrep -regex "./$output" -exec cp --parents \{\} /earthly_lib_rust_temp \; ; \ - fi + DO +INSTALL_CARGO_SWEEP + DO +RUN_WITH_CACHE --command="set -e; + echo \"Running cargo $args\" ; + cargo $args; + if [ -n \"$output\" ]; then + echo \"Copying output files\" ; + mkdir -p /earthly_lib_rust_temp; + cd target; + find . -type f -regextype posix-egrep -regex \"./$output\" -exec cp --parents \{\} /earthly_lib_rust_temp \; ; + cd ..; + fi; + echo \"Running cargo sweep -r -t $sweep_days\" ; + cargo sweep -r -t $sweep_days; + echo \"Running cargo sweep -r -i\" ; + cargo sweep -r -i;" IF [ "$output" != "" ] RUN mkdir -p target; \ mv /earthly_lib_rust_temp/* target 2>/dev/null || echo "no files found within ./target matching the provided output regexp" ; END # REMOVE_SOURCE_FINGERPRINTS removes the Cargo fingerprint folders of the source packages. -# This guarantees Cargo compiles the packages when COPY commands of the source folders have a static timestamp (see --keep-ts) +# This guarantees Cargo compiles the packages when COPY commands of the source folders have a static timestamp (see --keep-ts). REMOVE_SOURCE_FINGERPRINTS: COMMAND - COPY +get-stoml/stoml /tmp/stoml - ARG source_libs=$(find . -name Cargo.toml -exec bash -c '/tmp/stoml {} package.name; printf "\n"' \\;) - RUN --mount=type=cache,mode=0777,sharing=shared,target=target \ - find target -name .fingerprint; \ - fingerprint_folders=$(find target -name .fingerprint) ; \ - for fingerprint_folder in $fingerprint_folders; do \ - cd $fingerprint_folder; \ - for source_lib in $source_libs; do \ - find . -maxdepth 1 -regex "\./$source_lib-[^-]+" -exec rm -rf {} \; ;\ - done \ - done + COPY +get-tomljson/tomljson /tmp/tomljson + COPY +get-jq/jq /tmp/jq + DO +RUN_WITH_CACHE --command="set -e; + source_libs=\$(find . -name Cargo.toml -exec bash -c '/tmp/tomljson {} | /tmp/jq -r .package.name; printf \"\\n\"' \\;) ; + fingerprint_folders=\$(find target -name .fingerprint) ; + echo \"deleting fingerprints:\"; + for fingerprint_folder in \$fingerprint_folders; do + cd \$fingerprint_folder; + for source_lib in \$source_libs; do + find . -maxdepth 1 -regex \"\./\$source_lib-[^-]+\" -exec bash -c 'readlink -f {}; rm -rf {}' \; ; + done + done" + +# get-tomljson gets the portable tomljson binary. +get-tomljson: + FROM alpine:3.18.3 + ARG USERARCH + ARG version=2.1.0 + RUN wget -O tomljson.tar.xz https://github.com/pelletier/go-toml/releases/download/v${version}/tomljson_${version}_linux_${USERARCH}.tar.xz; \ + tar -xf tomljson.tar.xz; \ + chmod +x tomljson + SAVE ARTIFACT tomljson -# get-stoml gets the portable stoml binary -get-stoml: +# get-jq gets the portable jq binary. +get-jq: FROM alpine:3.18.3 - RUN wget -O stoml https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64; \ - chmod +x stoml - SAVE ARTIFACT ./stoml stoml + ARG USERARCH + ARG version=1.7 + RUN wget -O jq https://github.com/jqlang/jq/releases/download/jq-${version}/jq-linux-${USERARCH}; \ + chmod +x jq + SAVE ARTIFACT jq + +# INSTALL_CARGO_SWEEP installs cargo-sweep if it doesn't exist already. +INSTALL_CARGO_SWEEP: + COMMAND + DO +RUN_WITH_CACHE --command="set -e; + if [ ! -f \$CARGO_HOME/bin/cargo-sweep ]; then + cargo install cargo-sweep --root \$CARGO_HOME; + fi;" + +# RUN_WITH_CACHE runs the passed command with the CARGO caches mounted. +# Arguments: +# - command (required): Command to run, can be any expression. +# +# This implementation is not expected to significantly change. Prefer using the `CARGO` UDC if you can, so you can get future improvements transparently. +RUN_WITH_CACHE: + COMMAND + ARG --required command + ARG EARTHLY_TARGET_PROJECT_NO_TAG + ARG CACHE_ID="${EARTHLY_TARGET_PROJECT_NO_TAG}#earthly-cargo-cache" + # $ORIGINAL_CARGO_HOME/bin will contain the cargo and rust binaries, as well as the installed crates. + # We save it to reset it back at the end. This location is presumed to be in the calling target layers filesystem rather than in other mount cache. + ARG ORIGINAL_CARGO_HOME=$CARGO_HOME + ARG ORIGINAL_CARGO_INSTALL_ROOT=$CARGO_INSTALL_ROOT + # Make sure that crates installed though this UDC are stored in the original cargo home, and not in the cargo home within the mount cache. + # This way, if BK garbage-collects them, the build is not broken + ENV CARGO_INSTALL_ROOT=$ORIGINAL_CARGO_HOME + # We need $CARGO_HOME/.package-cache within the earthly shared-mount-cache, so cargo can properly synchronize parallel access to $CARGO_HOME resources. + ENV CARGO_HOME="/earthly/.cargo" + RUN echo $CACHE_ID + RUN --mount=type=cache,mode=0777,id=$CACHE_ID,sharing=shared,target=/earthly \ + --mount=type=cache,mode=0777,target=target \ + set -e; \ + mkdir -p $CARGO_HOME; \ + printf "Running:\n $command\n"; \ + eval $command + ENV CARGO_HOME=$ORIGINAL_CARGO_HOME + ENV CARGO_INSTALL_ROOT=$ORIGINAL_CARGO_INSTALL_ROOT diff --git a/rust/README.md b/rust/README.md index 844becd..34dd9ae 100644 --- a/rust/README.md +++ b/rust/README.md @@ -10,6 +10,7 @@ This UDC runs the cargo command `cargo $args` caching the contents of `$CARGO_HO First, import the UDC up in your Earthfile: ```earthfile +VERSION 0.7 IMPORT github.com/earthly/lib/rust: AS rust ``` @@ -33,6 +34,9 @@ Cargo caches compilations of packages in `target` folder based on their last mod By default, this UDC removes the fingerprints of the packages found in the source code, to force their recompilation and work even when the Earthly `COPY` commands used overwrote the timestamps. +#### `sweep_days (4)` +The UDC uses [cargo-sweep](https://github.com/holmgr/cargo-sweep) to clean build artifacts that haven't been accessed for this number of days. + #### `output` Regex to match the files within the target folder to be copied from the cache to the caller filesystem (image layers).