forked from jjlin/vaultwarden-shared-hosting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
docker-image-extract
executable file
·116 lines (106 loc) · 4.56 KB
/
docker-image-extract
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/sh
#
# This script pulls and extracts all files from an image in Docker Hub.
#
# Copyright (c) 2020, Jeremy Lin
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
if [ $# -ne 1 ]; then
echo "$0 IMAGE[:TAG]"
echo
echo "This script pulls and extracts all files from an image in Docker Hub."
echo
echo "Examples:"
echo
echo "# Pull and extract all files in the 'hello-world' image tagged 'latest'."
echo "\$ $0 hello-world:latest"
echo
echo "# Same as above; tag defaults to 'latest'."
echo "\$ $0 hello-world"
exit 1
fi
image_spec="$1"
image="${image_spec%:*}"
if [ "${image#*/}" = "${image}" ]; then
# Docker official images are in the 'library' namespace.
image="library/${image}"
fi
tag="${image_spec#*:}"
if [ "${tag}" = "${image_spec}" ]; then
tag=latest
fi
# Given a JSON input on stdin, extract the string value associated with the
# specified key. This avoids an extra dependency on a tool like `jq`.
extract() {
local key="$1"
# Extract "<key>":"<val>" (assumes key/val won't contain double quotes).
# The colon may have whitespace on either side.
grep -o "\"${key}\"[[:space:]]*:[[:space:]]*\"[^\"]\+\"" |
# Extract just <val> by deleting the last '"', and then greedily deleting
# everything up to '"'.
sed -e 's/"$//' -e 's/.*"//'
}
# https://docs.docker.com/registry/spec/auth/token/#how-to-authenticate
api_token_url="https://auth.docker.io/token?service=registry.docker.io&scope=repository:$image:pull"
# https://github.com/docker/distribution/blob/master/docs/spec/api.md#pulling-an-image-manifest
manifest_url="https://registry-1.docker.io/v2/${image}/manifests/$tag"
# https://github.com/docker/distribution/blob/master/docs/spec/api.md#pulling-a-layer
blobs_base_url="https://registry-1.docker.io/v2/${image}/blobs"
echo "Getting API token..."
token=$(curl -sS -H 'GET' "${api_token_url}" | extract 'token')
auth_header="Authorization: Bearer $token"
echo "Getting image manifest for $image:$tag..."
layers=$(curl -sS -H "${auth_header}" "${manifest_url}" |
extract 'blobSum' |
# Ref: https://github.com/docker/distribution/issues/1527
# There can be duplicate layers. Reduce duplicate downloads a bit
# by removing adjacent duplicates.
uniq |
# Layers are listed in newest-to-oldest order (i.e., the reverse
# of the order in which they were generated in the Dockerfile).
# Reverse the layer order to ensure that a file from a newer
# layer shadows/overwrites the same file from an older layer.
tac)
output_dir="./output"
mkdir -p "${output_dir}"
mkdir -p layers
for layer in $layers; do
hash="${layer#sha256:}"
tar_file=layers/"${hash}"
echo "Downloading layer ${hash}..."
curl -sSL -H "${auth_header}" "${blobs_base_url}/${layer}" |
gunzip -c > "${tar_file}"
echo "Extracting layer..."
tar -xf "${tar_file}" -C "${output_dir}"
# Ref: https://github.com/moby/moby/blob/master/image/spec/v1.2.md#creating-an-image-filesystem-changeset
# https://github.com/moby/moby/blob/master/pkg/archive/whiteouts.go
# Search for "whiteout" files to indicate files deleted in this layer.
OLD_IFS="${IFS}"
find "${output_dir}" -name '.wh.*' | while IFS= read -r f; do
dir="${f%/*}"
wh_file="${f##*/}"
file="${wh_file#.wh.}"
# Delete both the whiteout file and the whited-out file.
rm -rf "${dir}/${wh_file}" "${dir}/${file}"
done
IFS="${OLD_IFS}"
done
rm -rf layers
echo "Image contents extracted into $output_dir."