diff --git a/Makefile.am b/Makefile.am index 9a98213c..47414b06 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,9 +2,14 @@ SUBDIRS=libcomposefs tools tests CLEANFILES= -MANPAGES=\ +MAN1PAGES=\ man/mount.composefs.md \ - man/mkcomposefs.md + man/mkcomposefs.md \ + man/composefs-info.md +MAN5PAGES=\ + man/composefs-dump.md + +MANPAGES=${MAN1PAGES} ${MAN5PAGES} EXTRA_DIST=\ composefs.pc.in \ @@ -20,8 +25,13 @@ man/%.1: man/%.md mkdir -p man ${PANDOC} $+ -s -t man > $@ -man1_MANS = $(MANPAGES:.md=.1) +man/%.5: man/%.md + mkdir -p man + ${PANDOC} $+ -s -t man > $@ + +man1_MANS = $(MAN1PAGES:.md=.1) +man5_MANS = $(MAN5PAGES:.md=.5) -CLEANFILES += ${man1_MANS} +CLEANFILES += ${man1_MANS} ${man5_MANS} endif diff --git a/libcomposefs/lcfs-mount.c b/libcomposefs/lcfs-mount.c index cf7155e9..ce7c6e1b 100644 --- a/libcomposefs/lcfs-mount.c +++ b/libcomposefs/lcfs-mount.c @@ -170,44 +170,6 @@ static char *escape_mount_option(const char *str) return res; } -static int hexdigit(char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return 10 + (c - 'a'); - if (c >= 'A' && c <= 'F') - return 10 + (c - 'A'); - return -1; -} - -static int digest_to_raw(const char *digest, uint8_t *raw, int max_size) -{ - int size = 0; - - while (*digest) { - char c1, c2; - int n1, n2; - - if (size >= max_size) - return -1; - - c1 = *digest++; - n1 = hexdigit(c1); - if (n1 < 0) - return -1; - - c2 = *digest++; - n2 = hexdigit(c2); - if (n2 < 0) - return -1; - - raw[size++] = (n1 & 0xf) << 4 | (n2 & 0xf); - } - - return size; -} - static int lcfs_validate_mount_options(struct lcfs_mount_state_s *state) { struct lcfs_mount_options_s *options = state->options; diff --git a/libcomposefs/lcfs-utils.h b/libcomposefs/lcfs-utils.h index e493d6d7..c53073d7 100644 --- a/libcomposefs/lcfs-utils.h +++ b/libcomposefs/lcfs-utils.h @@ -42,6 +42,44 @@ static inline char *memdup(const char *s, size_t len) return s2; } +static inline int hexdigit(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return 10 + (c - 'a'); + if (c >= 'A' && c <= 'F') + return 10 + (c - 'A'); + return -1; +} + +static inline int digest_to_raw(const char *digest, uint8_t *raw, int max_size) +{ + int size = 0; + + while (*digest) { + char c1, c2; + int n1, n2; + + if (size >= max_size) + return -1; + + c1 = *digest++; + n1 = hexdigit(c1); + if (n1 < 0) + return -1; + + c2 = *digest++; + n2 = hexdigit(c2); + if (n2 < 0) + return -1; + + raw[size++] = (n1 & 0xf) << 4 | (n2 & 0xf); + } + + return size; +} + static inline char *str_join(const char *a, const char *b) { size_t a_len = strlen(a); diff --git a/libcomposefs/lcfs-writer.c b/libcomposefs/lcfs-writer.c index 9dcab379..2abdb03c 100644 --- a/libcomposefs/lcfs-writer.c +++ b/libcomposefs/lcfs-writer.c @@ -243,7 +243,8 @@ int lcfs_compute_tree(struct lcfs_ctx_s *ctx, struct lcfs_node_s *root) for (node = root; node != NULL; node = node->next) { for (size_t i = 0; i < node->children_size; i++) { struct lcfs_node_s *child = node->children[i]; - if (child->link_to != NULL && !child->link_to->in_tree) { + struct lcfs_node_s *link_to = follow_links(child); + if (child->link_to != NULL && !link_to->in_tree) { /* Link to inode outside tree */ errno = EINVAL; return -1; @@ -720,10 +721,13 @@ struct lcfs_node_s *lcfs_load_node_from_fd(int fd) int lcfs_node_set_payload(struct lcfs_node_s *node, const char *payload) { - char *dup = strdup(payload); - if (dup == NULL) { - errno = ENOMEM; - return -1; + char *dup = NULL; + if (payload) { + dup = strdup(payload); + if (dup == NULL) { + errno = ENOMEM; + return -1; + } } free(node->payload); node->payload = dup; diff --git a/man/.gitignore b/man/.gitignore index f7e585b8..8c7bbbe7 100644 --- a/man/.gitignore +++ b/man/.gitignore @@ -1 +1,2 @@ *.1 +*.5 diff --git a/man/composefs-dump.md b/man/composefs-dump.md new file mode 100644 index 00000000..416a0d79 --- /dev/null +++ b/man/composefs-dump.md @@ -0,0 +1,121 @@ +% composefs-dump(5) composefs | User Commands + +# NAME + +composefs-dump - textual file format for composefs content + +# DESCRIPTION + +Both the *composefs-info* and the *mkcompose* commands support +generation/consumptions of a textual descriptions of the contents of a +composefs image. This can be used to inspect or modify an image, or to +generate an image without having to have a local directory with the +files in it. + +The file format is very simple, with one file per line, first with a +11 fixed fields, followed by a variable number of extended attributes +for the file. + +Fields are separated by a single space, and lines by a single +newline. Extended attributes further use '=' to separate key from +value. Therefore all these characters, as well as non-printable +characters are escaped in the fields ('=' only in xattr fields). +Also, back-slashes have to be escaped as they are used as the +escape mechanism. + +Escapes are of the form \xXY which escapes a single byte using two hex +digits. For example \x00 is the zero byte and \xff is the 255 byte. +Optionally, these custom escapes are suppored: + + **\\\\** + : backslash. + + **\\n** + : newline. + + **\\r** + : carriage return. + + **\\t** + : tab + + **\\=** + : equal + +Optional fields tha are not set contain '-', and if a field actually +has that particular value it is escaped. + +The fixed fields on a line are (all numbers in base 10 unless +otherwise specified): + +**PATH** +: The full, absolute path of the file in the image. Any directories + used as prefix in the path must have been in the file before this + line. + +**SIZE** +: The size of the file. This is ignored for directories. + +**MODE** +: The st_mode stat field the file in octal, which includes both the + permissions and the file type. + + Additionally, if the file is a hardlink, then this field will + start with a single '@' character, and the payload field points + to the target file. Note that all other fields are typically + filled out for a hardlink as the target, but for generation + of a new file we ignore all the fields except the payload. + +**NLINK** +: The st_nlink stat field. + +**UID** +: The owner uid. + +**GID** +: The owner gid. + +**RDEV** +: The st_rdev stat field. + +**MTIME** +: The modification time in seconds and nanoseconds since the unix + epoch, separated by '.'. Note this is not a float, "1.1" means + one second and one nanosecond. + +**PAYLOAD** +: The payload of the file. For symbolic links this means the symlink + targets. For regular files this is the relative pathname for the + backing files. For hardlinks (see **MODE**), this is the path of + another file in this file that this is a hardlink of. + +**CONTENT** +: Small files can inline the actual content in the composefs + image. This contains an escaped version of the content. + This must match the size specified in **SIZE** + +**DIGEST** +: A fs-verity digest for the file (only used for regular files, and + not if *CONTENT* is set) that will be validated against backing + files when used. + +After the fixed fields comes the xattrs, escaped and space-separated in the form +**KEY**=**VALUE**. Note that '=' must be escaped in **KEY**. + + +# EXAMPLE + +``` +/ 4096 40755 4 1000 1000 0 1695372970.944925700 - - - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a\x20dir\x20w\x20space 27 40755 2 1000 1000 0 1694598852.869646118 - - - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a-dir 45 40755 2 1000 1000 0 1674041780.601887980 - - - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a-dir/a-file 259 100644 1 1000 1000 0 1695368732.385062094 35/d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/a-hardlink 259 @100644 1 1000 1000 0 1695368732.385062094 /a-dir/a-file - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +/inline.txt 10 100644 1 1000 1000 0 1697019909.446146440 - some-text\n - security.selinux=unconfined_u:object_r:unlabeled_t:s0\x00 +``` + +# SEE ALSO + +**composefs-info(1)**, **mkcomposefs(1)** + +[composefs upstream](https://github.com/containers/composefs) diff --git a/man/composefs-info.md b/man/composefs-info.md new file mode 100644 index 00000000..7b62f0f7 --- /dev/null +++ b/man/composefs-info.md @@ -0,0 +1,49 @@ +% composefs-info(1) composefs | User Commands + +# NAME + +composefs-info - print information about a composefs image + +# SYNOPSIS +**composefs-info** [ls|objects|missing-objects|dump] *IMAGE* [*IMAGE2* *IMAGE3* ...] + +# DESCRIPTION + +The composefs-info command lets you inspect a composefs image. It has +several sub-commands: + +**ls** +: Prints a simple list of the files and directorie in the images as + well as their backing file or symlink target. + +**objects** +: Prints a list of all the backing files referenced by the images, + in sorted order. + +**missing-objects** +: Prints a list of all the missing backing files referenced by the + images, in sorted order, given a backing file store passed in + using the --basedir option. + +**dump** +: Prints a full dump of the images in a line based textual format. + See **composefs-dump(5)** for more details. This format is also + accepted as input to mkcomposefs if the --from-file + option is used. + +# OPTIONS + +The provided *IMAGE* argument must be a composefs file. Multiple images +can be specified. + +**compoosefs-info** accepts the following options: + + +**\-\-basedir**=*PATH* +: This should point to a directory of backing files, and will be used + by the **missing-objects** command to know what files are available. + +# SEE ALSO +**composefs-info(1)**, **composefs-dump(5)** + +[composefs upstream](https://github.com/containers/composefs) diff --git a/man/mkcomposefs.md b/man/mkcomposefs.md index 51fd6c7a..fd307a65 100644 --- a/man/mkcomposefs.md +++ b/man/mkcomposefs.md @@ -5,7 +5,7 @@ mkcomposefs - create a composefs filesystem image # SYNOPSIS -**mkcomposefs** *SOURCEDIR* *IMAGE* +**mkcomposefs** *SOURCE* *IMAGE* # DESCRIPTION @@ -14,8 +14,9 @@ or more separate directories containing content-addressed backing data for regular files. **mkcomposefs** constructs the mountable "composefs image" using the -source directory as input. It can also create the backing store -directory. +source as input. It can also create the backing store directory. +Typically the source is a directory, but with *--from-file* it can +also be a file. # OPTIONS @@ -55,7 +56,12 @@ will be a mountable composefs image. **\-\-user-xattrs** : Only add xattrs with the "user." prefix to files in the image. +**\-\-from-file** +: The source is a file in the **composefs-dump(5)** format. If + the specified file is "-", the data is read from stdin. + # SEE ALSO +**composefs-info(1)**, **mount.composefs(1)**, **composefs-dump(5)** -- [composefs upstream](https://github.com/containers/composefs) +[composefs upstream](https://github.com/containers/composefs) diff --git a/man/mount.composefs.md b/man/mount.composefs.md index c5c83ca7..3679c9ac 100644 --- a/man/mount.composefs.md +++ b/man/mount.composefs.md @@ -62,5 +62,6 @@ options when passed via the `-o OPTIONS` argument. : Specifies an overlayfs workdir to go with **upperdir**. # SEE ALSO +**composefs-info(1)**, **mount.composefs(1)** -- [composefs upstream](https://github.com/containers/composefs) +[composefs upstream](https://github.com/containers/composefs) diff --git a/tests/test-checksums.sh b/tests/test-checksums.sh index df8520df..390cbb5a 100755 --- a/tests/test-checksums.sh +++ b/tests/test-checksums.sh @@ -45,5 +45,11 @@ for format in erofs ; do echo Dump is not reproducible exit 1 fi + + ${VALGRIND_PREFIX} ${BINDIR}/composefs-info dump $tmpfile | ${VALGRIND_PREFIX} ${BINDIR}/mkcomposefs --from-file - $tmpfile2 + if ! cmp $tmpfile $tmpfile2; then + echo Dump is not reproducible via composefs-info dump + exit 1 + fi done done diff --git a/tests/test-random-fuse.sh b/tests/test-random-fuse.sh index e3fce354..12c80ecb 100755 --- a/tests/test-random-fuse.sh +++ b/tests/test-random-fuse.sh @@ -42,6 +42,15 @@ test_random() { exit 1 fi + # dump + mkcomposefs should produce the identical results + echo Dumping composefs image + ${VALGRIND_PREFIX} ${BINDIR}/composefs-info dump $workdir/root.cfs | ${VALGRIND_PREFIX} ${BINDIR}/mkcomposefs --from-file - $workdir/dump.cfs + if ! cmp $workdir/root.cfs $workdir/dump.cfs; then + echo Dump + mkcomposefs is not reproducible + diff -u <(${BINDIR}/composefs-info dump $workdir/root.cfs) <(${BINDIR}/composefs-info dump $workdir/dump.cfs) + exit 1 + fi + if [ $has_fuse == 'n' ]; then return; fi diff --git a/tools/composefs-info.c b/tools/composefs-info.c index 512bbc9b..a8f9c97a 100644 --- a/tools/composefs-info.c +++ b/tools/composefs-info.c @@ -101,6 +101,15 @@ static void print_escaped(const char *val, ssize_t len, int escape) } } +static void print_escaped_optional(const char *val, ssize_t len, int escape) +{ + if (val == NULL) { + printf("-"); + } else { + print_escaped(val, len, escape); + } +} + static void print_node(struct lcfs_node_s *node, char *parent_path) { for (size_t i = 0; i < lcfs_node_get_n_children(node); i++) { @@ -151,29 +160,67 @@ static void digest_to_string(const uint8_t *csum, char *buf) buf[j] = '\0'; } -static void dump_node(struct lcfs_node_s *node, char *path) +static char *node_build_path(struct lcfs_node_s *node) { - struct lcfs_node_s *target; - struct timespec mtime; - const char *payload; - const uint8_t *digest; + size_t pathlen = 0; + for (struct lcfs_node_s *n = node; n != NULL; n = lcfs_node_get_parent(n)) { + const char *name = lcfs_node_get_name(n); + + /* separator after all but final element */ + if (n != node) + pathlen += 1; + + /* Root has no name */ + if (name) + pathlen += strlen(name); + } + + char *path = malloc(pathlen + 1); + char *p = path + pathlen; + *p = 0; - target = lcfs_node_get_hardlink_target(node); + for (struct lcfs_node_s *n = node; n != NULL; n = lcfs_node_get_parent(n)) { + const char *name = lcfs_node_get_name(n); + if (n != node) { + p--; + *p = '/'; + } + if (name) { + size_t len = strlen(name); + p -= len; + memcpy(p, name, len); + } + } + + return path; +} + +static void dump_node(struct lcfs_node_s *node, char *path) +{ + struct lcfs_node_s *target = lcfs_node_get_hardlink_target(node); + cleanup_free char *hardlink_path = NULL; if (target == NULL) target = node; + else + hardlink_path = node_build_path(target); + struct timespec mtime; lcfs_node_get_mtime(target, &mtime); - payload = lcfs_node_get_payload(target); - digest = lcfs_node_get_fsverity_digest(target); + const char *payload = lcfs_node_get_payload(target); + const uint8_t *digest = lcfs_node_get_fsverity_digest(target); + const uint8_t *content = lcfs_node_get_content(target); + uint64_t size = lcfs_node_get_size(target); print_escaped(*path == 0 ? "/" : path, -1, ESCAPE_STANDARD); - printf(" %" PRIu64 " %s%o %u %u %u %u %" PRIi64 ".%u ", - lcfs_node_get_size(target), target == node ? "" : "@", - lcfs_node_get_mode(target), lcfs_node_get_nlink(target), - lcfs_node_get_uid(target), lcfs_node_get_gid(target), - lcfs_node_get_rdev(target), (int64_t)mtime.tv_sec, - (unsigned int)mtime.tv_nsec); - print_escaped(payload ? payload : "-", -1, ESCAPE_LONE_DASH); + printf(" %" PRIu64 " %s%o %u %u %u %u %" PRIi64 ".%u ", size, + hardlink_path != NULL ? "@" : "", lcfs_node_get_mode(target), + lcfs_node_get_nlink(target), lcfs_node_get_uid(target), + lcfs_node_get_gid(target), lcfs_node_get_rdev(target), + (int64_t)mtime.tv_sec, (unsigned int)mtime.tv_nsec); + print_escaped_optional(hardlink_path ? hardlink_path : payload, -1, + ESCAPE_LONE_DASH); + printf(" "); + print_escaped_optional((char *)content, size, ESCAPE_LONE_DASH); if (digest) { char digest_str[LCFS_DIGEST_SIZE * 2 + 1] = { 0 }; diff --git a/tools/mkcomposefs.c b/tools/mkcomposefs.c index 0e12f593..bb262186 100644 --- a/tools/mkcomposefs.c +++ b/tools/mkcomposefs.c @@ -21,6 +21,7 @@ #include "libcomposefs/lcfs-writer.h" #include "libcomposefs/lcfs-utils.h" +#include "libcomposefs/lcfs-internal.h" #include #include @@ -36,6 +37,11 @@ #include #include +static void oom(void) +{ + errx(EXIT_FAILURE, "Out of memory"); +} + static void digest_to_string(const uint8_t *csum, char *buf) { static const char hexchars[] = "0123456789abcdef"; @@ -310,14 +316,15 @@ static void usage(const char *argv0) { const char *bin = basename(argv0); fprintf(stderr, - "Usage: %s [OPTIONS] SOURCEDIR IMAGE\n" + "Usage: %s [OPTIONS] SOURCE IMAGE\n" "Options:\n" " --digest-store=PATH Store content files in this directory\n" " --use-epoch Make all mtimes zero\n" " --skip-xattrs Don't store file xattrs\n" " --user-xattrs Only store user.* xattrs\n" " --print-digest Print the digest of the image\n" - " --print-digest-only Print the digest of the image, don't write image\n", + " --print-digest-only Print the digest of the image, don't write image\n" + " --from-file The source is a dump file, not a directory\n", bin); } @@ -328,6 +335,7 @@ static void usage(const char *argv0) #define OPT_PRINT_DIGEST 109 #define OPT_PRINT_DIGEST_ONLY 111 #define OPT_USER_XATTRS 112 +#define OPT_FROM_FILE 113 static ssize_t write_cb(void *_file, void *buf, size_t count) { @@ -336,6 +344,488 @@ static ssize_t write_cb(void *_file, void *buf, size_t count) return fwrite(buf, 1, count, file); } +static size_t split_at(const char **start, size_t *length, char split_char, + bool *partial) +{ + char *end = memchr(*start, split_char, *length); + if (end == NULL) { + size_t part_len = *length; + *start = *start + *length; + ; + *length = 0; + if (partial) + *partial = true; + return part_len; + } + + size_t part_len = end - *start; + *start += part_len + 1; + *length -= part_len + 1; + if (partial) + *partial = false; + + return part_len; +} + +enum { + FIELD_PATH, + FIELD_SIZE, + FIELD_MODE, + FIELD_NLINK, + FIELD_UID, + FIELD_GID, + FIELD_RDEV, + FIELD_MTIME, + FIELD_PAYLOAD, + FIELD_CONTENT, + FIELD_DIGEST, + + FIELD_XATTRS_START, +}; + +const char *names[] = { + "PATH", "SIZE", "MODE", "NLINK", "UID", "GID", + "RDEV", "MTIME", "PAYLOAD", "CONTENT", "DIGEST", + + "XATTRS_START", +}; + +static char *unescape_string(const char *escaped, size_t escaped_size, + size_t *unescaped_size) +{ + const char *escaped_end = escaped + escaped_size; + char *res = malloc(escaped_size + 1); + if (res == NULL) + oom(); + + char *out = res; + + while (escaped < escaped_end) { + char c = *escaped++; + if (c == '\\') { + if (escaped >= escaped_end) + errx(EXIT_FAILURE, "No character after escape"); + c = *escaped++; + switch (c) { + case '\\': + *out++ = '\\'; + break; + case 'n': + *out++ = '\n'; + break; + case 'r': + *out++ = '\r'; + break; + case 't': + *out++ = '\t'; + break; + case 'x': + if (escaped >= escaped_end) + errx(EXIT_FAILURE, + "No hex characters after hex escape"); + int x1 = hexdigit(*escaped++); + if (escaped >= escaped_end) + errx(EXIT_FAILURE, + "No hex characters after hex escape"); + int x2 = hexdigit(*escaped++); + if (x1 < 0 || x2 < 0) + errx(EXIT_FAILURE, + "Invalid hex characters after hex escape"); + + *out++ = x1 << 4 | x2; + break; + default: + errx(EXIT_FAILURE, "Unsupported escape type %c", c); + } + } else { + *out++ = c; + } + } + + if (unescaped_size) + *unescaped_size = out - res; + + *out = 0; /* Null terminate */ + + return res; +} + +static char *unescape_optional_string(const char *escaped, size_t escaped_size, + size_t *unescaped_size) +{ + /* Optional */ + if (escaped_size == 1 && escaped[0] == '-') + return NULL; + + return unescape_string(escaped, escaped_size, unescaped_size); +} + +static struct lcfs_node_s *lookup_parent_path(struct lcfs_node_s *node, + const char *path, const char **name_out) +{ + while (*path == '/') + path++; + + const char *start = path; + while (*path != 0 && *path != '/') + path++; + + if (*path == 0) { + *name_out = start; + return node; + } + + cleanup_free char *name = strndup(start, path - start); + + struct lcfs_node_s *child = lcfs_node_lookup_child(node, name); + if (child == NULL) + return NULL; + + return lookup_parent_path(child, path, name_out); +} + +static struct lcfs_node_s *lookup_path(struct lcfs_node_s *node, const char *path) +{ + while (*path == '/') + path++; + + if (*path == 0) + return node; + + const char *start = path; + while (*path != 0 && *path != '/') + path++; + + cleanup_free char *name = strndup(start, path - start); + + struct lcfs_node_s *child = lcfs_node_lookup_child(node, name); + if (child == NULL) + return NULL; + + return lookup_path(child, path); +} + +static uint64_t parse_int_field(const char *str, size_t length, int base) +{ + cleanup_free char *s = strndup(str, length); + if (s == NULL) + oom(); + + char *endptr = NULL; + unsigned long long v = strtoull(s, &endptr, base); + if (*s == 0 || *endptr != 0) + errx(EXIT_FAILURE, "Invalid integer %s\n", s); + + return (uint64_t)v; +} + +static void parse_mtime(const char *str, size_t length, struct timespec *mtime) +{ + const char *mtime_sec_s = str; + size_t mtime_sec_len = split_at(&str, &length, '.', NULL); + uint64_t mtime_sec = parse_int_field(mtime_sec_s, mtime_sec_len, 10); + uint64_t mtime_nsec = parse_int_field(str, length, 10); + mtime->tv_sec = mtime_sec; + mtime->tv_nsec = mtime_nsec; +} + +static void parse_xattr(const char *data, size_t data_len, struct lcfs_node_s *node) +{ + const char *xattr_name = data; + size_t xattr_name_len = split_at(&data, &data_len, '=', NULL); + + cleanup_free char *key = unescape_string(xattr_name, xattr_name_len, NULL); + size_t value_len; + cleanup_free char *value = unescape_string(data, data_len, &value_len); + + if (lcfs_node_set_xattr(node, key, value, value_len) != 0) + errx(EXIT_FAILURE, "Can't set xattr"); +} + +typedef struct hardlink_fixup hardlink_fixup; +struct hardlink_fixup { + struct lcfs_node_s *node; + char *target_path; + hardlink_fixup *next; +}; + +typedef struct dump_info dump_info; +struct dump_info { + struct lcfs_node_s *root; + hardlink_fixup *hardlink_fixups; +}; + +typedef struct field_info field_info; +struct field_info { + const char *data; + size_t len; +}; + +static void tree_add_node(dump_info *info, const char *path, struct lcfs_node_s *node) +{ + if (strcmp(path, "/") == 0) { + if (!lcfs_node_dirp(node)) + errx(EXIT_FAILURE, "Root must be a directory"); + + if (info->root == NULL) + info->root = lcfs_node_ref(node); + else + errx(EXIT_FAILURE, "Can't have multiple roots"); + } else { + const char *name; + struct lcfs_node_s *parent = + lookup_parent_path(info->root, path, &name); + + if (parent == NULL) + errx(EXIT_FAILURE, "Parent directory missing for %s", path); + + if (!lcfs_node_dirp(parent)) + errx(EXIT_FAILURE, "Parent must be a directory for %s", path); + + int r = lcfs_node_add_child(parent, node, name); + if (r < 0) { + if (r == -EEXIST) + err(EXIT_FAILURE, "Path %s already exist", path); + err(EXIT_FAILURE, "Can't add child"); + } + /* add_child took ownership, ref again */ + lcfs_node_ref(node); + } +} + +static void tree_add_hardlink_fixup(dump_info *info, char *target_path, + struct lcfs_node_s *node) +{ + hardlink_fixup *fixup = calloc(1, sizeof(hardlink_fixup)); + if (fixup == NULL) + oom(); + + fixup->node = node; + fixup->target_path = target_path; /* Takes ownership */ + + fixup->next = info->hardlink_fixups; + info->hardlink_fixups = fixup; +} + +static void tree_resolve_hardlinks(dump_info *info) +{ + hardlink_fixup *fixup = info->hardlink_fixups; + while (fixup != NULL) { + hardlink_fixup *next = fixup->next; + struct lcfs_node_s *target = + lookup_path(info->root, fixup->target_path); + if (target == NULL) + errx(EXIT_FAILURE, "No target at %s for hardlink", + fixup->target_path); + + /* Don't override existing value from image for target nlink */ + uint32_t old_nlink = lcfs_node_get_nlink(target); + + lcfs_node_make_hardlink(fixup->node, target); + + lcfs_node_set_nlink(target, old_nlink); + + free(fixup->target_path); + free(fixup); + + fixup = next; + } +} + +static void tree_from_dump_line(dump_info *info, const char *line, size_t line_len) +{ + /* Split out all fixed fields */ + field_info fields[FIELD_XATTRS_START]; + for (int i = 0; i < FIELD_XATTRS_START; i++) { + fields[i].data = line; + fields[i].len = split_at(&line, &line_len, ' ', NULL); + } + + cleanup_free char *path = unescape_string(fields[FIELD_PATH].data, + fields[FIELD_PATH].len, NULL); + + bool is_hardlink = false; + /* First char in mode is @ if hardlink */ + if (fields[FIELD_MODE].len > 0 && fields[FIELD_MODE].data[0] == '@') { + is_hardlink = true; + fields[FIELD_MODE].len -= 1; + fields[FIELD_MODE].data += 1; + } + uint64_t mode = parse_int_field(fields[FIELD_MODE].data, + fields[FIELD_MODE].len, 8); + + cleanup_node struct lcfs_node_s *node = lcfs_node_new(); + lcfs_node_set_mode(node, mode); + + tree_add_node(info, path, node); + + /* For hardlinks, bail out early and handle in a fixup at the + * end when we can resolve the target path. */ + if (is_hardlink) { + if (lcfs_node_dirp(node)) + errx(EXIT_FAILURE, "Directories can't be hardlinked"); + cleanup_free char *target_path = + unescape_optional_string(fields[FIELD_PAYLOAD].data, + fields[FIELD_PAYLOAD].len, NULL); + tree_add_hardlink_fixup(info, steal_pointer(&target_path), node); + return; + } + + /* Handle regular files/dir data from fixed fields */ + uint64_t size = parse_int_field(fields[FIELD_SIZE].data, + fields[FIELD_SIZE].len, 10); + uint64_t nlink = parse_int_field(fields[FIELD_NLINK].data, + fields[FIELD_NLINK].len, 10); + uint64_t uid = + parse_int_field(fields[FIELD_UID].data, fields[FIELD_UID].len, 10); + uint64_t gid = + parse_int_field(fields[FIELD_GID].data, fields[FIELD_GID].len, 10); + uint64_t rdev = parse_int_field(fields[FIELD_RDEV].data, + fields[FIELD_RDEV].len, 10); + + struct timespec mtime; + parse_mtime(fields[FIELD_MTIME].data, fields[FIELD_MTIME].len, &mtime); + + cleanup_free char *payload = unescape_optional_string( + fields[FIELD_PAYLOAD].data, fields[FIELD_PAYLOAD].len, NULL); + size_t content_len; + cleanup_free char *content = + unescape_optional_string(fields[FIELD_CONTENT].data, + fields[FIELD_CONTENT].len, &content_len); + if (content && content_len != size) + errx(EXIT_FAILURE, "Invalid content size %lld, must match size %lld", + (long long)content_len, (long long)size); + + cleanup_free char *digest = unescape_optional_string( + fields[FIELD_DIGEST].data, fields[FIELD_DIGEST].len, NULL); + + lcfs_node_set_mode(node, mode); + lcfs_node_set_size(node, size); + lcfs_node_set_nlink(node, nlink); + lcfs_node_set_uid(node, uid); + lcfs_node_set_gid(node, gid); + lcfs_node_set_rdev(node, rdev); + lcfs_node_set_mtime(node, &mtime); + lcfs_node_set_payload(node, payload); + if (content) + lcfs_node_set_content(node, (uint8_t *)content, size); + + if (digest) { + uint8_t raw[LCFS_DIGEST_SIZE]; + digest_to_raw(digest, raw, LCFS_DIGEST_SIZE); + lcfs_node_set_fsverity_digest(node, raw); + } + + /* Handle trailing xattrs */ + while (line_len > 0) { + const char *xattr = line; + size_t xattr_len = split_at(&line, &line_len, ' ', NULL); + + parse_xattr(xattr, xattr_len, node); + } +} + +struct buffer { + char *buf; + size_t size; + size_t capacity; +}; + +static void buffer_ensure_space(struct buffer *buf, size_t free_size_needed) +{ + size_t min_capacity = buf->size + free_size_needed; + if (buf->capacity >= min_capacity) + return; + + /* No space, grow */ + if (buf->capacity == 0) + buf->capacity = 64 * 1024; + else + buf->capacity = buf->capacity * 2; + + if (buf->capacity < min_capacity) + buf->capacity = min_capacity; + + buf->buf = realloc(buf->buf, buf->capacity); + if (buf->buf == NULL) + oom(); +} + +/* Fills buffer and returns the amount read. 0 on file end */ +static size_t buffer_fill(struct buffer *buf, FILE *input) +{ + /* Grow buffer if needed */ + buffer_ensure_space(buf, 1); + + size_t bytes_read = + fread(buf->buf + buf->size, 1, buf->capacity - buf->size, input); + if (bytes_read == 0 && ferror(input)) + errx(EXIT_FAILURE, "Error reading from file"); + buf->size += bytes_read; + + return bytes_read; +} + +static void buffer_reset(struct buffer *buf) +{ + /* NOTE: Leaves buffer data as is, just modified size */ + buf->size = 0; +} + +static void buffer_add(struct buffer *buf, const char *src, size_t len) +{ + buffer_ensure_space(buf, len); + + /* memmove, as src may be in the buf */ + memmove(buf->buf + buf->size, src, len); + buf->size += len; +} + +static void buffer_free(struct buffer *buf) +{ + free(buf->buf); +} + +static struct lcfs_node_s *tree_from_dump(FILE *input) +{ + dump_info info = { NULL }; + + struct buffer buf = { NULL }; + + while (!feof(input)) { + size_t bytes_read = buffer_fill(&buf, input); + bool short_read = bytes_read == 0; + + const char *data = buf.buf; + size_t remaining_data = buf.size; + buffer_reset(&buf); + + while (remaining_data > 0) { + const char *line = data; + bool partial; + size_t line_len = + split_at(&data, &remaining_data, '\n', &partial); + + if (!partial || short_read) { + tree_from_dump_line(&info, line, line_len); + } else { + /* Last line didn't have a newline and + * this wasn't a short read, so keep + * this for next read. + */ + buffer_add(&buf, line, line_len); + } + } + } + + buffer_free(&buf); + + /* Fixup hardlinks now that we have all other files */ + tree_resolve_hardlinks(&info); + + return info.root; +} + int main(int argc, char **argv) { const struct option longopts[] = { @@ -381,6 +871,12 @@ int main(int argc, char **argv) flag: NULL, val: OPT_PRINT_DIGEST_ONLY }, + { + name: "from-file", + has_arg: no_argument, + flag: NULL, + val: OPT_FROM_FILE + }, {}, }; struct lcfs_write_options_s options = { 0 }; @@ -388,9 +884,10 @@ int main(int argc, char **argv) int buildflags = 0; bool print_digest = false; bool print_digest_only = false; + bool from_file = false; struct lcfs_node_s *root; const char *out = NULL; - const char *dir_path = NULL; + const char *src_path = NULL; const char *digest_store_path = NULL; cleanup_free char *pathbuf = NULL; uint8_t digest[LCFS_DIGEST_SIZE]; @@ -424,6 +921,9 @@ int main(int argc, char **argv) case OPT_PRINT_DIGEST_ONLY: print_digest = print_digest_only = true; break; + case OPT_FROM_FILE: + from_file = true; + break; case ':': fprintf(stderr, "option needs a value\n"); exit(EXIT_FAILURE); @@ -441,9 +941,9 @@ int main(int argc, char **argv) usage(bin); exit(1); } - dir_path = argv[0]; + src_path = argv[0]; - if (dir_path[0] == '\0') + if (src_path[0] == '\0') errx(EXIT_FAILURE, "Empty source path specified"); if (argc > 2) { @@ -481,12 +981,33 @@ int main(int argc, char **argv) err(EXIT_FAILURE, "failed to open output file"); } - root = lcfs_build(AT_FDCWD, dir_path, buildflags, &failed_path); - if (root == NULL) - err(EXIT_FAILURE, "error accessing %s", failed_path); + if (from_file) { + FILE *input = NULL; + bool close_input = false; + if (strcmp(src_path, "-") == 0) { + input = stdin; + } else { + input = fopen(src_path, "r"); + if (input == NULL) + err(EXIT_FAILURE, "open `%s`", src_path); + close_input = true; + } + + root = tree_from_dump(input); + if (root == NULL) + errx(EXIT_FAILURE, "No files in dump file"); + + if (close_input) + fclose(input); + } else { + root = lcfs_build(AT_FDCWD, src_path, buildflags, &failed_path); + if (root == NULL) + err(EXIT_FAILURE, "error accessing %s", failed_path); - if (digest_store_path && fill_store(root, dir_path, digest_store_path) < 0) - err(EXIT_FAILURE, "cannot fill store"); + if (digest_store_path && + fill_store(root, src_path, digest_store_path) < 0) + err(EXIT_FAILURE, "cannot fill store"); + } if (out_file) { options.file = out_file;