diff --git a/Cargo.toml b/Cargo.toml
index 38ff692..f6f7dc6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,8 +16,10 @@ anyhow = { version = "1.0.89", default-features = false }
 async-compression = { version = "0.4.17", default-features = false, features = ["tokio", "gzip"] }
 clap = { version = "4.5.19", default-features = false, features = ["std", "help", "usage", "derive"] }
 containers-image-proxy = "0.7.0"
+env_logger = "0.11.5"
 hex = "0.4.3"
 indicatif = { version = "0.17.8", features = ["tokio"] }
+log = "0.4.22"
 oci-spec = "0.7.0"
 regex-automata = { version = "0.4.8", default-features = false }
 rustix = { version = "0.38.37", features = ["fs", "mount", "process"] }
@@ -26,7 +28,8 @@ tar = { version = "0.4.42", default-features = false }
 tempfile = "3.13.0"
 thiserror = "2.0.4"
 tokio = "1.41.0"
-zerocopy = "0.8.13"
+xxhash-rust = { version = "0.8.12", features = ["xxh32"] }
+zerocopy = { version = "0.8.13", features = ["derive"] }
 zstd = "0.13.2"
 
 [dev-dependencies]
diff --git a/doc/erofs.md b/doc/erofs.md
new file mode 100644
index 0000000..07b5755
--- /dev/null
+++ b/doc/erofs.md
@@ -0,0 +1,431 @@
+# erofs: the missing manual
+
+## Introduction
+
+This is an attempt to document the format of erofs (or at least the subsets of
+it that we use in composefs).
+
+It probably makes sense to have `erofs_fs.h` open when reading this.
+
+## Overall concepts
+
+All integers (including all offsets) are stored in little-endian byte order.
+
+The file layout is fairly free-form.  You can freely mix inodes, data blocks,
+and shared xattr entries.  inodes are 64-bit values based on file offsets
+rather than integer indexes into a fixed table, so they can be anywhere at all.
+xattrs are 32-bit values based on offsets, so they're a bit more limited (but
+not in filesystems of reasonable size).
+
+## The first 1024 bytes (pre-superblock)
+
+The first 1024 bytes of an erofs have no particular meaning.  You can put
+anything you want there, like partition tables or boot sectors or anything
+else.  composefs puts its own header inside of this area, at the start.
+
+## The superblock (at 1024 bytes, 128 bytes long)
+
+The superblock is defined by `struct erofs_super_block`.
+
+Here's some notes about some of the fields.  Anything not mentioned is left as
+0 by us. There's some pretty wild features in here, but we don't use them all
+(and I don't understand them, either) so they're not all documented.
+
+*   `magic`: set that to `EROFS_SUPER_MAGIC_V1` (`0xE0F5E1E2`)
+*   `checksum`: only meaningful of the `SB_CHKSUM` feature is enabled.  This is
+    a crc32c over a block-sized-chunk of data starting from the superblock,
+    with this field set to 0.  That's pretty weird.  Maybe don't use this.
+*   `feature_compat`: a flags field.  The filesystem will still mount even if
+    the kernel doesn't know about any features which might be present.  The
+    flags:
+    -   `SB_CHKSUM` (`0x0001`): set if the checksum field in the superblock is
+        populated.  Otherwise, the checksum is ignored.
+    -   `MTIME` (`0x0002`): at first, erofs named the timestamp fields `ctime`
+        instead of `mtime`.  That got changed a long time ago, and this flag
+        got added to indicate filesystems that were created with the new
+        semantics. This flag has absolutely zero impact at run time: the kernel
+        ignores it.
+    -   `XATTR_FILTER` (`0x0004`): set if the xattr bloom filter should be
+        used.  Read about this in the inode section.
+*   `blkszbits`: log2 of the block size.  Better set this to 12 (4096).
+*   `root_nid`: the reference to the root inode.  See the inodes section for
+    what that means.  Normally inodes are stored in u64, but this is somewhat
+    randomly a u16, which means that you're gonna need to put the root
+    directory near the start.
+*   `inos`: the total number of inodes defined.  This is only used for
+    `statfs()` purposes.
+*   `build_time`, `build_time_nsec`: this is something like a compression
+    feature if you want all (or many) files in your filesystem to have the same
+    mtime.  Then you can use the "compact" inode layout, which doesn't have its
+    own `mtime` field, and this one will be used instead.  If you don't have
+    compact inodes then this is meaningless.
+*   `blocks`: total filesystem block size.  This is only used for `statfs()`.
+*   `meta_blkaddr`: the start of the "metadata area".  This is where the inodes
+    are.  This is a block address, so it gets multiplied by the block size to
+    determine the actual offset.
+*   `xattr_blkaddr`: the start of the "shared xattr area".  See the "Shared
+    xattr" and "Inodes" sections for more info.
+
+## Extended attributes
+
+There are two options for storing xattr data in a erofs:
+*   inline with the inode itself
+*   in a "shared xattr" struct somewhere
+
+The format of both of these is the same.
+
+The inline thing is nice and simple, but it might be space-inefficient for
+cases where the same (key, value) pair appears over and over again (which might
+be the case for things like security labels and acls and the like).
+
+### Prefix indexes
+
+A rudimentary form of compression is supported on xattr names.  There are a
+number of hardcoded "common prefixes" defined with the `EROFS_XATTR_INDEX_`
+constants in `erofs_fs.h`.  Confusingly, although `LUSTRE` is present, it's not
+wired up in the kernel.  Don't use that one.
+
+The basic idea is that you find the prefix for your xattr from the list (like
+`user.` or `security.`) and then you store only the "suffix" part, along with
+the prefix index.  If you can't find a prefix, you use 0 (which is conceptually
+a prefix of "").  If the prefix matches the entire name then the suffix is `""`.
+
+Note: you really need to do this "compression" step, because it's assumed
+during the lookup phase.  ie: if we're looking for an xattr `"user.xyz"` then
+we'll only consider the entries that have the prefix index for `user.` set on
+them.  If you didn't properly "compress" your xattr names, they won't be found.
+
+There's support in the erofs format for custom prefixes.  That's when the high
+bit of the prefix index is set.  These got added circa kernel version 6.4 with
+a patch series ending with `6a318ccd7e08` ("erofs: enable long extended
+attribute name prefixes") but aren't documented here because we don't use them.
+
+### On-disk format
+
+All extended attributes (both shared and inode-inline) are stored in a
+simple format with a small header.  That's `struct erofs_xattr_entry`.  It's just 4 bytes:
+*   u8: the suffix length (in bytes, no nul)
+*   u8: the prefix index (see above)
+*   u16: the value length (in bytes, no nul)
+
+The header must start at an offset with an alignment of 4.
+
+Immediately following the header is the suffix (name with prefix removed),
+immediately followed by the value.  There's no nul after the name (which is OK,
+since we know the length from the header).
+
+### Shared xattrs
+
+This is basically just an xattr stored somewhere in the filesystem image, using
+the format mentioned above.  It is referred to by a 32-bit identifier:
+*   start at the `xattr_blkaddr` mentioned in the super block.  That's a block
+    address, so remember to multiply that by the block size.
+*   add 4 times the shared xattr identifier (since the header must be 4-aligned)
+*   that's the xattr header (mentioned above)
+
+If your filesystem image is going to be smaller than 16GB then you can probably
+just leave the `xattr_blkaddr` set to 0 to make your life easier.
+
+### Inode-inline xattrs
+
+We talk about those in the Inode section.  Speaking of which, let's talk about...
+
+## Inodes
+
+Here's where things get complicated.
+
+First, the easy part: similar to shared xattrs, inodes are just a structure
+stored somewhere in the filesystem image.  There's no "inode table".  This
+works because the way that you refer to inodes is with an "nid":
+*   start at the `meta_blkaddr` mentioned in the super block.  That's a block
+    address, so remember to multiply that by the block size.
+*   add 32 times the nid (since inodes must be 32-aligned)
+*   that's the inode header
+
+### On-disk formats
+
+The very first thing in the inode is the format field.  This is a mix of two
+things, but the most important thing to talk about first is the low-order bit:
+it's set to 0 if this is a "compact" inode and 1 if it's a "extended" inode.
+
+We don't use compact inodes, so I'm not going to document them, but you can get
+a pretty good idea of what they're capable of by reading the headers.  The rest
+of this section discusses extended inodes.
+
+The extended inode header (`struct erofs_inode_extended`) has a size of 64 and
+needs to be 32-aligned.  It has these interesting fields:
+*   `format`:
+    -   first bit: as mentioned above, for an extended inode the low order bit
+        will always be set
+    -   the rest: the "data layout" (which is complicated enough to get its own
+        section)
+*   `xattr_icount`: this is also complicated enough that we want to talk about
+    it elsewhere.  See the "Extended attributes" section below (not the one
+    above!).  The main thing to know is that this will be 0 if there are none.
+*   `mode`: that's the same like you'd find in `.st_mode` from `stat()`
+*   `size`: ditto, except `.st_size`
+*   `i_u`: you'd better look at the "data layout" section about this one...
+*   `ino`: a compatibility shim for cases where we need to report `st_ino` in
+    32-bits.  For 64-bit userlands, we use the nid directly as the `.st_ino`.
+    You can do what you want with this (as long as it's unique), but for
+    filesystems smaller than 128GB you can probably just use the nid.
+*   `uid`, `gid`: those are fairly obvious, I guess
+*   `mtime`, `mtime_nsec`: those too
+*   `nlink`: try to set this correctly: some things might get upset if it's not
+    right.  For non-directories, that's the number of hardlinks (ie: 1 for
+    non-hardlinked files).  For directories, that's 2 plus the number of
+    subdirectories.
+
+Directly following the inode header is the extended attribute header (if
+`xattr_icount` is non-zero).  Then comes any inline data (as per the "data
+layout" section).
+
+### Extended attributes
+
+If the `xattr_icount` field in the inode header is set to 0 then this section
+is skipped entirely.  Otherwise we write out the inode xattr header (`struct
+erofs_xattr_ibody_header`).  This has:
+*   `name_filter` (`u32`): a bloom filter for which xattrs are present.  This
+    needs its own section.
+*   `shared_count` (`u8`): the number of shared xattrs
+*   some reserved bytes to pad things up to 12
+
+Immediately following the header come the shared xattr references.  They're in
+the format mentioned in the "Shared xattrs" section above, simply encoded as
+little-endian u32s.  So: the first `4 * shared_count` bytes after the header
+are those.
+
+Then the inline xattrs are next.  Those are stored in the format mentioned in
+the "On-disk format" sub-section in the "Extended attributes" section.  They're
+just written here one after another, with padding added so that each header is
+4-aligned.  There is also padding after the last one, which is important if
+inline data is to follow (as per the "data layout" section).
+
+#### About `xattr_icount`
+
+So, if there's no xattrs then this is zero.
+
+Otherwise this is basically the size of the extended attributes area divided by
+4, with the exception that the 12-byte header counts for only 4 bytes.  Put
+another way: you remove the size of the header, divide by 4, then add 1 back
+again.
+
+A value of 1 would be pretty suspicious, since that would indicate the presence
+of a header, but no xattrs (shared or inline), and in that case normally we'd
+omit the header.
+
+The kernel basically uses this to know how many bytes it needs to skip over
+before it can find the inline file data.  It will remove the 1, multiply by 4,
+then add 12 (the header).  See `erofs_xattr_ibody_size()`.
+
+#### About `name_filter`
+
+This is a 32-bit bloom filter used to quickly determine if a given xattr is not present.
+
+The hash algorithm is xxh32.  The thing that gets hashed is not the name, but
+the "suffix" that's left after removing the prefix.  The seed is
+`EROFS_XATTR_FILTER_SEED` plus the prefix index.  The lower 5 bits of the hash
+value (0..31) are used to determine which bit is used.
+
+For some reason a bit value of 1 here indicates the absence of a particular
+xattr, which is opposite to the usual arrangement.  You'd think it was for
+compatibility, but the filter is only engaged if the feature bit is present in
+the superblock.
+
+This feature got added in kernel commits:
+*   `3f339920175c` ("erofs: update on-disk format for xattr name filter")
+*   `fd73a4395d47` ("erofs: boost negative xattr lookup with bloom filter")
+
+### Data layout
+
+erofs has a bunch of different ways to represent the actual content associated
+with an inode (regular file content, directory entries, symlink target).
+
+We describe three of them here:
+*   plain
+*   inline
+*   chunked
+
+The data layout is chosen using some of the bits of the `format` field in the
+inode header.
+
+#### `EROFS_INODE_FLAT_PLAIN`
+
+In this case there's never any inline data.  The inode content is stored
+entirely as a series of contiguous blocks.  The offset of the first block is
+what goes in the `i_u` field (measured in blocks, not bytes).
+
+The number of blocks is determined by the `.size` field (divided by block size,
+rounded up).
+
+If the content is not a multiple of the blocksize then the last block should be
+0-padded.
+
+#### `EROFS_INODE_FLAT_INLINE`
+
+This is similar to `EROFS_INODE_FLAT_PLAIN` except if the content is not a
+multiple of the blocksize.  In that case, instead of 0-padding the last block
+to fill up a block, the content of the last block is stored directly inline
+with the inode, without padding.
+
+So, imagining the content is 2.5 blocks worth of data:
+*   the first block is the one pointed to by `i_u`
+*   the second block is the one immediately following it
+*   the last block is stored at the end of the inode
+
+The number of blocks is determined by the `.size` field, divided by block size,
+rounded down.  The remainder is the number of bytes of inline data.
+
+The inline data must be written in such a way that it does not cross a block
+boundary.  It is theoretically permitted for the inline data to be in a
+separate block (ie: the block directly following the inode data).  It is also
+permitted for the inode data itself to cross block boundaries.  There are a
+couple of caveats to be aware of, however:
+*   the alignment of inodes is 32 bytes, but the size of an extended inode is 64
+    bytes.  `mkfs.erofs` tries to ensure that extended inodes headers land
+    entirely within on disk block (for efficiency), but this isn't required by
+    the kernel.
+*   `mkfs.erofs` also tries to ensure that the inline data ends in the same
+    disk block as the last byte of the inode metadata (ie: inode header plus
+    xattrs).  This is theoretically not required by the kernel.
+*   A bug present in the kernel before 6.12 meant that this was required for
+    inline symlink targets. This was fixed by `9ed50b8231e3` ("erofs: fix
+    incorrect symlink detection in fast symlink").
+*   In general, when faced with the task of writing out an inode with inline
+    data present, you may need to add padding bytes before the start of the
+    inode in order to ensure that the inline data falls within a single block.
+    If you allow inlining of large amounts of data (approaching the block size)
+    then you'll almost always need to add padding to get the correct alignment
+    (and often a large amount of it), which is wasteful.  On the other hand, if
+    you only inline very small amounts of data then you are wasting space by
+    padding out filesystem blocks with zeros. There is a balance to be struck,
+    and `mkcomposefs` uses a "heuristic" of half a block size as the inlining
+    limit.  I've performed simulations which show that this value is fairly
+    close to ideal for a random distribution of file sizes, starting inode
+    alignment and xattr content sizes.
+
+#### `EROFS_INODE_FLAT_CHUNK_BASED`
+
+In this case, the `i_u` field isn't a block reference but is instead split into
+sub-fields.  The main gist of it, though, is that this stores the log2 of the
+number of blocks per chunk (maximum of 31).
+
+So if you write 4 here, then there are 16 blocks in each chunk.
+
+The references to the chunks are then written as the inline data, 4 bytes per
+chunk, as block indexes (to the starting block).  I'm not sure if that's
+measured in blocks or in chunks, because the only reason we use this feature is
+for a special purpose: null chunks.
+
+If a chunk index is written as -1 (ie: 0xffffffff) then it refers to a "null"
+chunk of the given size.  This effectively gets you support for sparse files.
+
+For the sparse file use-case there's no benefit to choosing anything other than
+the maximum chunk format of 31 for the `-i_u` field.  The number of chunks you
+need to write is determined by the file size, but for a 4096 byte block size
+and a chunk format of 31 all files less than 8TB can be handled with a single
+"chunk".
+
+#### Character and block devices
+
+If the `mode` field of the inode indicates that this is a device, then the data
+layout isn't relevant, and the `i_u` field gets the `rdev` of the device.  Note
+that this is a 32-bit field, so 32-bit rdev.  `size` is zero.
+
+
+#### Fifos and sockets
+
+These have no storage at all.  `i_u` is ignored and there is never inline data.
+`size` should always be 0.
+
+## Directories
+
+The final thing that needs describing is how a directory gets stored.  erofs
+directories are the classical mapping from names to inodes, with the extra
+'file type' field that gets returned via the `d_type` field in `struct dirent`
+(to avoid needing to `stat()` the inode).
+
+The dirent structure has a size of 12 (and an alignment of 4) and looks like:
+*   `nid` (`u64`): the inode referred to by this entry
+*   `nameoff` (`u16`): an offset to the name (inside of this block).  See below.
+*   `file_type` (`u8`): the filetype field for `d_type`
+
+The directory needs to explicitly include the `.` and `..` entries.  All
+entries (including `.` and `..`) are sorted in asciibetical order.  Note: the
+`.` and `..` are not handled specially and are not necessarily at the start:
+they're in asciibetical order too.
+
+The directory entries are taken in their sorted order and split into blocks.
+However many entries will fit into the first block go into the first block, and
+so on.  All blocks except for the last one are padded with zeros.  A directory
+has a specific encoded size (which ends up in the `size` field of the inode).
+It is made from a number of complete blocks, times the blocksize, plus the size
+of the (possible) trailing partial block (which might be inlined, depending on
+the selected data layout).
+
+Each block is a number of dirent structs packed at the start, plus the entry
+names referred to from those structs.  The entry names must immediately follow
+the structs, and each entry name must immediately follow the previous (with no
+nul).  The reason for that will become clear with our example:
+
+Let's consider an example directory with entries `.`, `..`,
+`someverylongfilename`, `subdir`. To keep things interesting, let's further
+imagine that our filesystem block size is 32 bytes.
+
+We segment into blocks by taking entries until no more entries fit.  Each entry
+is the 12 byte dirent struct, plus the name, so:
+*   `.`: (12 + 1) = 13 → 13 total bytes
+*   `..`: (12 + 2) = 14 → 27 total bytes
+*   `file`: (12 + 4) = 16 → too big, won't fit.
+
+So we know that the first directory block will contain `.` and `..`.  It looks like:
+*   offset `0`: the dirent struct for `.`, `nameoff` is `24`.
+*   offset `12`: the dirent struct for `..`, `nameoff` is `25`.
+*   offset `24`: `.`
+*   offset `25`: `..`
+*   offset `27`: padded with `nul`
+
+The `nameoff` fields are more important here than they seem.  If we look at the
+first `nameoff` field, it's `24`.  That tells us that there are two entries in
+this block (since the entry size is 12).  We also know the length of the name
+of the first entry because the name of the second entry starts right after it.
+
+How do we know the name of the last entry?  One of three ways:
+*   if this is the final block of the directory, then the overall size of the
+    directory (in the inode `size` field) will indicate where the final name
+    must surely terminate
+*   if this is a non-final block, it might be that the name fits exactly into
+    the block size.  In that case, the end of the name is the end of the block.
+*   if this is a non-final block, and the name doesn't fit exactly into the
+    block size then it means we'll have added some padding.  In this case the
+    name is `nul`-terminated.  That's the case for our `..` entry here.
+
+Now let's do our next block:
+*   `someverylongfilename` (12 + 20) = 32 → 32 total bytes
+*   `subdir` (12 + 6) = 18 → too big, won't fit.
+
+So we only get one entry in this block.  The layout is:
+*   offset `0`: the dirent struct for `someverylongfilename`, `nameoff` is `12`.
+*   offset `12`: `someverylongfilename`
+*   no padding, since we're already at 32 bytes.
+
+In this case we look at the `nameoff` of the first entry (`12`) and know that
+there must only be one entry in this block.  And in this case, the name fills
+the block exactly, so we won't find a `nul` terminator, and we know the name
+must have a length of `12`.
+
+Finally, `subdir` gets put in the last partial block:
+*   offset `0`: the dirent struct, `nameoff` is `12`
+*   offset `12`: `subdir`
+*   offset `18`: that's the end of the directory
+
+What comes at offset `18`?  Nothing.  The `size` field of the directory is 2
+blocks (`2 * 32` = `64`) plus the `18` bytes from this block, so a total of
+`82`.
+
+Of course, if we're storing the directory as "flat plain" or "chunk based" then
+we need to pad this out to a complete block size (and we'll do that with
+`nul`s), but those padding bytes are not conceptually part of the directory
+content.  But what if we stored it "flat inline"?  We might have the next inode
+directly following.  In that case, we effectively depend on the inode `size` to
+know that the final filename has a length of `6`.
diff --git a/doc/image-format.md b/doc/image-format.md
new file mode 100644
index 0000000..40356c4
--- /dev/null
+++ b/doc/image-format.md
@@ -0,0 +1,276 @@
+# Canonical composefs file format
+
+## Prelude
+
+We expect the process of creating an erofs from a filesystem image to be
+deterministic.  `erofs` is very free-form and there are many ways things could
+be organized.
+
+Here's where we try to document some of the decisions we make.  This documents
+the erofs images produced by the `composefs` rust crate, which are currently
+different from the official `composefs` repository (ie: `libcomposefs`, in C).
+It would be very desirable to try to make this implementation exactly match the
+`libcomposefs` implementation so that we could check them against each other to
+ensure that they produce bitwise identical output.  On the other hand, we've
+been discussing creating a "version 1.1" format, and this might be a good
+jumping-off spot for that.
+
+The goal of this document is to completely and unambiguously document every
+decision we made in such a way that you could use this document as a guide to
+produce a new composefs erofs writer implementation, from scratch, which
+produces exactly the same output.  However, this document is probably currently
+very incomplete, and maybe even incorrect.  We should strive to cover every
+possible detail here, but it's hard.  Hopefully things will improve with time,
+but until then, you might need to check the implementation.
+
+In cases of ambiguity or incorrectness, issues and patches are extremely
+welcome.
+
+## Overall layout concept
+
+The composefs header and superblock are the only things that need to be at
+fixed offsets.  How do we organize everything else?
+
+Generally speaking, we perform these steps:
+*    collect the filesystem into a flat list of inodes
+*    collect and "share" xattrs, as appropriate
+*    write the composefs header and the superblock
+*    write the inodes directly following the superblock
+*    write the shared xattrs directly following the inodes
+*    then the blocks (only for directories)
+
+## Collecting inodes
+
+We collect the inodes into a flat list according to the following algorithm:
+*   our goal is to visit each inode, collecting it into the inode list as we
+    visit it, in the order that we visited it
+*   start at the root directory
+*   for each directory that we visit:
+    -   the directory is stored first, then the children
+    -   we visit the children in asciibetical order, regardless of file type
+        (ie: we interleave directories and regular files)
+    -   when visiting a child directory, we store all content of the child
+        directory before returning to the parent directory (ie: depth first)
+*   in the case of hardlinks, the inode gets added to the list at the spot that
+    the first link was encountered
+
+Consider a filesystem tree
+
+```
+ /
+   bin/
+     cfsctl
+   usr/
+     lib/
+       libcomposefs.so
+       libglib-2.0.so
+     libexec/
+       cfsctl
+```
+
+where `/bin/cfsctl` and `/usr/libexec/cfsctl` are hardlinks.
+
+In that case, we'd collect the inodes in this order:
+1.  `/`
+1.  `/bin/`
+1.  `/bin/cfsctl` (aka `/usr/libexec/cfsctl`)
+1.  `/usr/`
+1.  `/usr/lib/`
+1.  `/usr/lib/libcomposefs.so`
+1.  `/usr/lib/libglib-2.0.so`
+1.  `/usr/libexec/`
+
+(skipping `/usr/libexec/ctlctl` because we already had it by the time we encountered it).
+
+So that's 8 inodes, in that order.
+
+## Special handling for overlayfs
+
+Ultimately, the erofs image that we produce needs to be used as a layer in an
+overlayfs stack.  There are a lot of cases where the thing that we write out
+only makes sense to overlayfs.  There are other cases where we need to avoiding
+writing out things that overlayfs would treat as "special".
+
+`libcomposefs` writes 256 files named from `00` to `ff` into the root directory
+as character devices with major/minor of (0, 0).  Those are overlayfs whiteouts
+and they are needed for older versions of overlayfs which don't support "data
+only" layers.  We don't target these versions, so *we don't add these files*.
+We also don't mark the root directory as opaque or do anything else special
+with it.
+
+Conversely, if we encounter a character device with major/minor (0, 0) then we
+need to escape it to make sure that it appears as such in the final composed
+image (and does not get handled by overlayfs as a whiteout).  We do that by:
+TODO (not implemented yet).
+
+We also need to make sure that the only `trusted.overlay.*` attributes which we
+write are ones that came from us.  If we encounter any `trusted.overlay.*`
+attributes in the source, we escape them to `trusted.overlay.overlay.`, causing
+them to lose their special meaning.
+
+## Extended attribute handling
+
+For each inode, we collect and write the extended attributes in asciibetical
+order, by full name.  Note: this is different than the shared xattr table which
+has a more complicated sorting, but maybe we want to unify the two.
+
+We use the hardcoded prefix indexes (which is actually mandatory).
+
+We don't use "long prefixes", but we might start doing that at some point,
+because it would sure be nice to not have to write `"overlay.redirect"`,
+`"overlay.metacopy"` and `"selinux"` over and over again. The feature seems
+complicated, though...
+
+## Collecting shared xattrs
+
+`erofs` has a facility for sharing xattrs where the name and the value are
+identical, and we use it.  After we've collected all of our inodes, we iterate
+the list and take note of all (name, value) pairs.  If any (name, value) pair
+appears more than once, we share it.
+
+The process of "sharing" involves modifying the original inode.  We iterate the
+present xattrs, and for each attribute that we share, we remove it from the
+"inline" list and add it to the "shared" list, in the same order as it appeared
+in the inline list.
+
+NB: this operation is performed on the flattened inode list, not the directory
+tree.  That means that if a particular (name, value) pair appears uniquely on
+an inode with multiple hardlinks, we'll count that as a single occurrence and
+it won't be shared.
+
+Note also: the attributes that we add ourselves are considered candidates for
+sharing.  That means that if we had two external files which were not hardlinks
+but nevertheless contained the same data, we'd end up sharing their
+`trusted.overlayfs.` attributes.
+
+## The composefs header
+
+`erofs` leaves the first 1024 bytes of the file free to us, and we store a
+32-byte header at offset 0.  The kernel ignores this, and our mount code
+doesn't actually do anything with it at the moment, either.  We try to fill it
+out in the same way as `libcomposefs`:
+
+*   `magic` (`u32`): `0xd078629a`
+*   `version` (`u32`): I think this is something like the overall file format
+    version.  If this changes, then things are possibly incompatible, and maybe
+    this isn't even an `erofs` anymore.  Currently `1`.
+*   `flags`: `0`
+*   `composefs_version`: I think this is something like a statement about the
+    current strategy for layout decisions.  If this changes, the algorithm for
+    building the file has probably decided to put things in different places
+    (and the checksum of the file will have changed), but the result is still
+    understandable as an `erofs`.  Currently `1`.
+
+## The superblock
+
+*   `checksum`: we don't fill that out
+*   `feature_compat`: we set `MTIME` and `XATTR_FILTER`
+*   `blkszbits`: we use 12, for a block size of 4096
+*   `root_nid`: that's going to end up being 36, which follows from the fact
+    that we put the root inode directly following the superblock, at offset
+    `1024 + 128` = `1152`.  `1152 / 32` = `36`.
+*   `inos`: we currently set that to the number of inodes in the filesystem.
+    `libcomposefs` adds some extra file content (the `00`..`ff` whiteouts) so
+    it gets a larger number than we do.
+*   `blocks`: the total filesize, divided by 4096.
+*   `build_time`, `build_time_nsec`: since we only use extended format inodes,
+    these fields are meaningless and we currently set them to 0 (which is
+    different from `libcomposefs`).
+*   `meta_blkaddr`, `xattr_blkaddr`.  We currently set both of these to 0 to
+    keep things simple. `libcomposefs` performs a complicated calculation to
+    set `meta_blkaddr` to zero as well (since the first inode directly follows
+    the superblock, it will always be within the first 4096 byte filesystem
+    block), but its complicated calculation for `xattr_blkaddr` might well land
+    on a non-zero value, so that's different from us.
+
+## The inodes
+
+After the superblock, we write the inodes.  Some notes:
+
+*   we only use extended inodes, because mtime is important to us and we
+    generally expect every file to have a unique mtime.  This is a difference
+    from `libcomposefs`.
+
+*   we use a "chunk based" data layout for non-inline regular files:
+
+    -   the way this works in overlayfs, we want to store a correctly-sized
+        sparse file in the upper layer.  This lets us have the correct `size`
+        field on the inode, so we don't need to interact with the data layer in
+        order to do `stat()`.
+
+    -   we set the chunk format (ie: the `i_u` field) to 31, the maximum
+
+    -   we store a single "null" chunk pointer
+
+    -   this corresponds to a chunk size of 8TB, which is then the upper limit
+        of files we can store
+
+    -   `libcomposefs` tries to take the smallest chunk format value which will
+        get the job done with a single chunk pointer, and will write multiple
+        chunk pointers if necessary (for extreemely large files). Maybe we
+        should do that too.
+
+    -   in this case we set the `trusted.overlay.metacopy` and
+        `trusted.overlay.redirect` attributes (in that order) on the file.
+        These attributes are written first, before the other attributes that
+        would be present on the same file (which are otherwise in sorted
+        order).
+
+    -   the `trusted.overlay.metacopy` attribute is 36 bytes long, and is set to:
+        +   the 4-byte header: [0 36, 0, 1]
+        +   the 32-byte SHA256 fs-verity digest
+
+    -   the `trusted.overlay.redirect` attribute is set to the string
+        `"/xx/yyyy..."` where `xx` is the first two lowercase hexidecimal bytes
+        of the fs-verity digest and the `yyyy...` is the rest.  That's just a
+        reference into the `objects/` subdirectory of the repository (which is
+        mounted in the overlayfs stack as the data layer).
+
+*   we use a "flat inline" data layout for all other inodes:
+
+    -   for character and block devices, as well as fifos and sockets this is
+        meaningless, but we need to set something
+
+    -   for inline regular files we store the content inline.  This will break
+        if we try to inline a file larger than 4095 characters, but our current
+        cut-off is 64.
+
+    -   for symlinks this means that the link target gets stored inline.
+        Hopefully we don't have symlinks with targets longer than 4095
+        characters, or we're gonna get in trouble.
+
+    -   directories may well be larger than 4096 bytes, so we might end up
+        needing to store blocks for those.  These follow the "shared xattrs"
+        area.  We could probably set "flat plain" for directories that are an
+        exact multiple of 4096 bytes in size, and `libcomposefs` does that, but
+        we don't bother.
+
+We pad the last inode to the required alignment for inodes, even though it is
+generally followed by a shared xattr (which has a less stringent alignment
+requirement).
+
+## The shared xattrs
+
+There's not much left to be said about these.  We currently write them out in
+the order that `collections::BTreeMap` applies to our `struct XAttr`, which I
+think basically ends up sorting them by prefix index, then by suffix, then by
+value.  We might like to firm that up at some point.  This is notably different
+than the sorting applied to the attributes as they appear in the inodes, and we
+also don't give any special treatment to the `trusted.overlay.` attributes that
+we added: they're sorted here in the usual way.
+
+After we do this, and even if there was no shared xattrs, we always pad up to a
+4096 byte boundary, even if there are no data blocks.  That means that the
+filesystem image will always be a multiple of 4096.
+
+## The blocks
+
+Now comes the data blocks.  These are written in sequence for each inode,
+according to the sequence of the inode in the inode list.  Due to our use of
+"flat inline" data layout, only full blocks are stored (although they may have
+included inter-block padding in directories), so we keep 4096-byte alignment
+from here on out.
+
+## The end
+
+That's it.  The file is over now.  We'll have ended on a multiple of 4096.
diff --git a/src/bin/cfsctl.rs b/src/bin/cfsctl.rs
index d62a03e..1e0cc5b 100644
--- a/src/bin/cfsctl.rs
+++ b/src/bin/cfsctl.rs
@@ -97,6 +97,8 @@ enum Command {
 }
 
 fn main() -> Result<()> {
+    env_logger::init();
+
     let args = App::parse();
 
     let repo = (if let Some(path) = args.repo {
diff --git a/src/bin/erofs-debug.rs b/src/bin/erofs-debug.rs
new file mode 100644
index 0000000..e8afea4
--- /dev/null
+++ b/src/bin/erofs-debug.rs
@@ -0,0 +1,25 @@
+use std::{fs::File, io::Read, path::PathBuf};
+
+use clap::Parser;
+
+use composefs::erofs::debug::debug_img;
+
+/// Produce a detailed dump of an entire erofs image
+///
+/// The output is in a diff-friendly format, such that every distinct image produces a distinct
+/// output (ie: an injective mapping).  This is useful for determining the exact ways in which two
+/// different images are different.
+#[derive(Parser)]
+struct Args {
+    /// The path to the image file to dump
+    image: PathBuf,
+}
+
+fn main() {
+    let args = Args::parse();
+    let mut image = File::open(args.image).expect("Opening file");
+
+    let mut data = vec![];
+    image.read_to_end(&mut data).expect("read_to_end() failed");
+    debug_img(&data);
+}
diff --git a/src/erofs/debug.rs b/src/erofs/debug.rs
new file mode 100644
index 0000000..1cc082c
--- /dev/null
+++ b/src/erofs/debug.rs
@@ -0,0 +1,377 @@
+use core::mem::offset_of;
+use std::{
+    collections::BTreeMap,
+    ffi::OsStr,
+    mem::discriminant,
+    os::unix::ffi::OsStrExt,
+    path::{Path, PathBuf},
+};
+
+use zerocopy::{Immutable, KnownLayout, TryFromBytes};
+
+use super::{
+    format::{self, CompactInodeHeader, ComposefsHeader, ExtendedInodeHeader, Superblock},
+    reader::{DirectoryBlock, Image, Inode, InodeHeader, InodeOps, InodeType, InodeXAttrs, XAttr},
+};
+
+macro_rules! print_fields {
+    ($ty: ty, $s: expr, $f: ident) => {{
+        let value = &$s.$f;
+        let default = if false { value } else { &Default::default() };
+        if value != default {
+            println!("     +{:02x}    {}: {:?}", offset_of!($ty, $f), stringify!($f), value);
+        }
+    }};
+    ($ty: ty, $s:expr, $head: ident; $($tail: ident);+) => {{
+        print_fields!($ty, $s, $head);
+        print_fields!($ty, $s, $($tail);+);
+    }};
+}
+
+fn utf8_or_hex(data: &[u8]) -> String {
+    if let Ok(str) = std::str::from_utf8(data) {
+        format!("\"{str}\"")
+    } else {
+        hex::encode(data)
+    }
+}
+
+// This is basically just a fancy fat pointer type
+enum SegmentType<'img> {
+    Header(&'img ComposefsHeader),
+    Superblock(&'img Superblock),
+    CompactInode(&'img Inode<CompactInodeHeader>),
+    ExtendedInode(&'img Inode<ExtendedInodeHeader>),
+    XAttr(&'img XAttr),
+    DataBlock(&'img [u8]),
+    DirectoryBlock(&'img DirectoryBlock),
+}
+
+// TODO: Something for `enum_dispatch` would be good here, but I couldn't get it working...
+impl SegmentType<'_> {
+    fn addr(&self) -> usize {
+        match self {
+            SegmentType::Header(h) => &raw const **h as usize,
+            SegmentType::Superblock(sb) => &raw const **sb as usize,
+            SegmentType::CompactInode(i) => &raw const **i as *const u8 as usize,
+            SegmentType::ExtendedInode(i) => &raw const **i as *const u8 as usize,
+            SegmentType::XAttr(x) => &raw const **x as *const u8 as usize,
+            SegmentType::DataBlock(b) => &raw const **b as *const u8 as usize,
+            SegmentType::DirectoryBlock(b) => &raw const **b as *const u8 as usize,
+        }
+    }
+
+    fn size(&self) -> usize {
+        match self {
+            SegmentType::Header(h) => size_of_val(*h),
+            SegmentType::Superblock(sb) => size_of_val(*sb),
+            SegmentType::CompactInode(i) => size_of_val(*i),
+            SegmentType::ExtendedInode(i) => size_of_val(*i),
+            SegmentType::XAttr(x) => size_of_val(*x),
+            SegmentType::DataBlock(b) => size_of_val(*b),
+            SegmentType::DirectoryBlock(b) => size_of_val(*b),
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(TryFromBytes, KnownLayout, Immutable)]
+struct DataBlock([u8]);
+
+struct ImageVisitor<'img> {
+    image: &'img Image<'img>,
+    visited: BTreeMap<usize, (SegmentType<'img>, Vec<Box<Path>>)>,
+}
+
+impl<'img> ImageVisitor<'img> {
+    fn note(&mut self, segment: SegmentType<'img>, path: Option<&Path>) -> bool {
+        let offset = segment.addr() - self.image.image.as_ptr() as usize;
+        match self.visited.entry(offset) {
+            std::collections::btree_map::Entry::Occupied(mut e) => {
+                let (existing, paths) = e.get_mut();
+                // TODO: figure out pointer value equality...
+                assert_eq!(discriminant(existing), discriminant(&segment));
+                assert_eq!(existing.addr(), segment.addr());
+                assert_eq!(existing.size(), segment.size());
+                if let Some(path) = path {
+                    paths.push(Box::from(path));
+                }
+                true
+            }
+            std::collections::btree_map::Entry::Vacant(e) => {
+                let mut paths = vec![];
+                if let Some(path) = path {
+                    paths.push(Box::from(path));
+                }
+                e.insert((segment, paths));
+                false
+            }
+        }
+    }
+
+    fn visit_directory_block(&mut self, block: &DirectoryBlock, path: &Path) {
+        for entry in block.entries() {
+            if entry.name == b"." || entry.name == b".." {
+                // TODO: maybe we want to follow those and let deduplication happen
+                continue;
+            }
+            self.visit_inode(entry.inode, &path.join(OsStr::from_bytes(entry.name)));
+        }
+    }
+
+    fn visit_inode(&mut self, id: u64, path: &Path) {
+        let inode = self.image.inode(id);
+        let segment = match inode {
+            InodeType::Compact(inode) => SegmentType::CompactInode(inode),
+            InodeType::Extended(inode) => SegmentType::ExtendedInode(inode),
+        };
+        if self.note(segment, Some(path)) {
+            // TODO: maybe we want to throw an error if we detect loops
+            /* already processed */
+            return;
+        }
+
+        if let Some(xattrs) = inode.xattrs() {
+            for id in xattrs.shared() {
+                self.note(
+                    SegmentType::XAttr(self.image.shared_xattr(id.get())),
+                    Some(path),
+                );
+            }
+        }
+
+        if inode.mode() & format::S_IFMT == format::S_IFDIR {
+            let inline = inode.inline();
+            if !inline.is_empty() {
+                let inline_block = DirectoryBlock::try_ref_from_bytes(inode.inline()).unwrap();
+                self.visit_directory_block(inline_block, path);
+            }
+
+            for id in inode.blocks(self.image.blkszbits) {
+                let block = self.image.directory_block(id);
+                self.visit_directory_block(block, path);
+                self.note(SegmentType::DirectoryBlock(block), Some(path));
+            }
+        } else {
+            for id in inode.blocks(self.image.blkszbits) {
+                let block = self.image.data_block(id);
+                self.note(SegmentType::DataBlock(block), Some(path));
+            }
+        }
+    }
+
+    fn visit_image(
+        image: &'img Image<'img>,
+    ) -> BTreeMap<usize, (SegmentType<'img>, Vec<Box<Path>>)> {
+        let mut this = Self {
+            image,
+            visited: BTreeMap::new(),
+        };
+        this.note(SegmentType::Header(image.header), None);
+        this.note(SegmentType::Superblock(image.sb), None);
+        this.visit_inode(image.sb.root_nid.get() as u64, &PathBuf::from("/"));
+        this.visited
+    }
+}
+
+pub fn print_paths(paths: &[Box<Path>]) {
+    match paths {
+        [] => {}
+        [one] => {
+            println!("            filename: {one:?}");
+        }
+        many => {
+            println!("            links:");
+            many.iter()
+                .for_each(|one| println!("               - {one:?}"));
+        }
+    }
+}
+
+impl std::fmt::Debug for XAttr {
+    // Injective (ie: accounts for every byte in the input)
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "({} {} {}) {}{} = {}",
+            self.header.name_index,
+            self.header.name_len,
+            self.header.value_size,
+            std::str::from_utf8(format::XATTR_PREFIXES[self.header.name_index as usize]).unwrap(),
+            utf8_or_hex(self.suffix()),
+            utf8_or_hex(self.value()),
+        )?;
+        if self.padding().iter().any(|c| *c != 0) {
+            write!(f, " {:?}", self.padding())?;
+        }
+        Ok(())
+    }
+}
+
+// This accounts for every bytes of InodeXAttrs
+fn print_xattrs(xattrs: Option<&InodeXAttrs>) {
+    let Some(xattrs) = xattrs else {
+        return;
+    };
+
+    if !xattrs.shared().is_empty() {
+        print!("         Shared xattrs:");
+        for id in xattrs.shared() {
+            print!(" {id}");
+        }
+        println!();
+    }
+    println!("         Local xattrs:");
+    for xattr in xattrs.local() {
+        println!("          - {:?}", xattr);
+    }
+}
+
+fn hexdump(block: &[u8]) {
+    for row in 0..((block.len() + 15) / 16) {
+        let offset = row * 16;
+        print!("   +{offset:04x}  ");
+        for idx in offset..(offset + 16) {
+            if idx < block.len() {
+                print!("{:02x} ", block[idx]);
+            } else {
+                print!("   ");
+            }
+            if idx % 8 == 7 {
+                print!(" ");
+            }
+        }
+        print!("|");
+        for idx in offset..(offset + 16) {
+            if idx < block.len() {
+                let c = block[idx];
+                if c.is_ascii() && !c.is_ascii_control() {
+                    print!("{}", c as char);
+                } else {
+                    print!(".");
+                }
+            } else {
+                print!(" ");
+            }
+        }
+        println!("|");
+    }
+}
+
+pub fn print_directory_block(block: &DirectoryBlock) {
+    for entry in block.entries() {
+        println!(
+            "             {} {:?} -> {}",
+            utf8_or_hex(entry.name),
+            entry.file_type,
+            entry.inode
+        );
+    }
+}
+
+fn print_inode_extra(inode: impl InodeOps + InodeHeader) {
+    print_xattrs(inode.xattrs());
+    let inline = inode.inline();
+    if !inline.is_empty() {
+        if inode.mode() & format::S_IFMT == format::S_IFDIR {
+            let block = DirectoryBlock::try_ref_from_bytes(inline).unwrap();
+            print_directory_block(block);
+        } else {
+            hexdump(inode.inline());
+        }
+    }
+}
+
+pub fn debug_img(data: &[u8]) {
+    let image = Image::open(data);
+    let visited = ImageVisitor::visit_image(&image);
+
+    let mut offset = 0;
+    for (start, (segment, paths)) in visited {
+        if offset > start {
+            println!("*** Overlapping segments!");
+            offset = start;
+        }
+        if offset < start {
+            println!("{offset:08x} Padding");
+            let padding = &data[offset..start];
+            if padding.iter().all(|c| *c == 0) {
+                println!("         {} * nul", padding.len());
+            } else {
+                println!("         {:?}", padding);
+            }
+            println!();
+            offset = start;
+        }
+
+        match segment {
+            SegmentType::Header(header) => {
+                println!("{offset:08x} ComposefsHeader");
+                print_fields!(
+                    ComposefsHeader, header,
+                    magic; flags; version; composefs_version; unused
+                );
+            }
+            SegmentType::Superblock(sb) => {
+                println!("{offset:08x} Superblock");
+                print_fields!(
+                    Superblock, sb,
+                    magic; checksum; feature_compat; blkszbits; extslots; root_nid; inos; build_time;
+                    build_time_nsec; blocks; meta_blkaddr; xattr_blkaddr; uuid; volume_name;
+                    feature_incompat; available_compr_algs; extra_devices; devt_slotoff; dirblkbits;
+                    xattr_prefix_count; xattr_prefix_start; packed_nid; xattr_filter_reserved; reserved2
+                );
+            }
+            SegmentType::CompactInode(inode) => {
+                println!("{offset:08x} Inode (compact) #{}", offset / 32); // TODO: doesn't take metablk into account
+                print_paths(&paths);
+                print_fields!(
+                    CompactInodeHeader, inode.header,
+                    format; xattr_icount; mode; reserved; size; u; ino; uid; gid; nlink; reserved2;
+                    reserved2
+                );
+                print_inode_extra(inode);
+            }
+            SegmentType::ExtendedInode(inode) => {
+                println!("{offset:08x} Inode (extended) #{}", offset / 32); // TODO: doesn't take metablk into account
+                print_paths(&paths);
+                print_fields!(
+                    ExtendedInodeHeader, inode.header,
+                    format; xattr_icount; mode; reserved; size; u; ino; uid; gid; mtime; mtime_nsec; nlink;
+                    reserved2
+                );
+                print_inode_extra(inode);
+            }
+            SegmentType::XAttr(xattr) => {
+                println!("{offset:08x} XAttr #{}", offset / 4); // TODO: doesn't take xattrblk into account
+                print_paths(&paths);
+                println!("            {:?}", xattr);
+            }
+            SegmentType::DirectoryBlock(block) => {
+                println!("{offset:08x} Directory block");
+                print_paths(&paths);
+                print_directory_block(block);
+            }
+            SegmentType::DataBlock(block) => {
+                println!("{offset:08x} Data block");
+                print_paths(&paths);
+                hexdump(block);
+            }
+        }
+        println!();
+
+        offset = start + segment.size();
+    }
+    if offset < data.len() {
+        println!("{offset:08x} Padding");
+        let padding = &data[offset..data.len()];
+        if padding.iter().any(|c| *c != 0) {
+            println!("         {:?}", padding);
+        }
+        println!();
+    }
+
+    if offset > data.len() {
+        println!("*** Segments past EOF!");
+    }
+}
diff --git a/src/erofs/format.rs b/src/erofs/format.rs
new file mode 100644
index 0000000..9927ca5
--- /dev/null
+++ b/src/erofs/format.rs
@@ -0,0 +1,279 @@
+use zerocopy::{
+    little_endian::{U16, U32, U64},
+    Immutable, IntoBytes, KnownLayout, TryFromBytes,
+};
+
+#[derive(Debug)]
+pub enum FormatError {
+    InvalidDataLayout,
+}
+
+pub const BLOCK_BITS: u8 = 12;
+pub const BLOCK_SIZE: usize = 1 << BLOCK_BITS;
+
+/* composefs Header */
+
+pub const COMPOSEFS_VERSION: U32 = U32::new(1);
+pub const COMPOSEFS_MAGIC: U32 = U32::new(0xd078629a);
+
+#[derive(Debug, Immutable, IntoBytes, TryFromBytes)]
+#[repr(u32)]
+pub enum ComposefsFlags {
+    HasAcl = 1 << 0,
+}
+
+#[derive(Debug, Default, Immutable, IntoBytes, KnownLayout, TryFromBytes)]
+#[repr(C)]
+pub struct ComposefsHeader {
+    pub magic: U32,
+    pub version: U32,
+    pub flags: U32,
+    pub composefs_version: U32,
+    pub unused: [U32; 4],
+}
+
+/* Superblock */
+
+pub const MAGIC_V1: U32 = U32::new(0xE0F5E1E2);
+pub const FEATURE_COMPAT_MTIME: U32 = U32::new(2);
+pub const FEATURE_COMPAT_XATTR_FILTER: U32 = U32::new(4);
+
+#[derive(Debug, Default, Immutable, IntoBytes, KnownLayout, TryFromBytes)]
+#[repr(C)]
+pub struct Superblock {
+    // vertical whitespace every 16 bytes (hexdump-friendly)
+    pub magic: U32,
+    pub checksum: U32,
+    pub feature_compat: U32,
+    pub blkszbits: u8,
+    pub extslots: u8,
+    pub root_nid: U16,
+
+    pub inos: U64,
+    pub build_time: U64,
+
+    pub build_time_nsec: U32,
+    pub blocks: U32,
+    pub meta_blkaddr: U32,
+    pub xattr_blkaddr: U32,
+
+    pub uuid: [u8; 16],
+
+    pub volume_name: [u8; 16],
+
+    pub feature_incompat: U32,
+    pub available_compr_algs: U16,
+    pub extra_devices: U16,
+    pub devt_slotoff: U16,
+    pub dirblkbits: u8,
+    pub xattr_prefix_count: u8,
+    pub xattr_prefix_start: U32,
+
+    pub packed_nid: U64,
+    pub xattr_filter_reserved: u8,
+    pub reserved2: [u8; 23],
+}
+
+/* Inodes */
+
+#[derive(Debug, Default, Immutable, IntoBytes, KnownLayout, TryFromBytes)]
+#[repr(C)]
+pub struct CompactInodeHeader {
+    pub format: FormatField,
+    pub xattr_icount: U16,
+    pub mode: U16,
+    pub nlink: U16,
+
+    pub size: U32,
+    pub reserved: U32,
+
+    pub u: U32,
+    pub ino: U32, // only used for 32-bit stat compatibility
+
+    pub uid: U16,
+    pub gid: U16,
+    pub reserved2: [u8; 4],
+}
+
+#[derive(Debug, Default, Immutable, IntoBytes, KnownLayout, TryFromBytes)]
+#[repr(C)]
+pub struct ExtendedInodeHeader {
+    pub format: FormatField,
+    pub xattr_icount: U16,
+    pub mode: U16,
+    pub reserved: U16,
+    pub size: U64,
+
+    pub u: U32,
+    pub ino: U32, // only used for 32-bit stat compatibility
+    pub uid: U32,
+    pub gid: U32,
+
+    pub mtime: U64,
+
+    pub mtime_nsec: U32,
+    pub nlink: U32,
+
+    pub reserved2: [u8; 16],
+}
+
+#[derive(Debug, Default, Immutable, KnownLayout, IntoBytes, TryFromBytes)]
+#[repr(C)]
+pub struct InodeXAttrHeader {
+    pub name_filter: U32,
+    pub shared_count: u8,
+    pub reserved: [u8; 7],
+}
+
+#[derive(Clone, Copy, Immutable, KnownLayout, IntoBytes, PartialEq, TryFromBytes)]
+pub struct FormatField(U16);
+
+impl Default for FormatField {
+    fn default() -> Self {
+        Self(0xffff.into())
+    }
+}
+
+impl std::fmt::Debug for FormatField {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "{} = {:?} | {:?}",
+            self.0.get(),
+            InodeLayout::from(*self),
+            DataLayout::try_from(*self)
+        )
+    }
+}
+
+const INODE_LAYOUT_MASK: u16 = 0b00000001;
+const INODE_LAYOUT_COMPACT: u16 = 0;
+const INODE_LAYOUT_EXTENDED: u16 = 1;
+
+#[derive(Debug)]
+#[repr(u16)]
+pub enum InodeLayout {
+    Compact = INODE_LAYOUT_COMPACT,
+    Extended = INODE_LAYOUT_EXTENDED,
+}
+
+impl From<FormatField> for InodeLayout {
+    fn from(value: FormatField) -> Self {
+        match value.0.get() & INODE_LAYOUT_MASK {
+            INODE_LAYOUT_COMPACT => InodeLayout::Compact,
+            INODE_LAYOUT_EXTENDED => InodeLayout::Extended,
+            _ => unreachable!(),
+        }
+    }
+}
+
+const INODE_DATALAYOUT_MASK: u16 = 0b00001110;
+const INODE_DATALAYOUT_FLAT_PLAIN: u16 = 0;
+const INODE_DATALAYOUT_FLAT_INLINE: u16 = 4;
+const INODE_DATALAYOUT_CHUNK_BASED: u16 = 8;
+
+#[derive(Debug)]
+#[repr(u16)]
+pub enum DataLayout {
+    FlatPlain = 0,
+    FlatInline = 4,
+    ChunkBased = 8,
+}
+
+impl TryFrom<FormatField> for DataLayout {
+    type Error = FormatError;
+
+    fn try_from(value: FormatField) -> Result<Self, FormatError> {
+        match value.0.get() & INODE_DATALAYOUT_MASK {
+            INODE_DATALAYOUT_FLAT_PLAIN => Ok(DataLayout::FlatPlain),
+            INODE_DATALAYOUT_FLAT_INLINE => Ok(DataLayout::FlatInline),
+            INODE_DATALAYOUT_CHUNK_BASED => Ok(DataLayout::ChunkBased),
+            _ => Err(FormatError::InvalidDataLayout),
+        }
+    }
+}
+
+impl From<(InodeLayout, DataLayout)> for FormatField {
+    fn from(value: (InodeLayout, DataLayout)) -> FormatField {
+        FormatField(
+            (match value.0 {
+                InodeLayout::Compact => INODE_LAYOUT_COMPACT,
+                InodeLayout::Extended => INODE_LAYOUT_EXTENDED,
+            } | match value.1 {
+                DataLayout::FlatPlain => INODE_DATALAYOUT_FLAT_PLAIN,
+                DataLayout::FlatInline => INODE_DATALAYOUT_FLAT_INLINE,
+                DataLayout::ChunkBased => INODE_DATALAYOUT_CHUNK_BASED,
+            })
+            .into(),
+        )
+    }
+}
+
+/* Extended attributes */
+pub const XATTR_FILTER_SEED: u32 = 0x25BBE08F;
+
+#[derive(Debug, Immutable, IntoBytes, KnownLayout, TryFromBytes)]
+#[repr(C)]
+pub struct XAttrHeader {
+    pub name_len: u8,
+    pub name_index: u8,
+    pub value_size: U16,
+}
+
+pub const XATTR_PREFIXES: [&[u8]; 7] = [
+    b"",
+    b"user.",
+    b"system.posix_acl_access",
+    b"system.posix_acl_default",
+    b"trusted.",
+    b"lustre.",
+    b"security.",
+];
+
+/* Directories */
+
+#[derive(Clone, Copy, Debug, Default, Immutable, IntoBytes, TryFromBytes)]
+#[repr(u8)]
+pub enum FileType {
+    #[default]
+    Unknown,
+    RegularFile,
+    Directory,
+    CharacterDevice,
+    BlockDevice,
+    Fifo,
+    Socket,
+    Symlink,
+}
+pub const S_IFMT: u16 = 0o170000;
+pub const S_IFREG: u16 = 0o100000;
+pub const S_IFCHR: u16 = 0o020000;
+pub const S_IFDIR: u16 = 0o040000;
+pub const S_IFBLK: u16 = 0o060000;
+pub const S_IFIFO: u16 = 0o010000;
+pub const S_IFLNK: u16 = 0o120000;
+pub const S_IFSOCK: u16 = 0o140000;
+
+impl FileType {
+    pub fn to_ifmt(&self) -> u16 {
+        match self {
+            Self::RegularFile => S_IFREG,
+            Self::CharacterDevice => S_IFCHR,
+            Self::Directory => S_IFDIR,
+            Self::BlockDevice => S_IFBLK,
+            Self::Fifo => S_IFIFO,
+            Self::Symlink => S_IFLNK,
+            Self::Socket => S_IFSOCK,
+            Self::Unknown => unreachable!(),
+        }
+    }
+}
+
+#[derive(Debug, Default, Immutable, IntoBytes, KnownLayout, TryFromBytes)]
+#[repr(C)]
+pub struct DirectoryEntryHeader {
+    pub inode_offset: U64,
+    pub name_offset: U16,
+    pub file_type: FileType, // TODO: change to u8 for trivial transmute?
+    pub reserved: u8,
+}
diff --git a/src/erofs/mod.rs b/src/erofs/mod.rs
new file mode 100644
index 0000000..8c0cc51
--- /dev/null
+++ b/src/erofs/mod.rs
@@ -0,0 +1,3 @@
+pub mod debug;
+pub mod format;
+pub mod reader;
diff --git a/src/erofs/reader.rs b/src/erofs/reader.rs
new file mode 100644
index 0000000..942e0e7
--- /dev/null
+++ b/src/erofs/reader.rs
@@ -0,0 +1,460 @@
+use core::mem::size_of;
+use std::ops::Range;
+
+use zerocopy::{little_endian::U32, Immutable, KnownLayout, TryFromBytes};
+
+use super::format::{
+    CompactInodeHeader, ComposefsHeader, DataLayout, DirectoryEntryHeader, ExtendedInodeHeader,
+    FileType, InodeXAttrHeader, Superblock, XAttrHeader,
+};
+
+fn round_up(n: usize, to: usize) -> usize {
+    (n + to - 1) & !(to - 1)
+}
+
+pub trait InodeHeader {
+    fn data_layout(&self) -> DataLayout;
+    fn xattr_icount(&self) -> u16;
+    fn mode(&self) -> u16;
+    fn size(&self) -> u64;
+    fn u(&self) -> u32;
+
+    fn additional_bytes(&self, blkszbits: u8) -> usize {
+        let block_size = 1 << blkszbits;
+        self.xattr_size()
+            + match self.data_layout() {
+                DataLayout::FlatPlain => 0,
+                DataLayout::FlatInline => self.size() as usize % block_size,
+                DataLayout::ChunkBased => 4,
+            }
+    }
+
+    fn xattr_size(&self) -> usize {
+        match self.xattr_icount() {
+            0 => 0,
+            n => (n as usize - 1) * 4 + 12,
+        }
+    }
+}
+
+impl InodeHeader for ExtendedInodeHeader {
+    fn data_layout(&self) -> DataLayout {
+        self.format.try_into().unwrap()
+    }
+
+    fn xattr_icount(&self) -> u16 {
+        self.xattr_icount.get()
+    }
+
+    fn mode(&self) -> u16 {
+        self.mode.get()
+    }
+
+    fn size(&self) -> u64 {
+        self.size.get()
+    }
+
+    fn u(&self) -> u32 {
+        self.u.get()
+    }
+}
+
+impl InodeHeader for CompactInodeHeader {
+    fn data_layout(&self) -> DataLayout {
+        self.format.try_into().unwrap()
+    }
+
+    fn xattr_icount(&self) -> u16 {
+        self.xattr_icount.get()
+    }
+
+    fn mode(&self) -> u16 {
+        self.mode.get()
+    }
+
+    fn size(&self) -> u64 {
+        self.size.get() as u64
+    }
+
+    fn u(&self) -> u32 {
+        self.u.get()
+    }
+}
+
+#[repr(C)]
+#[derive(TryFromBytes, KnownLayout, Immutable)]
+pub struct XAttr {
+    pub header: XAttrHeader,
+    pub data: [u8],
+}
+
+#[repr(C)]
+#[derive(Debug, TryFromBytes, KnownLayout, Immutable)]
+pub struct Inode<Header: InodeHeader> {
+    pub header: Header,
+    pub data: [u8],
+}
+
+#[repr(C)]
+#[derive(Debug, TryFromBytes, KnownLayout, Immutable)]
+pub struct InodeXAttrs {
+    pub header: InodeXAttrHeader,
+    pub data: [u8],
+}
+
+impl XAttrHeader {
+    pub fn calculate_n_elems(&self) -> usize {
+        round_up(self.name_len as usize + self.value_size.get() as usize, 4)
+    }
+}
+
+impl XAttr {
+    pub fn from_prefix(data: &[u8]) -> (&XAttr, &[u8]) {
+        let header = XAttrHeader::try_ref_from_bytes(&data[..4]).unwrap();
+        Self::try_ref_from_prefix_with_elems(data, header.calculate_n_elems()).unwrap()
+    }
+
+    pub fn suffix(&self) -> &[u8] {
+        &self.data[..self.header.name_len as usize]
+    }
+
+    pub fn value(&self) -> &[u8] {
+        &self.data[self.header.name_len as usize..][..self.header.value_size.get() as usize]
+    }
+
+    pub fn padding(&self) -> &[u8] {
+        &self.data[self.header.name_len as usize + self.header.value_size.get() as usize..]
+    }
+}
+
+pub trait InodeOps {
+    fn xattrs(&self) -> Option<&InodeXAttrs>;
+    fn inline(&self) -> &[u8];
+    fn blocks(&self, blkszbits: u8) -> Range<u64>;
+}
+
+impl<Header: InodeHeader> InodeHeader for &Inode<Header> {
+    fn data_layout(&self) -> DataLayout {
+        self.header.data_layout()
+    }
+
+    fn xattr_icount(&self) -> u16 {
+        self.header.xattr_icount()
+    }
+
+    fn mode(&self) -> u16 {
+        self.header.mode()
+    }
+
+    fn size(&self) -> u64 {
+        self.header.size()
+    }
+
+    fn u(&self) -> u32 {
+        self.header.u()
+    }
+}
+
+impl<Header: InodeHeader> InodeOps for &Inode<Header> {
+    fn xattrs(&self) -> Option<&InodeXAttrs> {
+        match self.header.xattr_size() {
+            0 => None,
+            n => Some(InodeXAttrs::try_ref_from_bytes(&self.data[..n]).unwrap()),
+        }
+    }
+
+    fn inline(&self) -> &[u8] {
+        &self.data[self.header.xattr_size()..]
+    }
+
+    fn blocks(&self, blkszbits: u8) -> Range<u64> {
+        let size = self.header.size();
+        let block_size = 1 << blkszbits;
+        let start = self.header.u() as u64;
+
+        match self.header.data_layout() {
+            DataLayout::FlatPlain => Range {
+                start,
+                end: start + size.div_ceil(block_size),
+            },
+            DataLayout::FlatInline => Range {
+                start,
+                end: start + size / block_size,
+            },
+            DataLayout::ChunkBased => Range { start, end: start },
+        }
+    }
+}
+
+// this lets us avoid returning Box<dyn InodeOp> from Image.inode()
+// but ... wow.
+#[derive(Debug)]
+pub enum InodeType<'img> {
+    Compact(&'img Inode<CompactInodeHeader>),
+    Extended(&'img Inode<ExtendedInodeHeader>),
+}
+
+impl InodeHeader for InodeType<'_> {
+    fn u(&self) -> u32 {
+        match self {
+            Self::Compact(inode) => inode.u(),
+            Self::Extended(inode) => inode.u(),
+        }
+    }
+
+    fn size(&self) -> u64 {
+        match self {
+            Self::Compact(inode) => inode.size(),
+            Self::Extended(inode) => inode.size(),
+        }
+    }
+
+    fn xattr_icount(&self) -> u16 {
+        match self {
+            Self::Compact(inode) => inode.xattr_icount(),
+            Self::Extended(inode) => inode.xattr_icount(),
+        }
+    }
+
+    fn data_layout(&self) -> DataLayout {
+        match self {
+            Self::Compact(inode) => inode.data_layout(),
+            Self::Extended(inode) => inode.data_layout(),
+        }
+    }
+
+    fn mode(&self) -> u16 {
+        match self {
+            Self::Compact(inode) => inode.mode(),
+            Self::Extended(inode) => inode.mode(),
+        }
+    }
+}
+
+impl InodeOps for InodeType<'_> {
+    fn xattrs(&self) -> Option<&InodeXAttrs> {
+        match self {
+            Self::Compact(inode) => inode.xattrs(),
+            Self::Extended(inode) => inode.xattrs(),
+        }
+    }
+
+    fn inline(&self) -> &[u8] {
+        match self {
+            Self::Compact(inode) => inode.inline(),
+            Self::Extended(inode) => inode.inline(),
+        }
+    }
+
+    fn blocks(&self, blkszbits: u8) -> Range<u64> {
+        match self {
+            Self::Compact(inode) => inode.blocks(blkszbits),
+            Self::Extended(inode) => inode.blocks(blkszbits),
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Image<'i> {
+    pub image: &'i [u8],
+    pub header: &'i ComposefsHeader,
+    pub blkszbits: u8,
+    pub block_size: usize,
+    pub sb: &'i Superblock,
+    pub inodes: &'i [u8],
+    pub xattrs: &'i [u8],
+}
+
+impl<'img> Image<'img> {
+    pub fn open(image: &'img [u8]) -> Self {
+        let header = ComposefsHeader::try_ref_from_prefix(image)
+            .expect("header err")
+            .0;
+        let sb = Superblock::try_ref_from_prefix(&image[1024..])
+            .expect("superblock err")
+            .0;
+        let blkszbits = sb.blkszbits;
+        let block_size = 1usize << blkszbits;
+        assert!(block_size != 0);
+        let inodes = &image[sb.meta_blkaddr.get() as usize * block_size..];
+        let xattrs = &image[sb.xattr_blkaddr.get() as usize * block_size..];
+        Image {
+            image,
+            header,
+            blkszbits,
+            block_size,
+            sb,
+            inodes,
+            xattrs,
+        }
+    }
+
+    pub fn inode(&self, id: u64) -> InodeType {
+        let inode_data = &self.inodes[id as usize * 32..];
+        if inode_data[0] & 1 != 0 {
+            let header = ExtendedInodeHeader::try_ref_from_bytes(&inode_data[..64]).unwrap();
+            InodeType::Extended(
+                Inode::<ExtendedInodeHeader>::try_ref_from_prefix_with_elems(
+                    inode_data,
+                    header.additional_bytes(self.blkszbits),
+                )
+                .unwrap()
+                .0,
+            )
+        } else {
+            let header = CompactInodeHeader::try_ref_from_bytes(&inode_data[..32]).unwrap();
+            InodeType::Compact(
+                Inode::<CompactInodeHeader>::try_ref_from_prefix_with_elems(
+                    inode_data,
+                    header.additional_bytes(self.blkszbits),
+                )
+                .unwrap()
+                .0,
+            )
+        }
+    }
+
+    pub fn shared_xattr(&self, id: u32) -> &XAttr {
+        let xattr_data = &self.xattrs[id as usize * 4..];
+        let header = XAttrHeader::try_ref_from_bytes(&xattr_data[..4]).unwrap();
+        XAttr::try_ref_from_prefix_with_elems(xattr_data, header.calculate_n_elems())
+            .unwrap()
+            .0
+    }
+
+    pub fn data_block(&self, id: u64) -> &[u8] {
+        &self.image[id as usize * self.block_size..][..self.block_size]
+    }
+
+    pub fn directory_block(&self, id: u64) -> &DirectoryBlock {
+        DirectoryBlock::try_ref_from_bytes(self.data_block(id)).unwrap()
+    }
+
+    pub fn root(&self) -> InodeType {
+        self.inode(self.sb.root_nid.get() as u64)
+    }
+}
+
+impl InodeXAttrs {
+    pub fn shared(&self) -> &[U32] {
+        // TODO: there must be an easier way...
+        #[derive(TryFromBytes, KnownLayout, Immutable)]
+        #[repr(C)]
+        struct U32Array([U32]);
+        &U32Array::try_ref_from_prefix_with_elems(&self.data, self.header.shared_count as usize)
+            .unwrap()
+            .0
+             .0
+    }
+
+    pub fn local(&self) -> XAttrIter {
+        XAttrIter {
+            data: &self.data[self.header.shared_count as usize * 4..],
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct XAttrIter<'img> {
+    data: &'img [u8],
+}
+
+impl<'img> Iterator for XAttrIter<'img> {
+    type Item = &'img XAttr;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if !self.data.is_empty() {
+            let (result, rest) = XAttr::from_prefix(self.data);
+            self.data = rest;
+            Some(result)
+        } else {
+            None
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, Immutable, KnownLayout, TryFromBytes)]
+pub struct DirectoryBlock {
+    pub data: [u8],
+}
+
+impl DirectoryBlock {
+    pub fn get_entry_header(&self, n: usize) -> &DirectoryEntryHeader {
+        let entry_data = &self.data
+            [n * size_of::<DirectoryEntryHeader>()..(n + 1) * size_of::<DirectoryEntryHeader>()];
+        DirectoryEntryHeader::try_ref_from_bytes(entry_data).unwrap()
+    }
+
+    pub fn get_entry_headers(&self) -> &[DirectoryEntryHeader] {
+        // TODO: there must be an easier way...
+        #[derive(TryFromBytes, KnownLayout, Immutable)]
+        #[repr(C)]
+        struct EntryArray([DirectoryEntryHeader]);
+        &EntryArray::try_ref_from_prefix_with_elems(&self.data, self.n_entries())
+            .unwrap()
+            .0
+             .0
+    }
+
+    pub fn n_entries(&self) -> usize {
+        let first = self.get_entry_header(0);
+        let offset = first.name_offset.get();
+        assert!(offset != 0);
+        assert!(offset % 12 == 0);
+        offset as usize / 12
+    }
+
+    pub fn entries(&self) -> DirectoryEntries {
+        DirectoryEntries {
+            block: self,
+            length: self.n_entries(),
+            position: 0,
+        }
+    }
+}
+
+// High-level iterator interface
+#[derive(Debug)]
+pub struct DirectoryEntry<'a> {
+    pub file_type: FileType,
+    pub name: &'a [u8],
+    pub inode: u64,
+}
+
+#[derive(Debug)]
+pub struct DirectoryEntries<'d> {
+    block: &'d DirectoryBlock,
+    length: usize,
+    position: usize,
+}
+
+impl<'d> Iterator for DirectoryEntries<'d> {
+    type Item = DirectoryEntry<'d>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.position < self.length {
+            let item = self.block.get_entry_header(self.position);
+            let name_start = item.name_offset.get() as usize;
+            self.position += 1;
+
+            let name = if self.position == self.length {
+                let with_padding = &self.block.data[name_start..];
+                let end = with_padding.partition_point(|c| *c != 0);
+                &with_padding[..end]
+            } else {
+                let next = self.block.get_entry_header(self.position);
+                let name_end = next.name_offset.get() as usize;
+                &self.block.data[name_start..name_end]
+            };
+
+            Some(DirectoryEntry {
+                name,
+                file_type: item.file_type,
+                inode: item.inode_offset.get(),
+            })
+        } else {
+            None
+        }
+    }
+}
diff --git a/src/fs.rs b/src/fs.rs
index 3430bde..ec99590 100644
--- a/src/fs.rs
+++ b/src/fs.rs
@@ -314,7 +314,7 @@ pub fn read_from_path(path: &Path, repo: Option<&Repository>) -> Result<FileSyst
 
 pub fn create_image(path: &Path, repo: Option<&Repository>) -> Result<Sha256HashValue> {
     let fs = read_from_path(path, repo)?;
-    let image = super::image::mkcomposefs(fs)?;
+    let image = crate::mkfs::mkfs(&fs)?;
     if let Some(repo) = repo {
         Ok(repo.write_image(None, &image)?)
     } else {
diff --git a/src/image.rs b/src/image.rs
index 5205b23..a210760 100644
--- a/src/image.rs
+++ b/src/image.rs
@@ -3,15 +3,13 @@ use std::{
     cmp::{Ord, Ordering},
     collections::BTreeMap,
     ffi::{OsStr, OsString},
-    io::Read,
     path::Path,
-    process::{Command, Stdio},
     rc::Rc,
 };
 
 use anyhow::{bail, Context, Result};
 
-use crate::{dumpfile::write_dumpfile, fsverity::Sha256HashValue};
+use crate::fsverity::Sha256HashValue;
 
 #[derive(Debug)]
 pub struct Stat {
@@ -290,26 +288,3 @@ impl FileSystem {
         }
     }
 }
-
-pub fn mkcomposefs(filesystem: FileSystem) -> Result<Vec<u8>> {
-    let mut mkcomposefs = Command::new("mkcomposefs")
-        .args(["--from-file", "-", "-"])
-        .stdin(Stdio::piped())
-        .stdout(Stdio::piped())
-        .spawn()?;
-
-    let mut stdin = mkcomposefs.stdin.take().unwrap();
-    write_dumpfile(&mut stdin, &filesystem)?;
-    drop(stdin);
-
-    let mut stdout = mkcomposefs.stdout.take().unwrap();
-    let mut image = vec![];
-    stdout.read_to_end(&mut image)?;
-    drop(stdout);
-
-    if !mkcomposefs.wait()?.success() {
-        bail!("mkcomposefs failed");
-    };
-
-    Ok(image)
-}
diff --git a/src/lib.rs b/src/lib.rs
index ff8f4e4..61507e6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,9 +2,11 @@
 
 pub mod dumpfile;
 pub mod dumpfile_parse;
+pub mod erofs;
 pub mod fs;
 pub mod fsverity;
 pub mod image;
+pub mod mkfs;
 pub mod mount;
 pub mod oci;
 pub mod repository;
diff --git a/src/mkfs.rs b/src/mkfs.rs
new file mode 100644
index 0000000..cf27589
--- /dev/null
+++ b/src/mkfs.rs
@@ -0,0 +1,749 @@
+use std::{
+    env,
+    io::Read,
+    process::{Command, Stdio},
+};
+
+use anyhow::{bail, Result};
+
+use crate::{dumpfile::write_dumpfile, image::FileSystem};
+
+use std::{
+    cell::RefCell,
+    collections::{BTreeMap, HashMap},
+    mem::{align_of_val, size_of},
+    os::unix::ffi::OsStrExt,
+    rc::Rc,
+};
+
+use log::debug;
+use xxhash_rust::xxh32::xxh32;
+use zerocopy::{Immutable, IntoBytes};
+
+use crate::{
+    erofs::{debug::debug_img, format},
+    image,
+};
+
+fn round_up(n: usize, to: usize) -> usize {
+    (n + to - 1) & !(to - 1)
+}
+
+#[derive(Clone, Copy, Debug)]
+enum Offset {
+    Header,
+    Superblock,
+    Inode,
+    XAttr,
+    Block,
+    End,
+}
+
+trait Output {
+    fn note_offset(&mut self, offset_type: Offset);
+    fn get(&self, offset_type: Offset, idx: usize) -> usize;
+    fn write(&mut self, data: &[u8]);
+    fn pad(&mut self, alignment: usize);
+    fn len(&self) -> usize;
+
+    fn get_div(&self, offset_type: Offset, idx: usize, div: usize) -> usize {
+        let offset = self.get(offset_type, idx);
+        assert_eq!(offset % div, 0);
+        offset / div
+    }
+
+    fn get_nid(&self, idx: usize) -> u64 {
+        self.get_div(Offset::Inode, idx, 32) as u64
+    }
+
+    fn get_xattr(&self, idx: usize) -> u32 {
+        self.get_div(Offset::XAttr, idx, 4).try_into().unwrap()
+    }
+
+    fn write_struct(&mut self, st: impl IntoBytes + Immutable) {
+        assert_eq!(self.len() % align_of_val(&st), 0); // TODO: this is less than we want
+        self.write(st.as_bytes());
+    }
+}
+
+#[derive(PartialOrd, PartialEq, Eq, Ord, Clone)]
+struct XAttr {
+    prefix: u8,
+    suffix: Box<[u8]>,
+    value: Box<[u8]>,
+}
+
+#[derive(Clone, Default)]
+struct InodeXAttrs {
+    shared: Vec<usize>,
+    local: Vec<XAttr>,
+    filter: u32,
+}
+
+struct DirEnt<'a> {
+    name: &'a [u8],
+    inode: usize,
+    file_type: format::FileType,
+}
+
+#[derive(Default)]
+struct Directory<'a> {
+    blocks: Box<[Box<[DirEnt<'a>]>]>,
+    inline: Box<[DirEnt<'a>]>,
+    size: usize,
+    nlink: usize,
+}
+
+struct Leaf<'a> {
+    content: &'a image::LeafContent,
+    nlink: usize,
+}
+
+enum InodeContent<'a> {
+    Directory(Directory<'a>),
+    Leaf(Leaf<'a>),
+}
+
+struct Inode<'a> {
+    stat: &'a image::Stat,
+    xattrs: InodeXAttrs,
+    content: InodeContent<'a>,
+}
+
+impl XAttr {
+    pub fn write(&self, output: &mut impl Output) {
+        output.write_struct(format::XAttrHeader {
+            name_len: self.suffix.len() as u8,
+            name_index: self.prefix,
+            value_size: (self.value.len() as u16).into(),
+        });
+        output.write(&self.suffix);
+        output.write(&self.value);
+        output.pad(4);
+    }
+}
+
+impl InodeXAttrs {
+    fn add(&mut self, name: &[u8], value: &[u8]) {
+        for (idx, prefix) in format::XATTR_PREFIXES.iter().enumerate().rev() {
+            if let Some(suffix) = name.strip_prefix(*prefix) {
+                self.filter |= 1 << (xxh32(suffix, format::XATTR_FILTER_SEED + idx as u32) % 32);
+                self.local.push(XAttr {
+                    prefix: idx as u8,
+                    suffix: Box::from(suffix),
+                    value: Box::from(value),
+                });
+                return;
+            }
+        }
+        unreachable!("{:?}", std::str::from_utf8(name)); // worst case: we matched the empty prefix (0)
+    }
+
+    fn write(&self, output: &mut impl Output) {
+        if self.filter != 0 {
+            debug!("  write xattrs block");
+            output.write_struct(format::InodeXAttrHeader {
+                name_filter: (!self.filter).into(),
+                shared_count: self.shared.len() as u8,
+                ..Default::default()
+            });
+            for idx in &self.shared {
+                debug!("    shared {} @{}", idx, output.len());
+                output.write(&output.get_xattr(*idx).to_le_bytes());
+            }
+            for attr in &self.local {
+                debug!("    local @{}", output.len());
+                attr.write(output);
+            }
+        }
+        // our alignment is equal to xattr alignment: no need to pad
+    }
+}
+
+impl<'a> Directory<'a> {
+    pub fn from_entries(entries: Vec<DirEnt<'a>>) -> Self {
+        let mut blocks = vec![];
+        let mut rest = vec![];
+
+        let mut n_bytes = 0;
+        let mut nlink = 0;
+
+        debug!("Directory with {} items", entries.len());
+
+        // The content of the directory is fixed at this point so we may as well split it into
+        // blocks.  This lets us avoid measuring and re-measuring.
+        for entry in entries.into_iter() {
+            let entry_size = size_of::<format::DirectoryEntryHeader>() + entry.name.len();
+            assert!(entry_size <= 4096);
+
+            debug!("    {:?}", entry.file_type);
+
+            if matches!(entry.file_type, format::FileType::Directory) {
+                nlink += 1;
+            }
+
+            n_bytes += entry_size;
+            if n_bytes <= 4096 {
+                rest.push(entry);
+            } else {
+                // It won't fit, so we need to store the existing entries in a block.
+                debug!("    block {}", rest.len());
+                blocks.push(rest.into_boxed_slice());
+
+                // Start over
+                rest = vec![entry];
+                n_bytes = entry_size;
+            }
+        }
+
+        // Don't try to store more than 2048 bytes of tail data
+        if n_bytes > 2048 {
+            blocks.push(rest.into_boxed_slice());
+            rest = vec![];
+            n_bytes = 0;
+        }
+
+        debug!(
+            "  blocks {} inline {} inline_size {n_bytes}",
+            blocks.len(),
+            rest.len()
+        );
+
+        let size = format::BLOCK_SIZE * blocks.len() + n_bytes;
+        Self {
+            blocks: blocks.into_boxed_slice(),
+            inline: rest.into_boxed_slice(),
+            size,
+            nlink,
+        }
+    }
+
+    fn write_block(&self, output: &mut impl Output, block: &[DirEnt]) {
+        debug!("    write dir block {} @{}", block.len(), output.len());
+        let mut nameofs = size_of::<format::DirectoryEntryHeader>() * block.len();
+
+        for entry in block {
+            debug!(
+                "      entry {:?} name {} @{}",
+                entry.file_type,
+                nameofs,
+                output.len()
+            );
+            output.write_struct(format::DirectoryEntryHeader {
+                name_offset: (nameofs as u16).into(),
+                inode_offset: output.get_nid(entry.inode).into(),
+                file_type: entry.file_type,
+                ..Default::default()
+            });
+            nameofs += entry.name.len();
+        }
+
+        for entry in block {
+            debug!("      name @{}", output.len());
+            output.write(entry.name.as_bytes());
+        }
+    }
+
+    fn write_inline(&self, output: &mut impl Output) {
+        debug!(
+            "  write inline len {} expected size {} of {}",
+            self.inline.len(),
+            self.size % 4096,
+            self.size
+        );
+        self.write_block(output, &self.inline);
+    }
+
+    fn write_blocks(&self, output: &mut impl Output) {
+        for block in &self.blocks {
+            assert_eq!(output.len() % format::BLOCK_SIZE, 0);
+            self.write_block(output, block);
+            output.pad(format::BLOCK_SIZE);
+        }
+    }
+
+    fn inode_meta(&self, block_offset: usize) -> (format::DataLayout, u32, u64, usize) {
+        let (layout, u) = if self.inline.len() == 0 {
+            (format::DataLayout::FlatPlain, block_offset as u32 / 4096)
+        } else if self.blocks.len() > 0 {
+            (format::DataLayout::FlatInline, block_offset as u32 / 4096)
+        } else {
+            (format::DataLayout::FlatInline, 0)
+        };
+        (layout, u, self.size as u64, self.nlink)
+    }
+}
+
+impl Leaf<'_> {
+    fn inode_meta(&self) -> (format::DataLayout, u32, u64, usize) {
+        let (layout, u, size) = match &self.content {
+            image::LeafContent::InlineFile(data) => {
+                if data.is_empty() {
+                    (format::DataLayout::FlatPlain, 0, data.len() as u64)
+                } else {
+                    (format::DataLayout::FlatInline, 0, data.len() as u64)
+                }
+            }
+            image::LeafContent::ExternalFile(.., size) => {
+                // TODO: libcomposefs tries harder here.  Should we?
+                (format::DataLayout::ChunkBased, 31, *size)
+            }
+            image::LeafContent::CharacterDevice(rdev) | image::LeafContent::BlockDevice(rdev) => {
+                (format::DataLayout::FlatPlain, *rdev as u32, 0)
+            }
+            image::LeafContent::Fifo | image::LeafContent::Socket => {
+                (format::DataLayout::FlatPlain, 0, 0)
+            }
+            image::LeafContent::Symlink(target) => {
+                (format::DataLayout::FlatInline, 0, target.len() as u64)
+            }
+        };
+        (layout, u, size, self.nlink)
+    }
+
+    fn write_inline(&self, output: &mut impl Output) {
+        output.write(match self.content {
+            image::LeafContent::InlineFile(data) => data,
+            image::LeafContent::ExternalFile(..) => b"\xff\xff\xff\xff", // null chunk
+            image::LeafContent::Symlink(target) => target.as_bytes(),
+            _ => &[],
+        });
+    }
+}
+
+impl Inode<'_> {
+    fn file_type(&self) -> format::FileType {
+        match &self.content {
+            InodeContent::Directory(..) => format::FileType::Directory,
+            InodeContent::Leaf(leaf) => match &leaf.content {
+                image::LeafContent::ExternalFile(..) | image::LeafContent::InlineFile(..) => {
+                    format::FileType::RegularFile
+                }
+                image::LeafContent::CharacterDevice(..) => format::FileType::CharacterDevice,
+                image::LeafContent::BlockDevice(..) => format::FileType::BlockDevice,
+                image::LeafContent::Fifo => format::FileType::Fifo,
+                image::LeafContent::Socket => format::FileType::Socket,
+                image::LeafContent::Symlink(..) => format::FileType::Symlink,
+            },
+        }
+    }
+
+    fn write_inode(&self, output: &mut impl Output, idx: usize) {
+        let (layout, u, size, nlink) = match &self.content {
+            InodeContent::Directory(dir) => dir.inode_meta(output.get(Offset::Block, idx)),
+            InodeContent::Leaf(leaf) => leaf.inode_meta(),
+        };
+
+        let xattr_size = {
+            let mut xattr = FirstPass::default();
+            self.xattrs.write(&mut xattr);
+            xattr.offset
+        };
+
+        // We need to make sure the inline part doesn't overlap a block boundary
+        if matches!(layout, format::DataLayout::FlatInline) {
+            let inode_and_xattr_size = size_of::<format::ExtendedInodeHeader>() + xattr_size;
+            let inline_start = output.len() + inode_and_xattr_size;
+            let inline_end = inline_start + (size as usize % format::BLOCK_SIZE);
+            if inline_start / format::BLOCK_SIZE != inline_end / format::BLOCK_SIZE {
+                // If we proceed, then we'll violate the rule about crossing block boundaries.
+                // The easiest thing to do is to add padding so that the inline data starts at a
+                // fresh block boundary.
+                let pad = vec![0; 4096 - inline_start % 4096];
+                debug!("added pad {}", pad.len());
+                output.write(&pad);
+            }
+        }
+
+        let format = format::FormatField::from((format::InodeLayout::Extended, layout));
+
+        output.pad(32);
+
+        debug!(
+            "write inode {idx} nid {} {:?} {:?} xattrsize{xattr_size} icount{} inline{} @{}",
+            output.len() / 32,
+            format,
+            self.file_type(),
+            match xattr_size {
+                0 => 0,
+                n => (1 + (n - 12) / 4) as u16,
+            },
+            size % 4096,
+            output.len()
+        );
+
+        output.note_offset(Offset::Inode);
+        output.write_struct(format::ExtendedInodeHeader {
+            format,
+            xattr_icount: match xattr_size {
+                0 => 0,
+                n => (1 + (n - 12) / 4) as u16,
+            }
+            .into(),
+            mode: (self.stat.st_mode as u16 | self.file_type().to_ifmt()).into(),
+            size: size.into(),
+            u: u.into(),
+            ino: ((output.len() / 32) as u32).into(),
+            uid: self.stat.st_uid.into(),
+            gid: self.stat.st_gid.into(),
+            mtime: (self.stat.st_mtim_sec as u64).into(),
+            nlink: (nlink as u32).into(),
+            ..Default::default()
+        });
+
+        self.xattrs.write(output);
+
+        match &self.content {
+            InodeContent::Directory(dir) => dir.write_inline(output),
+            InodeContent::Leaf(leaf) => leaf.write_inline(output),
+        };
+
+        output.pad(32);
+    }
+
+    fn write_blocks(&self, output: &mut impl Output) {
+        if let InodeContent::Directory(dir) = &self.content {
+            dir.write_blocks(output);
+        }
+    }
+}
+
+struct InodeCollector<'a> {
+    inodes: Vec<Inode<'a>>,
+    hardlinks: HashMap<*const image::Leaf, usize>,
+}
+
+impl<'a> InodeCollector<'a> {
+    fn push_inode(&mut self, stat: &'a image::Stat, content: InodeContent<'a>) -> usize {
+        let mut xattrs = InodeXAttrs::default();
+
+        // We need to record extra xattrs for some files.  These come first.
+        if let InodeContent::Leaf(Leaf {
+            content: image::LeafContent::ExternalFile(id, ..),
+            ..
+        }) = content
+        {
+            let metacopy = [&[0, 36, 0, 1], &id[..]].concat();
+            xattrs.add(b"trusted.overlay.metacopy", &metacopy);
+
+            let redirect = format!("/{:02x}/{}", id[0], hex::encode(&id[1..]));
+            xattrs.add(b"trusted.overlay.redirect", redirect.as_bytes());
+        }
+
+        // Add the normal xattrs.  They're already listed in sorted order.
+        for (name, value) in RefCell::borrow(&stat.xattrs).iter() {
+            let name = name.as_bytes();
+
+            if let Some(escapee) = name.strip_prefix(b"trusted.overlay.") {
+                let escaped = [b"trusted.overlay.overlay.", escapee].concat();
+                xattrs.add(&escaped, value);
+            } else {
+                xattrs.add(name, value);
+            }
+        }
+
+        // Allocate an inode for ourselves.  At first we write all xattrs as local.  Later (after
+        // we've determined which xattrs ought to be shared) we'll come and move some of them over.
+        let inode = self.inodes.len();
+        self.inodes.push(Inode {
+            stat,
+            xattrs,
+            content,
+        });
+        inode
+    }
+
+    fn collect_leaf(&mut self, leaf: &'a Rc<image::Leaf>) -> usize {
+        let nlink = Rc::strong_count(leaf);
+
+        if nlink > 1 {
+            if let Some(inode) = self.hardlinks.get(&Rc::as_ptr(leaf)) {
+                return *inode;
+            }
+        }
+
+        let inode = self.push_inode(
+            &leaf.stat,
+            InodeContent::Leaf(Leaf {
+                content: &leaf.content,
+                nlink,
+            }),
+        );
+
+        if nlink > 1 {
+            self.hardlinks.insert(Rc::as_ptr(leaf), inode);
+        }
+
+        inode
+    }
+
+    fn insert_sorted(
+        entries: &mut Vec<DirEnt<'a>>,
+        name: &'a [u8],
+        inode: usize,
+        file_type: format::FileType,
+    ) {
+        let entry = DirEnt {
+            name,
+            inode,
+            file_type,
+        };
+        let point = entries.partition_point(|e| e.name < entry.name);
+        entries.insert(point, entry);
+    }
+
+    fn collect_dir(&mut self, dir: &'a image::Directory, parent: usize) -> usize {
+        // The root inode number needs to fit in a u16.  That more or less compels us to write the
+        // directory inode before the inode of the children of the directory.  Reserve a slot.
+        let me = self.push_inode(&dir.stat, InodeContent::Directory(Directory::default()));
+
+        let mut entries = vec![];
+
+        for entry in &dir.entries {
+            let child = match &entry.inode {
+                image::Inode::Directory(dir) => self.collect_dir(dir, me),
+                image::Inode::Leaf(leaf) => self.collect_leaf(leaf),
+            };
+            entries.push(DirEnt {
+                name: entry.name.as_bytes(),
+                inode: child,
+                file_type: self.inodes[child].file_type(),
+            });
+        }
+
+        // We're expected to add those, too
+        Self::insert_sorted(&mut entries, b".", me, format::FileType::Directory);
+        Self::insert_sorted(&mut entries, b"..", parent, format::FileType::Directory);
+
+        // Now that we know the actual content, we can write it to our reserved slot
+        self.inodes[me].content = InodeContent::Directory(Directory::from_entries(entries));
+        me
+    }
+
+    pub fn collect(fs: &'a image::FileSystem) -> Vec<Inode<'a>> {
+        let mut this = Self {
+            inodes: vec![],
+            hardlinks: HashMap::new(),
+        };
+
+        // '..' of the root directory is the root directory again
+        let root_inode = this.collect_dir(&fs.root, 0);
+        assert_eq!(root_inode, 0);
+
+        this.inodes
+    }
+}
+
+/// Takes a list of inodes where each inode contains only local xattr values, determines which
+/// xattrs (key, value) pairs appear more than once, and shares them.
+fn share_xattrs(inodes: &mut [Inode]) -> Vec<XAttr> {
+    let mut xattrs: BTreeMap<XAttr, usize> = BTreeMap::new();
+
+    // Collect all xattrs from the inodes
+    for inode in inodes.iter() {
+        for attr in &inode.xattrs.local {
+            if let Some(count) = xattrs.get_mut(attr) {
+                *count += 1;
+            } else {
+                xattrs.insert(attr.clone(), 1);
+            }
+        }
+    }
+
+    // Share only xattrs with more than one user
+    xattrs.retain(|_k, v| *v > 1);
+
+    // Repurpose the refcount field as an index lookup
+    for (idx, value) in xattrs.values_mut().enumerate() {
+        *value = idx;
+    }
+
+    // Visit each inode and change local xattrs into shared xattrs
+    for inode in inodes.iter_mut() {
+        inode.xattrs.local.retain(|attr| {
+            if let Some(idx) = xattrs.get(attr) {
+                inode.xattrs.shared.push(*idx);
+                false // drop the local xattr: we converted it
+            } else {
+                true // retain the local xattr: we didn't convert it
+            }
+        });
+    }
+
+    // Return the shared xattrs as a vec
+    xattrs.into_keys().collect()
+}
+
+fn write_erofs(output: &mut impl Output, inodes: &[Inode], xattrs: &[XAttr]) {
+    // Write composefs header
+    output.note_offset(Offset::Header);
+    output.write_struct(format::ComposefsHeader {
+        magic: format::COMPOSEFS_MAGIC,
+        version: format::COMPOSEFS_VERSION,
+        flags: 0.into(),
+        composefs_version: format::COMPOSEFS_VERSION,
+        ..Default::default()
+    });
+    output.pad(1024);
+
+    // Write superblock
+    output.note_offset(Offset::Superblock);
+    output.write_struct(format::Superblock {
+        magic: format::MAGIC_V1,
+        blkszbits: format::BLOCK_BITS,
+        feature_compat: format::FEATURE_COMPAT_MTIME | format::FEATURE_COMPAT_XATTR_FILTER,
+        root_nid: (output.get_nid(0) as u16).into(),
+        inos: (inodes.len() as u64).into(),
+        blocks: ((output.get(Offset::End, 0) / format::BLOCK_SIZE) as u32).into(),
+        ..Default::default()
+    });
+
+    // Write inode table
+    for (idx, inode) in inodes.iter().enumerate() {
+        // The inode may add padding to itself, so it notes its own offset
+        inode.write_inode(output, idx);
+    }
+
+    // Write shared xattr table
+    for xattr in xattrs {
+        output.note_offset(Offset::XAttr);
+        xattr.write(output);
+    }
+
+    // Write blocks from inodes that have them
+    output.pad(4096);
+    for inode in inodes.iter() {
+        output.note_offset(Offset::Block);
+        inode.write_blocks(output);
+    }
+
+    // That's it
+    output.note_offset(Offset::End);
+}
+
+#[derive(Default)]
+struct Layout {
+    offset_types: Vec<usize>,
+    offsets: Vec<usize>,
+}
+
+#[derive(Default)]
+struct FirstPass {
+    offset: usize,
+    layout: Layout,
+}
+
+struct SecondPass {
+    output: Vec<u8>,
+    layout: Layout,
+}
+
+impl Output for SecondPass {
+    fn note_offset(&mut self, _offset_type: Offset) {
+        /* no-op */
+    }
+
+    fn get(&self, offset_type: Offset, idx: usize) -> usize {
+        self.layout.offsets[self.layout.offset_types[offset_type as usize] + idx]
+    }
+
+    fn write(&mut self, data: &[u8]) {
+        self.output.extend_from_slice(data);
+    }
+
+    fn pad(&mut self, alignment: usize) {
+        self.output
+            .resize(round_up(self.output.len(), alignment), 0);
+    }
+
+    fn len(&self) -> usize {
+        self.output.len()
+    }
+}
+
+impl Output for FirstPass {
+    fn note_offset(&mut self, offset_type: Offset) {
+        if self.layout.offset_types.len() == offset_type as usize {
+            self.layout.offset_types.push(self.layout.offsets.len());
+        }
+        debug!(
+            "{:?} #{} @{}",
+            offset_type,
+            self.layout.offsets.len() - self.layout.offset_types[offset_type as usize],
+            self.offset
+        );
+        self.layout.offsets.push(self.offset);
+    }
+
+    fn get(&self, _: Offset, _: usize) -> usize {
+        0 // We don't know offsets in the first pass, so fake it
+    }
+
+    fn write(&mut self, data: &[u8]) {
+        self.offset += data.len();
+    }
+
+    fn pad(&mut self, alignment: usize) {
+        self.offset = round_up(self.offset, alignment);
+    }
+
+    fn len(&self) -> usize {
+        self.offset
+    }
+}
+
+pub fn mkfs_erofs(fs: &image::FileSystem) -> Box<[u8]> {
+    // Create the intermediate representation: flattened inodes and shared xattrs
+    let mut inodes = InodeCollector::collect(fs);
+    let xattrs = share_xattrs(&mut inodes);
+
+    // Do a first pass with the writer to determine the layout
+    let mut first_pass = FirstPass::default();
+    write_erofs(&mut first_pass, &inodes, &xattrs);
+
+    // Do a second pass with the writer to get the actual bytes
+    let mut second_pass = SecondPass {
+        output: vec![],
+        layout: first_pass.layout,
+    };
+    write_erofs(&mut second_pass, &inodes, &xattrs);
+
+    // That's it
+    second_pass.output.into_boxed_slice()
+}
+
+pub fn mkfs_mkcomposefs(filesystem: &FileSystem) -> Result<Box<[u8]>> {
+    let mut mkcomposefs = Command::new("mkcomposefs")
+        .args(["--from-file", "-", "-"])
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()?;
+
+    let mut stdin = mkcomposefs.stdin.take().unwrap();
+    write_dumpfile(&mut stdin, filesystem)?;
+    drop(stdin);
+
+    let mut stdout = mkcomposefs.stdout.take().unwrap();
+    let mut image = vec![];
+    stdout.read_to_end(&mut image)?;
+    drop(stdout);
+
+    if !mkcomposefs.wait()?.success() {
+        bail!("mkcomposefs failed");
+    };
+
+    Ok(image.into())
+}
+
+pub fn mkfs(fs: &FileSystem) -> Result<Box<[u8]>> {
+    let image = match env::var("COMPOSEFS_FORMAT") {
+        Ok(s) if s == "new" => mkfs_erofs(fs),
+        _ => mkfs_mkcomposefs(fs)?,
+    };
+
+    if env::var("COMPOSEFS_DUMP_EROFS") == Ok("1".to_string()) {
+        debug_img(&image);
+    }
+
+    Ok(image)
+}
diff --git a/src/oci/image.rs b/src/oci/image.rs
index 0358f6f..b75f0c7 100644
--- a/src/oci/image.rs
+++ b/src/oci/image.rs
@@ -6,7 +6,8 @@ use oci_spec::image::ImageConfiguration;
 use crate::{
     dumpfile::write_dumpfile,
     fsverity::Sha256HashValue,
-    image::{mkcomposefs, FileSystem, Inode, Leaf},
+    image::{FileSystem, Inode, Leaf},
+    mkfs::mkfs,
     oci,
     repository::Repository,
     selabel::selabel,
@@ -101,8 +102,8 @@ pub fn create_image(
     selabel(&mut filesystem, repo)?;
     filesystem.done();
 
-    let image = mkcomposefs(filesystem)?;
-    repo.write_image(name, &image)
+    let erofs = mkfs(&filesystem)?;
+    repo.write_image(name, &erofs)
 }
 
 #[cfg(test)]