diff --git a/.travis.yml b/.travis.yml index f8306b19..a69ecea8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,10 +18,11 @@ script: - make test QUIET=1 # run tests with a few different configurations - - make test QUIET=1 CFLAGS+="-DLFS_READ_SIZE=1 -DLFS_PROG_SIZE=1" - - make test QUIET=1 CFLAGS+="-DLFS_READ_SIZE=512 -DLFS_PROG_SIZE=512" - - make test QUIET=1 CFLAGS+="-DLFS_BLOCK_COUNT=1023 -DLFS_LOOKAHEAD=2048" + - make test QUIET=1 CFLAGS+="-DLFS_READ_SIZE=1 -DLFS_CACHE_SIZE=4" + - make test QUIET=1 CFLAGS+="-DLFS_READ_SIZE=512 -DLFS_CACHE_SIZE=512 -DLFS_BLOCK_CYCLES=16" + - make test QUIET=1 CFLAGS+="-DLFS_BLOCK_COUNT=1023 -DLFS_LOOKAHEAD_SIZE=256" + - make clean test QUIET=1 CFLAGS+="-DLFS_INLINE_MAX=0" - make clean test QUIET=1 CFLAGS+="-DLFS_NO_INTRINSICS" # compile and find the code size with the smallest configuration @@ -103,7 +104,7 @@ jobs: if: branch !~ -prefix$ install: - sudo apt-get install libfuse-dev - - git clone --depth 1 https://github.com/geky/littlefs-fuse + - git clone --depth 1 https://github.com/geky/littlefs-fuse -b v2-alpha - fusermount -V - gcc --version before_script: @@ -113,7 +114,7 @@ jobs: - mkdir mount - sudo chmod a+rw /dev/loop0 - - dd if=/dev/zero bs=512 count=2048 of=disk + - dd if=/dev/zero bs=512 count=4096 of=disk - losetup /dev/loop0 disk script: # self-host test @@ -126,7 +127,59 @@ jobs: - mkdir mount/littlefs - cp -r $(git ls-tree --name-only HEAD) mount/littlefs - cd mount/littlefs - - ls + - stat . + - ls -flh + - make -B test_dirs test_files QUIET=1 + + # self-host with littlefs-fuse for fuzz test + - stage: test + env: + - STAGE=test + - NAME=littlefs-migration + install: + - sudo apt-get install libfuse-dev + - git clone --depth 1 https://github.com/geky/littlefs-fuse -b v2-alpha v2 + - git clone --depth 1 https://github.com/geky/littlefs-fuse v1 + - fusermount -V + - gcc --version + before_script: + # setup disk for littlefs-fuse + - rm -rf v2/littlefs/* + - cp -r $(git ls-tree --name-only HEAD) v2/littlefs + + - mkdir mount + - sudo chmod a+rw /dev/loop0 + - dd if=/dev/zero bs=512 count=4096 of=disk + - losetup /dev/loop0 disk + script: + # compile v1 and v2 + - make -C v1 + - make -C v2 + + # run self-host test with v1 + - v1/lfs --format /dev/loop0 + - v1/lfs /dev/loop0 mount + + - ls mount + - mkdir mount/littlefs + - cp -r $(git ls-tree --name-only HEAD) mount/littlefs + - cd mount/littlefs + - stat . + - ls -flh + - make -B test_dirs test_files QUIET=1 + + # attempt to migrate + - cd ../.. + - fusermount -u mount + + - v2/lfs --migrate /dev/loop0 + - v2/lfs /dev/loop0 mount + + # run self-host test with v2 right where we left off + - ls mount + - cd mount/littlefs + - stat . + - ls -flh - make -B test_dirs test_files QUIET=1 # Automatically create releases diff --git a/DESIGN.md b/DESIGN.md index 3afb0a20..da693f44 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -1,6 +1,6 @@ -## The design of the little filesystem +## The design of littlefs -A little fail-safe filesystem designed for embedded systems. +A little fail-safe filesystem designed for microcontrollers. ``` | | | .---._____ @@ -11,211 +11,581 @@ A little fail-safe filesystem designed for embedded systems. | | | ``` -For a bit of backstory, the littlefs was developed with the goal of learning -more about filesystem design by tackling the relative unsolved problem of -managing a robust filesystem resilient to power loss on devices -with limited RAM and ROM. - -The embedded systems the littlefs is targeting are usually 32 bit -microcontrollers with around 32KB of RAM and 512KB of ROM. These are -often paired with SPI NOR flash chips with about 4MB of flash storage. - -Flash itself is a very interesting piece of technology with quite a bit of -nuance. Unlike most other forms of storage, writing to flash requires two -operations: erasing and programming. The programming operation is relatively -cheap, and can be very granular. For NOR flash specifically, byte-level -programs are quite common. Erasing, however, requires an expensive operation -that forces the state of large blocks of memory to reset in a destructive -reaction that gives flash its name. The [Wikipedia entry](https://en.wikipedia.org/wiki/Flash_memory) -has more information if you are interested in how this works. - -This leaves us with an interesting set of limitations that can be simplified -to three strong requirements: - -1. **Power-loss resilient** - This is the main goal of the littlefs and the - focus of this project. - - Embedded systems are usually designed without a shutdown routine and a - notable lack of user interface for recovery, so filesystems targeting - embedded systems must be prepared to lose power at any given time. - - Despite this state of things, there are very few embedded filesystems that - handle power loss in a reasonable manner, and most can become corrupted if - the user is unlucky enough. - -2. **Wear leveling** - Due to the destructive nature of flash, most flash - chips have a limited number of erase cycles, usually in the order of around - 100,000 erases per block for NOR flash. Filesystems that don't take wear - into account can easily burn through blocks used to store frequently updated - metadata. - - Consider the [FAT filesystem](https://en.wikipedia.org/wiki/Design_of_the_FAT_file_system), - which stores a file allocation table (FAT) at a specific offset from the - beginning of disk. Every block allocation will update this table, and after - 100,000 updates, the block will likely go bad, rendering the filesystem - unusable even if there are many more erase cycles available on the storage - as a whole. - -3. **Bounded RAM/ROM** - Even with the design difficulties presented by the - previous two limitations, we have already seen several flash filesystems - developed on PCs that handle power loss just fine, such as the - logging filesystems. However, these filesystems take advantage of the - relatively cheap access to RAM, and use some rather... opportunistic... - techniques, such as reconstructing the entire directory structure in RAM. - These operations make perfect sense when the filesystem's only concern is - erase cycles, but the idea is a bit silly on embedded systems. - - To cater to embedded systems, the littlefs has the simple limitation of - using only a bounded amount of RAM and ROM. That is, no matter what is - written to the filesystem, and no matter how large the underlying storage - is, the littlefs will always use the same amount of RAM and ROM. This - presents a very unique challenge, and makes presumably simple operations, - such as iterating through the directory tree, surprisingly difficult. +littlefs was originally built as an experiment to learn about filesystem design +in the context of microcontrollers. The question was: How would you build a +filesystem that is resilient to power-loss and flash wear without using +unbounded memory? + +This document covers the high-level design of littlefs, how it is different +than other filesystems, and the design decisions that got us here. For the +low-level details covering every bit on disk, check out [SPEC.md](SPEC.md). + +## The problem + +The embedded systems littlefs targets are usually 32-bit microcontrollers with +around 32 KiB of RAM and 512 KiB of ROM. These are often paired with SPI NOR +flash chips with about 4 MiB of flash storage. These devices are too small for +Linux and most existing filesystems, requiring code written specifically with +size in mind. + +Flash itself is an interesting piece of technology with its own quirks and +nuance. Unlike other forms of storage, writing to flash requires two +operations: erasing and programming. Programming (setting bits to 0) is +relatively cheap and can be very granular. Erasing however (setting bits to 1), +requires an expensive and destructive operation which gives flash its name. +[Wikipedia][wikipedia-flash] has more information on how exactly flash works. + +To make the situation more annoying, it's very common for these embedded +systems to lose power at any time. Usually, microcontroller code is simple and +reactive, with no concept of a shutdown routine. This presents a big challenge +for persistent storage, where an unlucky power loss can corrupt the storage and +leave a device unrecoverable. + +This leaves us with three major requirements for an embedded filesystem. + +1. **Power-loss resilience** - On these systems, power can be lost at any time. + If a power loss corrupts any persistent data structures, this can cause the + device to become unrecoverable. An embedded filesystem must be designed to + recover from a power loss during any write operation. + +1. **Wear leveling** - Writing to flash is destructive. If a filesystem + repeatedly writes to the same block, eventually that block will wear out. + Filesystems that don't take wear into account can easily burn through blocks + used to store frequently updated metadata and cause a device's early death. + +1. **Bounded RAM/ROM** - If the above requirements weren't enough, these + systems also have very limited amounts of memory. This prevents many + existing filesystem designs, which can lean on relatively large amounts of + RAM to temporarily store filesystem metadata. + + For ROM, this means we need to keep our design simple and reuse code paths + were possible. For RAM we have a stronger requirement, all RAM usage is + bounded. This means RAM usage does not grow as the filesystem changes in + size or number of files. This creates a unique challenge as even presumably + simple operations, such as traversing the filesystem, become surprisingly + difficult. ## Existing designs? -There are of course, many different existing filesystem. Here is a very rough -summary of the general ideas behind some of them. - -Most of the existing filesystems fall into the one big category of filesystem -designed in the early days of spinny magnet disks. While there is a vast amount -of interesting technology and ideas in this area, the nature of spinny magnet -disks encourage properties, such as grouping writes near each other, that don't -make as much sense on recent storage types. For instance, on flash, write -locality is not important and can actually increase wear. - -One of the most popular designs for flash filesystems is called the -[logging filesystem](https://en.wikipedia.org/wiki/Log-structured_file_system). -The flash filesystems [jffs](https://en.wikipedia.org/wiki/JFFS) -and [yaffs](https://en.wikipedia.org/wiki/YAFFS) are good examples. In a -logging filesystem, data is not stored in a data structure on disk, but instead -the changes to the files are stored on disk. This has several neat advantages, -such as the fact that the data is written in a cyclic log format and naturally -wear levels as a side effect. And, with a bit of error detection, the entire -filesystem can easily be designed to be resilient to power loss. The -journaling component of most modern day filesystems is actually a reduced -form of a logging filesystem. However, logging filesystems have a difficulty -scaling as the size of storage increases. And most filesystems compensate by -caching large parts of the filesystem in RAM, a strategy that is inappropriate -for embedded systems. - -Another interesting filesystem design technique is that of [copy-on-write (COW)](https://en.wikipedia.org/wiki/Copy-on-write). -A good example of this is the [btrfs](https://en.wikipedia.org/wiki/Btrfs) -filesystem. COW filesystems can easily recover from corrupted blocks and have -natural protection against power loss. However, if they are not designed with -wear in mind, a COW filesystem could unintentionally wear down the root block -where the COW data structures are synchronized. +So, what's already out there? There are, of course, many different filesystems, +however they often share and borrow feature from each other. If we look at +power-loss resilience and wear leveling, we can narrow these down to a handful +of designs. -## Metadata pairs +1. First we have the non-resilient, block based filesystems, such as [FAT] and + [ext2]. These are the earliest filesystem designs and often the most simple. + Here storage is divided into blocks, with each file being stored in a + collection of blocks. Without modifications, these filesystems are not + power-loss resilient, so updating a file is a simple as rewriting the blocks + in place. + + ``` + .--------. + | root | + | | + | | + '--------' + .-' '-. + v v + .--------. .--------. + | A | | B | + | | | | + | | | | + '--------' '--------' + .-' .-' '-. + v v v + .--------. .--------. .--------. + | C | | D | | E | + | | | | | | + | | | | | | + '--------' '--------' '--------' + ``` + + Because of their simplicity, these filesystems are usually both the fastest + and smallest. However the lack of power resilience is not great, and the + binding relationship of storage location and data removes the filesystem's + ability to manage wear. + +2. In a completely different direction, we have logging filesystems, such as + [JFFS], [YAFFS], and [SPIFFS], storage location is not bound to a piece of + data, instead the entire storage is used for a circular log which is + appended with every change made to the filesystem. Writing appends new + changes, while reading requires traversing the log to reconstruct a file. + Some logging filesystems cache files to avoid the read cost, but this comes + at a tradeoff of RAM. + + ``` + v + .--------.--------.--------.--------.--------.--------.--------.--------. + | C | new B | new A | | A | B | + | | | |-> | | | + | | | | | | | + '--------'--------'--------'--------'--------'--------'--------'--------' + ``` + + Logging filesystem are beautifully elegant. With a checksum, we can easily + detect power-loss and fall back to the previous state by ignoring failed + appends. And if that wasn't good enough, their cyclic nature means that + logging filesystems distribute wear across storage perfectly. + + The main downside is performance. If we look at garbage collection, the + process of cleaning up outdated data from the end of the log, I've yet to + see a pure logging filesystem that does not have one of these two costs: + + 1. _O(n²)_ runtime + 2. _O(n)_ RAM + + SPIFFS is a very interesting case here, as it uses the fact that repeated + programs to NOR flash is both atomic and masking. This is a very neat + solution, however it limits the type of storage you can support. + +3. Perhaps the most common type of filesystem, a journaling filesystem is the + offspring that happens when you mate a block based filesystem with a logging + filesystem. [ext4] and [NTFS] are good examples. Here, we take a normal + block based filesystem and add a bounded log where we note every change + before it occurs. + + ``` + journal + .--------.--------. + .--------. | C'| D'| | E'| + | root |-->| | |-> | | + | | | | | | | + | | '--------'--------' + '--------' + .-' '-. + v v + .--------. .--------. + | A | | B | + | | | | + | | | | + '--------' '--------' + .-' .-' '-. + v v v + .--------. .--------. .--------. + | C | | D | | E | + | | | | | | + | | | | | | + '--------' '--------' '--------' + ``` + + + This sort of filesystem takes the best from both worlds. Performance can be + as fast as a block based filesystem (though updating the journal does have + a small cost), and atomic updates to the journal allow the filesystem to + recover in the event of a power loss. + + Unfortunately, journaling filesystems have a couple of problems. They are + fairly complex, since there are effectively two filesystems running in + parallel, which comes with a code size cost. They also offer no protection + against wear because of the strong relationship between storage location + and data. + +4. Last but not least we have copy-on-write (COW) filesystems, such as + [btrfs] and [ZFS]. These are very similar to other block based filesystems, + but instead of updating block inplace, all updates are performed by creating + a copy with the changes and replacing any references to the old block with + our new block. This recursively pushes all of our problems upwards until we + reach the root of our filesystem, which is often stored in a very small log. + + ``` + .--------. .--------. + | root | write |new root| + | | ==> | | + | | | | + '--------' '--------' + .-' '-. | '-. + | .-------|------------------' v + v v v .--------. + .--------. .--------. | new B | + | A | | B | | | + | | | | | | + | | | | '--------' + '--------' '--------' .-' | + .-' .-' '-. .------------|------' + | | | | v + v v v v .--------. + .--------. .--------. .--------. | new D | + | C | | D | | E | | | + | | | | | | | | + | | | | | | '--------' + '--------' '--------' '--------' + ``` + + COW filesystems are interesting. They offer very similar performance to + block based filesystems while managing to pull off atomic updates without + storing data changes directly in a log. They even disassociate the storage + location of data, which creates an opportunity for wear leveling. + + Well, almost. The unbounded upwards movement of updates causes some + problems. Because updates to a COW filesystem don't stop until they've + reached the root, an update can cascade into a larger set of writes than + would be needed for the original data. On top of this, the upward motion + focuses these writes into the block, which can wear out much earlier than + the rest of the filesystem. + +## littlefs + +So what does littlefs do? + +If we look at existing filesystems, there are two interesting design patterns +that stand out, but each have their own set of problems. Logging, which +provides independent atomicity, has poor runtime performance. And COW data +structures, which perform well, push the atomicity problem upwards. + +Can we work around these limitations? + +Consider logging. It has either a _O(n²)_ runtime or _O(n)_ RAM cost. We +can't avoid these costs, _but_ if we put an upper bound on the size we can at +least prevent the theoretical cost from becoming problem. This relies on the +super secret computer science hack where you can pretend any algorithmic +complexity is _O(1)_ by bounding the input. + +In the case of COW data structures, we can try twisting the definition a bit. +Let's say that our COW structure doesn't copy after a single write, but instead +copies after _n_ writes. This doesn't change most COW properties (assuming you +can write atomically!), but what it does do is prevent the upward motion of +wear. This sort of copy-on-bounded-writes (CObW) still focuses wear, but at +each level we divide the propagation of wear by _n_. With a sufficiently +large _n_ (> branching factor) wear propagation is no longer a problem. + +See where this is going? Separate, logging and COW are imperfect solutions and +have weaknesses that limit their usefulness. But if we merge the two they can +mutually solve each other's limitations. + +This is the idea behind littlefs. At the sub-block level, littlefs is built +out of small, two blocks logs that provide atomic updates to metadata anywhere +on the filesystem. At the super-block level, littlefs is a CObW tree of blocks +that can be evicted on demand. -The core piece of technology that provides the backbone for the littlefs is -the concept of metadata pairs. The key idea here is that any metadata that -needs to be updated atomically is stored on a pair of blocks tagged with -a revision count and checksum. Every update alternates between these two -pairs, so that at any time there is always a backup containing the previous -state of the metadata. - -Consider a small example where each metadata pair has a revision count, -a number as data, and the XOR of the block as a quick checksum. If -we update the data to a value of 9, and then to a value of 5, here is -what the pair of blocks may look like after each update: ``` - block 1 block 2 block 1 block 2 block 1 block 2 -.---------.---------. .---------.---------. .---------.---------. -| rev: 1 | rev: 0 | | rev: 1 | rev: 2 | | rev: 3 | rev: 2 | -| data: 3 | data: 0 | -> | data: 3 | data: 9 | -> | data: 5 | data: 9 | -| xor: 2 | xor: 0 | | xor: 2 | xor: 11 | | xor: 6 | xor: 11 | -'---------'---------' '---------'---------' '---------'---------' - let data = 9 let data = 5 + root + .--------.--------. + | A'| B'| | + | | |-> | + | | | | + '--------'--------' + .----' '--------------. + A v B v + .--------.--------. .--------.--------. + | C'| D'| | | E'|new| | + | | |-> | | | E'|-> | + | | | | | | | | + '--------'--------' '--------'--------' + .-' '--. | '------------------. + v v .-' v +.--------. .--------. v .--------. +| C | | D | .--------. write | new E | +| | | | | E | ==> | | +| | | | | | | | +'--------' '--------' | | '--------' + '--------' .-' | + .-' '-. .-------------|------' + v v v v + .--------. .--------. .--------. + | F | | G | | new F | + | | | | | | + | | | | | | + '--------' '--------' '--------' ``` -After each update, we can find the most up to date value of data by looking -at the revision count. +There are still some minor issues. Small logs can be expensive in terms of +storage, in the worst case a small log costs 4x the size of the original data. +CObW structures require an efficient block allocator since allocation occurs +every _n_ writes. And there is still the challenge of keeping the RAM usage +constant. -Now consider what the blocks may look like if we suddenly lose power while -changing the value of data to 5: -``` - block 1 block 2 block 1 block 2 block 1 block 2 -.---------.---------. .---------.---------. .---------.---------. -| rev: 1 | rev: 0 | | rev: 1 | rev: 2 | | rev: 3 | rev: 2 | -| data: 3 | data: 0 | -> | data: 3 | data: 9 | -x | data: 3 | data: 9 | -| xor: 2 | xor: 0 | | xor: 2 | xor: 11 | | xor: 2 | xor: 11 | -'---------'---------' '---------'---------' '---------'---------' - let data = 9 let data = 5 - powerloss!!! -``` +## Metadata pairs + +Metadata pairs are the backbone of littlefs. These are small, two block logs +that allow atomic updates anywhere in the filesystem. + +Why two blocks? Well, logs work by appending entries to a circular buffer +stored on disk. But remember that flash has limited write granularity. We can +incrementally program new data onto erased blocks, but we need to erase a full +block at a time. This means that in order for our circular buffer to work, we +need more than one block. + +We could make our logs larger than two blocks, but the next challenge is how +do we store references to these logs? Because the blocks themselves are erased +during writes, using a data structure to track these blocks is complicated. +The simple solution here is to store a two block addresses for every metadata +pair. This has the added advantage that we can change out blocks in the +metadata pair independently, and we don't reduce our block granularity for +other operations. + +In order to determine which metadata block is the most recent, we store a +revision count that we compare using [sequence arithmetic][wikipedia-sna] +(very handy for avoiding problems with integer overflow). Conveniently, this +revision count also gives us a rough idea of how many erases have occurred on +the block. -In this case, block 1 was partially written with a new revision count, but -the littlefs hadn't made it to updating the value of data. However, if we -check our checksum we notice that block 1 was corrupted. So we fall back to -block 2 and use the value 9. - -Using this concept, the littlefs is able to update metadata blocks atomically. -There are a few other tweaks, such as using a 32 bit CRC and using sequence -arithmetic to handle revision count overflow, but the basic concept -is the same. These metadata pairs define the backbone of the littlefs, and the -rest of the filesystem is built on top of these atomic updates. - -## Non-meta data - -Now, the metadata pairs do come with some drawbacks. Most notably, each pair -requires two blocks for each block of data. I'm sure users would be very -unhappy if their storage was suddenly cut in half! Instead of storing -everything in these metadata blocks, the littlefs uses a COW data structure -for files which is in turn pointed to by a metadata block. When -we update a file, we create copies of any blocks that are modified until -the metadata blocks are updated with the new copy. Once the metadata block -points to the new copy, we deallocate the old blocks that are no longer in use. - -Here is what updating a one-block file may look like: ``` - block 1 block 2 block 1 block 2 block 1 block 2 -.---------.---------. .---------.---------. .---------.---------. -| rev: 1 | rev: 0 | | rev: 1 | rev: 0 | | rev: 1 | rev: 2 | -| file: 4 | file: 0 | -> | file: 4 | file: 0 | -> | file: 4 | file: 5 | -| xor: 5 | xor: 0 | | xor: 5 | xor: 0 | | xor: 5 | xor: 7 | -'---------'---------' '---------'---------' '---------'---------' - | | | - v v v - block 4 block 4 block 5 block 4 block 5 -.--------. .--------. .--------. .--------. .--------. -| old | | old | | new | | old | | new | -| data | | data | | data | | data | | data | -| | | | | | | | | | -'--------' '--------' '--------' '--------' '--------' - update data in file update metadata pair +metadata pair pointer: {block 0, block 1} + | '--------------------. + '-. | +disk v v +.--------.--------.--------.--------.--------.--------.--------.--------. +| | |metadata| |metadata| | +| | |block 0 | |block 1 | | +| | | | | | | +'--------'--------'--------'--------'--------'--------'--------'--------' + '--. .----' + v v + metadata pair .----------------.----------------. + | revision 11 | revision 12 | + block 1 is |----------------|----------------| + most recent | A | A'' | + |----------------|----------------| + | checksum | checksum | + |----------------|----------------| + | B | A''' | <- most recent A + |----------------|----------------| + | A'' | checksum | + |----------------|----------------| + | checksum | | | + |----------------| v | + '----------------'----------------' ``` -It doesn't matter if we lose power while writing new data to block 5, -since the old data remains unmodified in block 4. This example also -highlights how the atomic updates of the metadata blocks provide a -synchronization barrier for the rest of the littlefs. - -At this point, it may look like we are wasting an awfully large amount -of space on the metadata. Just looking at that example, we are using -three blocks to represent a file that fits comfortably in one! So instead -of giving each file a metadata pair, we actually store the metadata for -all files contained in a single directory in a single metadata block. - -Now we could just leave files here, copying the entire file on write -provides the synchronization without the duplicated memory requirements -of the metadata blocks. However, we can do a bit better. +So how do we atomically update our metadata pairs? Atomicity (a type of +power-loss resilience) requires two parts: redundancy and error detection. +Error detection can be provided with a checksum, and in littlefs's case we +use a 32-bit [CRC][wikipedia-crc]. Maintaining redundancy, on the other hand, +requires multiple stages. + +1. If our block is not full and the program size is small enough to let us + append more entries, we can simply append the entries to the log. Because + we don't overwrite the original entries (remember rewriting flash requires + an erase), we still have the original entries if we lose power during the + append. + + ``` + commit A + .----------------.----------------. .----------------.----------------. + | revision 1 | revision 0 | => | revision 1 | revision 0 | + |----------------|----------------| |----------------|----------------| + | | | | | A | | + | v | | |----------------| | + | | | | checksum | | + | | | |----------------| | + | | | | | | | + | | | | v | | + | | | | | | + | | | | | | + | | | | | | + | | | | | | + '----------------'----------------' '----------------'----------------' + ``` + + Note that littlefs doesn't maintain a checksum for each entry. Many logging + filesystems do this, but it limits what you can update in a single atomic + operation. What we can do instead is group multiple entries into a commit + that shares a single checksum. This lets us update multiple unrelated pieces + of metadata as long as they reside on the same metadata pair. + + ``` + commit B and A' + .----------------.----------------. .----------------.----------------. + | revision 1 | revision 0 | => | revision 1 | revision 0 | + |----------------|----------------| |----------------|----------------| + | A | | | A | | + |----------------| | |----------------| | + | checksum | | | checksum | | + |----------------| | |----------------| | + | | | | | B | | + | v | | |----------------| | + | | | | A' | | + | | | |----------------| | + | | | | checksum | | + | | | |----------------| | + '----------------'----------------' '----------------'----------------' + ``` + +2. If our block _is_ full of entries, we need to somehow remove outdated + entries to make space for new ones. This process is called garbage + collection, but because littlefs has multiple garbage collectors, we + also call this specific case compaction. + + Compared to other filesystems, littlefs's garbage collector is relatively + simple. We want to avoid RAM consumption, so we use a sort of brute force + solution where for each entry we check to see if a newer entry has been + written. If the entry is the most recent we append it to our new block. This + is where having two blocks becomes important, if we lose power we still have + everything in our original block. + + During this compaction step we also erase the metadata block and increment + the revision count. Because we can commit multiple entries at once, we can + write all of these changes to the second block without worrying about power + loss. It's only when the commit's checksum is written that the compacted + entries and revision count become committed and readable. + + ``` + commit B', need to compact + .----------------.----------------. .----------------.----------------. + | revision 1 | revision 0 | => | revision 1 | revision 2 | + |----------------|----------------| |----------------|----------------| + | A | | | A | A' | + |----------------| | |----------------|----------------| + | checksum | | | checksum | B' | + |----------------| | |----------------|----------------| + | B | | | B | checksum | + |----------------| | |----------------|----------------| + | A' | | | A' | | | + |----------------| | |----------------| v | + | checksum | | | checksum | | + |----------------| | |----------------| | + '----------------'----------------' '----------------'----------------' + ``` + +3. If our block is full of entries _and_ we can't find any garbage, then what? + At this point, most logging filesystems would return an error indicating no + more space is available, but because we have small logs, overflowing a log + isn't really an error condition. + + Instead, we split our original metadata pair into two metadata pairs, each + containing half of the entries, connected by a tail pointer. Instead of + increasing the size of the log and dealing with the scalability issues + associated with larger logs, we form a linked list of small bounded logs. + This is a tradeoff as this approach does use more storage space, but at the + benefit of improved scalability. + + Despite writing to two metadata pairs, we can still maintain power + resilience during this split step by first preparing the new metadata pair, + and then inserting the tail pointer during the commit to the original + metadata pair. + + ``` + commit C and D, need to split + .----------------.----------------. .----------------.----------------. + | revision 1 | revision 2 | => | revision 3 | revision 2 | + |----------------|----------------| |----------------|----------------| + | A | A' | | A' | A' | + |----------------|----------------| |----------------|----------------| + | checksum | B' | | B' | B' | + |----------------|----------------| |----------------|----------------| + | B | checksum | | tail ---------------------. + |----------------|----------------| |----------------|----------------| | + | A' | | | | checksum | | | + |----------------| v | |----------------| | | + | checksum | | | | | | | + |----------------| | | v | | | + '----------------'----------------' '----------------'----------------' | + .----------------.---------' + v v + .----------------.----------------. + | revision 1 | revision 0 | + |----------------|----------------| + | C | | + |----------------| | + | D | | + |----------------| | + | checksum | | + |----------------| | + | | | | + | v | | + | | | + | | | + '----------------'----------------' + ``` + +There is another complexity the crops up when dealing with small logs. The +amortized runtime cost of garbage collection is not only dependent on its +one time cost (_O(n²)_ for littlefs), but also depends on how often +garbage collection occurs. + +Consider two extremes: + +1. Log is empty, garbage collection occurs once every _n_ updates +2. Log is full, garbage collection occurs **every** update + +Clearly we need to be more aggressive than waiting for our metadata pair to +be full. As the metadata pair approaches fullness the frequency of compactions +grows very rapidly. + +Looking at the problem generically, consider a log with ![n] bytes for each +entry, ![d] dynamic entries (entries that are outdated during garbage +collection), and ![s] static entries (entries that need to be copied during +garbage collection). If we look at the amortized runtime complexity of updating +this log we get this formula: + +![cost = n + n (s / d+1)][metadata-formula1] + +If we let ![r] be the ratio of static space to the size of our log in bytes, we +find an alternative representation of the number of static and dynamic entries: + +![s = r (size/n)][metadata-formula2] + +![d = (1 - r) (size/n)][metadata-formula3] + +Substituting these in for ![d] and ![s] gives us a nice formula for the cost of +updating an entry given how full the log is: + +![cost = n + n (r (size/n) / ((1-r) (size/n) + 1))][metadata-formula4] + +Assuming 100 byte entries in a 4 KiB log, we can graph this using the entry +size to find a multiplicative cost: + +![Metadata pair update cost graph][metadata-cost-graph] + +So at 50% usage, we're seeing an average of 2x cost per update, and at 75% +usage, we're already at an average of 4x cost per update. + +To avoid this exponential growth, instead of waiting for our metadata pair +to be full, we split the metadata pair once we exceed 50% capacity. We do this +lazily, waiting until we need to compact before checking if we fit in our 50% +limit. This limits the overhead of garbage collection to 2x the runtime cost, +giving us an amortized runtime complexity of _O(1)_. + +--- + +If we look at metadata pairs and linked-lists of metadata pairs at a high +level, they have fairly nice runtime costs. Assuming _n_ metadata pairs, +each containing _m_ metadata entries, the _lookup_ cost for a specific +entry has a worst case runtime complexity of _O(nm)_. For _updating_ a specific +entry, the worst case complexity is _O(nm²)_, with an amortized complexity +of only _O(nm)_. + +However, splitting at 50% capacity does mean that in the best case our +metadata pairs will only be 1/2 full. If we include the overhead of the second +block in our metadata pair, each metadata entry has an effective storage cost +of 4x the original size. I imagine users would not be happy if they found +that they can only use a quarter of their original storage. Metadata pairs +provide a mechanism for performing atomic updates, but we need a separate +mechanism for storing the bulk of our data. ## CTZ skip-lists -There are many different data structures for representing the actual -files in filesystems. Of these, the littlefs uses a rather unique [COW](https://upload.wikimedia.org/wikipedia/commons/0/0c/Cow_female_black_white.jpg) -data structure that allows the filesystem to reuse unmodified parts of the -file without additional metadata pairs. - -First lets consider storing files in a simple linked-list. What happens when we -append a block? We have to change the last block in the linked-list to point -to this new block, which means we have to copy out the last block, and change -the second-to-last block, and then the third-to-last, and so on until we've -copied out the entire file. +Metadata pairs provide efficient atomic updates but unfortunately have a large +storage cost. But we can work around this storage cost by only using the +metadata pairs to store references to more dense, copy-on-write (COW) data +structures. + +[Copy-on-write data structures][wikipedia-cow], also called purely functional +data structures, are a category of data structures where the underlying +elements are immutable. Making changes to the data requires creating new +elements containing a copy of the updated data and replacing any references +with references to the new elements. Generally, the performance of a COW data +structure depends on how many old elements can be reused after replacing parts +of the data. + +littlefs has several requirements of its COW structures. They need to be +efficient to read and write, but most frustrating, they need to be traversable +with a constant amount of RAM. Notably this rules out +[B-trees][wikipedia-B-tree], which can not be traversed with constant RAM, and +[B+-trees][wikipedia-B+-tree], which are not possible to update with COW +operations. + +--- + +So, what can we do? First let's consider storing files in a simple COW +linked-list. Appending a block, which is the basis for writing files, means we +have to update the last block to point to our new block. This requires a COW +operation, which means we need to update the second-to-last block, and then the +third-to-last, and so on until we've copied out the entire file. ``` -Exhibit A: A linked-list +A linked-list .--------. .--------. .--------. .--------. .--------. .--------. | data 0 |->| data 1 |->| data 2 |->| data 4 |->| data 5 |->| data 6 | | | | | | | | | | | | | @@ -223,17 +593,15 @@ Exhibit A: A linked-list '--------' '--------' '--------' '--------' '--------' '--------' ``` -To get around this, the littlefs, at its heart, stores files backwards. Each -block points to its predecessor, with the first block containing no pointers. -If you think about for a while, it starts to make a bit of sense. Appending -blocks just point to their predecessor and no other blocks need to be updated. -If we update a block in the middle, we will need to copy out the blocks that -follow, but can reuse the blocks before the modified block. Since most file -operations either reset the file each write or append to files, this design -avoids copying the file in the most common cases. +To avoid a full copy during appends, we can store the data backwards. Appending +blocks just requires adding the new block and no other blocks need to be +updated. If we update a block in the middle, we still need to copy the +following blocks, but can reuse any blocks before it. Since most file writes +are linear, this design gambles that appends are the most common type of data +update. ``` -Exhibit B: A backwards linked-list +A backwards linked-list .--------. .--------. .--------. .--------. .--------. .--------. | data 0 |<-| data 1 |<-| data 2 |<-| data 4 |<-| data 5 |<-| data 6 | | | | | | | | | | | | | @@ -241,25 +609,28 @@ Exhibit B: A backwards linked-list '--------' '--------' '--------' '--------' '--------' '--------' ``` -However, a backwards linked-list does come with a rather glaring problem. -Iterating over a file _in order_ has a runtime cost of O(n^2). Gah! A quadratic -runtime to just _read_ a file? That's awful. Keep in mind reading files is -usually the most common filesystem operation. +However, a backwards linked-list does have a rather glaring problem. Iterating +over a file _in order_ has a runtime cost of _O(n²)_. A quadratic runtime +just to read a file! That's awful. + +Fortunately we can do better. Instead of a singly linked list, littlefs +uses a multilayered linked-list often called a +[skip-list][wikipedia-skip-list]. However, unlike the most common type of +skip-list, littlefs's skip-lists are strictly deterministic built around some +interesting properties of the count-trailing-zeros (CTZ) instruction. -To avoid this problem, the littlefs uses a multilayered linked-list. For -every nth block where n is divisible by 2^x, the block contains a pointer -to block n-2^x. So each block contains anywhere from 1 to log2(n) pointers -that skip to various sections of the preceding list. If you're familiar with -data-structures, you may have recognized that this is a type of deterministic +The rules CTZ skip-lists follow are that for every _n_‍th block where _n_ +is divisible by 2‍_ˣ_, that block contains a pointer to block +_n_-2‍_ˣ_. This means that each block contains anywhere from 1 to +log₂_n_ pointers that skip to different preceding elements of the skip-list. -The name comes from the use of the -[count trailing zeros (CTZ)](https://en.wikipedia.org/wiki/Count_trailing_zeros) -instruction, which allows us to calculate the power-of-two factors efficiently. -For a given block n, the block contains ctz(n)+1 pointers. +The name comes from heavy use of the [CTZ instruction][wikipedia-ctz], which +lets us calculate the power-of-two factors efficiently. For a give block _n_, +that block contains ctz(_n_)+1 pointers. ``` -Exhibit C: A backwards CTZ skip-list +A backwards CTZ skip-list .--------. .--------. .--------. .--------. .--------. .--------. | data 0 |<-| data 1 |<-| data 2 |<-| data 3 |<-| data 4 |<-| data 5 | | |<-| |--| |<-| |--| | | | @@ -267,11 +638,11 @@ Exhibit C: A backwards CTZ skip-list '--------' '--------' '--------' '--------' '--------' '--------' ``` -The additional pointers allow us to navigate the data-structure on disk -much more efficiently than in a singly linked-list. +The additional pointers let us navigate the data-structure on disk much more +efficiently than in a singly linked list. -Taking exhibit C for example, here is the path from data block 5 to data -block 1. You can see how data block 3 was completely skipped: +Consider a path from data block 5 to data block 1. You can see how data block 3 +was completely skipped: ``` .--------. .--------. .--------. .--------. .--------. .--------. | data 0 | | data 1 |<-| data 2 | | data 3 | | data 4 |<-| data 5 | @@ -280,7 +651,7 @@ block 1. You can see how data block 3 was completely skipped: '--------' '--------' '--------' '--------' '--------' '--------' ``` -The path to data block 0 is even more quick, requiring only two jumps: +The path to data block 0 is even faster, requiring only two jumps: ``` .--------. .--------. .--------. .--------. .--------. .--------. | data 0 | | data 1 | | data 2 | | data 3 | | data 4 |<-| data 5 | @@ -291,193 +662,171 @@ The path to data block 0 is even more quick, requiring only two jumps: We can find the runtime complexity by looking at the path to any block from the block containing the most pointers. Every step along the path divides -the search space for the block in half. This gives us a runtime of O(log n). -To get to the block with the most pointers, we can perform the same steps -backwards, which puts the runtime at O(2 log n) = O(log n). The interesting -part about this data structure is that this optimal path occurs naturally -if we greedily choose the pointer that covers the most distance without passing -our target block. - -So now we have a representation of files that can be appended trivially with -a runtime of O(1), and can be read with a worst case runtime of O(n log n). -Given that the the runtime is also divided by the amount of data we can store -in a block, this is pretty reasonable. - -Unfortunately, the CTZ skip-list comes with a few questions that aren't -straightforward to answer. What is the overhead? How do we handle more -pointers than we can store in a block? How do we store the skip-list in -a directory entry? - -One way to find the overhead per block is to look at the data structure as -multiple layers of linked-lists. Each linked-list skips twice as many blocks -as the previous linked-list. Another way of looking at it is that each -linked-list uses half as much storage per block as the previous linked-list. -As we approach infinity, the number of pointers per block forms a geometric -series. Solving this geometric series gives us an average of only 2 pointers -per block. - -![overhead_per_block](https://latex.codecogs.com/svg.latex?%5Clim_%7Bn%5Cto%5Cinfty%7D%5Cfrac%7B1%7D%7Bn%7D%5Csum_%7Bi%3D0%7D%5E%7Bn%7D%5Cleft%28%5Ctext%7Bctz%7D%28i%29+1%5Cright%29%20%3D%20%5Csum_%7Bi%3D0%7D%5Cfrac%7B1%7D%7B2%5Ei%7D%20%3D%202) - -Finding the maximum number of pointers in a block is a bit more complicated, -but since our file size is limited by the integer width we use to store the -size, we can solve for it. Setting the overhead of the maximum pointers equal -to the block size we get the following equation. Note that a smaller block size -results in more pointers, and a larger word width results in larger pointers. - -![maximum overhead](https://latex.codecogs.com/svg.latex?B%20%3D%20%5Cfrac%7Bw%7D%7B8%7D%5Cleft%5Clceil%5Clog_2%5Cleft%28%5Cfrac%7B2%5Ew%7D%7BB-2%5Cfrac%7Bw%7D%7B8%7D%7D%5Cright%29%5Cright%5Crceil) - -where: -B = block size in bytes -w = word width in bits - -Solving the equation for B gives us the minimum block size for various word -widths: -32 bit CTZ skip-list = minimum block size of 104 bytes -64 bit CTZ skip-list = minimum block size of 448 bytes - -Since littlefs uses a 32 bit word size, we are limited to a minimum block -size of 104 bytes. This is a perfectly reasonable minimum block size, with most -block sizes starting around 512 bytes. So we can avoid additional logic to -avoid overflowing our block's capacity in the CTZ skip-list. - -So, how do we store the skip-list in a directory entry? A naive approach would -be to store a pointer to the head of the skip-list, the length of the file -in bytes, the index of the head block in the skip-list, and the offset in the -head block in bytes. However this is a lot of information, and we can observe -that a file size maps to only one block index + offset pair. So it should be -sufficient to store only the pointer and file size. - -But there is one problem, calculating the block index + offset pair from a -file size doesn't have an obvious implementation. - -We can start by just writing down an equation. The first idea that comes to -mind is to just use a for loop to sum together blocks until we reach our -file size. We can write this equation as a summation: - -![summation1](https://latex.codecogs.com/svg.latex?N%20%3D%20%5Csum_i%5En%5Cleft%5BB-%5Cfrac%7Bw%7D%7B8%7D%5Cleft%28%5Ctext%7Bctz%7D%28i%29+1%5Cright%29%5Cright%5D) - -where: -B = block size in bytes -w = word width in bits -n = block index in skip-list -N = file size in bytes - -And this works quite well, but is not trivial to calculate. This equation -requires O(n) to compute, which brings the entire runtime of reading a file -to O(n^2 log n). Fortunately, the additional O(n) does not need to touch disk, -so it is not completely unreasonable. But if we could solve this equation into -a form that is easily computable, we can avoid a big slowdown. - -Unfortunately, the summation of the CTZ instruction presents a big challenge. -How would you even begin to reason about integrating a bitwise instruction? -Fortunately, there is a powerful tool I've found useful in these situations: -The [On-Line Encyclopedia of Integer Sequences (OEIS)](https://oeis.org/). -If we work out the first couple of values in our summation, we find that CTZ -maps to [A001511](https://oeis.org/A001511), and its partial summation maps -to [A005187](https://oeis.org/A005187), and surprisingly, both of these -sequences have relatively trivial equations! This leads us to a rather -unintuitive property: - -![mindblown](https://latex.codecogs.com/svg.latex?%5Csum_i%5En%5Cleft%28%5Ctext%7Bctz%7D%28i%29+1%5Cright%29%20%3D%202n-%5Ctext%7Bpopcount%7D%28n%29) - -where: -ctz(x) = the number of trailing bits that are 0 in x -popcount(x) = the number of bits that are 1 in x - -It's a bit bewildering that these two seemingly unrelated bitwise instructions -are related by this property. But if we start to dissect this equation we can -see that it does hold. As n approaches infinity, we do end up with an average -overhead of 2 pointers as we find earlier. And popcount seems to handle the -error from this average as it accumulates in the CTZ skip-list. - -Now we can substitute into the original equation to get a trivial equation -for a file size: - -![summation2](https://latex.codecogs.com/svg.latex?N%20%3D%20Bn%20-%20%5Cfrac%7Bw%7D%7B8%7D%5Cleft%282n-%5Ctext%7Bpopcount%7D%28n%29%5Cright%29) - -Unfortunately, we're not quite done. The popcount function is non-injective, -so we can only find the file size from the block index, not the other way -around. However, we can solve for an n' block index that is greater than n -with an error bounded by the range of the popcount function. We can then -repeatedly substitute this n' into the original equation until the error -is smaller than the integer division. As it turns out, we only need to -perform this substitution once. Now we directly calculate our block index: - -![formulaforn](https://latex.codecogs.com/svg.latex?n%20%3D%20%5Cleft%5Clfloor%5Cfrac%7BN-%5Cfrac%7Bw%7D%7B8%7D%5Cleft%28%5Ctext%7Bpopcount%7D%5Cleft%28%5Cfrac%7BN%7D%7BB-2%5Cfrac%7Bw%7D%7B8%7D%7D-1%5Cright%29+2%5Cright%29%7D%7BB-2%5Cfrac%7Bw%7D%7B8%7D%7D%5Cright%5Crfloor) - -Now that we have our block index n, we can just plug it back into the above -equation to find the offset. However, we do need to rearrange the equation -a bit to avoid integer overflow: - -![formulaforoff](https://latex.codecogs.com/svg.latex?%5Cmathit%7Boff%7D%20%3D%20N%20-%20%5Cleft%28B-2%5Cfrac%7Bw%7D%7B8%7D%5Cright%29n%20-%20%5Cfrac%7Bw%7D%7B8%7D%5Ctext%7Bpopcount%7D%28n%29) - -The solution involves quite a bit of math, but computers are very good at math. -Now we can solve for both the block index and offset from the file size in O(1). - -Here is what it might look like to update a file stored with a CTZ skip-list: +the search space for the block in half, giving us a runtime of _O(log n)_. +To get _to_ the block with the most pointers, we can perform the same steps +backwards, which puts the runtime at _O(2 log n)_ = _O(log n)_. An interesting +note is that this optimal path occurs naturally if we greedily choose the +pointer that covers the most distance without passing our target. + +So now we have a [COW] data structure that is cheap to append with a runtime +of _O(1)_, and can be read with a worst case runtime of _O(n log n)_. Given +that this runtime is also divided by the amount of data we can store in a +block, this cost is fairly reasonable. + +--- + +This is a new data structure, so we still have several questions. What is the +storage overage? Can the number of pointers exceed the size of a block? How do +we store a CTZ skip-list in our metadata pairs? + +To find the storage overhead, we can look at the data structure as multiple +linked-lists. Each linked-list skips twice as many blocks as the previous, +or from another perspective, each linked-list uses half as much storage as +the previous. As we approach infinity, the storage overhead forms a geometric +series. Solving this tells us that on average our storage overhead is only +2 pointers per block. + +![lim,n->inf((1/n)sum,i,0->n(ctz(i)+1)) = sum,i,0->inf(1/2^i) = 2][ctz-formula1] + +Because our file size is limited the word width we use to store sizes, we can +also solve for the maximum number of pointers we would ever need to store in a +block. If we set the overhead of pointers equal to the block size, we get the +following equation. Note that both a smaller block size (![B][bigB]) and larger +word width (![w]) result in more storage overhead. + +![B = (w/8)ceil(log2(2^w / (B-2w/8)))][ctz-formula2] + +Solving the equation for ![B][bigB] gives us the minimum block size for some +common word widths: + +1. 32-bit CTZ skip-list => minimum block size of 104 bytes +2. 64-bit CTZ skip-list => minimum block size of 448 bytes + +littlefs uses a 32-bit word width, so our blocks can only overflow with +pointers if they are smaller than 104 bytes. This is an easy requirement, as +in practice, most block sizes start at 512 bytes. As long as our block size +is larger than 104 bytes, we can avoid the extra logic needed to handle +pointer overflow. + +This last question is how do we store CTZ skip-lists? We need a pointer to the +head block, the size of the skip-list, the index of the head block, and our +offset in the head block. But it's worth noting that each size maps to a unique +index + offset pair. So in theory we can store only a single pointer and size. + +However, calculating the index + offset pair from the size is a bit +complicated. We can start with a summation that loops through all of the blocks +up until our given size. Let ![B][bigB] be the block size in bytes, ![w] be the +word width in bits, ![n] be the index of the block in the skip-list, and +![N][bigN] be the file size in bytes: + +![N = sum,i,0->n(B-(w/8)(ctz(i)+1))][ctz-formula3] + +This works quite well, but requires _O(n)_ to compute, which brings the full +runtime of reading a file up to _O(n² log n)_. Fortunately, that summation +doesn't need to touch the disk, so the practical impact is minimal. + +However, despite the integration of a bitwise operation, we can actually reduce +this equation to a _O(1)_ form. While browsing the amazing resource that is +the [On-Line Encyclopedia of Integer Sequences (OEIS)][oeis], I managed to find +[A001511], which matches the iteration of the CTZ instruction, +and [A005187], which matches its partial summation. Much to my +surprise, these both result from simple equations, leading us to a rather +unintuitive property that ties together two seemingly unrelated bitwise +instructions: + +![sum,i,0->n(ctz(i)+1) = 2n-popcount(n)][ctz-formula4] + +where: + +1. ctz(![x]) = the number of trailing bits that are 0 in ![x] +2. popcount(![x]) = the number of bits that are 1 in ![x] + +Initial tests of this surprising property seem to hold. As ![n] approaches +infinity, we end up with an average overhead of 2 pointers, which matches what +our assumption from earlier. During iteration, the popcount function seems to +handle deviations from this average. Of course, just to make sure I wrote a +quick script that verified this property for all 32-bit integers. + +Now we can substitute into our original equation to find a more efficient +equation for file size: + +![N = Bn - (w/8)(2n-popcount(n))][ctz-formula5] + +Unfortunately, the popcount function is non-injective, so we can't solve this +equation for our index. But what we can do is solve for an ![n'] index that +is greater than ![n] with error bounded by the range of the popcount function. +We can repeatedly substitute ![n'] into the original equation until the error +is smaller than our integer resolution. As it turns out, we only need to +perform this substitution once, which gives us this formula for our index: + +![n = floor((N-(w/8)popcount(N/(B-2w/8))) / (B-2w/8))][ctz-formula6] + +Now that we have our index ![n], we can just plug it back into the above +equation to find the offset. We run into a bit of a problem with integer +overflow, but we can avoid this by rearranging the equation a bit: + +![off = N - (B-2w/8)n - (w/8)popcount(n)][ctz-formula7] + +Our solution requires quite a bit of math, but computer are very good at math. +Now we can find both our block index and offset from a size in _O(1)_, letting +us store CTZ skip-lists with only a pointer and size. + +CTZ skip-lists give us a COW data structure that is easily traversable in +_O(n)_, can be appended in _O(1)_, and can be read in _O(n log n)_. All of +these operations work in a bounded amount of RAM and require only two words of +storage overhead per block. In combination with metadata pairs, CTZ skip-lists +provide power resilience and compact storage of data. + ``` - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 0 | - | file: 6 | file: 0 | - | size: 4 | size: 0 | - | xor: 3 | xor: 0 | - '---------'---------' - | + .--------. + .|metadata| + || | + || | + |'--------' + '----|---' v - block 3 block 4 block 5 block 6 .--------. .--------. .--------. .--------. | data 0 |<-| data 1 |<-| data 2 |<-| data 3 | | |<-| |--| | | | | | | | | | | | '--------' '--------' '--------' '--------' -| update data in file -v - - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 0 | - | file: 6 | file: 0 | - | size: 4 | size: 0 | - | xor: 3 | xor: 0 | - '---------'---------' - | +write data to disk, create copies +=> + .--------. + .|metadata| + || | + || | + |'--------' + '----|---' v - block 3 block 4 block 5 block 6 .--------. .--------. .--------. .--------. -| data 0 |<-| data 1 |<-| old |<-| old | -| |<-| |--| data 2 | | data 3 | +| data 0 |<-| data 1 |<-| data 2 |<-| data 3 | +| |<-| |--| | | | | | | | | | | | '--------' '--------' '--------' '--------' ^ ^ ^ - | | | block 7 block 8 block 9 block 10 | | | .--------. .--------. .--------. .--------. | | '----| new |<-| new |<-| new |<-| new | | '----------------| data 2 |<-| data 3 |--| data 4 | | data 5 | '------------------| |--| |--| | | | '--------' '--------' '--------' '--------' -| update metadata pair -v - - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 2 | - | file: 6 | file: 10| - | size: 4 | size: 6 | - | xor: 3 | xor: 14 | - '---------'---------' - | +commit to metadata pair +=> + .--------. + .|new | + ||metadata| + || | + |'--------' + '----|---' | - block 3 block 4 block 5 block 6 | .--------. .--------. .--------. .--------. | -| data 0 |<-| data 1 |<-| old |<-| old | | -| |<-| |--| data 2 | | data 3 | | +| data 0 |<-| data 1 |<-| data 2 |<-| data 3 | | +| |<-| |--| | | | | | | | | | | | | | '--------' '--------' '--------' '--------' | ^ ^ ^ v - | | | block 7 block 8 block 9 block 10 | | | .--------. .--------. .--------. .--------. | | '----| new |<-| new |<-| new |<-| new | | '----------------| data 2 |<-| data 3 |--| data 4 | | data 5 | @@ -485,68 +834,98 @@ v '--------' '--------' '--------' '--------' ``` -## Block allocation - -So those two ideas provide the grounds for the filesystem. The metadata pairs -give us directories, and the CTZ skip-lists give us files. But this leaves -one big [elephant](https://upload.wikimedia.org/wikipedia/commons/3/37/African_Bush_Elephant.jpg) -of a question. How do we get those blocks in the first place? - -One common strategy is to store unallocated blocks in a big free list, and -initially the littlefs was designed with this in mind. By storing a reference -to the free list in every single metadata pair, additions to the free list -could be updated atomically at the same time the replacement blocks were -stored in the metadata pair. During boot, every metadata pair had to be -scanned to find the most recent free list, but once the list was found the -state of all free blocks becomes known. - -However, this approach had several issues: - -- There was a lot of nuanced logic for adding blocks to the free list without - modifying the blocks, since the blocks remain active until the metadata is - updated. -- The free list had to support both additions and removals in FIFO order while - minimizing block erases. -- The free list had to handle the case where the file system completely ran - out of blocks and may no longer be able to add blocks to the free list. -- If we used a revision count to track the most recently updated free list, - metadata blocks that were left unmodified were ticking time bombs that would - cause the system to go haywire if the revision count overflowed. -- Every single metadata block wasted space to store these free list references. - -Actually, to simplify, this approach had one massive glaring issue: complexity. - -> Complexity leads to fallibility. -> Fallibility leads to unmaintainability. -> Unmaintainability leads to suffering. - -Or at least, complexity leads to increased code size, which is a problem -for embedded systems. - -In the end, the littlefs adopted more of a "drop it on the floor" strategy. -That is, the littlefs doesn't actually store information about which blocks -are free on the storage. The littlefs already stores which files _are_ in -use, so to find a free block, the littlefs just takes all of the blocks that -exist and subtract the blocks that are in use. - -Of course, it's not quite that simple. Most filesystems that adopt this "drop -it on the floor" strategy either rely on some properties inherent to the -filesystem, such as the cyclic-buffer structure of logging filesystems, -or use a bitmap or table stored in RAM to track free blocks, which scales -with the size of storage and is problematic when you have limited RAM. You -could iterate through every single block in storage and check it against -every single block in the filesystem on every single allocation, but that -would have an abhorrent runtime. - -So the littlefs compromises. It doesn't store a bitmap the size of the storage, -but it does store a little bit-vector that contains a fixed set lookahead -for block allocations. During a block allocation, the lookahead vector is -checked for any free blocks. If there are none, the lookahead region jumps -forward and the entire filesystem is scanned for free blocks. +## The block allocator + +So we now have the framework for an atomic, wear leveling filesystem. Small two +block metadata pairs provide atomic updates, while CTZ skip-lists provide +compact storage of data in COW blocks. + +But now we need to look at the [elephant] in the room. Where do all these +blocks come from? + +Deciding which block to use next is the responsibility of the block allocator. +In filesystem design, block allocation is often a second-class citizen, but in +a COW filesystem its role becomes much more important as it is needed for +nearly every write to the filesystem. + +Normally, block allocation involves some sort of free list or bitmap stored on +the filesystem that is updated with free blocks. However, with power +resilience, keeping these structure consistent becomes difficult. It doesn't +help that any mistake in updating these structures can result in lost blocks +that are impossible to recover. + +littlefs takes a cautious approach. Instead of trusting a free list on disk, +littlefs relies on the fact that the filesystem on disk is a mirror image of +the free blocks on the disk. The block allocator operates much like a garbage +collector in a scripting language, scanning for unused blocks on demand. + +``` + .----. + |root| + | | + '----' + v-------' '-------v +.----. . . .----. +| A | . . | B | +| | . . | | +'----' . . '----' +. . . . v--' '------------v---------v +. . . .----. . .----. .----. +. . . | C | . | D | | E | +. . . | | . | | | | +. . . '----' . '----' '----' +. . . . . . . . . . +.----.----.----.----.----.----.----.----.----.----.----.----. +| A | |root| C | B | | D | | E | | +| | | | | | | | | | | +'----'----'----'----'----'----'----'----'----'----'----'----' + ^ ^ ^ ^ ^ + '-------------------'----'-------------------'----'-- free blocks +``` + +While this approach may sound complicated, the decision to not maintain a free +list greatly simplifies the overall design of littlefs. Unlike programming +languages, there are only a handful of data structures we need to traverse. +And block deallocation, which occurs nearly as often as block allocation, +is simply a noop. This "drop it on the floor" strategy greatly reduces the +complexity of managing on disk data structures, especially when handling +high-risk error conditions. + +--- + +Our block allocator needs to find free blocks efficiently. You could traverse +through every block on storage and check each one against our filesystem tree, +however the runtime would be abhorrent. We need to somehow collect multiple +blocks each traversal. + +Looking at existing designs, some larger filesystems that use a similar "drop +it on the floor" strategy store a bitmap of the entire storage in [RAM]. This +works well because bitmaps are surprisingly compact. We can't use the same +strategy here, as it violates our constant RAM requirement, but we may be able +to modify the idea into a workable solution. + +``` +.----.----.----.----.----.----.----.----.----.----.----.----. +| A | |root| C | B | | D | | E | | +| | | | | | | | | | | +'----'----'----'----'----'----'----'----'----'----'----'----' + 1 0 1 1 1 0 0 1 0 1 0 0 + \---------------------------+----------------------------/ + v + bitmap: 0xb94 (0b101110010100) +``` + +The block allocator in littlefs is a compromise between a disk-sized bitmap and +a brute force traversal. Instead of a bitmap the size of storage, we keep track +of a small, fixed-size bitmap called the lookahead buffer. During block +allocation, we take blocks from the lookahead buffer. If the lookahead buffer +is empty, we scan the filesystem for more free blocks, populating our lookahead +buffer. Each scan we use an increasing offset, circling the storage as blocks +are allocated. Here's what it might look like to allocate 4 blocks on a decently busy -filesystem with a 32bit lookahead and a total of -128 blocks (512Kbytes of storage if blocks are 4Kbyte): +filesystem with a 32 bit lookahead and a total of 128 blocks (512 KiB +of storage if blocks are 4 KiB): ``` boot... lookahead: fs blocks: fffff9fffffffffeffffffffffff0000 @@ -570,40 +949,557 @@ alloc = 112 lookahead: ffff8000 fs blocks: ffffffffffffffffffffffffffff8000 ``` -While this lookahead approach still has an asymptotic runtime of O(n^2) to -scan all of storage, the lookahead reduces the practical runtime to a -reasonable amount. Bit-vectors are surprisingly compact, given only 16 bytes, -the lookahead could track 128 blocks. For a 4Mbyte flash chip with 4Kbyte -blocks, the littlefs would only need 8 passes to scan the entire storage. +This lookahead approach has a runtime complexity of _O(n²)_ to completely +scan storage, however, bitmaps are surprisingly compact, and in practice only +one or two passes are usually needed to find free blocks. Additionally, the +performance of the allocator can be optimized by adjusting the block size or +size of the lookahead buffer, trading either write granularity or RAM for +allocator performance. + +## Wear leveling + +The block allocator has a secondary role: wear leveling. + +Wear leveling is the process of distributing wear across all blocks in the +storage to prevent the filesystem from experiencing an early death due to +wear on a single block in the storage. + +littlefs has two methods of protecting against wear: +1. Detection and recovery from bad blocks +2. Evenly distributing wear across dynamic blocks + +--- + +Recovery from bad blocks doesn't actually have anything to do with the block +allocator itself. Instead, it relies on the ability of the filesystem to detect +and evict bad blocks when they occur. + +In littlefs, it is fairly straightforward to detect bad blocks at write time. +All writes must be sourced by some form of data in RAM, so immediately after we +write to a block, we can read the data back and verify that it was written +correctly. If we find that the data on disk does not match the copy we have in +RAM, a write error has occurred and we most likely have a bad block. + +Once we detect a bad block, we need to recover from it. In the case of write +errors, we have a copy of the corrupted data in RAM, so all we need to do is +evict the bad block, allocate a new, hopefully good block, and repeat the write +that previously failed. + +The actual act of evicting the bad block and replacing it with a new block is +left up to the filesystem's copy-on-bounded-writes (CObW) data structures. One +property of CObW data structures is that any block can be replaced during a +COW operation. The bounded-writes part is normally triggered by a counter, but +nothing prevents us from triggering a COW operation as soon as we find a bad +block. + +``` + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. +| A | | B | +| | | | +'----' '----' +. . v---' . +. . .----. . +. . | C | . +. . | | . +. . '----' . +. . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| | C | B | | +| | | | | | | +'----'----'----'----'----'----'----'----'----'----' + +update C +=> + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. +| A | | B | +| | | | +'----' '----' +. . v---' . +. . .----. . +. . |bad | . +. . |blck| . +. . '----' . +. . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| |bad | B | | +| | | |blck| | | +'----'----'----'----'----'----'----'----'----'----' + +oh no! bad block! relocate C +=> + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. +| A | | B | +| | | | +'----' '----' +. . v---' . +. . .----. . +. . |bad | . +. . |blck| . +. . '----' . +. . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| |bad | B |bad | | +| | | |blck| |blck| | +'----'----'----'----'----'----'----'----'----'----' + ---------> +oh no! bad block! relocate C +=> + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. +| A | | B | +| | | | +'----' '----' +. . v---' . +. . .----. . .----. +. . |bad | . | C' | +. . |blck| . | | +. . '----' . '----' +. . . . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| |bad | B |bad | C' | | +| | | |blck| |blck| | | +'----'----'----'----'----'----'----'----'----'----' + --------------> +successfully relocated C, update B +=> + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. +| A | |bad | +| | |blck| +'----' '----' +. . v---' . +. . .----. . .----. +. . |bad | . | C' | +. . |blck| . | | +. . '----' . '----' +. . . . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| |bad |bad |bad | C' | | +| | | |blck|blck|blck| | | +'----'----'----'----'----'----'----'----'----'----' + +oh no! bad block! relocate B +=> + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. .----. +| A | |bad | |bad | +| | |blck| |blck| +'----' '----' '----' +. . v---' . . . +. . .----. . .----. . +. . |bad | . | C' | . +. . |blck| . | | . +. . '----' . '----' . +. . . . . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| |bad |bad |bad | C' |bad | +| | | |blck|blck|blck| |blck| +'----'----'----'----'----'----'----'----'----'----' + --------------> +oh no! bad block! relocate B +=> + .----. + |root| + | | + '----' + v--' '----------------------v +.----. .----. .----. +| A | | B' | |bad | +| | | | |blck| +'----' '----' '----' +. . . | . .---' . +. . . '--------------v-------------v +. . . . .----. . .----. +. . . . |bad | . | C' | +. . . . |blck| . | | +. . . . '----' . '----' +. . . . . . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| B' | |bad |bad |bad | C' |bad | +| | | | |blck|blck|blck| |blck| +'----'----'----'----'----'----'----'----'----'----' +------------> ------------------ +successfully relocated B, update root +=> + .----. + |root| + | | + '----' + v--' '--v +.----. .----. +| A | | B' | +| | | | +'----' '----' +. . . '---------------------------v +. . . . .----. +. . . . | C' | +. . . . | | +. . . . '----' +. . . . . . +.----.----.----.----.----.----.----.----.----.----. +| A |root| B' | |bad |bad |bad | C' |bad | +| | | | |blck|blck|blck| |blck| +'----'----'----'----'----'----'----'----'----'----' +``` + +We may find that the new block is also bad, but hopefully after repeating this +cycle we'll eventually find a new block where a write succeeds. If we don't, +that means that all blocks in our storage are bad, and we've reached the end of +our device's usable life. At this point, littlefs will return an "out of space" +error, which is technically true, there are no more good blocks, but as an +added benefit also matches the error condition expected by users of dynamically +sized data. + +--- + +Read errors, on the other hand, are quite a bit more complicated. We don't have +a copy of the data lingering around in RAM, so we need a way to reconstruct the +original data even after it has been corrupted. One such mechanism for this is +[error-correction-codes (ECC)][wikipedia-ecc]. + +ECC is an extension to the idea of a checksum. Where a checksum such as CRC can +detect that an error has occurred in the data, ECC can detect and actually +correct some amount of errors. However, there is a limit to how many errors ECC +can detect, call the [Hamming bound][wikipedia-hamming-bound]. As the number of +errors approaches the Hamming bound, we may still be able to detect errors, but +can no longer fix the data. If we've reached this point the block is +unrecoverable. + +littlefs by itself does **not** provide ECC. The block nature and relatively +large footprint of ECC does not work well with the dynamically sized data of +filesystems, correcting errors without RAM is complicated, and ECC fits better +with the geometry of block devices. In fact, several NOR flash chips have extra +storage intended for ECC, and many NAND chips can even calculate ECC on the +chip itself. + +In littlefs, ECC is entirely optional. Read errors can instead be prevented +proactively by wear leveling. But it's important to note that ECC can be used +at the block device level to modestly extend the life of a device. littlefs +respects any errors reported by the block device, allow a block device to +provide additional aggressive error detection. + +--- + +To avoid read errors, we need to be proactive, as opposed to reactive as we +were with write errors. + +One way to do this is to detect when the number of errors in a block exceeds +some threshold, but is still recoverable. With ECC we can do this at write +time, and treat the error as a write error, evicting the block before fatal +read errors have a chance to develop. + +A different, more generic strategy, is to proactively distribute wear across +all blocks in the storage, with the hope that no single block fails before the +rest of storage is approaching the end of its usable life. This is called +wear leveling. + +Generally, wear leveling algorithms fall into one of two categories: + +1. [Dynamic wear leveling][wikipedia-dynamic-wear-leveling], where we + distribute wear over "dynamic" blocks. The can be accomplished by + only considering unused blocks. + +2. [Static wear leveling][wikipedia-static-wear-leveling], where we + distribute wear over both "dynamic" and "static" blocks. To make this work, + we need to consider all blocks, including blocks that already contain data. + +As a tradeoff for code size and complexity, littlefs (currently) only provides +dynamic wear leveling. This is a best efforts solution. Wear is not distributed +perfectly, but it is distributed among the free blocks and greatly extends the +life of a device. + +On top of this, littlefs uses a statistical wear leveling algorithm. What this +means is that we don’t actively track wear, instead we rely on a uniform +distribution of wear across storage to approximate a dynamic wear leveling +algorithm. Despite the long name, this is actually a simplification of dynamic +wear leveling. + +The uniform distribution of wear is left up to the block allocator, which +creates a uniform distribution in two parts. The easy part is when the device +is powered, in which case we allocate the blocks linearly, circling the device. +The harder part is what to do when the device loses power. We can't just +restart the allocator at the beginning of storage, as this would bias the wear. +Instead, we start the allocator as a random offset every time we mount the +filesystem. As long as this random offset is uniform, the combined allocation +pattern is also a uniform distribution. + +![Cumulative wear distribution graph][wear-distribution-graph] + +Initially, this approach to wear leveling looks like it creates a difficult +dependency on a power-independent random number generator, which must return +different random numbers on each boot. However, the filesystem is in a +relatively unique situation in that it is sitting on top of a large of amount +of entropy that persists across power loss. + +We can actually use the data on disk to directly drive our random number +generator. In practice, this is implemented by xoring the checksums of each +metadata pair, which is already calculated to fetch and mount the filesystem. + +``` + .--------. \ probably random + .|metadata| | ^ + || | +-> crc ----------------------> xor + || | | ^ + |'--------' / | + '---|--|-' | + .-' '-------------------------. | + | | | + | .--------------> xor ------------> xor + | | ^ | ^ + v crc crc v crc + .--------. \ ^ .--------. \ ^ .--------. \ ^ + .|metadata|-|--|-->|metadata| | | .|metadata| | | + || | +--' || | +--' || | +--' + || | | || | | || | | + |'--------' / |'--------' / |'--------' / + '---|--|-' '----|---' '---|--|-' + .-' '-. | .-' '-. + v v v v v +.--------. .--------. .--------. .--------. .--------. +| data | | data | | data | | data | | data | +| | | | | | | | | | +| | | | | | | | | | +'--------' '--------' '--------' '--------' '--------' +``` + +Note that this random number generator is not perfect. It only returns unique +random numbers when the filesystem is modified. This is exactly what we want +for distributing wear in the allocator, but means this random number generator +is not useful for general use. + +--- + +Together, bad block detection and dynamic wear leveling provide a best effort +solution for avoiding the early death of a filesystem due to wear. Importantly, +littlefs's wear leveling algorithm provides a key feature: You can increase the +life of a device simply by increasing the size of storage. And if more +aggressive wear leveling is desired, you can always combine littlefs with a +[flash translation layer (FTL)][wikipedia-ftl] to get a small power resilient +filesystem with static wear leveling. + +## Files + +Now that we have our building blocks out of the way, we can start looking at +our filesystem as a whole. + +The first step: How do we actually store our files? + +We've determined that CTZ skip-lists are pretty good at storing data compactly, +so following the precedent found in other filesystems we could give each file +a skip-list stored in a metadata pair that acts as an inode for the file. + + +``` + .--------. + .|metadata| + || | + || | + |'--------' + '----|---' + v +.--------. .--------. .--------. .--------. +| data 0 |<-| data 1 |<-| data 2 |<-| data 3 | +| |<-| |--| | | | +| | | | | | | | +'--------' '--------' '--------' '--------' +``` + +However, this doesn't work well when files are small, which is common for +embedded systems. Compared to PCs, _all_ data in an embedded system is small. + +Consider a small 4-byte file. With a two block metadata-pair and one block for +the CTZ skip-list, we find ourselves using a full 3 blocks. On most NOR flash +with 4 KiB blocks, this is 12 KiB of overhead. A ridiculous 3072x increase. + +``` +file stored as inode, 4 bytes costs ~12 KiB + + .----------------. \ +.| revision | | +||----------------| \ | +|| skiplist ---. +- metadata | +||----------------| | / 4x8 bytes | +|| checksum | | 32 bytes | +||----------------| | | +|| | | | +- metadata pair +|| v | | | 2x4 KiB +|| | | | 8 KiB +|| | | | +|| | | | +|| | | | +|'----------------' | | +'----------------' | / + .--------' + v + .----------------. \ \ + | data | +- data | + |----------------| / 4 bytes | + | | | + | | | + | | | + | | +- data block + | | | 4 KiB + | | | + | | | + | | | + | | | + | | | + '----------------' / +``` + +We can make several improvements. First, instead of giving each file its own +metadata pair, we can store multiple files in a single metadata pair. One way +to do this is to directly associate a directory with a metadata pair (or a +linked list of metadata pairs). This makes it easy for multiple files to share +the directory's metadata pair for logging and reduce the collective storage +overhead. + +The strict binding of metadata pairs and directories also gives users +direct control over storage utilization depending on how they organize their +directories. + +``` +multiple files stored in metadata pair, 4 bytes costs ~4 KiB + + .----------------. + .| revision | + ||----------------| + || A name | + || A skiplist -----. + ||----------------| | \ + || B name | | +- metadata + || B skiplist ---. | | 4x8 bytes + ||----------------| | | / 32 bytes + || checksum | | | + ||----------------| | | + || | | | | + || v | | | + |'----------------' | | + '----------------' | | + .----------------' | + v v +.----------------. .----------------. \ \ +| A data | | B data | +- data | +| | |----------------| / 4 bytes | +| | | | | +| | | | | +| | | | | +| | | | + data block +| | | | | 4 KiB +| | | | | +|----------------| | | | +| | | | | +| | | | | +| | | | | +'----------------' '----------------' / +``` + +The second improvement we can make is noticing that for very small files, our +attempts to use CTZ skip-lists for compact storage backfires. Metadata pairs +have a ~4x storage cost, so if our file is smaller than 1/4 the block size, +there's actually no benefit in storing our file outside of our metadata pair. + +In this case, we can store the file directly in our directory's metadata pair. +We call this an inline file, and it allows a directory to store many small +files quite efficiently. Our previous 4 byte file now only takes up a +theoretical 16 bytes on disk. + +``` +inline files stored in metadata pair, 4 bytes costs ~16 bytes + + .----------------. +.| revision | +||----------------| +|| A name | +|| A skiplist ---. +||----------------| | \ +|| B name | | +- data +|| B data | | | 4x4 bytes +||----------------| | / 16 bytes +|| checksum | | +||----------------| | +|| | | | +|| v | | +|'----------------' | +'----------------' | + .---------' + v + .----------------. + | A data | + | | + | | + | | + | | + | | + | | + | | + |----------------| + | | + | | + | | + '----------------' +``` + +Once the file exceeds 1/4 the block size, we switch to a CTZ skip-list. This +means that our files never use more than 4x storage overhead, decreasing as +the file grows in size. -The real benefit of this approach is just how much it simplified the design -of the littlefs. Deallocating blocks is as simple as simply forgetting they -exist, and there is absolutely no concern of bugs in the deallocation code -causing difficult to detect memory leaks. +![File storage cost graph][file-cost-graph] ## Directories -Now we just need directories to store our files. Since we already have -metadata blocks that store information about files, lets just use these -metadata blocks as the directories. Maybe turn the directories into linked -lists of metadata blocks so it isn't limited by the number of files that fit -in a single block. Add entries that represent other nested directories. -Drop "." and ".." entries, cause who needs them. Dust off our hands and -we now have a directory tree. +Now we just need directories to store our files. As mentioned above we want +a strict binding of directories and metadata pairs, but there are a few +complications we need to sort out. + +On their own, each directory is a linked-list of metadata pairs. This lets us +store an unlimited number of files in each directory, and we don't need to +worry about the runtime complexity of unbounded logs. We can store other +directory pointers in our metadata pairs, which gives us a directory tree, much +like what you find on other filesystems. ``` .--------. - |root dir| - | pair 0 | - | | - '--------' + .| root | + || | + || | + |'--------' + '---|--|-' .-' '-------------------------. v v .--------. .--------. .--------. - | dir A |------->| dir A | | dir B | - | pair 0 | | pair 1 | | pair 0 | - | | | | | | - '--------' '--------' '--------' + .| dir A |------->| dir A | .| dir B | + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '---|--|-' '----|---' '---|--|-' .-' '-. | .-' '-. v v v v v .--------. .--------. .--------. .--------. .--------. @@ -613,34 +1509,30 @@ we now have a directory tree. '--------' '--------' '--------' '--------' '--------' ``` -Unfortunately it turns out it's not that simple. See, iterating over a -directory tree isn't actually all that easy, especially when you're trying -to fit in a bounded amount of RAM, which rules out any recursive solution. -And since our block allocator involves iterating over the entire filesystem -tree, possibly multiple times in a single allocation, iteration needs to be -efficient. - -So, as a solution, the littlefs adopted a sort of threaded tree. Each -directory not only contains pointers to all of its children, but also a -pointer to the next directory. These pointers create a linked-list that -is threaded through all of the directories in the filesystem. Since we -only use this linked list to check for existence, the order doesn't actually -matter. As an added plus, we can repurpose the pointer for the individual -directory linked-lists and avoid using any additional space. +The main complication is, once again, traversal with a constant amount of +[RAM]. The directory tree is a tree, and the unfortunate fact is you can't +traverse a tree with constant RAM. + +Fortunately, the elements of our tree are metadata pairs, so unlike CTZ +skip-lists, we're not limited to strict COW operations. One thing we can do is +thread a linked-list through our tree, explicitly enabling cheap traversal +over the entire filesystem. ``` .--------. - |root dir|-. - | pair 0 | | - .--------| |-' - | '--------' + .| root |-. + || | | + .-------|| |-' + | |'--------' + | '---|--|-' | .-' '-------------------------. | v v | .--------. .--------. .--------. '->| dir A |------->| dir A |------->| dir B | - | pair 0 | | pair 1 | | pair 0 | - | | | | | | - '--------' '--------' '--------' + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '---|--|-' '----|---' '---|--|-' .-' '-. | .-' '-. v v v v v .--------. .--------. .--------. .--------. .--------. @@ -650,577 +1542,632 @@ directory linked-lists and avoid using any additional space. '--------' '--------' '--------' '--------' '--------' ``` -This threaded tree approach does come with a few tradeoffs. Now, anytime we -want to manipulate the directory tree, we find ourselves having to update two -pointers instead of one. For anyone familiar with creating atomic data -structures this should set off a whole bunch of red flags. +Unfortunately, not sticking to pure COW operations creates some problems. Now, +whenever we want to manipulate the directory tree, multiple pointers need to be +updated. If you're familiar with designing atomic data structures this should +set off a bunch of red flags. -But unlike the data structure guys, we can update a whole block atomically! So -as long as we're really careful (and cheat a little bit), we can still -manipulate the directory tree in a way that is resilient to power loss. - -Consider how we might add a new directory. Since both pointers that reference -it can come from the same directory, we only need a single atomic update to -finagle the directory into the filesystem: -``` - .--------. - |root dir|-. - | pair 0 | | -.--| |-' -| '--------' -| | -| v -| .--------. -'->| dir A | - | pair 0 | - | | - '--------' +To work around this, our threaded linked-list has a bit of leeway. Instead of +only containing metadata pairs found in our filesystem, it is allowed to +contain metadata pairs that have no parent because of a power loss. These are +called orphaned metadata pairs. -| create the new directory block -v +With the possibility of orphans, we can build power loss resilient operations +that maintain a filesystem tree threaded with a linked-list for traversal. - .--------. - |root dir|-. - | pair 0 | | - .--| |-' - | '--------' - | | - | v - | .--------. -.--------. '->| dir A | -| dir B |---->| pair 0 | -| pair 0 | | | -| | '--------' -'--------' - -| update root to point to directory B -v +Adding a directory to our tree: +``` .--------. - |root dir|-. - | pair 0 | | -.--------| |-' -| '--------' + .| root |-. + || | | +.-------|| |-' +| |'--------' +| '---|--|-' | .-' '-. | v v | .--------. .--------. -'->| dir B |->| dir A | - | pair 0 | | pair 0 | - | | | | - '--------' '--------' -``` +'->| dir A |->| dir C | + || | || | + || | || | + |'--------' |'--------' + '--------' '--------' -Note that even though directory B was added after directory A, we insert -directory B before directory A in the linked-list because it is convenient. +allocate dir B +=> + .--------. + .| root |-. + || | | +.-------|| |-' +| |'--------' +| '---|--|-' +| .-' '-. +| v v +| .--------. .--------. +'->| dir A |--->| dir C | + || | .->| | + || | | || | + |'--------' | |'--------' + '--------' | '--------' + | + .--------. | + .| dir B |-' + || | + || | + |'--------' + '--------' + +insert dir B into threaded linked-list, creating an orphan +=> + .--------. + .| root |-. + || | | +.-------|| |-' +| |'--------' +| '---|--|-' +| .-' '-------------. +| v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || | || orphan!| || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' -Now how about removal: -``` - .--------. .--------. - |root dir|------->|root dir|-. - | pair 0 | | pair 1 | | -.--------| |--------| |-' -| '--------' '--------' -| .-' '-. | +add dir B to parent directory +=> + .--------. + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. | v v v | .--------. .--------. .--------. '->| dir A |->| dir B |->| dir C | - | pair 0 | | pair 0 | | pair 0 | - | | | | | | - '--------' '--------' '--------' + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' +``` -| update root to no longer contain directory B -v +Removing a directory: - .--------. .--------. - |root dir|------------->|root dir|-. - | pair 0 | | pair 1 | | -.--| |--------------| |-' -| '--------' '--------' -| | | -| v v +``` + .--------. + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v | .--------. .--------. .--------. '->| dir A |->| dir B |->| dir C | - | pair 0 | | pair 0 | | pair 0 | - | | | | | | - '--------' '--------' '--------' + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' -| remove directory B from the linked-list -v +remove dir B from parent directory, creating an orphan +=> + .--------. + .| root |-. + || | | +.-------|| |-' +| |'--------' +| '---|--|-' +| .-' '-------------. +| v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || | || orphan!| || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' - .--------. .--------. - |root dir|->|root dir|-. - | pair 0 | | pair 1 | | -.--| |--| |-' -| '--------' '--------' -| | | -| v v +remove dir B from threaded linked-list, returning dir B to free blocks +=> + .--------. + .| root |-. + || | | +.-------|| |-' +| |'--------' +| '---|--|-' +| .-' '-. +| v v | .--------. .--------. '->| dir A |->| dir C | - | pair 0 | | pair 0 | - | | | | - '--------' '--------' + || | || | + || | || | + |'--------' |'--------' + '--------' '--------' ``` -Wait, wait, wait, that's not atomic at all! If power is lost after removing -directory B from the root, directory B is still in the linked-list. We've -just created a memory leak! +In addition to normal directory tree operations, we can use orphans to evict +blocks in a metadata pair when the block goes bad or exceeds its allocated +erases. If we lose power while evicting a metadata block we may end up with +a situation where the filesystem references the replacement block while the +threaded linked-list still contains the evicted block. We call this a +half-orphan. -And to be honest, I don't have a clever solution for this case. As a -side-effect of using multiple pointers in the threaded tree, the littlefs -can end up with orphan blocks that have no parents and should have been -removed. +``` + .--------. + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' + +try to write to dir B +=> + .--------. + .| root |-. + || | | +.----------------|| |-' +| |'--------' +| '-|-||-|-' +| .--------' || '-----. +| v |v v +| .--------. .--------. .--------. +'->| dir A |---->| dir B |->| dir C | + || |-. | | || | + || | | | | || | + |'--------' | '--------' |'--------' + '--------' | v '--------' + | .--------. + '->| dir B | + | bad | + | block! | + '--------' + +oh no! bad block detected, allocate replacement +=> + .--------. + .| root |-. + || | | +.----------------|| |-' +| |'--------' +| '-|-||-|-' +| .--------' || '-------. +| v |v v +| .--------. .--------. .--------. +'->| dir A |---->| dir B |--->| dir C | + || |-. | | .->| | + || | | | | | || | + |'--------' | '--------' | |'--------' + '--------' | v | '--------' + | .--------. | + '->| dir B | | + | bad | | + | block! | | + '--------' | + | + .--------. | + | dir B |--' + | | + | | + '--------' + +insert replacement in threaded linked-list, creating a half-orphan +=> + .--------. + .| root |-. + || | | +.----------------|| |-' +| |'--------' +| '-|-||-|-' +| .--------' || '-------. +| v |v v +| .--------. .--------. .--------. +'->| dir A |---->| dir B |--->| dir C | + || |-. | | .->| | + || | | | | | || | + |'--------' | '--------' | |'--------' + '--------' | v | '--------' + | .--------. | + | | dir B | | + | | bad | | + | | block! | | + | '--------' | + | | + | .--------. | + '->| dir B |--' + | half | + | orphan!| + '--------' + +fix reference in parent directory +=> + .--------. + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' +``` + +Finding orphans and half-orphans is expensive, requiring a _O(n²)_ +comparison of every metadata pair with every directory entry. But the tradeoff +is a power resilient filesystem that works with only a bounded amount of RAM. +Fortunately, we only need to check for orphans on the first allocation after +boot, and a read-only littlefs can ignore the threaded linked-list entirely. -To keep these orphan blocks from becoming a problem, the littlefs has a -deorphan step that simply iterates through every directory in the linked-list -and checks it against every directory entry in the filesystem to see if it -has a parent. The deorphan step occurs on the first block allocation after -boot, so orphans should never cause the littlefs to run out of storage -prematurely. Note that the deorphan step never needs to run in a read-only -filesystem. +If we only had some sort of global state, then we could also store a flag and +avoid searching for orphans unless we knew we were specifically interrupted +while manipulating the directory tree (foreshadowing!). ## The move problem -Now we have a real problem. How do we move things between directories while -remaining power resilient? Even looking at the problem from a high level, -it seems impossible. We can update directory blocks atomically, but atomically -updating two independent directory blocks is not an atomic operation. +We have one last challenge. The move problem. Phrasing the problem is simple: + +How do you atomically move a file between two directories? + +In littlefs we can atomically commit to directories, but we can't create +an atomic commit that span multiple directories. The filesystem must go +through a minimum of two distinct states to complete a move. + +To make matters worse, file moves are a common form of synchronization for +filesystems. As a filesystem designed for power-loss, it's important we get +atomic moves right. + +So what can we do? + +- We definitely can't just let power-loss result in duplicated or lost files. + This could easily break user's code and would only reveal itself in extreme + cases. We were only able to be lazy about the threaded linked-list because + it isn't user facing and we can handle the corner cases internally. + +- Some filesystems propagate COW operations up the tree until finding a common + parent. Unfortunately this interacts poorly with our threaded tree and brings + back the issue of upward propagation of wear. + +- In a previous version of littlefs we tried to solve this problem by going + back and forth between the source and destination, marking and unmarking the + file as moving in order to make the move atomic from the user perspective. + This worked, but not well. Finding failed moves was expensive and required + a unique identifier for each file. + +In the end, solving the move problem required creating a new mechanism for +sharing knowledge between multiple metadata pairs. In littlefs this led to the +introduction of a mechanism called "global state". + +--- + +Global state is a small set of state that can be updated from _any_ metadata +pair. Combining global state with metadata pair's ability to update multiple +entries in one commit gives us a powerful tool for crafting complex atomic +operations. + +How does global state work? + +Global state exists as a set of deltas that are distributed across the metadata +pairs in the filesystem. The actual global state can be built out of these +deltas by xoring together all of the deltas in the filesystem. -Here's the steps the filesystem may go through to move a directory: ``` - .--------. - |root dir|-. - | pair 0 | | -.--------| |-' -| '--------' -| .-' '-. -| v v -| .--------. .--------. -'->| dir A |->| dir B | - | pair 0 | | pair 0 | - | | | | - '--------' '--------' + .--------. .--------. .--------. .--------. .--------. +.| |->| gdelta |->| |->| gdelta |->| gdelta | +|| | || 0x23 | || | || 0xff | || 0xce | +|| | || | || | || | || | +|'--------' |'--------' |'--------' |'--------' |'--------' +'--------' '----|---' '--------' '----|---' '----|---' + v v v + 0x00 --> xor ------------------> xor ------> xor --> gstate 0x12 +``` -| update directory B to point to directory A -v +To update the global state from a metadata pair, we take the global state we +know and xor it with both our changes and any existing delta in the metadata +pair. Committing this new delta to the metadata pair commits the changes to +the filesystem's global state. - .--------. - |root dir|-. - | pair 0 | | -.--------| |-' -| '--------' -| .-----' '-. -| | v -| | .--------. -| | .->| dir B | -| | | | pair 0 | -| | | | | -| | | '--------' -| | .-------' -| v v | -| .--------. | -'->| dir A |-' - | pair 0 | +``` + .--------. .--------. .--------. .--------. .--------. +.| |->| gdelta |->| |->| gdelta |->| gdelta | +|| | || 0x23 | || | || 0xff | || 0xce | +|| | || | || | || | || | +|'--------' |'--------' |'--------' |'--------' |'--------' +'--------' '----|---' '--------' '--|---|-' '----|---' + v v | v + 0x00 --> xor ----------------> xor -|------> xor --> gstate = 0x12 + | | + | | +change gstate to 0xab --> xor <------------|--------------------------' +=> | v + '------------> xor + | + v + .--------. .--------. .--------. .--------. .--------. +.| |->| gdelta |->| |->| gdelta |->| gdelta | +|| | || 0x23 | || | || 0x46 | || 0xce | +|| | || | || | || | || | +|'--------' |'--------' |'--------' |'--------' |'--------' +'--------' '----|---' '--------' '----|---' '----|---' + v v v + 0x00 --> xor ------------------> xor ------> xor --> gstate = 0xab +``` + +To make this efficient, we always keep a copy of the global state in RAM. We +only need to iterate over our metadata pairs and build the global state when +the filesystem is mounted. + +You may have noticed that global state is very expensive. We keep a copy in +RAM and a delta in an unbounded number of metadata pairs. Even if we reset +the global state to its initial value we can't easily clean up the deltas on +disk. For this reason, it's very important that we keep the size of global +state bounded and extremely small. But, even with a strict budget, global +state is incredibly valuable. + +--- + +Now we can solve the move problem. We can create global state describing our +move atomically with the creation of the new file, and we can clear this move +state atomically with the removal of the old file. + +``` + .--------. gstate = no move + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || | || | || | + || | || | || | + |'--------' |'--------' |'--------' + '----|---' '--------' '--------' + v + .--------. + | file D | + | | | | '--------' -| update root to no longer contain directory A -v +begin move, add reference in dir C, change gstate to have move +=> + .--------. gstate = moving file D in dir A (m1) + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || | || | || gdelta | + || | || | || =m1 | + |'--------' |'--------' |'--------' + '----|---' '--------' '----|---' + | .----------------' + v v .--------. - |root dir|-. - | pair 0 | | -.----| |-' -| '--------' -| | -| v -| .--------. -| .->| dir B | -| | | pair 0 | -| '--| |-. -| '--------' | -| | | -| v | -| .--------. | -'--->| dir A |-' - | pair 0 | + | file D | + | | | | '--------' + +complete move, remove reference in dir A, change gstate to no move +=> + .--------. gstate = no move (m1^~m1) + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || gdelta | || | || gdelta | + || =~m1 | || | || =m1 | + |'--------' |'--------' |'--------' + '--------' '--------' '----|---' + v + .--------. + | file D | + | | + | | + '--------' ``` -We can leave any orphans up to the deorphan step to collect, but that doesn't -help the case where dir A has both dir B and the root dir as parents if we -lose power inconveniently. - -Initially, you might think this is fine. Dir A _might_ end up with two parents, -but the filesystem will still work as intended. But then this raises the -question of what do we do when the dir A wears out? For other directory blocks -we can update the parent pointer, but for a dir with two parents we would need -work out how to update both parents. And the check for multiple parents would -need to be carried out for every directory, even if the directory has never -been moved. - -It also presents a bad user-experience, since the condition of ending up with -two parents is rare, it's unlikely user-level code will be prepared. Just think -about how a user would recover from a multi-parented directory. They can't just -remove one directory, since remove would report the directory as "not empty". - -Other atomic filesystems simple COW the entire directory tree. But this -introduces a significant bit of complexity, which leads to code size, along -with a surprisingly expensive runtime cost during what most users assume is -a single pointer update. - -Another option is to update the directory block we're moving from to point -to the destination with a sort of predicate that we have moved if the -destination exists. Unfortunately, the omnipresent concern of wear could -cause any of these directory entries to change blocks, and changing the -entry size before a move introduces complications if it spills out of -the current directory block. - -So how do we go about moving a directory atomically? - -We rely on the improbableness of power loss. - -Power loss during a move is certainly possible, but it's actually relatively -rare. Unless a device is writing to a filesystem constantly, it's unlikely that -a power loss will occur during filesystem activity. We still need to handle -the condition, but runtime during a power loss takes a back seat to the runtime -during normal operations. - -So what littlefs does is inelegantly simple. When littlefs moves a file, it -marks the file as "moving". This is stored as a single bit in the directory -entry and doesn't take up much space. Then littlefs moves the directory, -finishing with the complete remove of the "moving" directory entry. -``` - .--------. - |root dir|-. - | pair 0 | | -.--------| |-' -| '--------' -| .-' '-. -| v v -| .--------. .--------. -'->| dir A |->| dir B | - | pair 0 | | pair 0 | - | | | | - '--------' '--------' - -| update root directory to mark directory A as moving -v - - .----------. - |root dir |-. - | pair 0 | | -.-------| moving A!|-' -| '----------' -| .-' '-. -| v v -| .--------. .--------. -'->| dir A |->| dir B | - | pair 0 | | pair 0 | - | | | | - '--------' '--------' - -| update directory B to point to directory A -v - - .----------. - |root dir |-. - | pair 0 | | -.-------| moving A!|-' -| '----------' -| .-----' '-. -| | v -| | .--------. -| | .->| dir B | -| | | | pair 0 | -| | | | | -| | | '--------' -| | .-------' -| v v | -| .--------. | -'->| dir A |-' - | pair 0 | - | | - '--------' +If, after building our global state during mount, we find information +describing an ongoing move, we know we lost power during a move and the file +is duplicated in both the source and destination directories. If this happens, +we can resolve the move using the information in the global state to remove +one of the files. -| update root to no longer contain directory A -v +``` + .--------. gstate = moving file D in dir A (m1) + .| root |-. ^ + || |------------> xor +.---------------|| |-' ^ +| |'--------' | +| '--|-|-|-' | +| .--------' | '---------. | +| | | | | +| | .----------> xor --------> xor +| v | v ^ v ^ +| .--------. | .--------. | .--------. | +'->| dir A |-|->| dir B |-|->| dir C | | + || |-' || |-' || gdelta |-' + || | || | || =m1 | + |'--------' |'--------' |'--------' + '----|---' '--------' '----|---' + | .---------------------' + v v .--------. - |root dir|-. - | pair 0 | | -.----| |-' -| '--------' -| | -| v -| .--------. -| .->| dir B | -| | | pair 0 | -| '--| |-. -| '--------' | -| | | -| v | -| .--------. | -'--->| dir A |-' - | pair 0 | + | file D | + | | | | '--------' ``` -Now, if we run into a directory entry that has been marked as "moved", one -of two things is possible. Either the directory entry exists elsewhere in the -filesystem, or it doesn't. This is a O(n) operation, but only occurs in the -unlikely case we lost power during a move. - -And we can easily fix the "moved" directory entry. Since we're already scanning -the filesystem during the deorphan step, we can also check for moved entries. -If we find one, we either remove the "moved" marking or remove the whole entry -if it exists elsewhere in the filesystem. - -## Wear awareness - -So now that we have all of the pieces of a filesystem, we can look at a more -subtle attribute of embedded storage: The wear down of flash blocks. - -The first concern for the littlefs, is that perfectly valid blocks can suddenly -become unusable. As a nice side-effect of using a COW data-structure for files, -we can simply move on to a different block when a file write fails. All -modifications to files are performed in copies, so we will only replace the -old file when we are sure none of the new file has errors. Directories, on -the other hand, need a different strategy. - -The solution to directory corruption in the littlefs relies on the redundant -nature of the metadata pairs. If an error is detected during a write to one -of the metadata pairs, we seek out a new block to take its place. Once we find -a block without errors, we iterate through the directory tree, updating any -references to the corrupted metadata pair to point to the new metadata block. -Just like when we remove directories, we can lose power during this operation -and end up with a desynchronized metadata pair in our filesystem. And just like -when we remove directories, we leave the possibility of a desynchronized -metadata pair up to the deorphan step to clean up. - -Here's what encountering a directory error may look like with all of -the directories and directory pointers fully expanded: +We can also move directories the same way we move files. There is the threaded +linked-list to consider, but leaving the threaded linked-list unchanged works +fine as the order doesn't really matter. + ``` - root dir - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 0 |--. - | | |-.| -.------| | |-|' -|.-----| | |-' -|| '---------'---------' -|| |||||'--------------------------------------------------. -|| ||||'-----------------------------------------. | -|| |||'-----------------------------. | | -|| ||'--------------------. | | | -|| |'-------. | | | | -|| v v v v v v -|| dir A dir B dir C -|| block 3 block 4 block 5 block 6 block 7 block 8 -|| .---------.---------. .---------.---------. .---------.---------. -|'->| rev: 1 | rev: 0 |->| rev: 1 | rev: 0 |->| rev: 1 | rev: 0 | -'-->| | |->| | |->| | | - | | | | | | | - | | | | | | | | | - '---------'---------' '---------'---------' '---------'---------' - -| update directory B -v - - root dir - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 0 |--. - | | |-.| -.------| | |-|' -|.-----| | |-' -|| '---------'---------' -|| |||||'--------------------------------------------------. -|| ||||'-----------------------------------------. | -|| |||'-----------------------------. | | -|| ||'--------------------. | | | -|| |'-------. | | | | -|| v v v v v v -|| dir A dir B dir C -|| block 3 block 4 block 5 block 6 block 7 block 8 -|| .---------.---------. .---------.---------. .---------.---------. -|'->| rev: 1 | rev: 0 |->| rev: 1 | rev: 2 |->| rev: 1 | rev: 0 | -'-->| | |->| | corrupt!|->| | | - | | | | | corrupt!| | | | - | | | | | corrupt!| | | | - '---------'---------' '---------'---------' '---------'---------' - -| oh no! corruption detected -v allocate a replacement block - - root dir - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 0 |--. - | | |-.| -.------| | |-|' -|.-----| | |-' -|| '---------'---------' -|| |||||'----------------------------------------------------. -|| ||||'-------------------------------------------. | -|| |||'-----------------------------. | | -|| ||'--------------------. | | | -|| |'-------. | | | | -|| v v v v v v -|| dir A dir B dir C -|| block 3 block 4 block 5 block 6 block 7 block 8 -|| .---------.---------. .---------.---------. .---------.---------. -|'->| rev: 1 | rev: 0 |->| rev: 1 | rev: 2 |--->| rev: 1 | rev: 0 | -'-->| | |->| | corrupt!|--->| | | - | | | | | corrupt!| .->| | | - | | | | | corrupt!| | | | | - '---------'---------' '---------'---------' | '---------'---------' - block 9 | - .---------. | - | rev: 2 |-' - | | - | | - | | - '---------' - -| update root directory to contain block 9 -v - - root dir - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 2 |--. - | | |-.| -.-----| | |-|' -|.----| | |-' -|| '---------'---------' -|| .--------'||||'----------------------------------------------. -|| | |||'-------------------------------------. | -|| | ||'-----------------------. | | -|| | |'------------. | | | -|| | | | | | | -|| v v v v v v -|| dir A dir B dir C -|| block 3 block 4 block 5 block 9 block 7 block 8 -|| .---------.---------. .---------. .---------. .---------.---------. -|'->| rev: 1 | rev: 0 |-->| rev: 1 |-| rev: 2 |--->| rev: 1 | rev: 0 | -'-->| | |-. | | | |--->| | | - | | | | | | | | .->| | | - | | | | | | | | | | | | - '---------'---------' | '---------' '---------' | '---------'---------' - | block 6 | - | .---------. | - '------------>| rev: 2 |-' - | corrupt!| - | corrupt!| - | corrupt!| - '---------' - -| remove corrupted block from linked-list -v - - root dir - block 1 block 2 - .---------.---------. - | rev: 1 | rev: 2 |--. - | | |-.| -.-----| | |-|' -|.----| | |-' -|| '---------'---------' -|| .--------'||||'-----------------------------------------. -|| | |||'--------------------------------. | -|| | ||'--------------------. | | -|| | |'-----------. | | | -|| | | | | | | -|| v v v v v v -|| dir A dir B dir C -|| block 3 block 4 block 5 block 9 block 7 block 8 -|| .---------.---------. .---------.---------. .---------.---------. -|'->| rev: 1 | rev: 2 |->| rev: 1 | rev: 2 |->| rev: 1 | rev: 0 | -'-->| | |->| | |->| | | - | | | | | | | | | - | | | | | | | | | - '---------'---------' '---------'---------' '---------'---------' + .--------. gstate = no move (m1^~m1) + .| root |-. + || | | +.-------------|| |-' +| |'--------' +| '--|-|-|-' +| .------' | '-------. +| v v v +| .--------. .--------. .--------. +'->| dir A |->| dir B |->| dir C | + || gdelta | || | || gdelta | + || =~m1 | || | || =m1 | + |'--------' |'--------' |'--------' + '--------' '--------' '----|---' + v + .--------. + | file D | + | | + | | + '--------' + +begin move, add reference in dir C, change gstate to have move +=> + .--------. gstate = moving dir B in root (m1^~m1^m2) + .| root |-. + || | | +.--------------|| |-' +| |'--------' +| '--|-|-|-' +| .-------' | '----------. +| v | v +| .--------. | .--------. +'->| dir A |-. | .->| dir C | + || gdelta | | | | || gdelta | + || =~m1 | | | | || =m1^m2 | + |'--------' | | | |'--------' + '--------' | | | '---|--|-' + | | .-------' | + | v v | v + | .--------. | .--------. + '->| dir B |-' | file D | + || | | | + || | | | + |'--------' '--------' + '--------' + +complete move, remove reference in root, change gstate to no move +=> + .--------. gstate = no move (m1^~m1^m2^~m2) + .| root |-. + || gdelta | | +.-----------|| =~m2 |-' +| |'--------' +| '---|--|-' +| .-----' '-----. +| v v +| .--------. .--------. +'->| dir A |-. .->| dir C | + || gdelta | | | || gdelta | + || =~m1 | | '-|| =m1^m2 |-------. + |'--------' | |'--------' | + '--------' | '---|--|-' | + | .-' '-. | + | v v | + | .--------. .--------. | + '->| dir B |--| file D |-' + || | | | + || | | | + |'--------' '--------' + '--------' ``` -Also one question I've been getting is, what about the root directory? -It can't move so wouldn't the filesystem die as soon as the root blocks -develop errors? And you would be correct. So instead of storing the root -in the first few blocks of the storage, the root is actually pointed to -by the superblock. The superblock contains a few bits of static data, but -outside of when the filesystem is formatted, it is only updated when the root -develops errors and needs to be moved. - -## Wear leveling - -The second concern for the littlefs is that blocks in the filesystem may wear -unevenly. In this situation, a filesystem may meet an early demise where -there are no more non-corrupted blocks that aren't in use. It's common to -have files that were written once and left unmodified, wasting the potential -erase cycles of the blocks it sits on. - -Wear leveling is a term that describes distributing block writes evenly to -avoid the early termination of a flash part. There are typically two levels -of wear leveling: -1. Dynamic wear leveling - Wear is distributed evenly across all **dynamic** - blocks. Usually this is accomplished by simply choosing the unused block - with the lowest amount of wear. Note this does not solve the problem of - static data. -2. Static wear leveling - Wear is distributed evenly across all **dynamic** - and **static** blocks. Unmodified blocks may be evicted for new block - writes. This does handle the problem of static data but may lead to - wear amplification. - -In littlefs's case, it's possible to use the revision count on metadata pairs -to approximate the wear of a metadata block. And combined with the COW nature -of files, littlefs could provide your usual implementation of dynamic wear -leveling. - -However, the littlefs does not. This is for a few reasons. Most notably, even -if the littlefs did implement dynamic wear leveling, this would still not -handle the case of write-once files, and near the end of the lifetime of a -flash device, you would likely end up with uneven wear on the blocks anyways. - -As a flash device reaches the end of its life, the metadata blocks will -naturally be the first to go since they are updated most often. In this -situation, the littlefs is designed to simply move on to another set of -metadata blocks. This travelling means that at the end of a flash device's -life, the filesystem will have worn the device down nearly as evenly as the -usual dynamic wear leveling could. More aggressive wear leveling would come -with a code-size cost for marginal benefit. - - -One important takeaway to note, if your storage stack uses highly sensitive -storage such as NAND flash, static wear leveling is the only valid solution. -In most cases you are going to be better off using a full [flash translation layer (FTL)](https://en.wikipedia.org/wiki/Flash_translation_layer). -NAND flash already has many limitations that make it poorly suited for an -embedded system: low erase cycles, very large blocks, errors that can develop -even during reads, errors that can develop during writes of neighboring blocks. -Managing sensitive storage such as NAND flash is out of scope for the littlefs. -The littlefs does have some properties that may be beneficial on top of a FTL, -such as limiting the number of writes where possible, but if you have the -storage requirements that necessitate the need of NAND flash, you should have -the RAM to match and just use an FTL or flash filesystem. - -## Summary - -So, to summarize: - -1. The littlefs is composed of directory blocks -2. Each directory is a linked-list of metadata pairs -3. These metadata pairs can be updated atomically by alternating which - metadata block is active -4. Directory blocks contain either references to other directories or files -5. Files are represented by copy-on-write CTZ skip-lists which support O(1) - append and O(n log n) reading -6. Blocks are allocated by scanning the filesystem for used blocks in a - fixed-size lookahead region that is stored in a bit-vector -7. To facilitate scanning the filesystem, all directories are part of a - linked-list that is threaded through the entire filesystem -8. If a block develops an error, the littlefs allocates a new block, and - moves the data and references of the old block to the new. -9. Any case where an atomic operation is not possible, mistakes are resolved - by a deorphan step that occurs on the first allocation after boot - -That's the little filesystem. Thanks for reading! - +Global state gives us a powerful tool we can use to solve the move problem. +And the result is surprisingly performant, only needing the minimum number +of states and using the same number of commits as a naive move. Additionally, +global state gives us a bit of persistent state we can use for some other +small improvements. + +## Conclusion + +And that's littlefs, thanks for reading! + + +[wikipedia-flash]: https://en.wikipedia.org/wiki/Flash_memory +[wikipedia-sna]: https://en.wikipedia.org/wiki/Serial_number_arithmetic +[wikipedia-crc]: https://en.wikipedia.org/wiki/Cyclic_redundancy_check +[wikipedia-cow]: https://en.wikipedia.org/wiki/Copy-on-write +[wikipedia-B-tree]: https://en.wikipedia.org/wiki/B-tree +[wikipedia-B+-tree]: https://en.wikipedia.org/wiki/B%2B_tree +[wikipedia-skip-list]: https://en.wikipedia.org/wiki/Skip_list +[wikipedia-ctz]: https://en.wikipedia.org/wiki/Count_trailing_zeros +[wikipedia-ecc]: https://en.wikipedia.org/wiki/Error_correction_code +[wikipedia-hamming-bound]: https://en.wikipedia.org/wiki/Hamming_bound +[wikipedia-dynamic-wear-leveling]: https://en.wikipedia.org/wiki/Wear_leveling#Dynamic_wear_leveling +[wikipedia-static-wear-leveling]: https://en.wikipedia.org/wiki/Wear_leveling#Static_wear_leveling +[wikipedia-ftl]: https://en.wikipedia.org/wiki/Flash_translation_layer + +[oeis]: https://oeis.org +[A001511]: https://oeis.org/A001511 +[A005187]: https://oeis.org/A005187 + +[fat]: https://en.wikipedia.org/wiki/Design_of_the_FAT_file_system +[ext2]: http://e2fsprogs.sourceforge.net/ext2intro.html +[jffs]: https://www.sourceware.org/jffs2/jffs2-html +[yaffs]: https://yaffs.net/documents/how-yaffs-works +[spiffs]: https://github.com/pellepl/spiffs/blob/master/docs/TECH_SPEC +[ext4]: https://ext4.wiki.kernel.org/index.php/Ext4_Design +[ntfs]: https://en.wikipedia.org/wiki/NTFS +[btrfs]: https://btrfs.wiki.kernel.org/index.php/Btrfs_design +[zfs]: https://en.wikipedia.org/wiki/ZFS + +[cow]: https://upload.wikimedia.org/wikipedia/commons/0/0c/Cow_female_black_white.jpg +[elephant]: https://upload.wikimedia.org/wikipedia/commons/3/37/African_Bush_Elephant.jpg +[ram]: https://upload.wikimedia.org/wikipedia/commons/9/97/New_Mexico_Bighorn_Sheep.JPG + +[metadata-formula1]: https://latex.codecogs.com/svg.latex?cost%20%3D%20n%20+%20n%20%5Cfrac%7Bs%7D%7Bd+1%7D +[metadata-formula2]: https://latex.codecogs.com/svg.latex?s%20%3D%20r%20%5Cfrac%7Bsize%7D%7Bn%7D +[metadata-formula3]: https://latex.codecogs.com/svg.latex?d%20%3D%20%281-r%29%20%5Cfrac%7Bsize%7D%7Bn%7D +[metadata-formula4]: https://latex.codecogs.com/svg.latex?cost%20%3D%20n%20+%20n%20%5Cfrac%7Br%5Cfrac%7Bsize%7D%7Bn%7D%7D%7B%281-r%29%5Cfrac%7Bsize%7D%7Bn%7D+1%7D + +[ctz-formula1]: https://latex.codecogs.com/svg.latex?%5Clim_%7Bn%5Cto%5Cinfty%7D%5Cfrac%7B1%7D%7Bn%7D%5Csum_%7Bi%3D0%7D%5E%7Bn%7D%5Cleft%28%5Ctext%7Bctz%7D%28i%29+1%5Cright%29%20%3D%20%5Csum_%7Bi%3D0%7D%5Cfrac%7B1%7D%7B2%5Ei%7D%20%3D%202 +[ctz-formula2]: https://latex.codecogs.com/svg.latex?B%20%3D%20%5Cfrac%7Bw%7D%7B8%7D%5Cleft%5Clceil%5Clog_2%5Cleft%28%5Cfrac%7B2%5Ew%7D%7BB-2%5Cfrac%7Bw%7D%7B8%7D%7D%5Cright%29%5Cright%5Crceil +[ctz-formula3]: https://latex.codecogs.com/svg.latex?N%20%3D%20%5Csum_i%5En%5Cleft%5BB-%5Cfrac%7Bw%7D%7B8%7D%5Cleft%28%5Ctext%7Bctz%7D%28i%29+1%5Cright%29%5Cright%5D +[ctz-formula4]: https://latex.codecogs.com/svg.latex?%5Csum_i%5En%5Cleft%28%5Ctext%7Bctz%7D%28i%29+1%5Cright%29%20%3D%202n-%5Ctext%7Bpopcount%7D%28n%29 +[ctz-formula5]: https://latex.codecogs.com/svg.latex?N%20%3D%20Bn%20-%20%5Cfrac%7Bw%7D%7B8%7D%5Cleft%282n-%5Ctext%7Bpopcount%7D%28n%29%5Cright%29 +[ctz-formula6]: https://latex.codecogs.com/svg.latex?n%20%3D%20%5Cleft%5Clfloor%5Cfrac%7BN-%5Cfrac%7Bw%7D%7B8%7D%5Cleft%28%5Ctext%7Bpopcount%7D%5Cleft%28%5Cfrac%7BN%7D%7BB-2%5Cfrac%7Bw%7D%7B8%7D%7D-1%5Cright%29+2%5Cright%29%7D%7BB-2%5Cfrac%7Bw%7D%7B8%7D%7D%5Cright%5Crfloor +[ctz-formula7]: https://latex.codecogs.com/svg.latex?%5Cmathit%7Boff%7D%20%3D%20N%20-%20%5Cleft%28B-2%5Cfrac%7Bw%7D%7B8%7D%5Cright%29n%20-%20%5Cfrac%7Bw%7D%7B8%7D%5Ctext%7Bpopcount%7D%28n%29 + +[bigB]: https://latex.codecogs.com/svg.latex?B +[d]: https://latex.codecogs.com/svg.latex?d +[m]: https://latex.codecogs.com/svg.latex?m +[bigN]: https://latex.codecogs.com/svg.latex?N +[n]: https://latex.codecogs.com/svg.latex?n +[n']: https://latex.codecogs.com/svg.latex?n%27 +[r]: https://latex.codecogs.com/svg.latex?r +[s]: https://latex.codecogs.com/svg.latex?s +[w]: https://latex.codecogs.com/svg.latex?w +[x]: https://latex.codecogs.com/svg.latex?x + +[metadata-cost-graph]: https://raw.githubusercontent.com/geky/littlefs/gh-images/metadata-cost.svg?sanitize=true +[wear-distribution-graph]: https://raw.githubusercontent.com/geky/littlefs/gh-images/wear-distribution.svg?sanitize=true +[file-cost-graph]: https://raw.githubusercontent.com/geky/littlefs/gh-images/file-cost.svg?sanitize=true diff --git a/Makefile b/Makefile index 17d3616c..185d8e59 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,9 @@ override CFLAGS += -m$(WORD) endif override CFLAGS += -I. override CFLAGS += -std=c99 -Wall -pedantic -override CFLAGS += -Wshadow -Wunused-parameter -Wjump-misses-init -Wsign-compare +override CFLAGS += -Wextra -Wshadow -Wjump-misses-init +# Remove missing-field-initializers because of GCC bug +override CFLAGS += -Wno-missing-field-initializers all: $(TARGET) @@ -38,7 +40,8 @@ size: $(OBJ) .SUFFIXES: test: test_format test_dirs test_files test_seek test_truncate \ - test_interspersed test_alloc test_paths test_orphan test_move test_corrupt + test_entries test_interspersed test_alloc test_paths test_attrs \ + test_move test_orphan test_corrupt @rm test.c test_%: tests/test_%.sh diff --git a/README.md b/README.md index a47f6e23..b50dd310 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -## The little filesystem +## littlefs -A little fail-safe filesystem designed for embedded systems. +A little fail-safe filesystem designed for microcontrollers. ``` | | | .---._____ @@ -11,17 +11,19 @@ A little fail-safe filesystem designed for embedded systems. | | | ``` -**Bounded RAM/ROM** - The littlefs is designed to work with a limited amount -of memory. Recursion is avoided and dynamic memory is limited to configurable -buffers that can be provided statically. +**Power-loss resilience** - littlefs is designed to handle random power +failures. All file operations have strong copy-on-write guarantees and if +power is lost the filesystem will fall back to the last known good state. -**Power-loss resilient** - The littlefs is designed for systems that may have -random power failures. The littlefs has strong copy-on-write guarantees and -storage on disk is always kept in a valid state. +**Dynamic wear leveling** - littlefs is designed with flash in mind, and +provides wear leveling over dynamic blocks. Additionally, littlefs can +detect bad blocks and work around them. -**Wear leveling** - Since the most common form of embedded storage is erodible -flash memories, littlefs provides a form of dynamic wear leveling for systems -that can not fit a full flash translation layer. +**Bounded RAM/ROM** - littlefs is designed to work with a small amount of +memory. RAM usage is strictly bounded, which means RAM consumption does not +change as the filesystem grows. The filesystem contains no unbounded +recursion and dynamic memory is limited to configurable buffers that can be +provided statically. ## Example @@ -49,7 +51,8 @@ const struct lfs_config cfg = { .prog_size = 16, .block_size = 4096, .block_count = 128, - .lookahead = 128, + .cache_size = 16, + .lookahead_size = 16, }; // entry point @@ -90,11 +93,11 @@ int main(void) { Detailed documentation (or at least as much detail as is currently available) can be found in the comments in [lfs.h](lfs.h). -As you may have noticed, littlefs takes in a configuration structure that -defines how the filesystem operates. The configuration struct provides the -filesystem with the block device operations and dimensions, tweakable -parameters that tradeoff memory usage for performance, and optional -static buffers if the user wants to avoid dynamic memory. +littlefs takes in a configuration structure that defines how the filesystem +operates. The configuration struct provides the filesystem with the block +device operations and dimensions, tweakable parameters that tradeoff memory +usage for performance, and optional static buffers if the user wants to avoid +dynamic memory. The state of the littlefs is stored in the `lfs_t` type which is left up to the user to allocate, allowing multiple filesystems to be in use @@ -106,13 +109,13 @@ directory functions, with the deviation that the allocation of filesystem structures must be provided by the user. All POSIX operations, such as remove and rename, are atomic, even in event -of power-loss. Additionally, no file updates are actually committed to the -filesystem until sync or close is called on the file. +of power-loss. Additionally, no file updates are not actually committed to +the filesystem until sync or close is called on the file. ## Other notes -All littlefs calls have the potential to return a negative error code. The -errors can be either one of those found in the `enum lfs_error` in +All littlefs calls have the potential to return a negative error code. The +errors can be either one of those found in the `enum lfs_error` in [lfs.h](lfs.h), or an error returned by the user's block device operations. In the configuration struct, the `prog` and `erase` function provided by the @@ -127,14 +130,60 @@ from memory, otherwise data integrity can not be guaranteed. If the `write` function does not perform caching, and therefore each `read` or `write` call hits the memory, the `sync` function can simply return 0. -## Reference material +## Design -[DESIGN.md](DESIGN.md) - DESIGN.md contains a fully detailed dive into how -littlefs actually works. I would encourage you to read it since the -solutions and tradeoffs at work here are quite interesting. +At a high level, littlefs is a block based filesystem that uses small logs to +store metadata and larger copy-on-write (COW) structures to store file data. -[SPEC.md](SPEC.md) - SPEC.md contains the on-disk specification of littlefs -with all the nitty-gritty details. Can be useful for developing tooling. +In littlefs, these ingredients form a sort of two-layered cake, with the small +logs (called metadata pairs) providing fast updates to metadata anywhere on +storage, while the COW structures store file data compactly and without any +wear amplification cost. + +Both of these data structures are built out of blocks, which are fed by a +common block allocator. By limiting the number of erases allowed on a block +per allocation, the allocator provides dynamic wear leveling over the entire +filesystem. + +``` + root + .--------.--------. + | A'| B'| | + | | |-> | + | | | | + '--------'--------' + .----' '--------------. + A v B v + .--------.--------. .--------.--------. + | C'| D'| | | E'|new| | + | | |-> | | | E'|-> | + | | | | | | | | + '--------'--------' '--------'--------' + .-' '--. | '------------------. + v v .-' v +.--------. .--------. v .--------. +| C | | D | .--------. write | new E | +| | | | | E | ==> | | +| | | | | | | | +'--------' '--------' | | '--------' + '--------' .-' | + .-' '-. .-------------|------' + v v v v + .--------. .--------. .--------. + | F | | G | | new F | + | | | | | | + | | | | | | + '--------' '--------' '--------' +``` + +More details on how littlefs works can be found in [DESIGN.md](DESIGN.md) and +[SPEC.md](SPEC.md). + +- [DESIGN.md](DESIGN.md) - A fully detailed dive into how littlefs works. + I would suggest reading it as the tradeoffs at work are quite interesting. + +- [SPEC.md](SPEC.md) - The on-disk specification of littlefs with all the + nitty-gritty details. May be useful for tooling development. ## Testing @@ -148,9 +197,9 @@ make test ## License -The littlefs is provided under the [BSD-3-Clause](https://spdx.org/licenses/BSD-3-Clause.html) -license. See [LICENSE.md](LICENSE.md) for more information. Contributions to -this project are accepted under the same license. +The littlefs is provided under the [BSD-3-Clause] license. See +[LICENSE.md](LICENSE.md) for more information. Contributions to this project +are accepted under the same license. Individual files contain the following tag instead of the full license text. @@ -161,32 +210,39 @@ License Identifiers that are here available: http://spdx.org/licenses/ ## Related projects -[Mbed OS](https://github.com/ARMmbed/mbed-os/tree/master/features/filesystem/littlefs) - -The easiest way to get started with littlefs is to jump into [Mbed](https://os.mbed.com/), -which already has block device drivers for most forms of embedded storage. The -littlefs is available in Mbed OS as the [LittleFileSystem](https://os.mbed.com/docs/latest/reference/littlefilesystem.html) -class. - -[littlefs-fuse](https://github.com/geky/littlefs-fuse) - A [FUSE](https://github.com/libfuse/libfuse) -wrapper for littlefs. The project allows you to mount littlefs directly on a -Linux machine. Can be useful for debugging littlefs if you have an SD card -handy. - -[littlefs-js](https://github.com/geky/littlefs-js) - A javascript wrapper for -littlefs. I'm not sure why you would want this, but it is handy for demos. -You can see it in action [here](http://littlefs.geky.net/demo.html). - -[mklfs](https://github.com/whitecatboard/Lua-RTOS-ESP32/tree/master/components/mklfs/src) - -A command line tool built by the [Lua RTOS](https://github.com/whitecatboard/Lua-RTOS-ESP32) -guys for making littlefs images from a host PC. Supports Windows, Mac OS, -and Linux. - -[SPIFFS](https://github.com/pellepl/spiffs) - Another excellent embedded -filesystem for NOR flash. As a more traditional logging filesystem with full -static wear-leveling, SPIFFS will likely outperform littlefs on small -memories such as the internal flash on microcontrollers. - -[Dhara](https://github.com/dlbeer/dhara) - An interesting NAND flash -translation layer designed for small MCUs. It offers static wear-leveling and -power-resilience with only a fixed O(|address|) pointer structure stored on -each block and in RAM. +- [littlefs-fuse] - A [FUSE] wrapper for littlefs. The project allows you to + mount littlefs directly on a Linux machine. Can be useful for debugging + littlefs if you have an SD card handy. + +- [littlefs-js] - A javascript wrapper for littlefs. I'm not sure why you would + want this, but it is handy for demos. You can see it in action + [here][littlefs-js-demo]. + +- [mklfs] - A command line tool built by the [Lua RTOS] guys for making + littlefs images from a host PC. Supports Windows, Mac OS, and Linux. + +- [Mbed OS] - The easiest way to get started with littlefs is to jump into Mbed + which already has block device drivers for most forms of embedded storage. + littlefs is available in Mbed OS as the [LittleFileSystem] class. + +- [SPIFFS] - Another excellent embedded filesystem for NOR flash. As a more + traditional logging filesystem with full static wear-leveling, SPIFFS will + likely outperform littlefs on small memories such as the internal flash on + microcontrollers. + +- [Dhara] - An interesting NAND flash translation layer designed for small + MCUs. It offers static wear-leveling and power-resilience with only a fixed + _O(|address|)_ pointer structure stored on each block and in RAM. + + +[BSD-3-Clause]: https://spdx.org/licenses/BSD-3-Clause.html +[littlefs-fuse]: https://github.com/geky/littlefs-fuse +[FUSE]: https://github.com/libfuse/libfuse +[littlefs-js]: https://github.com/geky/littlefs-js +[littlefs-js-demo]:http://littlefs.geky.net/demo.html +[mklfs]: https://github.com/whitecatboard/Lua-RTOS-ESP32/tree/master/components/mklfs/src +[Lua RTOS]: https://github.com/whitecatboard/Lua-RTOS-ESP32 +[Mbed OS]: https://github.com/armmbed/mbed-os +[LittleFileSystem]: https://os.mbed.com/docs/mbed-os/v5.12/apis/littlefilesystem.html +[SPIFFS]: https://github.com/pellepl/spiffs +[Dhara]: https://github.com/dlbeer/dhara diff --git a/SPEC.md b/SPEC.md index 2a1f9eca..e6622d38 100644 --- a/SPEC.md +++ b/SPEC.md @@ -1,10 +1,10 @@ -## The little filesystem technical specification +## littlefs technical specification This is the technical specification of the little filesystem. This document covers the technical details of how the littlefs is stored on disk for -introspection and tooling development. This document assumes you are -familiar with the design of the littlefs, for more info on how littlefs -works check out [DESIGN.md](DESIGN.md). +introspection and tooling. This document assumes you are familiar with the +design of the littlefs, for more info on how littlefs works check +out [DESIGN.md](DESIGN.md). ``` | | | .---._____ @@ -15,356 +15,773 @@ works check out [DESIGN.md](DESIGN.md). | | | ``` -## Some important details +## Some quick notes -- The littlefs is a block-based filesystem. This is, the disk is divided into - an array of evenly sized blocks that are used as the logical unit of storage - in littlefs. Block pointers are stored in 32 bits. +- littlefs is a block-based filesystem. The disk is divided into an array of + evenly sized blocks that are used as the logical unit of storage. -- There is no explicit free-list stored on disk, the littlefs only knows what - is in use in the filesystem. +- Block pointers are stored in 32 bits, with the special value `0xffffffff` + representing a null block address. -- The littlefs uses the value of 0xffffffff to represent a null block-pointer. +- In addition to the logical block size (which usually matches the erase + block size), littlefs also uses a program block size and read block size. + These determine the alignment of block device operations, but don't need + to be consistent for portability. -- All values in littlefs are stored in little-endian byte order. +- By default, all values in littlefs are stored in little-endian byte order. ## Directories / Metadata pairs -Metadata pairs form the backbone of the littlefs and provide a system for -atomic updates. Even the superblock is stored in a metadata pair. +Metadata pairs form the backbone of littlefs and provide a system for +distributed atomic updates. Even the superblock is stored in a metadata pair. As their name suggests, a metadata pair is stored in two blocks, with one block -acting as a redundant backup in case the other is corrupted. These two blocks -could be anywhere in the disk and may not be next to each other, so any -pointers to directory pairs need to be stored as two block pointers. - -Here's the layout of metadata blocks on disk: - -| offset | size | description | -|--------|---------------|----------------| -| 0x00 | 32 bits | revision count | -| 0x04 | 32 bits | dir size | -| 0x08 | 64 bits | tail pointer | -| 0x10 | size-16 bytes | dir entries | -| 0x00+s | 32 bits | CRC | - -**Revision count** - Incremented every update, only the uncorrupted -metadata-block with the most recent revision count contains the valid metadata. -Comparison between revision counts must use sequence comparison since the -revision counts may overflow. - -**Dir size** - Size in bytes of the contents in the current metadata block, -including the metadata-pair metadata. Additionally, the highest bit of the -dir size may be set to indicate that the directory's contents continue on the -next metadata-pair pointed to by the tail pointer. - -**Tail pointer** - Pointer to the next metadata-pair in the filesystem. -A null pair-pointer (0xffffffff, 0xffffffff) indicates the end of the list. -If the highest bit in the dir size is set, this points to the next -metadata-pair in the current directory, otherwise it points to an arbitrary -metadata-pair. Starting with the superblock, the tail-pointers form a -linked-list containing all metadata-pairs in the filesystem. - -**CRC** - 32 bit CRC used to detect corruption from power-lost, from block -end-of-life, or just from noise on the storage bus. The CRC is appended to -the end of each metadata-block. The littlefs uses the standard CRC-32, which -uses a polynomial of 0x04c11db7, initialized with 0xffffffff. - -Here's an example of a simple directory stored on disk: -``` -(32 bits) revision count = 10 (0x0000000a) -(32 bits) dir size = 154 bytes, end of dir (0x0000009a) -(64 bits) tail pointer = 37, 36 (0x00000025, 0x00000024) -(32 bits) CRC = 0xc86e3106 - -00000000: 0a 00 00 00 9a 00 00 00 25 00 00 00 24 00 00 00 ........%...$... -00000010: 22 08 00 03 05 00 00 00 04 00 00 00 74 65 61 22 "...........tea" -00000020: 08 00 06 07 00 00 00 06 00 00 00 63 6f 66 66 65 ...........coffe -00000030: 65 22 08 00 04 09 00 00 00 08 00 00 00 73 6f 64 e"...........sod -00000040: 61 22 08 00 05 1d 00 00 00 1c 00 00 00 6d 69 6c a"...........mil -00000050: 6b 31 22 08 00 05 1f 00 00 00 1e 00 00 00 6d 69 k1"...........mi -00000060: 6c 6b 32 22 08 00 05 21 00 00 00 20 00 00 00 6d lk2"...!... ...m -00000070: 69 6c 6b 33 22 08 00 05 23 00 00 00 22 00 00 00 ilk3"...#..."... -00000080: 6d 69 6c 6b 34 22 08 00 05 25 00 00 00 24 00 00 milk4"...%...$.. -00000090: 00 6d 69 6c 6b 35 06 31 6e c8 .milk5.1n. -``` - -A note about the tail pointer linked-list: Normally, this linked-list is -threaded through the entire filesystem. However, after power-loss this -linked-list may become out of sync with the rest of the filesystem. -- The linked-list may contain a directory that has actually been removed -- The linked-list may contain a metadata pair that has not been updated after - a block in the pair has gone bad. - -The threaded linked-list must be checked for these errors before it can be -used reliably. Fortunately, the threaded linked-list can simply be ignored -if littlefs is mounted read-only. - -## Entries - -Each metadata block contains a series of entries that follow a standard -layout. An entry contains the type of the entry, along with a section for -entry-specific data, attributes, and a name. - -Here's the layout of entries on disk: - -| offset | size | description | -|---------|------------------------|----------------------------| -| 0x0 | 8 bits | entry type | -| 0x1 | 8 bits | entry length | -| 0x2 | 8 bits | attribute length | -| 0x3 | 8 bits | name length | -| 0x4 | entry length bytes | entry-specific data | -| 0x4+e | attribute length bytes | system-specific attributes | -| 0x4+e+a | name length bytes | entry name | - -**Entry type** - Type of the entry, currently this is limited to the following: -- 0x11 - file entry -- 0x22 - directory entry -- 0x2e - superblock entry - -Additionally, the type is broken into two 4 bit nibbles, with the upper nibble -specifying the type's data structure used when scanning the filesystem. The -lower nibble clarifies the type further when multiple entries share the same -data structure. - -The highest bit is reserved for marking the entry as "moved". If an entry -is marked as "moved", the entry may also exist somewhere else in the -filesystem. If the entry exists elsewhere, this entry must be treated as -though it does not exist. - -**Entry length** - Length in bytes of the entry-specific data. This does -not include the entry type size, attributes, or name. The full size in bytes -of the entry is 4 + entry length + attribute length + name length. - -**Attribute length** - Length of system-specific attributes in bytes. Since -attributes are system specific, there is not much guarantee on the values in -this section, and systems are expected to work even when it is empty. See the -[attributes](#entry-attributes) section for more details. - -**Name length** - Length of the entry name. Entry names are stored as UTF8, -although most systems will probably only support ASCII. Entry names can not -contain '/' and can not be '.' or '..' as these are a part of the syntax of -filesystem paths. - -Here's an example of a simple entry stored on disk: -``` -(8 bits) entry type = file (0x11) -(8 bits) entry length = 8 bytes (0x08) -(8 bits) attribute length = 0 bytes (0x00) -(8 bits) name length = 12 bytes (0x0c) -(8 bytes) entry data = 05 00 00 00 20 00 00 00 -(12 bytes) entry name = smallavacado - -00000000: 11 08 00 0c 05 00 00 00 20 00 00 00 73 6d 61 6c ........ ...smal -00000010: 6c 61 76 61 63 61 64 6f lavacado -``` - -## Superblock - -The superblock is the anchor for the littlefs. The superblock is stored as -a metadata pair containing a single superblock entry. It is through the -superblock that littlefs can access the rest of the filesystem. - -The superblock can always be found in blocks 0 and 1, however fetching the -superblock requires knowing the block size. The block size can be guessed by -searching the beginning of disk for the string "littlefs", although currently -the filesystems relies on the user providing the correct block size. - -The superblock is the most valuable block in the filesystem. It is updated -very rarely, only during format or when the root directory must be moved. It -is encouraged to always write out both superblock pairs even though it is not -required. - -Here's the layout of the superblock entry: - -| offset | size | description | -|--------|------------------------|----------------------------------------| -| 0x00 | 8 bits | entry type (0x2e for superblock entry) | -| 0x01 | 8 bits | entry length (20 bytes) | -| 0x02 | 8 bits | attribute length | -| 0x03 | 8 bits | name length (8 bytes) | -| 0x04 | 64 bits | root directory | -| 0x0c | 32 bits | block size | -| 0x10 | 32 bits | block count | -| 0x14 | 32 bits | version | -| 0x18 | attribute length bytes | system-specific attributes | -| 0x18+a | 8 bytes | magic string ("littlefs") | - -**Root directory** - Pointer to the root directory's metadata pair. - -**Block size** - Size of the logical block size used by the filesystem. - -**Block count** - Number of blocks in the filesystem. - -**Version** - The littlefs version encoded as a 32 bit value. The upper 16 bits -encodes the major version, which is incremented when a breaking-change is -introduced in the filesystem specification. The lower 16 bits encodes the -minor version, which is incremented when a backwards-compatible change is -introduced. Non-standard Attribute changes do not change the version. This -specification describes version 1.1 (0x00010001), which is the first version -of littlefs. - -**Magic string** - The magic string "littlefs" takes the place of an entry -name. - -Here's an example of a complete superblock: -``` -(32 bits) revision count = 3 (0x00000003) -(32 bits) dir size = 52 bytes, end of dir (0x00000034) -(64 bits) tail pointer = 3, 2 (0x00000003, 0x00000002) -(8 bits) entry type = superblock (0x2e) -(8 bits) entry length = 20 bytes (0x14) -(8 bits) attribute length = 0 bytes (0x00) -(8 bits) name length = 8 bytes (0x08) -(64 bits) root directory = 3, 2 (0x00000003, 0x00000002) -(32 bits) block size = 512 bytes (0x00000200) -(32 bits) block count = 1024 blocks (0x00000400) -(32 bits) version = 1.1 (0x00010001) -(8 bytes) magic string = littlefs -(32 bits) CRC = 0xc50b74fa - -00000000: 03 00 00 00 34 00 00 00 03 00 00 00 02 00 00 00 ....4........... -00000010: 2e 14 00 08 03 00 00 00 02 00 00 00 00 02 00 00 ................ -00000020: 00 04 00 00 01 00 01 00 6c 69 74 74 6c 65 66 73 ........littlefs -00000030: fa 74 0b c5 .t.. -``` - -## Directory entries - -Directories are stored in entries with a pointer to the first metadata pair -in the directory. Keep in mind that a directory may be composed of multiple -metadata pairs connected by the tail pointer when the highest bit in the dir -size is set. +providing a backup during erase cycles in case power is lost. These two blocks +are not necessarily sequential and may be anywhere on disk, so a "pointer" to a +metadata pair is stored as two block pointers. -Here's the layout of a directory entry: - -| offset | size | description | -|--------|------------------------|-----------------------------------------| -| 0x0 | 8 bits | entry type (0x22 for directory entries) | -| 0x1 | 8 bits | entry length (8 bytes) | -| 0x2 | 8 bits | attribute length | -| 0x3 | 8 bits | name length | -| 0x4 | 64 bits | directory pointer | -| 0xc | attribute length bytes | system-specific attributes | -| 0xc+a | name length bytes | directory name | +On top of this, each metadata block behaves as an appendable log, containing a +variable number of commits. Commits can be appended to the metadata log in +order to update the metadata without requiring an erase cycles. Note that +successive commits may supersede the metadata in previous commits. Only the +most recent metadata should be considered valid. + +The high-level layout of a metadata block is fairly simple: + +``` + .---------------------------------------. +.-| revision count | entries | \ +| |-------------------+ | | +| | | | +| | | +-- 1st commit +| | | | +| | +-------------------| | +| | | CRC | / +| |-------------------+-------------------| +| | entries | \ +| | | | +| | | +-- 2nd commit +| | +-------------------+--------------| | +| | | CRC | padding | / +| |----+-------------------+--------------| +| | entries | \ +| | | | +| | | +-- 3rd commit +| | +-------------------+---------| | +| | | CRC | | / +| |---------+-------------------+ | +| | unwritten storage | more commits +| | | | +| | | v +| | | +| | | +| '---------------------------------------' +'---------------------------------------' +``` + +Each metadata block contains a 32-bit revision count followed by a number of +commits. Each commit contains a variable number of metadata entries followed +by a 32-bit CRC. + +Note also that entries aren't necessarily word-aligned. This allows us to +store metadata more compactly, however we can only write to addresses that are +aligned to our program block size. This means each commit may have padding for +alignment. + +Metadata block fields: + +1. **Revision count (32-bits)** - Incremented every erase cycle. If both blocks + contain valid commits, only the block with the most recent revision count + should be used. Sequence comparison must be used to avoid issues with + integer overflow. + +2. **CRC (32-bits)** - Detects corruption from power-loss or other write + issues. Uses a CRC-32 with a polynomial of `0x04c11db7` initialized + with `0xffffffff`. + +Entries themselves are stored as a 32-bit tag followed by a variable length +blob of data. But exactly how these tags are stored is a little bit tricky. + +Metadata blocks support both forward and backward iteration. In order to do +this without duplicating the space for each tag, neighboring entries have their +tags XORed together, starting with `0xffffffff`. -**Directory pointer** - Pointer to the first metadata pair in the directory. - -Here's an example of a directory entry: ``` -(8 bits) entry type = directory (0x22) -(8 bits) entry length = 8 bytes (0x08) -(8 bits) attribute length = 0 bytes (0x00) -(8 bits) name length = 3 bytes (0x03) -(64 bits) directory pointer = 5, 4 (0x00000005, 0x00000004) -(3 bytes) name = tea - -00000000: 22 08 00 03 05 00 00 00 04 00 00 00 74 65 61 "...........tea -``` - -## File entries - -Files are stored in entries with a pointer to the head of the file and the -size of the file. This is enough information to determine the state of the -CTZ skip-list that is being referenced. - -How files are actually stored on disk is a bit complicated. The full -explanation of CTZ skip-lists can be found in [DESIGN.md](DESIGN.md#ctz-skip-lists). - -A terribly quick summary: For every nth block where n is divisible by 2^x, -the block contains a pointer to block n-2^x. These pointers are stored in -increasing order of x in each block of the file preceding the data in the + Forward iteration Backward iteration + +.-------------------. 0xffffffff .-------------------. +| revision count | | | revision count | +|-------------------| v |-------------------| +| tag ~A |---> xor -> tag A | tag ~A |---> xor -> 0xffffffff +|-------------------| | |-------------------| ^ +| data A | | | data A | | +| | | | | | +| | | | | | +|-------------------| v |-------------------| | +| tag AxB |---> xor -> tag B | tag AxB |---> xor -> tag A +|-------------------| | |-------------------| ^ +| data B | | | data B | | +| | | | | | +| | | | | | +|-------------------| v |-------------------| | +| tag BxC |---> xor -> tag C | tag BxC |---> xor -> tag B +|-------------------| |-------------------| ^ +| data C | | data C | | +| | | | tag C +| | | | +| | | | +'-------------------' '-------------------' +``` + +One last thing to note before we get into the details around tag encoding. Each +tag contains a valid bit used to indicate if the tag and containing commit is +valid. This valid bit is the first bit found in the tag and the commit and can +be used to tell if we've attempted to write to the remaining space in the block. -The maximum number of pointers in a block is bounded by the maximum file size -divided by the block size. With 32 bits for file size, this results in a -minimum block size of 104 bytes. +Here's a more complete example of metadata block containing 4 entries: -Here's the layout of a file entry: +``` + .---------------------------------------. +.-| revision count | tag ~A | \ +| |-------------------+-------------------| | +| | data A | | +| | | | +| |-------------------+-------------------| | +| | tag AxB | data B | <--. | +| |-------------------+ | | | +| | | | +-- 1st commit +| | +-------------------+---------| | | +| | | tag BxC | | <-.| | +| |---------+-------------------+ | || | +| | data C | || | +| | | || | +| |-------------------+-------------------| || | +| | tag CxCRC | CRC | || / +| |-------------------+-------------------| || +| | tag CRCxA' | data A' | || \ +| |-------------------+ | || | +| | | || | +| | +-------------------+----| || +-- 2nd commit +| | | tag CRCxA' | | || | +| |--------------+-------------------+----| || | +| | CRC | padding | || / +| |--------------+----+-------------------| || +| | tag CRCxA'' | data A'' | <---. \ +| |-------------------+ | ||| | +| | | ||| | +| | +-------------------+---------| ||| | +| | | tag A''xD | | < ||| | +| |---------+-------------------+ | |||| +-- 3rd commit +| | data D | |||| | +| | +---------| |||| | +| | | tag Dx| |||| | +| |---------+-------------------+---------| |||| | +| |CRC | CRC | | |||| / +| |---------+-------------------+ | |||| +| | unwritten storage | |||| more commits +| | | |||| | +| | | |||| v +| | | |||| +| | | |||| +| '---------------------------------------' |||| +'---------------------------------------' |||'- most recent A + ||'-- most recent B + |'--- most recent C + '---- most recent D +``` -| offset | size | description | -|--------|------------------------|------------------------------------| -| 0x0 | 8 bits | entry type (0x11 for file entries) | -| 0x1 | 8 bits | entry length (8 bytes) | -| 0x2 | 8 bits | attribute length | -| 0x3 | 8 bits | name length | -| 0x4 | 32 bits | file head | -| 0x8 | 32 bits | file size | -| 0xc | attribute length bytes | system-specific attributes | -| 0xc+a | name length bytes | directory name | +## Metadata tags -**File head** - Pointer to the block that is the head of the file's CTZ -skip-list. +So in littlefs, 32-bit tags describe every type of metadata. And this means +_every_ type of metadata, including file entries, directory fields, and +global state. Even the CRCs used to mark the end of commits get their own tag. -**File size** - Size of file in bytes. +Because of this, the tag format contains some densely packed information. Note +that there are multiple levels of types which break down into more info: -Here's an example of a file entry: ``` -(8 bits) entry type = file (0x11) -(8 bits) entry length = 8 bytes (0x08) -(8 bits) attribute length = 0 bytes (0x00) -(8 bits) name length = 12 bytes (0x03) -(32 bits) file head = 543 (0x0000021f) -(32 bits) file size = 256 KB (0x00040000) -(12 bytes) name = largeavacado +[---- 32 ----] +[1|-- 11 --|-- 10 --|-- 10 --] + ^. ^ . ^ ^- length + |. | . '------------ id + |. '-----.------------------ type (type3) + '.-----------.------------------ valid bit + [-3-|-- 8 --] + ^ ^- chunk + '------- type (type1) +``` + + +Before we go further, there's one important thing to note. These tags are +**not** stored in little-endian. Tags stored in commits are actually stored +in big-endian (and is the only thing in littlefs stored in big-endian). This +little bit of craziness comes from the fact that the valid bit must be the +first bit in a commit, and when converted to little-endian, the valid bit finds +itself in byte 4. We could restructure the tag to store the valid bit lower, +but, because none of the fields are byte-aligned, this would be more +complicated than just storing the tag in big-endian. + +Another thing to note is that both the tags `0x00000000` and `0xffffffff` are +invalid and can be used for null values. + +Metadata tag fields: + +1. **Valid bit (1-bit)** - Indicates if the tag is valid. + +2. **Type3 (11-bits)** - Type of the tag. This field is broken down further + into a 3-bit abstract type and an 8-bit chunk field. Note that the value + `0x000` is invalid and not assigned a type. -00000000: 11 08 00 0c 1f 02 00 00 00 00 04 00 6c 61 72 67 ............larg -00000010: 65 61 76 61 63 61 64 6f eavacado +3. **Type1 (3-bits)** - Abstract type of the tag. Groups the tags into + 8 categories that facilitate bitmasked lookups. + +4. **Chunk (8-bits)** - Chunk field used for various purposes by the different + abstract types. type1+chunk+id form a unique identifier for each tag in the + metadata block. + +5. **Id (10-bits)** - File id associated with the tag. Each file in a metadata + block gets a unique id which is used to associate tags with that file. The + special value `0x3ff` is used for any tags that are not associated with a + file, such as directory and global metadata. + +6. **Length (10-bits)** - Length of the data in bytes. The special value + `0x3ff` indicates that this tag has been deleted. + +## Metadata types + +What follows is an exhaustive list of metadata in littlefs. + +--- +#### `0x401` LFS_TYPE_CREATE + +Creates a new file with this id. Note that files in a metadata block +don't necessarily need a create tag. All a create does is move over any +files using this id. In this sense a create is similar to insertion into +an imaginary array of files. + +The create and delete tags allow littlefs to keep files in a directory +ordered alphabetically by filename. + +--- +#### `0x4ff` LFS_TYPE_DELETE + +Deletes the file with this id. An inverse to create, this tag moves over +any files neighboring this id similar to a deletion from an imaginary +array of files. + +--- +#### `0x0xx` LFS_TYPE_NAME + +Associates the id with a file name and file type. + +The data contains the file name stored as an ASCII string (may be expanded to +UTF8 in the future). + +The chunk field in this tag indicates an 8-bit file type which can be one of +the following. + +Currently, the name tag must precede any other tags associated with the id and +can not be reassigned without deleting the file. + +Layout of the name tag: + +``` + tag data +[-- 32 --][--- variable length ---] +[1| 3| 8 | 10 | 10 ][--- (size) ---] + ^ ^ ^ ^ ^- size ^- file name + | | | '------ id + | | '----------- file type + | '-------------- type1 (0x0) + '----------------- valid bit ``` -## Entry attributes +Name fields: + +1. **file type (8-bits)** - Type of the file. + +2. **file name** - File name stored as an ASCII string. + +--- +#### `0x001` LFS_TYPE_REG -Each dir entry can have up to 256 bytes of system-specific attributes. Since -these attributes are system-specific, they may not be portable between -different systems. For this reason, all attributes must be optional. A minimal -littlefs driver must be able to get away with supporting no attributes at all. +Initializes the id + name as a regular file. -For some level of portability, littlefs has a simple scheme for attributes. -Each attribute is prefixes with an 8-bit type that indicates what the attribute -is. The length of attributes may also be determined from this type. Attributes -in an entry should be sorted based on portability, since attribute parsing -will end when it hits the first attribute it does not understand. +How each file is stored depends on its struct tag, which is described below. -Each system should choose a 4-bit value to prefix all attribute types with to -avoid conflicts with other systems. Additionally, littlefs drivers that support -attributes should provide a "ignore attributes" flag to users in case attribute -conflicts do occur. +--- +#### `0x002` LFS_TYPE_DIR -Attribute types prefixes with 0x0 and 0xf are currently reserved for future -standard attributes. Standard attributes will be added to this document in -that case. +Initializes the id + name as a directory. -Here's an example of non-standard time attribute: +Directories in littlefs are stored on disk as a linked-list of metadata pairs, +each pair containing any number of files in alphabetical order. A pointer to +the directory is stored in the struct tag, which is described below. + +--- +#### `0x0ff` LFS_TYPE_SUPERBLOCK + +Initializes the id as a superblock entry. + +The superblock entry is a special entry used to store format-time configuration +and identify the filesystem. + +The name is a bit of a misnomer. While the superblock entry serves the same +purpose as a superblock found in other filesystems, in littlefs the superblock +does not get a dedicated block. Instead, the superblock entry is duplicated +across a linked-list of metadata pairs rooted on the blocks 0 and 1. The last +metadata pair doubles as the root directory of the filesystem. + +``` + .--------. .--------. .--------. .--------. .--------. +.| super |->| super |->| super |->| super |->| file B | +|| block | || block | || block | || block | || file C | +|| | || | || | || file A | || file D | +|'--------' |'--------' |'--------' |'--------' |'--------' +'--------' '--------' '--------' '--------' '--------' + +\----------------+----------------/ \----------+----------/ + superblock pairs root directory ``` -(8 bits) attribute type = time (0xc1) -(72 bits) time in seconds = 1506286115 (0x0059c81a23) -00000000: c1 23 1a c8 59 00 .#..Y. +The filesystem starts with only the root directory. The superblock metadata +pairs grow every time the root pair is compacted in order to prolong the +life of the device exponentially. + +The contents of the superblock entry are stored in a name tag with the +superblock type and an inline-struct tag. The name tag contains the magic +string "littlefs", while the inline-struct tag contains version and +configuration information. + +Layout of the superblock name tag and inline-struct tag: + ``` + tag data +[-- 32 --][-- 32 --|-- 32 --] +[1|- 11 -| 10 | 10 ][--- 64 ---] + ^ ^ ^ ^- size (8) ^- magic string ("littlefs") + | | '------ id (0) + | '------------ type (0x0ff) + '----------------- valid bit + + tag data +[-- 32 --][-- 32 --|-- 32 --|-- 32 --] +[1|- 11 -| 10 | 10 ][-- 32 --|-- 32 --|-- 32 --] + ^ ^ ^ ^ ^- version ^- block size ^- block count + | | | | [-- 32 --|-- 32 --|-- 32 --] + | | | | [-- 32 --|-- 32 --|-- 32 --] + | | | | ^- name max ^- file max ^- attr max + | | | '- size (24) + | | '------ id (0) + | '------------ type (0x201) + '----------------- valid bit +``` + +Superblock fields: + +1. **Magic string (8-bytes)** - Magic string indicating the presence of + littlefs on the device. Must be the string "littlefs". + +2. **Version (32-bits)** - The version of littlefs at format time. The version + is encoded in a 32-bit value with the upper 16-bits containing the major + version, and the lower 16-bits containing the minor version. + + This specification describes version 2.0 (`0x00020000`). + +3. **Block size (32-bits)** - Size of the logical block size used by the + filesystem in bytes. + +4. **Block count (32-bits)** - Number of blocks in the filesystem. + +5. **Name max (32-bits)** - Maximum size of file names in bytes. + +6. **File max (32-bits)** - Maximum size of files in bytes. + +7. **Attr max (32-bits)** - Maximum size of file attributes in bytes. + +The superblock must always be the first entry (id 0) in a metadata pair as well +as be the first entry written to the block. This means that the superblock +entry can be read from a device using offsets alone. + +--- +#### `0x2xx` LFS_TYPE_STRUCT + +Associates the id with an on-disk data structure. -Here's an example of non-standard permissions attribute: +The exact layout of the data depends on the data structure type stored in the +chunk field and can be one of the following. + +Any type of struct supersedes all other structs associated with the id. For +example, appending a ctz-struct replaces an inline-struct on the same file. + +--- +#### `0x200` LFS_TYPE_DIRSTRUCT + +Gives the id a directory data structure. + +Directories in littlefs are stored on disk as a linked-list of metadata pairs, +each pair containing any number of files in alphabetical order. + +``` + | + v + .--------. .--------. .--------. .--------. .--------. .--------. +.| file A |->| file D |->| file G |->| file I |->| file J |->| file M | +|| file B | || file E | || file H | || | || file K | || file N | +|| file C | || file F | || | || | || file L | || | +|'--------' |'--------' |'--------' |'--------' |'--------' |'--------' +'--------' '--------' '--------' '--------' '--------' '--------' ``` -(8 bits) attribute type = permissions (0xc2) -(16 bits) permission bits = rw-rw-r-- (0x01b4) -00000000: c2 b4 01 ... +The dir-struct tag contains only the pointer to the first metadata-pair in the +directory. The directory size is not known without traversing the directory. + +The pointer to the next metadata-pair in the directory is stored in a tail tag, +which is described below. + +Layout of the dir-struct tag: + ``` + tag data +[-- 32 --][-- 32 --|-- 32 --] +[1|- 11 -| 10 | 10 ][--- 64 ---] + ^ ^ ^ ^- size (8) ^- metadata pair + | | '------ id + | '------------ type (0x200) + '----------------- valid bit +``` + +Dir-struct fields: + +1. **Metadata pair (8-bytes)** - Pointer to the first metadata-pair + in the directory. + +--- +#### `0x201` LFS_TYPE_INLINESTRUCT + +Gives the id an inline data structure. + +Inline structs store small files that can fit in the metadata pair. In this +case, the file data is stored directly in the tag's data area. + +Layout of the inline-struct tag: -Here's what a dir entry may look like with these attributes: ``` -(8 bits) entry type = file (0x11) -(8 bits) entry length = 8 bytes (0x08) -(8 bits) attribute length = 9 bytes (0x09) -(8 bits) name length = 12 bytes (0x0c) -(8 bytes) entry data = 05 00 00 00 20 00 00 00 -(8 bits) attribute type = time (0xc1) -(72 bits) time in seconds = 1506286115 (0x0059c81a23) -(8 bits) attribute type = permissions (0xc2) -(16 bits) permission bits = rw-rw-r-- (0x01b4) -(12 bytes) entry name = smallavacado + tag data +[-- 32 --][--- variable length ---] +[1|- 11 -| 10 | 10 ][--- (size) ---] + ^ ^ ^ ^- size ^- inline data + | | '------ id + | '------------ type (0x201) + '----------------- valid bit +``` + +Inline-struct fields: + +1. **Inline data** - File data stored directly in the metadata-pair. + +--- +#### `0x202` LFS_TYPE_CTZSTRUCT + +Gives the id a CTZ skip-list data structure. + +CTZ skip-lists store files that can not fit in the metadata pair. These files +are stored in a skip-list in reverse, with a pointer to the head of the +skip-list. Note that the head of the skip-list and the file size is enough +information to read the file. + +How exactly CTZ skip-lists work is a bit complicated. A full explanation can be +found in the [DESIGN.md](DESIGN.md#ctz-skip-lists). + +A quick summary: For every _n_‍th block where _n_ is divisible by +2‍_ˣ_, that block contains a pointer to block _n_-2‍_ˣ_. +These pointers are stored in increasing order of _x_ in each block of the file +before the actual data. + +``` + | + v +.--------. .--------. .--------. .--------. .--------. .--------. +| A |<-| D |<-| G |<-| J |<-| M |<-| P | +| B |<-| E |--| H |<-| K |--| N | | Q | +| C |<-| F |--| I |--| L |--| O | | | +'--------' '--------' '--------' '--------' '--------' '--------' + block 0 block 1 block 2 block 3 block 4 block 5 + 1 skip 2 skips 1 skip 3 skips 1 skip +``` + +Note that the maximum number of pointers in a block is bounded by the maximum +file size divided by the block size. With 32 bits for file size, this results +in a minimum block size of 104 bytes. + +Layout of the CTZ-struct tag: + +``` + tag data +[-- 32 --][-- 32 --|-- 32 --] +[1|- 11 -| 10 | 10 ][-- 32 --|-- 32 --] + ^ ^ ^ ^ ^ ^- file size + | | | | '-------------------- file head + | | | '- size (8) + | | '------ id + | '------------ type (0x202) + '----------------- valid bit +``` + +CTZ-struct fields: + +1. **File head (32-bits)** - Pointer to the block that is the head of the + file's CTZ skip-list. + +2. **File size (32-bits)** - Size of the file in bytes. + +--- +#### `0x3xx` LFS_TYPE_USERATTR + +Attaches a user attribute to an id. + +littlefs has a concept of "user attributes". These are small user-provided +attributes that can be used to store things like timestamps, hashes, +permissions, etc. + +Each user attribute is uniquely identified by an 8-bit type which is stored in +the chunk field, and the user attribute itself can be found in the tag's data. + +There are currently no standard user attributes and a portable littlefs +implementation should work with any user attributes missing. + +Layout of the user-attr tag: -00000000: 11 08 09 0c 05 00 00 00 20 00 00 00 c1 23 1a c8 ........ ....#.. -00000010: 59 00 c2 b4 01 73 6d 61 6c 6c 61 76 61 63 61 64 Y....smallavacad -00000020: 6f o ``` + tag data +[-- 32 --][--- variable length ---] +[1| 3| 8 | 10 | 10 ][--- (size) ---] + ^ ^ ^ ^ ^- size ^- attr data + | | | '------ id + | | '----------- attr type + | '-------------- type1 (0x3) + '----------------- valid bit +``` + +User-attr fields: + +1. **Attr type (8-bits)** - Type of the user attributes. + +2. **Attr data** - The data associated with the user attribute. + +--- +#### `0x6xx` LFS_TYPE_TAIL + +Provides the tail pointer for the metadata pair itself. + +The metadata pair's tail pointer is used in littlefs for a linked-list +containing all metadata pairs. The chunk field contains the type of the tail, +which indicates if the following metadata pair is a part of the directory +(hard-tail) or only used to traverse the filesystem (soft-tail). + +``` + .--------. + .| dir A |-. + ||softtail| | +.--------| |-' +| |'--------' +| '---|--|-' +| .-' '-------------. +| v v +| .--------. .--------. .--------. +'->| dir B |->| dir B |->| dir C | + ||hardtail| ||softtail| || | + || | || | || | + |'--------' |'--------' |'--------' + '--------' '--------' '--------' +``` + +Currently any type supersedes any other preceding tails in the metadata pair, +but this may change if additional metadata pair state is added. + +A note about the metadata pair linked-list: Normally, this linked-list contains +every metadata pair in the filesystem. However, there are some operations that +can cause this linked-list to become out of sync if a power-loss were to occur. +When this happens, littlefs sets the "sync" flag in the global state. How +exactly this flag is stored is described below. + +When the sync flag is set: + +1. The linked-list may contain an orphaned directory that has been removed in + the filesystem. +2. The linked-list may contain a metadata pair with a bad block that has been + replaced in the filesystem. + +If the sync flag is set, the threaded linked-list must be checked for these +errors before it can be used reliably. Note that the threaded linked-list can +be ignored if littlefs is mounted read-only. + +Layout of the tail tag: + +``` + tag data +[-- 32 --][-- 32 --|-- 32 --] +[1| 3| 8 | 10 | 10 ][--- 64 ---] + ^ ^ ^ ^ ^- size (8) ^- metadata pair + | | | '------ id + | | '---------- tail type + | '------------- type1 (0x6) + '---------------- valid bit +``` + +Tail fields: + +1. **Tail type (8-bits)** - Type of the tail pointer. + +2. **Metadata pair (8-bytes)** - Pointer to the next metadata-pair. + +--- +#### `0x600` LFS_TYPE_SOFTTAIL + +Provides a tail pointer that points to the next metadata pair in the +filesystem. + +In this case, the next metadata pair is not a part of our current directory +and should only be followed when traversing the entire filesystem. + +--- +#### `0x601` LFS_TYPE_HARDTAIL + +Provides a tail pointer that points to the next metadata pair in the +directory. + +In this case, the next metadata pair belongs to the current directory. Note +that because directories in littlefs are sorted alphabetically, the next +metadata pair should only contain filenames greater than any filename in the +current pair. + +--- +#### `0x7xx` LFS_TYPE_GSTATE + +Provides delta bits for global state entries. + +littlefs has a concept of "global state". This is a small set of state that +can be updated by a commit to _any_ metadata pair in the filesystem. + +The way this works is that the global state is stored as a set of deltas +distributed across the filesystem such that the global state can be found by +the xor-sum of these deltas. + +``` + .--------. .--------. .--------. .--------. .--------. +.| |->| gdelta |->| |->| gdelta |->| gdelta | +|| | || 0x23 | || | || 0xff | || 0xce | +|| | || | || | || | || | +|'--------' |'--------' |'--------' |'--------' |'--------' +'--------' '----|---' '--------' '----|---' '----|---' + v v v + 0x00 --> xor ------------------> xor ------> xor --> gstate = 0x12 +``` + +Note that storing globals this way is very expensive in terms of storage usage, +so any global state should be kept very small. + +The size and format of each piece of global state depends on the type, which +is stored in the chunk field. Currently, the only global state is move state, +which is outlined below. + +--- +#### `0x7ff` LFS_TYPE_MOVESTATE + +Provides delta bits for the global move state. + +The move state in littlefs is used to store info about operations that could +cause to filesystem to go out of sync if the power is lost. The operations +where this could occur is moves of files between metadata pairs and any +operation that changes metadata pairs on the threaded linked-list. + +In the case of moves, the move state contains a tag + metadata pair describing +the source of the ongoing move. If this tag is non-zero, that means that power +was lost during a move, and the file exists in two different locations. If this +happens, the source of the move should be considered deleted, and the move +should be completed (the source should be deleted) before any other write +operations to the filesystem. + +In the case of operations to the threaded linked-list, a single "sync" bit is +used to indicate that a modification is ongoing. If this sync flag is set, the +threaded linked-list will need to be checked for errors before it can be used +reliably. The exact cases to check for are described above in the tail tag. + +Layout of the move state: + +``` + tag data +[-- 32 --][-- 32 --|-- 32 --|-- 32 --] +[1|- 11 -| 10 | 10 ][1|- 11 -| 10 | 10 |--- 64 ---] + ^ ^ ^ ^ ^ ^ ^ ^- padding (0) ^- metadata pair + | | | | | | '------ move id + | | | | | '------------ move type + | | | | '----------------- sync bit + | | | | + | | | '- size (12) + | | '------ id (0x3ff) + | '------------ type (0x7ff) + '----------------- valid bit +``` + +Move state fields: + +1. **Sync bit (1-bit)** - Indicates if the metadata pair threaded linked-list + is in-sync. If set, the threaded linked-list should be checked for errors. + +2. **Move type (11-bits)** - Type of move being performed. Must be either + `0x000`, indicating no move, or `0x4ff` indicating the source file should + be deleted. + +3. **Move id (10-bits)** - The file id being moved. + +4. **Metadata pair (8-bytes)** - Pointer to the metadata-pair containing + the move. + +--- +#### `0x5xx` LFS_TYPE_CRC + +Last but not least, the CRC tag marks the end of a commit and provides a +checksum for any commits to the metadata block. + +The first 32-bits of the data contain a CRC-32 with a polynomial of +`0x04c11db7` initialized with `0xffffffff`. This CRC provides a checksum for +all metadata since the previous CRC tag, including the CRC tag itself. For +the first commit, this includes the revision count for the metadata block. + +However, the size of the data is not limited to 32-bits. The data field may +larger to pad the commit to the next program-aligned boundary. + +In addition, the CRC tag's chunk field contains a set of flags which can +change the behaviour of commits. Currently the only flag in use is the lowest +bit, which determines the expected state of the valid bit for any following +tags. This is used to guarantee that unwritten storage in a metadata block +will be detected as invalid. + +Layout of the CRC tag: + +``` + tag data +[-- 32 --][-- 32 --|--- variable length ---] +[1| 3| 8 | 10 | 10 ][-- 32 --|--- (size) ---] + ^ ^ ^ ^ ^ ^- crc ^- padding + | | | | '- size (12) + | | | '------ id (0x3ff) + | | '----------- valid state + | '-------------- type1 (0x5) + '----------------- valid bit +``` + +CRC fields: + +1. **Valid state (1-bit)** - Indicates the expected value of the valid bit for + any tags in the next commit. + +2. **CRC (32-bits)** - CRC-32 with a polynomial of `0x04c11db7` initialized + with `0xffffffff`. + +3. **Padding** - Padding to the next program-aligned boundary. No guarantees + are made about the contents. + +--- diff --git a/emubd/lfs_emubd.c b/emubd/lfs_emubd.c index e44602c2..3f31bfac 100644 --- a/emubd/lfs_emubd.c +++ b/emubd/lfs_emubd.c @@ -19,6 +19,40 @@ #include +// Emulated block device utils +static inline void lfs_emubd_tole32(lfs_emubd_t *emu) { + emu->cfg.read_size = lfs_tole32(emu->cfg.read_size); + emu->cfg.prog_size = lfs_tole32(emu->cfg.prog_size); + emu->cfg.block_size = lfs_tole32(emu->cfg.block_size); + emu->cfg.block_count = lfs_tole32(emu->cfg.block_count); + + emu->stats.read_count = lfs_tole32(emu->stats.read_count); + emu->stats.prog_count = lfs_tole32(emu->stats.prog_count); + emu->stats.erase_count = lfs_tole32(emu->stats.erase_count); + + for (unsigned i = 0; i < sizeof(emu->history.blocks) / + sizeof(emu->history.blocks[0]); i++) { + emu->history.blocks[i] = lfs_tole32(emu->history.blocks[i]); + } +} + +static inline void lfs_emubd_fromle32(lfs_emubd_t *emu) { + emu->cfg.read_size = lfs_fromle32(emu->cfg.read_size); + emu->cfg.prog_size = lfs_fromle32(emu->cfg.prog_size); + emu->cfg.block_size = lfs_fromle32(emu->cfg.block_size); + emu->cfg.block_count = lfs_fromle32(emu->cfg.block_count); + + emu->stats.read_count = lfs_fromle32(emu->stats.read_count); + emu->stats.prog_count = lfs_fromle32(emu->stats.prog_count); + emu->stats.erase_count = lfs_fromle32(emu->stats.erase_count); + + for (unsigned i = 0; i < sizeof(emu->history.blocks) / + sizeof(emu->history.blocks[0]); i++) { + emu->history.blocks[i] = lfs_fromle32(emu->history.blocks[i]); + } +} + + // Block device emulated on existing filesystem int lfs_emubd_create(const struct lfs_config *cfg, const char *path) { lfs_emubd_t *emu = cfg->context; @@ -46,17 +80,31 @@ int lfs_emubd_create(const struct lfs_config *cfg, const char *path) { } // Load stats to continue incrementing - snprintf(emu->child, LFS_NAME_MAX, "stats"); - + snprintf(emu->child, LFS_NAME_MAX, ".stats"); FILE *f = fopen(emu->path, "r"); - if (!f && errno != ENOENT) { - return -errno; + if (!f) { + memset(&emu->stats, 0, sizeof(emu->stats)); + } else { + size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f); + lfs_emubd_fromle32(emu); + if (res < 1) { + return -errno; + } + + err = fclose(f); + if (err) { + return -errno; + } } - if (errno == ENOENT) { - memset(&emu->stats, 0x0, sizeof(emu->stats)); + // Load history + snprintf(emu->child, LFS_NAME_MAX, ".history"); + f = fopen(emu->path, "r"); + if (!f) { + memset(&emu->history, 0, sizeof(emu->history)); } else { - size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f); + size_t res = fread(&emu->history, sizeof(emu->history), 1, f); + lfs_emubd_fromle32(emu); if (res < 1) { return -errno; } @@ -166,6 +214,13 @@ int lfs_emubd_prog(const struct lfs_config *cfg, lfs_block_t block, return -errno; } + // update history and stats + if (block != emu->history.blocks[0]) { + memcpy(&emu->history.blocks[1], &emu->history.blocks[0], + sizeof(emu->history) - sizeof(emu->history.blocks[0])); + emu->history.blocks[0] = block; + } + emu->stats.prog_count += 1; return 0; } @@ -211,13 +266,15 @@ int lfs_emubd_sync(const struct lfs_config *cfg) { lfs_emubd_t *emu = cfg->context; // Just write out info/stats for later lookup - snprintf(emu->child, LFS_NAME_MAX, "config"); + snprintf(emu->child, LFS_NAME_MAX, ".config"); FILE *f = fopen(emu->path, "w"); if (!f) { return -errno; } + lfs_emubd_tole32(emu); size_t res = fwrite(&emu->cfg, sizeof(emu->cfg), 1, f); + lfs_emubd_fromle32(emu); if (res < 1) { return -errno; } @@ -227,13 +284,33 @@ int lfs_emubd_sync(const struct lfs_config *cfg) { return -errno; } - snprintf(emu->child, LFS_NAME_MAX, "stats"); + snprintf(emu->child, LFS_NAME_MAX, ".stats"); f = fopen(emu->path, "w"); if (!f) { return -errno; } + lfs_emubd_tole32(emu); res = fwrite(&emu->stats, sizeof(emu->stats), 1, f); + lfs_emubd_fromle32(emu); + if (res < 1) { + return -errno; + } + + err = fclose(f); + if (err) { + return -errno; + } + + snprintf(emu->child, LFS_NAME_MAX, ".history"); + f = fopen(emu->path, "w"); + if (!f) { + return -errno; + } + + lfs_emubd_tole32(emu); + res = fwrite(&emu->history, sizeof(emu->history), 1, f); + lfs_emubd_fromle32(emu); if (res < 1) { return -errno; } diff --git a/emubd/lfs_emubd.h b/emubd/lfs_emubd.h index 0fd43875..64afa3ee 100644 --- a/emubd/lfs_emubd.h +++ b/emubd/lfs_emubd.h @@ -45,6 +45,10 @@ typedef struct lfs_emubd { uint64_t erase_count; } stats; + struct { + lfs_block_t blocks[4]; + } history; + struct { uint32_t read_size; uint32_t prog_size; diff --git a/lfs.c b/lfs.c index ed7f6876..25c88c60 100644 --- a/lfs.c +++ b/lfs.c @@ -1,52 +1,89 @@ /* * The little filesystem * - * Copyright (c) 2017, Arm Limited. All rights reserved. - * SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2017 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ #include "lfs.h" #include "lfs_util.h" -#include - /// Caching block device operations /// -static int lfs_cache_read(lfs_t *lfs, lfs_cache_t *rcache, - const lfs_cache_t *pcache, lfs_block_t block, - lfs_off_t off, void *buffer, lfs_size_t size) { +static inline void lfs_cache_drop(lfs_t *lfs, lfs_cache_t *rcache) { + // do not zero, cheaper if cache is readonly or only going to be + // written with identical data (during relocates) + (void)lfs; + rcache->block = 0xffffffff; +} + +static inline void lfs_cache_zero(lfs_t *lfs, lfs_cache_t *pcache) { + // zero to avoid information leak + memset(pcache->buffer, 0xff, lfs->cfg->prog_size); + pcache->block = 0xffffffff; +} + +static int lfs_bd_read(lfs_t *lfs, + const lfs_cache_t *pcache, lfs_cache_t *rcache, lfs_size_t hint, + lfs_block_t block, lfs_off_t off, + void *buffer, lfs_size_t size) { uint8_t *data = buffer; - LFS_ASSERT(block < lfs->cfg->block_count); + LFS_ASSERT(block != 0xffffffff); + if (off+size > lfs->cfg->block_size) { + return LFS_ERR_CORRUPT; + } while (size > 0) { - if (pcache && block == pcache->block && off >= pcache->off && - off < pcache->off + lfs->cfg->prog_size) { - // is already in pcache? - lfs_size_t diff = lfs_min(size, - lfs->cfg->prog_size - (off-pcache->off)); - memcpy(data, &pcache->buffer[off-pcache->off], diff); + lfs_size_t diff = size; + + if (pcache && block == pcache->block && + off < pcache->off + pcache->size) { + if (off >= pcache->off) { + // is already in pcache? + diff = lfs_min(diff, pcache->size - (off-pcache->off)); + memcpy(data, &pcache->buffer[off-pcache->off], diff); + + data += diff; + off += diff; + size -= diff; + continue; + } - data += diff; - off += diff; - size -= diff; - continue; + // pcache takes priority + diff = lfs_min(diff, pcache->off-off); } - if (block == rcache->block && off >= rcache->off && - off < rcache->off + lfs->cfg->read_size) { - // is already in rcache? - lfs_size_t diff = lfs_min(size, - lfs->cfg->read_size - (off-rcache->off)); - memcpy(data, &rcache->buffer[off-rcache->off], diff); + if (block == rcache->block && + off < rcache->off + rcache->size) { + if (off >= rcache->off) { + // is already in rcache? + diff = lfs_min(diff, rcache->size - (off-rcache->off)); + memcpy(data, &rcache->buffer[off-rcache->off], diff); - data += diff; - off += diff; - size -= diff; - continue; + data += diff; + off += diff; + size -= diff; + continue; + } + + // rcache takes priority + diff = lfs_min(diff, rcache->off-off); } - if (off % lfs->cfg->read_size == 0 && size >= lfs->cfg->read_size) { + if (size >= hint && off % lfs->cfg->read_size == 0 && + size >= lfs->cfg->read_size) { // bypass cache? - lfs_size_t diff = size - (size % lfs->cfg->read_size); + diff = lfs_aligndown(diff, lfs->cfg->read_size); int err = lfs->cfg->read(lfs->cfg, block, off, data, diff); if (err) { return err; @@ -59,10 +96,14 @@ static int lfs_cache_read(lfs_t *lfs, lfs_cache_t *rcache, } // load to cache, first condition can no longer fail + LFS_ASSERT(block < lfs->cfg->block_count); rcache->block = block; - rcache->off = off - (off % lfs->cfg->read_size); + rcache->off = lfs_aligndown(off, lfs->cfg->read_size); + rcache->size = lfs_min(lfs_alignup(off+hint, lfs->cfg->read_size), + lfs_min(lfs->cfg->block_size - rcache->off, + lfs->cfg->cache_size)); int err = lfs->cfg->read(lfs->cfg, rcache->block, - rcache->off, rcache->buffer, lfs->cfg->read_size); + rcache->off, rcache->buffer, rcache->size); if (err) { return err; } @@ -71,74 +112,57 @@ static int lfs_cache_read(lfs_t *lfs, lfs_cache_t *rcache, return 0; } -static int lfs_cache_cmp(lfs_t *lfs, lfs_cache_t *rcache, - const lfs_cache_t *pcache, lfs_block_t block, - lfs_off_t off, const void *buffer, lfs_size_t size) { +enum { + LFS_CMP_EQ = 0, + LFS_CMP_LT = 1, + LFS_CMP_GT = 2, +}; + +static int lfs_bd_cmp(lfs_t *lfs, + const lfs_cache_t *pcache, lfs_cache_t *rcache, lfs_size_t hint, + lfs_block_t block, lfs_off_t off, + const void *buffer, lfs_size_t size) { const uint8_t *data = buffer; for (lfs_off_t i = 0; i < size; i++) { - uint8_t c; - int err = lfs_cache_read(lfs, rcache, pcache, - block, off+i, &c, 1); + uint8_t dat; + int err = lfs_bd_read(lfs, + pcache, rcache, hint-i, + block, off+i, &dat, 1); if (err) { return err; } - if (c != data[i]) { - return false; - } - } - - return true; -} - -static int lfs_cache_crc(lfs_t *lfs, lfs_cache_t *rcache, - const lfs_cache_t *pcache, lfs_block_t block, - lfs_off_t off, lfs_size_t size, uint32_t *crc) { - for (lfs_off_t i = 0; i < size; i++) { - uint8_t c; - int err = lfs_cache_read(lfs, rcache, pcache, - block, off+i, &c, 1); - if (err) { - return err; + if (dat != data[i]) { + return (dat < data[i]) ? LFS_CMP_LT : LFS_CMP_GT; } - - lfs_crc(crc, &c, 1); } - return 0; -} - -static inline void lfs_cache_drop(lfs_t *lfs, lfs_cache_t *rcache) { - // do not zero, cheaper if cache is readonly or only going to be - // written with identical data (during relocates) - (void)lfs; - rcache->block = 0xffffffff; -} - -static inline void lfs_cache_zero(lfs_t *lfs, lfs_cache_t *pcache) { - // zero to avoid information leak - memset(pcache->buffer, 0xff, lfs->cfg->prog_size); - pcache->block = 0xffffffff; + return LFS_CMP_EQ; } -static int lfs_cache_flush(lfs_t *lfs, - lfs_cache_t *pcache, lfs_cache_t *rcache) { - if (pcache->block != 0xffffffff) { +static int lfs_bd_flush(lfs_t *lfs, + lfs_cache_t *pcache, lfs_cache_t *rcache, bool validate) { + if (pcache->block != 0xffffffff && pcache->block != 0xfffffffe) { + LFS_ASSERT(pcache->block < lfs->cfg->block_count); + lfs_size_t diff = lfs_alignup(pcache->size, lfs->cfg->prog_size); int err = lfs->cfg->prog(lfs->cfg, pcache->block, - pcache->off, pcache->buffer, lfs->cfg->prog_size); + pcache->off, pcache->buffer, diff); if (err) { return err; } - if (rcache) { - int res = lfs_cache_cmp(lfs, rcache, NULL, pcache->block, - pcache->off, pcache->buffer, lfs->cfg->prog_size); + if (validate) { + // check data on disk + lfs_cache_drop(lfs, rcache); + int res = lfs_bd_cmp(lfs, + NULL, rcache, diff, + pcache->block, pcache->off, pcache->buffer, diff); if (res < 0) { return res; } - if (!res) { + if (res != LFS_CMP_EQ) { return LFS_ERR_CORRUPT; } } @@ -149,27 +173,43 @@ static int lfs_cache_flush(lfs_t *lfs, return 0; } -static int lfs_cache_prog(lfs_t *lfs, lfs_cache_t *pcache, - lfs_cache_t *rcache, lfs_block_t block, - lfs_off_t off, const void *buffer, lfs_size_t size) { +static int lfs_bd_sync(lfs_t *lfs, + lfs_cache_t *pcache, lfs_cache_t *rcache, bool validate) { + lfs_cache_drop(lfs, rcache); + + int err = lfs_bd_flush(lfs, pcache, rcache, validate); + if (err) { + return err; + } + + return lfs->cfg->sync(lfs->cfg); +} + +static int lfs_bd_prog(lfs_t *lfs, + lfs_cache_t *pcache, lfs_cache_t *rcache, bool validate, + lfs_block_t block, lfs_off_t off, + const void *buffer, lfs_size_t size) { const uint8_t *data = buffer; - LFS_ASSERT(block < lfs->cfg->block_count); + LFS_ASSERT(block != 0xffffffff); + LFS_ASSERT(off + size <= lfs->cfg->block_size); while (size > 0) { - if (block == pcache->block && off >= pcache->off && - off < pcache->off + lfs->cfg->prog_size) { - // is already in pcache? + if (block == pcache->block && + off >= pcache->off && + off < pcache->off + lfs->cfg->cache_size) { + // already fits in pcache? lfs_size_t diff = lfs_min(size, - lfs->cfg->prog_size - (off-pcache->off)); + lfs->cfg->cache_size - (off-pcache->off)); memcpy(&pcache->buffer[off-pcache->off], data, diff); data += diff; off += diff; size -= diff; - if (off % lfs->cfg->prog_size == 0) { + pcache->size = off - pcache->off; + if (pcache->size == lfs->cfg->cache_size) { // eagerly flush out pcache if we fill up - int err = lfs_cache_flush(lfs, pcache, rcache); + int err = lfs_bd_flush(lfs, pcache, rcache, validate); if (err) { return err; } @@ -182,98 +222,231 @@ static int lfs_cache_prog(lfs_t *lfs, lfs_cache_t *pcache, // entire block or manually flushing the pcache LFS_ASSERT(pcache->block == 0xffffffff); - if (off % lfs->cfg->prog_size == 0 && - size >= lfs->cfg->prog_size) { - // bypass pcache? - lfs_size_t diff = size - (size % lfs->cfg->prog_size); - int err = lfs->cfg->prog(lfs->cfg, block, off, data, diff); - if (err) { - return err; - } - - if (rcache) { - int res = lfs_cache_cmp(lfs, rcache, NULL, - block, off, data, diff); - if (res < 0) { - return res; - } - - if (!res) { - return LFS_ERR_CORRUPT; - } - } - - data += diff; - off += diff; - size -= diff; - continue; - } - // prepare pcache, first condition can no longer fail pcache->block = block; - pcache->off = off - (off % lfs->cfg->prog_size); + pcache->off = lfs_aligndown(off, lfs->cfg->prog_size); + pcache->size = 0; } return 0; } +static int lfs_bd_erase(lfs_t *lfs, lfs_block_t block) { + LFS_ASSERT(block < lfs->cfg->block_count); + return lfs->cfg->erase(lfs->cfg, block); +} + -/// General lfs block device operations /// -static int lfs_bd_read(lfs_t *lfs, lfs_block_t block, - lfs_off_t off, void *buffer, lfs_size_t size) { - // if we ever do more than writes to alternating pairs, - // this may need to consider pcache - return lfs_cache_read(lfs, &lfs->rcache, NULL, - block, off, buffer, size); +/// Small type-level utilities /// +// operations on block pairs +static inline void lfs_pair_swap(lfs_block_t pair[2]) { + lfs_block_t t = pair[0]; + pair[0] = pair[1]; + pair[1] = t; } -static int lfs_bd_prog(lfs_t *lfs, lfs_block_t block, - lfs_off_t off, const void *buffer, lfs_size_t size) { - return lfs_cache_prog(lfs, &lfs->pcache, NULL, - block, off, buffer, size); +static inline bool lfs_pair_isnull(const lfs_block_t pair[2]) { + return pair[0] == 0xffffffff || pair[1] == 0xffffffff; +} + +static inline int lfs_pair_cmp( + const lfs_block_t paira[2], + const lfs_block_t pairb[2]) { + return !(paira[0] == pairb[0] || paira[1] == pairb[1] || + paira[0] == pairb[1] || paira[1] == pairb[0]); } -static int lfs_bd_cmp(lfs_t *lfs, lfs_block_t block, - lfs_off_t off, const void *buffer, lfs_size_t size) { - return lfs_cache_cmp(lfs, &lfs->rcache, NULL, block, off, buffer, size); +static inline bool lfs_pair_sync( + const lfs_block_t paira[2], + const lfs_block_t pairb[2]) { + return (paira[0] == pairb[0] && paira[1] == pairb[1]) || + (paira[0] == pairb[1] && paira[1] == pairb[0]); } -static int lfs_bd_crc(lfs_t *lfs, lfs_block_t block, - lfs_off_t off, lfs_size_t size, uint32_t *crc) { - return lfs_cache_crc(lfs, &lfs->rcache, NULL, block, off, size, crc); +static inline void lfs_pair_fromle32(lfs_block_t pair[2]) { + pair[0] = lfs_fromle32(pair[0]); + pair[1] = lfs_fromle32(pair[1]); } -static int lfs_bd_erase(lfs_t *lfs, lfs_block_t block) { - return lfs->cfg->erase(lfs->cfg, block); +static inline void lfs_pair_tole32(lfs_block_t pair[2]) { + pair[0] = lfs_tole32(pair[0]); + pair[1] = lfs_tole32(pair[1]); } -static int lfs_bd_sync(lfs_t *lfs) { - lfs_cache_drop(lfs, &lfs->rcache); +// operations on 32-bit entry tags +typedef uint32_t lfs_tag_t; +typedef int32_t lfs_stag_t; - int err = lfs_cache_flush(lfs, &lfs->pcache, NULL); - if (err) { - return err; +#define LFS_MKTAG(type, id, size) \ + (((lfs_tag_t)(type) << 20) | ((lfs_tag_t)(id) << 10) | (lfs_tag_t)(size)) + +static inline bool lfs_tag_isvalid(lfs_tag_t tag) { + return !(tag & 0x80000000); +} + +static inline bool lfs_tag_isdelete(lfs_tag_t tag) { + return ((int32_t)(tag << 22) >> 22) == -1; +} + +static inline uint16_t lfs_tag_type1(lfs_tag_t tag) { + return (tag & 0x70000000) >> 20; +} + +static inline uint16_t lfs_tag_type3(lfs_tag_t tag) { + return (tag & 0x7ff00000) >> 20; +} + +static inline uint8_t lfs_tag_chunk(lfs_tag_t tag) { + return (tag & 0x0ff00000) >> 20; +} + +static inline int8_t lfs_tag_splice(lfs_tag_t tag) { + return (int8_t)lfs_tag_chunk(tag); +} + +static inline uint16_t lfs_tag_id(lfs_tag_t tag) { + return (tag & 0x000ffc00) >> 10; +} + +static inline lfs_size_t lfs_tag_size(lfs_tag_t tag) { + return tag & 0x000003ff; +} + +static inline lfs_size_t lfs_tag_dsize(lfs_tag_t tag) { + return sizeof(tag) + lfs_tag_size(tag + lfs_tag_isdelete(tag)); +} + +// operations on attributes in attribute lists +struct lfs_mattr { + lfs_tag_t tag; + const void *buffer; +}; + +struct lfs_diskoff { + lfs_block_t block; + lfs_off_t off; +}; + +#define LFS_MKATTRS(...) \ + (struct lfs_mattr[]){__VA_ARGS__}, \ + sizeof((struct lfs_mattr[]){__VA_ARGS__}) / sizeof(struct lfs_mattr) + +// operations on global state +static inline void lfs_gstate_xor(struct lfs_gstate *a, + const struct lfs_gstate *b) { + for (int i = 0; i < 3; i++) { + ((uint32_t*)a)[i] ^= ((const uint32_t*)b)[i]; } +} - return lfs->cfg->sync(lfs->cfg); +static inline bool lfs_gstate_iszero(const struct lfs_gstate *a) { + for (int i = 0; i < 3; i++) { + if (((uint32_t*)a)[i] != 0) { + return false; + } + } + return true; } +static inline bool lfs_gstate_hasorphans(const struct lfs_gstate *a) { + return lfs_tag_size(a->tag); +} + +static inline uint8_t lfs_gstate_getorphans(const struct lfs_gstate *a) { + return lfs_tag_size(a->tag); +} + +static inline bool lfs_gstate_hasmove(const struct lfs_gstate *a) { + return lfs_tag_type1(a->tag); +} + +static inline bool lfs_gstate_hasmovehere(const struct lfs_gstate *a, + const lfs_block_t *pair) { + return lfs_tag_type1(a->tag) && lfs_pair_cmp(a->pair, pair) == 0; +} + +static inline void lfs_gstate_xororphans(struct lfs_gstate *a, + const struct lfs_gstate *b, bool orphans) { + a->tag ^= LFS_MKTAG(0x800, 0, 0) & (b->tag ^ (orphans << 31)); +} + +static inline void lfs_gstate_xormove(struct lfs_gstate *a, + const struct lfs_gstate *b, uint16_t id, const lfs_block_t pair[2]) { + a->tag ^= LFS_MKTAG(0x7ff, 0x3ff, 0) & (b->tag ^ ( + (id != 0x3ff) ? LFS_MKTAG(LFS_TYPE_DELETE, id, 0) : 0)); + a->pair[0] ^= b->pair[0] ^ ((id != 0x3ff) ? pair[0] : 0); + a->pair[1] ^= b->pair[1] ^ ((id != 0x3ff) ? pair[1] : 0); +} + +static inline void lfs_gstate_fromle32(struct lfs_gstate *a) { + a->tag = lfs_fromle32(a->tag); + a->pair[0] = lfs_fromle32(a->pair[0]); + a->pair[1] = lfs_fromle32(a->pair[1]); +} + +static inline void lfs_gstate_tole32(struct lfs_gstate *a) { + a->tag = lfs_tole32(a->tag); + a->pair[0] = lfs_tole32(a->pair[0]); + a->pair[1] = lfs_tole32(a->pair[1]); +} + +// other endianness operations +static void lfs_ctz_fromle32(struct lfs_ctz *ctz) { + ctz->head = lfs_fromle32(ctz->head); + ctz->size = lfs_fromle32(ctz->size); +} + +static void lfs_ctz_tole32(struct lfs_ctz *ctz) { + ctz->head = lfs_tole32(ctz->head); + ctz->size = lfs_tole32(ctz->size); +} + +static inline void lfs_superblock_fromle32(lfs_superblock_t *superblock) { + superblock->version = lfs_fromle32(superblock->version); + superblock->block_size = lfs_fromle32(superblock->block_size); + superblock->block_count = lfs_fromle32(superblock->block_count); + superblock->name_max = lfs_fromle32(superblock->name_max); + superblock->file_max = lfs_fromle32(superblock->file_max); + superblock->attr_max = lfs_fromle32(superblock->attr_max); +} + +static inline void lfs_superblock_tole32(lfs_superblock_t *superblock) { + superblock->version = lfs_tole32(superblock->version); + superblock->block_size = lfs_tole32(superblock->block_size); + superblock->block_count = lfs_tole32(superblock->block_count); + superblock->name_max = lfs_tole32(superblock->name_max); + superblock->file_max = lfs_tole32(superblock->file_max); + superblock->attr_max = lfs_tole32(superblock->attr_max); +} -/// Internal operations predeclared here /// -int lfs_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data); -static int lfs_pred(lfs_t *lfs, const lfs_block_t dir[2], lfs_dir_t *pdir); -static int lfs_parent(lfs_t *lfs, const lfs_block_t dir[2], - lfs_dir_t *parent, lfs_entry_t *entry); -static int lfs_moved(lfs_t *lfs, const void *e); -static int lfs_relocate(lfs_t *lfs, - const lfs_block_t oldpair[2], const lfs_block_t newpair[2]); -int lfs_deorphan(lfs_t *lfs); +/// Internal operations predeclared here /// +static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, + const struct lfs_mattr *attrs, int attrcount); +static int lfs_dir_compact(lfs_t *lfs, + lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount, + lfs_mdir_t *source, uint16_t begin, uint16_t end); +static int lfs_file_relocate(lfs_t *lfs, lfs_file_t *file); +static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file); +static void lfs_fs_preporphans(lfs_t *lfs, int8_t orphans); +static void lfs_fs_prepmove(lfs_t *lfs, + uint16_t id, const lfs_block_t pair[2]); +static int lfs_fs_pred(lfs_t *lfs, const lfs_block_t dir[2], + lfs_mdir_t *pdir); +static lfs_stag_t lfs_fs_parent(lfs_t *lfs, const lfs_block_t dir[2], + lfs_mdir_t *parent); +static int lfs_fs_relocate(lfs_t *lfs, + const lfs_block_t oldpair[2], lfs_block_t newpair[2]); +static int lfs_fs_forceconsistency(lfs_t *lfs); +static int lfs_deinit(lfs_t *lfs); +#ifdef LFS_MIGRATE +static int lfs1_traverse(lfs_t *lfs, + int (*cb)(void*, lfs_block_t), void *data); +#endif /// Block allocator /// static int lfs_alloc_lookahead(void *p, lfs_block_t block) { - lfs_t *lfs = p; - + lfs_t *lfs = (lfs_t*)p; lfs_block_t off = ((block - lfs->free.off) + lfs->cfg->block_count) % lfs->cfg->block_count; @@ -310,19 +483,19 @@ static int lfs_alloc(lfs_t *lfs, lfs_block_t *block) { // check if we have looked at all blocks since last ack if (lfs->free.ack == 0) { - LFS_WARN("No more free space %" PRIu32, + LFS_WARN("No more free space %"PRIu32, lfs->free.i + lfs->free.off); return LFS_ERR_NOSPC; } lfs->free.off = (lfs->free.off + lfs->free.size) % lfs->cfg->block_count; - lfs->free.size = lfs_min(lfs->cfg->lookahead, lfs->free.ack); + lfs->free.size = lfs_min(8*lfs->cfg->lookahead_size, lfs->free.ack); lfs->free.i = 0; // find mask of free blocks from tree - memset(lfs->free.buffer, 0, lfs->cfg->lookahead/8); - int err = lfs_traverse(lfs, lfs_alloc_lookahead, lfs); + memset(lfs->free.buffer, 0, lfs->cfg->lookahead_size); + int err = lfs_fs_traverse(lfs, lfs_alloc_lookahead, lfs); if (err) { return err; } @@ -334,494 +507,599 @@ static void lfs_alloc_ack(lfs_t *lfs) { } -/// Endian swapping functions /// -static void lfs_dir_fromle32(struct lfs_disk_dir *d) { - d->rev = lfs_fromle32(d->rev); - d->size = lfs_fromle32(d->size); - d->tail[0] = lfs_fromle32(d->tail[0]); - d->tail[1] = lfs_fromle32(d->tail[1]); -} +/// Metadata pair and directory operations /// +static lfs_stag_t lfs_dir_getslice(lfs_t *lfs, const lfs_mdir_t *dir, + lfs_tag_t gmask, lfs_tag_t gtag, + lfs_off_t goff, void *gbuffer, lfs_size_t gsize) { + lfs_off_t off = dir->off; + lfs_tag_t ntag = dir->etag; + lfs_stag_t gdiff = 0; + + if (lfs_gstate_hasmovehere(&lfs->gstate, dir->pair) && + lfs_tag_id(gtag) <= lfs_tag_id(lfs->gstate.tag)) { + // synthetic moves + gdiff -= LFS_MKTAG(0, 1, 0); + } -static void lfs_dir_tole32(struct lfs_disk_dir *d) { - d->rev = lfs_tole32(d->rev); - d->size = lfs_tole32(d->size); - d->tail[0] = lfs_tole32(d->tail[0]); - d->tail[1] = lfs_tole32(d->tail[1]); -} + // iterate over dir block backwards (for faster lookups) + while (off >= sizeof(lfs_tag_t) + lfs_tag_dsize(ntag)) { + off -= lfs_tag_dsize(ntag); + lfs_tag_t tag = ntag; + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, sizeof(ntag), + dir->pair[0], off, &ntag, sizeof(ntag)); + if (err) { + return err; + } -static void lfs_entry_fromle32(struct lfs_disk_entry *d) { - d->u.dir[0] = lfs_fromle32(d->u.dir[0]); - d->u.dir[1] = lfs_fromle32(d->u.dir[1]); -} + ntag = (lfs_frombe32(ntag) ^ tag) & 0x7fffffff; -static void lfs_entry_tole32(struct lfs_disk_entry *d) { - d->u.dir[0] = lfs_tole32(d->u.dir[0]); - d->u.dir[1] = lfs_tole32(d->u.dir[1]); -} + if (lfs_tag_id(gmask) != 0 && + lfs_tag_type1(tag) == LFS_TYPE_SPLICE && + lfs_tag_id(tag) <= lfs_tag_id(gtag - gdiff)) { + if (tag == (LFS_MKTAG(LFS_TYPE_CREATE, 0, 0) | + (LFS_MKTAG(0, 0x3ff, 0) & (gtag - gdiff)))) { + // found where we were created + return LFS_ERR_NOENT; + } -static void lfs_superblock_fromle32(struct lfs_disk_superblock *d) { - d->root[0] = lfs_fromle32(d->root[0]); - d->root[1] = lfs_fromle32(d->root[1]); - d->block_size = lfs_fromle32(d->block_size); - d->block_count = lfs_fromle32(d->block_count); - d->version = lfs_fromle32(d->version); -} + // move around splices + gdiff += LFS_MKTAG(0, lfs_tag_splice(tag), 0); + } -static void lfs_superblock_tole32(struct lfs_disk_superblock *d) { - d->root[0] = lfs_tole32(d->root[0]); - d->root[1] = lfs_tole32(d->root[1]); - d->block_size = lfs_tole32(d->block_size); - d->block_count = lfs_tole32(d->block_count); - d->version = lfs_tole32(d->version); -} + if ((gmask & tag) == (gmask & (gtag - gdiff))) { + if (lfs_tag_isdelete(tag)) { + return LFS_ERR_NOENT; + } + lfs_size_t diff = lfs_min(lfs_tag_size(tag), gsize); + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, diff, + dir->pair[0], off+sizeof(tag)+goff, gbuffer, diff); + if (err) { + return err; + } -/// Metadata pair and directory operations /// -static inline void lfs_pairswap(lfs_block_t pair[2]) { - lfs_block_t t = pair[0]; - pair[0] = pair[1]; - pair[1] = t; -} + memset((uint8_t*)gbuffer + diff, 0, gsize - diff); -static inline bool lfs_pairisnull(const lfs_block_t pair[2]) { - return pair[0] == 0xffffffff || pair[1] == 0xffffffff; -} + return tag + gdiff; + } + } -static inline int lfs_paircmp( - const lfs_block_t paira[2], - const lfs_block_t pairb[2]) { - return !(paira[0] == pairb[0] || paira[1] == pairb[1] || - paira[0] == pairb[1] || paira[1] == pairb[0]); + return LFS_ERR_NOENT; } -static inline bool lfs_pairsync( - const lfs_block_t paira[2], - const lfs_block_t pairb[2]) { - return (paira[0] == pairb[0] && paira[1] == pairb[1]) || - (paira[0] == pairb[1] && paira[1] == pairb[0]); +static lfs_stag_t lfs_dir_get(lfs_t *lfs, const lfs_mdir_t *dir, + lfs_tag_t gmask, lfs_tag_t gtag, void *buffer) { + return lfs_dir_getslice(lfs, dir, + gmask, gtag, + 0, buffer, lfs_tag_size(gtag)); } -static inline lfs_size_t lfs_entry_size(const lfs_entry_t *entry) { - return 4 + entry->d.elen + entry->d.alen + entry->d.nlen; -} +static int lfs_dir_getread(lfs_t *lfs, const lfs_mdir_t *dir, + const lfs_cache_t *pcache, lfs_cache_t *rcache, lfs_size_t hint, + lfs_tag_t gmask, lfs_tag_t gtag, + lfs_off_t off, void *buffer, lfs_size_t size) { + uint8_t *data = buffer; + if (off+size > lfs->cfg->block_size) { + return LFS_ERR_CORRUPT; + } -static int lfs_dir_alloc(lfs_t *lfs, lfs_dir_t *dir) { - // allocate pair of dir blocks - for (int i = 0; i < 2; i++) { - int err = lfs_alloc(lfs, &dir->pair[i]); - if (err) { - return err; + while (size > 0) { + lfs_size_t diff = size; + + if (pcache && pcache->block == 0xfffffffe && + off < pcache->off + pcache->size) { + if (off >= pcache->off) { + // is already in pcache? + diff = lfs_min(diff, pcache->size - (off-pcache->off)); + memcpy(data, &pcache->buffer[off-pcache->off], diff); + + data += diff; + off += diff; + size -= diff; + continue; + } + + // pcache takes priority + diff = lfs_min(diff, pcache->off-off); } - } - // rather than clobbering one of the blocks we just pretend - // the revision may be valid - int err = lfs_bd_read(lfs, dir->pair[0], 0, &dir->d.rev, 4); - if (err && err != LFS_ERR_CORRUPT) { - return err; - } + if (rcache->block == 0xfffffffe && + off < rcache->off + rcache->size) { + if (off >= rcache->off) { + // is already in rcache? + diff = lfs_min(diff, rcache->size - (off-rcache->off)); + memcpy(data, &rcache->buffer[off-rcache->off], diff); - if (err != LFS_ERR_CORRUPT) { - dir->d.rev = lfs_fromle32(dir->d.rev); - } + data += diff; + off += diff; + size -= diff; + continue; + } - // set defaults - dir->d.rev += 1; - dir->d.size = sizeof(dir->d)+4; - dir->d.tail[0] = 0xffffffff; - dir->d.tail[1] = 0xffffffff; - dir->off = sizeof(dir->d); + // rcache takes priority + diff = lfs_min(diff, rcache->off-off); + } + + // load to cache, first condition can no longer fail + rcache->block = 0xfffffffe; + rcache->off = lfs_aligndown(off, lfs->cfg->read_size); + rcache->size = lfs_min(lfs_alignup(off+hint, lfs->cfg->read_size), + lfs->cfg->cache_size); + int err = lfs_dir_getslice(lfs, dir, gmask, gtag, + rcache->off, rcache->buffer, rcache->size); + if (err) { + return err; + } + } - // don't write out yet, let caller take care of that return 0; } -static int lfs_dir_fetch(lfs_t *lfs, - lfs_dir_t *dir, const lfs_block_t pair[2]) { - // copy out pair, otherwise may be aliasing dir - const lfs_block_t tpair[2] = {pair[0], pair[1]}; - bool valid = false; +static int lfs_dir_traverse_filter(void *p, + lfs_tag_t tag, const void *buffer) { + lfs_tag_t *filtertag = p; + (void)buffer; + + // check for redundancy + uint32_t mask = LFS_MKTAG(0x7ff, 0x3ff, 0); + if ((mask & tag) == (mask & *filtertag) || + (mask & tag) == (LFS_MKTAG(LFS_TYPE_DELETE, 0, 0) | + (LFS_MKTAG(0, 0x3ff, 0) & *filtertag))) { + return true; + } - // check both blocks for the most recent revision - for (int i = 0; i < 2; i++) { - struct lfs_disk_dir test; - int err = lfs_bd_read(lfs, tpair[i], 0, &test, sizeof(test)); - lfs_dir_fromle32(&test); - if (err) { - if (err == LFS_ERR_CORRUPT) { - continue; + // check if we need to adjust for created/deleted tags + if (lfs_tag_type1(tag) == LFS_TYPE_SPLICE && + lfs_tag_id(tag) <= lfs_tag_id(*filtertag)) { + *filtertag += LFS_MKTAG(0, lfs_tag_splice(tag), 0); + } + + return false; +} + +static int lfs_dir_traverse(lfs_t *lfs, + const lfs_mdir_t *dir, lfs_off_t off, lfs_tag_t ptag, + const struct lfs_mattr *attrs, int attrcount, bool hasseenmove, + lfs_tag_t tmask, lfs_tag_t ttag, + uint16_t begin, uint16_t end, int16_t diff, + int (*cb)(void *data, lfs_tag_t tag, const void *buffer), void *data) { + // iterate over directory and attrs + while (true) { + lfs_tag_t tag; + const void *buffer; + struct lfs_diskoff disk; + if (off+lfs_tag_dsize(ptag) < dir->off) { + off += lfs_tag_dsize(ptag); + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, sizeof(tag), + dir->pair[0], off, &tag, sizeof(tag)); + if (err) { + return err; } - return err; - } - if (valid && lfs_scmp(test.rev, dir->d.rev) < 0) { - continue; + tag = (lfs_frombe32(tag) ^ ptag) | 0x80000000; + disk.block = dir->pair[0]; + disk.off = off+sizeof(lfs_tag_t); + buffer = &disk; + ptag = tag; + } else if (attrcount > 0) { + tag = attrs[0].tag; + buffer = attrs[0].buffer; + attrs += 1; + attrcount -= 1; + } else if (!hasseenmove && + lfs_gstate_hasmovehere(&lfs->gpending, dir->pair)) { + // Wait, we have pending move? Handle this here (we need to + // or else we risk letting moves fall out of date) + tag = lfs->gpending.tag & LFS_MKTAG(0x7ff, 0x3ff, 0); + buffer = NULL; + hasseenmove = true; + } else { + return 0; } - if ((0x7fffffff & test.size) < sizeof(test)+4 || - (0x7fffffff & test.size) > lfs->cfg->block_size) { + lfs_tag_t mask = LFS_MKTAG(0x7ff, 0, 0); + if ((mask & tmask & tag) != (mask & tmask & ttag)) { continue; } - uint32_t crc = 0xffffffff; - lfs_dir_tole32(&test); - lfs_crc(&crc, &test, sizeof(test)); - lfs_dir_fromle32(&test); - err = lfs_bd_crc(lfs, tpair[i], sizeof(test), - (0x7fffffff & test.size) - sizeof(test), &crc); - if (err) { - if (err == LFS_ERR_CORRUPT) { + // do we need to filter? inlining the filtering logic here allows + // for some minor optimizations + if (lfs_tag_id(tmask) != 0) { + // scan for duplicates and update tag based on creates/deletes + int filter = lfs_dir_traverse(lfs, + dir, off, ptag, attrs, attrcount, hasseenmove, + 0, 0, 0, 0, 0, + lfs_dir_traverse_filter, &tag); + if (filter < 0) { + return filter; + } + + if (filter) { continue; } - return err; - } - if (crc != 0) { - continue; + // in filter range? + if (!(lfs_tag_id(tag) >= begin && lfs_tag_id(tag) < end)) { + continue; + } } - valid = true; - - // setup dir in case it's valid - dir->pair[0] = tpair[(i+0) % 2]; - dir->pair[1] = tpair[(i+1) % 2]; - dir->off = sizeof(dir->d); - dir->d = test; + // handle special cases for mcu-side operations + if (lfs_tag_type3(tag) == LFS_FROM_NOOP) { + // do nothing + } else if (lfs_tag_type3(tag) == LFS_FROM_MOVE) { + uint16_t fromid = lfs_tag_size(tag); + uint16_t toid = lfs_tag_id(tag); + int err = lfs_dir_traverse(lfs, + buffer, 0, 0xffffffff, NULL, 0, true, + LFS_MKTAG(0x600, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, 0, 0), + fromid, fromid+1, toid-fromid+diff, + cb, data); + if (err) { + return err; + } + } else if (lfs_tag_type3(tag) == LFS_FROM_USERATTRS) { + for (unsigned i = 0; i < lfs_tag_size(tag); i++) { + const struct lfs_attr *a = buffer; + int err = cb(data, LFS_MKTAG(LFS_TYPE_USERATTR + a[i].type, + lfs_tag_id(tag) + diff, a[i].size), a[i].buffer); + if (err) { + return err; + } + } + } else { + int err = cb(data, tag + LFS_MKTAG(0, diff, 0), buffer); + if (err) { + return err; + } + } } +} - if (!valid) { - LFS_ERROR("Corrupted dir pair at %" PRIu32 " %" PRIu32 , - tpair[0], tpair[1]); - return LFS_ERR_CORRUPT; - } +static lfs_stag_t lfs_dir_fetchmatch(lfs_t *lfs, + lfs_mdir_t *dir, const lfs_block_t pair[2], + lfs_tag_t fmask, lfs_tag_t ftag, uint16_t *id, + int (*cb)(void *data, lfs_tag_t tag, const void *buffer), void *data) { + // we can find tag very efficiently during a fetch, since we're already + // scanning the entire directory + lfs_stag_t besttag = -1; + + // find the block with the most recent revision + uint32_t revs[2] = {0, 0}; + int r = 0; + for (int i = 0; i < 2; i++) { + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, sizeof(revs[i]), + pair[i], 0, &revs[i], sizeof(revs[i])); + revs[i] = lfs_fromle32(revs[i]); + if (err && err != LFS_ERR_CORRUPT) { + return err; + } - return 0; -} + if (err != LFS_ERR_CORRUPT && + lfs_scmp(revs[i], revs[(i+1)%2]) > 0) { + r = i; + } + } -struct lfs_region { - lfs_off_t oldoff; - lfs_size_t oldlen; - const void *newdata; - lfs_size_t newlen; -}; + dir->pair[0] = pair[(r+0)%2]; + dir->pair[1] = pair[(r+1)%2]; + dir->rev = revs[(r+0)%2]; + dir->off = 0; // nonzero = found some commits -static int lfs_dir_commit(lfs_t *lfs, lfs_dir_t *dir, - const struct lfs_region *regions, int count) { - // increment revision count - dir->d.rev += 1; + // now scan tags to fetch the actual dir and find possible match + for (int i = 0; i < 2; i++) { + lfs_off_t off = 0; + lfs_tag_t ptag = 0xffffffff; - // keep pairs in order such that pair[0] is most recent - lfs_pairswap(dir->pair); - for (int i = 0; i < count; i++) { - dir->d.size += regions[i].newlen - regions[i].oldlen; - } + uint16_t tempcount = 0; + lfs_block_t temptail[2] = {0xffffffff, 0xffffffff}; + bool tempsplit = false; + lfs_stag_t tempbesttag = besttag; - const lfs_block_t oldpair[2] = {dir->pair[0], dir->pair[1]}; - bool relocated = false; + dir->rev = lfs_tole32(dir->rev); + uint32_t crc = lfs_crc(0xffffffff, &dir->rev, sizeof(dir->rev)); + dir->rev = lfs_fromle32(dir->rev); - while (true) { - if (true) { - int err = lfs_bd_erase(lfs, dir->pair[0]); + while (true) { + // extract next tag + lfs_tag_t tag; + off += lfs_tag_dsize(ptag); + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, lfs->cfg->block_size, + dir->pair[0], off, &tag, sizeof(tag)); if (err) { if (err == LFS_ERR_CORRUPT) { - goto relocate; + // can't continue? + dir->erased = false; + break; } return err; } - uint32_t crc = 0xffffffff; - lfs_dir_tole32(&dir->d); - lfs_crc(&crc, &dir->d, sizeof(dir->d)); - err = lfs_bd_prog(lfs, dir->pair[0], 0, &dir->d, sizeof(dir->d)); - lfs_dir_fromle32(&dir->d); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } - return err; - } + crc = lfs_crc(crc, &tag, sizeof(tag)); + tag = lfs_frombe32(tag) ^ ptag; - int i = 0; - lfs_off_t oldoff = sizeof(dir->d); - lfs_off_t newoff = sizeof(dir->d); - while (newoff < (0x7fffffff & dir->d.size)-4) { - if (i < count && regions[i].oldoff == oldoff) { - lfs_crc(&crc, regions[i].newdata, regions[i].newlen); - err = lfs_bd_prog(lfs, dir->pair[0], - newoff, regions[i].newdata, regions[i].newlen); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } - return err; - } + // next commit not yet programmed or we're not in valid range + if (!lfs_tag_isvalid(tag) || + off + lfs_tag_dsize(tag) > lfs->cfg->block_size) { + dir->erased = (lfs_tag_type1(ptag) == LFS_TYPE_CRC && + dir->off % lfs->cfg->prog_size == 0); + break; + } - oldoff += regions[i].oldlen; - newoff += regions[i].newlen; - i += 1; - } else { - uint8_t data; - err = lfs_bd_read(lfs, oldpair[1], oldoff, &data, 1); - if (err) { - return err; - } + ptag = tag; - lfs_crc(&crc, &data, 1); - err = lfs_bd_prog(lfs, dir->pair[0], newoff, &data, 1); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } - return err; + if (lfs_tag_type1(tag) == LFS_TYPE_CRC) { + // check the crc attr + uint32_t dcrc; + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, lfs->cfg->block_size, + dir->pair[0], off+sizeof(tag), &dcrc, sizeof(dcrc)); + if (err) { + if (err == LFS_ERR_CORRUPT) { + dir->erased = false; + break; } + return err; + } + dcrc = lfs_fromle32(dcrc); - oldoff += 1; - newoff += 1; + if (crc != dcrc) { + dir->erased = false; + break; } + + // reset the next bit if we need to + ptag ^= (lfs_tag_chunk(tag) & 1U) << 31; + + // toss our crc into the filesystem seed for + // pseudorandom numbers + lfs->seed ^= crc; + + // update with what's found so far + besttag = tempbesttag; + dir->off = off + lfs_tag_dsize(tag); + dir->etag = ptag; + dir->count = tempcount; + dir->tail[0] = temptail[0]; + dir->tail[1] = temptail[1]; + dir->split = tempsplit; + + // reset crc + crc = 0xffffffff; + continue; } - crc = lfs_tole32(crc); - err = lfs_bd_prog(lfs, dir->pair[0], newoff, &crc, 4); - crc = lfs_fromle32(crc); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; + // crc the entry first, hopefully leaving it in the cache + for (lfs_off_t j = sizeof(tag); j < lfs_tag_dsize(tag); j++) { + uint8_t dat; + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, lfs->cfg->block_size, + dir->pair[0], off+j, &dat, 1); + if (err) { + if (err == LFS_ERR_CORRUPT) { + dir->erased = false; + break; + } + return err; } - return err; + + crc = lfs_crc(crc, &dat, 1); } - err = lfs_bd_sync(lfs); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; + // directory modification tags? + if (lfs_tag_type1(tag) == LFS_TYPE_NAME) { + // increase count of files if necessary + if (lfs_tag_id(tag) >= tempcount) { + tempcount = lfs_tag_id(tag) + 1; } - return err; - } + } else if (lfs_tag_type1(tag) == LFS_TYPE_SPLICE) { + tempcount += lfs_tag_splice(tag); + + if (tag == (LFS_MKTAG(LFS_TYPE_DELETE, 0, 0) | + (LFS_MKTAG(0, 0x3ff, 0) & tempbesttag))) { + tempbesttag |= 0x80000000; + } else if (tempbesttag != -1 && + lfs_tag_id(tag) <= lfs_tag_id(tempbesttag)) { + tempbesttag += LFS_MKTAG(0, lfs_tag_splice(tag), 0); + } + } else if (lfs_tag_type1(tag) == LFS_TYPE_TAIL) { + tempsplit = (lfs_tag_chunk(tag) & 1); - // successful commit, check checksum to make sure - uint32_t ncrc = 0xffffffff; - err = lfs_bd_crc(lfs, dir->pair[0], 0, - (0x7fffffff & dir->d.size)-4, &ncrc); - if (err) { - return err; + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, lfs->cfg->block_size, + dir->pair[0], off+sizeof(tag), &temptail, 8); + if (err) { + if (err == LFS_ERR_CORRUPT) { + dir->erased = false; + break; + } + } + lfs_pair_fromle32(temptail); } - if (ncrc != crc) { - goto relocate; + // found a match for our fetcher? + if ((fmask & tag) == (fmask & ftag)) { + int res = cb(data, tag, &(struct lfs_diskoff){ + dir->pair[0], off+sizeof(tag)}); + if (res < 0) { + if (res == LFS_ERR_CORRUPT) { + dir->erased = false; + break; + } + return res; + } + + if (res == LFS_CMP_EQ) { + // found a match + tempbesttag = tag; + } else if (res == LFS_CMP_GT && + lfs_tag_id(tag) <= lfs_tag_id(tempbesttag)) { + // found a greater match, keep track to keep things sorted + tempbesttag = tag | 0x80000000; + } } } - break; -relocate: - //commit was corrupted - LFS_DEBUG("Bad block at %" PRIu32, dir->pair[0]); - - // drop caches and prepare to relocate block - relocated = true; - lfs_cache_drop(lfs, &lfs->pcache); - - // can't relocate superblock, filesystem is now frozen - if (lfs_paircmp(oldpair, (const lfs_block_t[2]){0, 1}) == 0) { - LFS_WARN("Superblock %" PRIu32 " has become unwritable", - oldpair[0]); - return LFS_ERR_CORRUPT; - } + // consider what we have good enough + if (dir->off > 0) { + // synthetic move + if (lfs_gstate_hasmovehere(&lfs->gstate, dir->pair)) { + if (lfs_tag_id(lfs->gstate.tag) == lfs_tag_id(besttag)) { + besttag |= 0x80000000; + } else if (besttag != -1 && + lfs_tag_id(lfs->gstate.tag) < lfs_tag_id(besttag)) { + besttag -= LFS_MKTAG(0, 1, 0); + } + } - // relocate half of pair - int err = lfs_alloc(lfs, &dir->pair[0]); - if (err) { - return err; - } - } + // found tag? or found best id? + if (id) { + *id = lfs_min(lfs_tag_id(besttag), dir->count); + } - if (relocated) { - // update references if we relocated - LFS_DEBUG("Relocating %" PRIu32 " %" PRIu32 " to %" PRIu32 " %" PRIu32, - oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]); - int err = lfs_relocate(lfs, oldpair, dir->pair); - if (err) { - return err; + if (lfs_tag_isvalid(besttag)) { + return besttag; + } else if (lfs_tag_id(besttag) < dir->count) { + return LFS_ERR_NOENT; + } else { + return 0; + } } - } - // shift over any directories that are affected - for (lfs_dir_t *d = lfs->dirs; d; d = d->next) { - if (lfs_paircmp(d->pair, dir->pair) == 0) { - d->pair[0] = dir->pair[0]; - d->pair[1] = dir->pair[1]; - } + // failed, try the other block? + lfs_pair_swap(dir->pair); + dir->rev = revs[(r+1)%2]; } - return 0; + LFS_ERROR("Corrupted dir pair at %"PRIu32" %"PRIu32, + dir->pair[0], dir->pair[1]); + return LFS_ERR_CORRUPT; } -static int lfs_dir_update(lfs_t *lfs, lfs_dir_t *dir, - lfs_entry_t *entry, const void *data) { - lfs_entry_tole32(&entry->d); - int err = lfs_dir_commit(lfs, dir, (struct lfs_region[]){ - {entry->off, sizeof(entry->d), &entry->d, sizeof(entry->d)}, - {entry->off+sizeof(entry->d), entry->d.nlen, data, entry->d.nlen} - }, data ? 2 : 1); - lfs_entry_fromle32(&entry->d); - return err; +static int lfs_dir_fetch(lfs_t *lfs, + lfs_mdir_t *dir, const lfs_block_t pair[2]) { + // note, mask=-1, tag=0 can never match a tag since this + // pattern has the invalid bit set + return lfs_dir_fetchmatch(lfs, dir, pair, -1, 0, NULL, NULL, NULL); } -static int lfs_dir_append(lfs_t *lfs, lfs_dir_t *dir, - lfs_entry_t *entry, const void *data) { - // check if we fit, if top bit is set we do not and move on - while (true) { - if (dir->d.size + lfs_entry_size(entry) <= lfs->cfg->block_size) { - entry->off = dir->d.size - 4; - - lfs_entry_tole32(&entry->d); - int err = lfs_dir_commit(lfs, dir, (struct lfs_region[]){ - {entry->off, 0, &entry->d, sizeof(entry->d)}, - {entry->off, 0, data, entry->d.nlen} - }, 2); - lfs_entry_fromle32(&entry->d); - return err; - } - - // we need to allocate a new dir block - if (!(0x80000000 & dir->d.size)) { - lfs_dir_t olddir = *dir; - int err = lfs_dir_alloc(lfs, dir); - if (err) { - return err; - } - - dir->d.tail[0] = olddir.d.tail[0]; - dir->d.tail[1] = olddir.d.tail[1]; - entry->off = dir->d.size - 4; - lfs_entry_tole32(&entry->d); - err = lfs_dir_commit(lfs, dir, (struct lfs_region[]){ - {entry->off, 0, &entry->d, sizeof(entry->d)}, - {entry->off, 0, data, entry->d.nlen} - }, 2); - lfs_entry_fromle32(&entry->d); - if (err) { - return err; - } - - olddir.d.size |= 0x80000000; - olddir.d.tail[0] = dir->pair[0]; - olddir.d.tail[1] = dir->pair[1]; - return lfs_dir_commit(lfs, &olddir, NULL, 0); - } +static int lfs_dir_getgstate(lfs_t *lfs, const lfs_mdir_t *dir, + struct lfs_gstate *gstate) { + struct lfs_gstate temp; + lfs_stag_t res = lfs_dir_get(lfs, dir, LFS_MKTAG(0x7ff, 0, 0), + LFS_MKTAG(LFS_TYPE_MOVESTATE, 0, sizeof(temp)), &temp); + if (res < 0 && res != LFS_ERR_NOENT) { + return res; + } - int err = lfs_dir_fetch(lfs, dir, dir->d.tail); - if (err) { - return err; - } + if (res != LFS_ERR_NOENT) { + // xor together to find resulting gstate + lfs_gstate_fromle32(&temp); + lfs_gstate_xor(gstate, &temp); } -} -static int lfs_dir_remove(lfs_t *lfs, lfs_dir_t *dir, lfs_entry_t *entry) { - // check if we should just drop the directory block - if ((dir->d.size & 0x7fffffff) == sizeof(dir->d)+4 - + lfs_entry_size(entry)) { - lfs_dir_t pdir; - int res = lfs_pred(lfs, dir->pair, &pdir); - if (res < 0) { - return res; - } + return 0; +} - if (pdir.d.size & 0x80000000) { - pdir.d.size &= dir->d.size | 0x7fffffff; - pdir.d.tail[0] = dir->d.tail[0]; - pdir.d.tail[1] = dir->d.tail[1]; - return lfs_dir_commit(lfs, &pdir, NULL, 0); - } +static int lfs_dir_getinfo(lfs_t *lfs, lfs_mdir_t *dir, + uint16_t id, struct lfs_info *info) { + if (id == 0x3ff) { + // special case for root + strcpy(info->name, "/"); + info->type = LFS_TYPE_DIR; + return 0; } - // shift out the entry - int err = lfs_dir_commit(lfs, dir, (struct lfs_region[]){ - {entry->off, lfs_entry_size(entry), NULL, 0}, - }, 1); - if (err) { - return err; + lfs_stag_t tag = lfs_dir_get(lfs, dir, LFS_MKTAG(0x780, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_NAME, id, lfs->name_max+1), info->name); + if (tag < 0) { + return tag; } - // shift over any files/directories that are affected - for (lfs_file_t *f = lfs->files; f; f = f->next) { - if (lfs_paircmp(f->pair, dir->pair) == 0) { - if (f->poff == entry->off) { - f->pair[0] = 0xffffffff; - f->pair[1] = 0xffffffff; - } else if (f->poff > entry->off) { - f->poff -= lfs_entry_size(entry); - } - } + info->type = lfs_tag_type3(tag); + + struct lfs_ctz ctz; + tag = lfs_dir_get(lfs, dir, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, id, sizeof(ctz)), &ctz); + if (tag < 0) { + return tag; } + lfs_ctz_fromle32(&ctz); - for (lfs_dir_t *d = lfs->dirs; d; d = d->next) { - if (lfs_paircmp(d->pair, dir->pair) == 0) { - if (d->off > entry->off) { - d->off -= lfs_entry_size(entry); - d->pos -= lfs_entry_size(entry); - } - } + if (lfs_tag_type3(tag) == LFS_TYPE_CTZSTRUCT) { + info->size = ctz.size; + } else if (lfs_tag_type3(tag) == LFS_TYPE_INLINESTRUCT) { + info->size = lfs_tag_size(tag); } return 0; } -static int lfs_dir_next(lfs_t *lfs, lfs_dir_t *dir, lfs_entry_t *entry) { - while (dir->off + sizeof(entry->d) > (0x7fffffff & dir->d.size)-4) { - if (!(0x80000000 & dir->d.size)) { - entry->off = dir->off; - return LFS_ERR_NOENT; - } - - int err = lfs_dir_fetch(lfs, dir, dir->d.tail); - if (err) { - return err; - } +struct lfs_dir_find_match { + lfs_t *lfs; + const void *name; + lfs_size_t size; +}; - dir->off = sizeof(dir->d); - dir->pos += sizeof(dir->d) + 4; +static int lfs_dir_find_match(void *data, + lfs_tag_t tag, const void *buffer) { + struct lfs_dir_find_match *name = data; + lfs_t *lfs = name->lfs; + const struct lfs_diskoff *disk = buffer; + + // compare with disk + lfs_size_t diff = lfs_min(name->size, lfs_tag_size(tag)); + int res = lfs_bd_cmp(lfs, + NULL, &lfs->rcache, diff, + disk->block, disk->off, name->name, diff); + if (res != LFS_CMP_EQ) { + return res; } - int err = lfs_bd_read(lfs, dir->pair[0], dir->off, - &entry->d, sizeof(entry->d)); - lfs_entry_fromle32(&entry->d); - if (err) { - return err; + // only equal if our size is still the same + if (name->size != lfs_tag_size(tag)) { + return (name->size < lfs_tag_size(tag)) ? LFS_CMP_LT : LFS_CMP_GT; } - entry->off = dir->off; - dir->off += lfs_entry_size(entry); - dir->pos += lfs_entry_size(entry); - return 0; + // found a match! + return LFS_CMP_EQ; } -static int lfs_dir_find(lfs_t *lfs, lfs_dir_t *dir, - lfs_entry_t *entry, const char **path) { - const char *pathname = *path; - size_t pathlen; - entry->d.type = LFS_TYPE_DIR; - entry->d.elen = sizeof(entry->d) - 4; - entry->d.alen = 0; - entry->d.nlen = 0; - entry->d.u.dir[0] = lfs->root[0]; - entry->d.u.dir[1] = lfs->root[1]; +static int lfs_dir_find(lfs_t *lfs, lfs_mdir_t *dir, + const char **path, uint16_t *id) { + // we reduce path to a single name if we can find it + const char *name = *path; + if (id) { + *id = 0x3ff; + } + + // default to root dir + lfs_stag_t tag = LFS_MKTAG(LFS_TYPE_DIR, 0x3ff, 0); + dir->tail[0] = lfs->root[0]; + dir->tail[1] = lfs->root[1]; while (true) { nextname: // skip slashes - pathname += strspn(pathname, "/"); - pathlen = strcspn(pathname, "/"); + name += strspn(name, "/"); + lfs_size_t namelen = strcspn(name, "/"); // skip '.' and root '..' - if ((pathlen == 1 && memcmp(pathname, ".", 1) == 0) || - (pathlen == 2 && memcmp(pathname, "..", 2) == 0)) { - pathname += pathlen; + if ((namelen == 1 && memcmp(name, ".", 1) == 0) || + (namelen == 2 && memcmp(name, "..", 2) == 0)) { + name += namelen; goto nextname; } // skip if matched by '..' in name - const char *suffix = pathname + pathlen; - size_t sufflen; + const char *suffix = name + namelen; + lfs_size_t sufflen; int depth = 1; while (true) { suffix += strspn(suffix, "/"); @@ -833,7 +1111,7 @@ static int lfs_dir_find(lfs_t *lfs, lfs_dir_t *dir, if (sufflen == 2 && memcmp(suffix, "..", 2) == 0) { depth -= 1; if (depth == 0) { - pathname = suffix + sufflen; + name = suffix + sufflen; goto nextname; } } else { @@ -844,327 +1122,405 @@ static int lfs_dir_find(lfs_t *lfs, lfs_dir_t *dir, } // found path - if (pathname[0] == '\0') { - return 0; + if (name[0] == '\0') { + return tag; } - // update what we've found - *path = pathname; + // update what we've found so far + *path = name; - // continue on if we hit a directory - if (entry->d.type != LFS_TYPE_DIR) { + // only continue if we hit a directory + if (lfs_tag_type3(tag) != LFS_TYPE_DIR) { return LFS_ERR_NOTDIR; } - int err = lfs_dir_fetch(lfs, dir, entry->d.u.dir); - if (err) { - return err; + // grab the entry data + if (lfs_tag_id(tag) != 0x3ff) { + lfs_stag_t res = lfs_dir_get(lfs, dir, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, lfs_tag_id(tag), 8), dir->tail); + if (res < 0) { + return res; + } + lfs_pair_fromle32(dir->tail); } // find entry matching name while (true) { - err = lfs_dir_next(lfs, dir, entry); - if (err) { - return err; - } - - if (((0x7f & entry->d.type) != LFS_TYPE_REG && - (0x7f & entry->d.type) != LFS_TYPE_DIR) || - entry->d.nlen != pathlen) { - continue; + tag = lfs_dir_fetchmatch(lfs, dir, dir->tail, + LFS_MKTAG(0x780, 0, 0), + LFS_MKTAG(LFS_TYPE_NAME, 0, namelen), + // are we last name? + (strchr(name, '/') == NULL) ? id : NULL, + lfs_dir_find_match, &(struct lfs_dir_find_match){ + lfs, name, namelen}); + if (tag < 0) { + return tag; } - int res = lfs_bd_cmp(lfs, dir->pair[0], - entry->off + 4+entry->d.elen+entry->d.alen, - pathname, pathlen); - if (res < 0) { - return res; - } - - // found match - if (res) { + if (tag) { break; } - } - // check that entry has not been moved - if (!lfs->moving && entry->d.type & 0x80) { - int moved = lfs_moved(lfs, &entry->d.u); - if (moved < 0 || moved) { - return (moved < 0) ? moved : LFS_ERR_NOENT; + if (!dir->split) { + return LFS_ERR_NOENT; } - - entry->d.type &= ~0x80; } // to next name - pathname += pathlen; + name += namelen; } } +// commit logic +struct lfs_commit { + lfs_block_t block; + lfs_off_t off; + lfs_tag_t ptag; + uint32_t crc; -/// Top level directory operations /// -int lfs_mkdir(lfs_t *lfs, const char *path) { - // deorphan if we haven't yet, needed at most once after poweron - if (!lfs->deorphaned) { - int err = lfs_deorphan(lfs); - if (err) { - return err; - } - } - - // fetch parent directory - lfs_dir_t cwd; - lfs_entry_t entry; - int err = lfs_dir_find(lfs, &cwd, &entry, &path); - if (err != LFS_ERR_NOENT || strchr(path, '/') != NULL) { - return err ? err : LFS_ERR_EXIST; - } + lfs_off_t begin; + lfs_off_t end; +}; - // build up new directory - lfs_alloc_ack(lfs); - - lfs_dir_t dir; - err = lfs_dir_alloc(lfs, &dir); +static int lfs_dir_commitprog(lfs_t *lfs, struct lfs_commit *commit, + const void *buffer, lfs_size_t size) { + int err = lfs_bd_prog(lfs, + &lfs->pcache, &lfs->rcache, false, + commit->block, commit->off , + (const uint8_t*)buffer, size); if (err) { return err; } - dir.d.tail[0] = cwd.d.tail[0]; - dir.d.tail[1] = cwd.d.tail[1]; - err = lfs_dir_commit(lfs, &dir, NULL, 0); + commit->crc = lfs_crc(commit->crc, buffer, size); + commit->off += size; + return 0; +} + +static int lfs_dir_commitattr(lfs_t *lfs, struct lfs_commit *commit, + lfs_tag_t tag, const void *buffer) { + // check if we fit + lfs_size_t dsize = lfs_tag_dsize(tag); + if (commit->off + dsize > commit->end) { + return LFS_ERR_NOSPC; + } + + // write out tag + lfs_tag_t ntag = lfs_tobe32((tag & 0x7fffffff) ^ commit->ptag); + int err = lfs_dir_commitprog(lfs, commit, &ntag, sizeof(ntag)); if (err) { return err; } - entry.d.type = LFS_TYPE_DIR; - entry.d.elen = sizeof(entry.d) - 4; - entry.d.alen = 0; - entry.d.nlen = strlen(path); - entry.d.u.dir[0] = dir.pair[0]; - entry.d.u.dir[1] = dir.pair[1]; - - cwd.d.tail[0] = dir.pair[0]; - cwd.d.tail[1] = dir.pair[1]; + if (!(tag & 0x80000000)) { + // from memory + err = lfs_dir_commitprog(lfs, commit, buffer, dsize-sizeof(tag)); + if (err) { + return err; + } + } else { + // from disk + const struct lfs_diskoff *disk = buffer; + for (lfs_off_t i = 0; i < dsize-sizeof(tag); i++) { + // rely on caching to make this efficient + uint8_t dat; + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, dsize-sizeof(tag)-i, + disk->block, disk->off+i, &dat, 1); + if (err) { + return err; + } - err = lfs_dir_append(lfs, &cwd, &entry, path); - if (err) { - return err; + err = lfs_dir_commitprog(lfs, commit, &dat, 1); + if (err) { + return err; + } + } } - lfs_alloc_ack(lfs); + commit->ptag = tag & 0x7fffffff; return 0; } -int lfs_dir_open(lfs_t *lfs, lfs_dir_t *dir, const char *path) { - dir->pair[0] = lfs->root[0]; - dir->pair[1] = lfs->root[1]; +static int lfs_dir_commitcrc(lfs_t *lfs, struct lfs_commit *commit) { + // align to program units + lfs_off_t off = lfs_alignup(commit->off + 2*sizeof(uint32_t), + lfs->cfg->prog_size); + + // read erased state from next program unit + lfs_tag_t tag; + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, sizeof(tag), + commit->block, off, &tag, sizeof(tag)); + if (err && err != LFS_ERR_CORRUPT) { + return err; + } - lfs_entry_t entry; - int err = lfs_dir_find(lfs, dir, &entry, &path); + // build crc tag + bool reset = ~lfs_frombe32(tag) >> 31; + tag = LFS_MKTAG(LFS_TYPE_CRC + reset, 0x3ff, + off - (commit->off+sizeof(lfs_tag_t))); + + // write out crc + uint32_t footer[2]; + footer[0] = lfs_tobe32(tag ^ commit->ptag); + commit->crc = lfs_crc(commit->crc, &footer[0], sizeof(footer[0])); + footer[1] = lfs_tole32(commit->crc); + err = lfs_bd_prog(lfs, + &lfs->pcache, &lfs->rcache, false, + commit->block, commit->off, &footer, sizeof(footer)); if (err) { return err; - } else if (entry.d.type != LFS_TYPE_DIR) { - return LFS_ERR_NOTDIR; } + commit->off += sizeof(tag)+lfs_tag_size(tag); + commit->ptag = tag ^ (reset << 31); - err = lfs_dir_fetch(lfs, dir, entry.d.u.dir); + // flush buffers + err = lfs_bd_sync(lfs, &lfs->pcache, &lfs->rcache, false); if (err) { return err; } - // setup head dir - // special offset for '.' and '..' - dir->head[0] = dir->pair[0]; - dir->head[1] = dir->pair[1]; - dir->pos = sizeof(dir->d) - 2; - dir->off = sizeof(dir->d); + // successful commit, check checksum to make sure + uint32_t crc = 0xffffffff; + lfs_size_t size = commit->off - lfs_tag_size(tag) - commit->begin; + for (lfs_off_t i = 0; i < size; i++) { + // leave it up to caching to make this efficient + uint8_t dat; + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, size-i, + commit->block, commit->begin+i, &dat, 1); + if (err) { + return err; + } - // add to list of directories - dir->next = lfs->dirs; - lfs->dirs = dir; + crc = lfs_crc(crc, &dat, 1); + } - return 0; -} + if (err) { + return err; + } -int lfs_dir_close(lfs_t *lfs, lfs_dir_t *dir) { - // remove from list of directories - for (lfs_dir_t **p = &lfs->dirs; *p; p = &(*p)->next) { - if (*p == dir) { - *p = dir->next; - break; - } + if (crc != commit->crc) { + return LFS_ERR_CORRUPT; } return 0; } -int lfs_dir_read(lfs_t *lfs, lfs_dir_t *dir, struct lfs_info *info) { - memset(info, 0, sizeof(*info)); - - // special offset for '.' and '..' - if (dir->pos == sizeof(dir->d) - 2) { - info->type = LFS_TYPE_DIR; - strcpy(info->name, "."); - dir->pos += 1; - return 1; - } else if (dir->pos == sizeof(dir->d) - 1) { - info->type = LFS_TYPE_DIR; - strcpy(info->name, ".."); - dir->pos += 1; - return 1; - } - - lfs_entry_t entry; - while (true) { - int err = lfs_dir_next(lfs, dir, &entry); +static int lfs_dir_alloc(lfs_t *lfs, lfs_mdir_t *dir) { + // allocate pair of dir blocks (backwards, so we write block 1 first) + for (int i = 0; i < 2; i++) { + int err = lfs_alloc(lfs, &dir->pair[(i+1)%2]); if (err) { - return (err == LFS_ERR_NOENT) ? 0 : err; + return err; } + } - if ((0x7f & entry.d.type) != LFS_TYPE_REG && - (0x7f & entry.d.type) != LFS_TYPE_DIR) { - continue; - } + // rather than clobbering one of the blocks we just pretend + // the revision may be valid + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, sizeof(dir->rev), + dir->pair[0], 0, &dir->rev, sizeof(dir->rev)); + dir->rev = lfs_fromle32(dir->rev); + if (err && err != LFS_ERR_CORRUPT) { + return err; + } - // check that entry has not been moved - if (entry.d.type & 0x80) { - int moved = lfs_moved(lfs, &entry.d.u); - if (moved < 0) { - return moved; - } + // make sure we don't immediately evict + dir->rev += dir->rev & 1; - if (moved) { - continue; - } + // set defaults + dir->off = sizeof(dir->rev); + dir->etag = 0xffffffff; + dir->count = 0; + dir->tail[0] = 0xffffffff; + dir->tail[1] = 0xffffffff; + dir->erased = false; + dir->split = false; - entry.d.type &= ~0x80; - } + // don't write out yet, let caller take care of that + return 0; +} - break; +static int lfs_dir_drop(lfs_t *lfs, lfs_mdir_t *dir, lfs_mdir_t *tail) { + // steal state + int err = lfs_dir_getgstate(lfs, tail, &lfs->gdelta); + if (err) { + return err; } - info->type = entry.d.type; - if (info->type == LFS_TYPE_REG) { - info->size = entry.d.u.file.size; + // steal tail + lfs_pair_tole32(tail->tail); + err = lfs_dir_commit(lfs, dir, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_TAIL + tail->split, 0x3ff, 8), tail->tail})); + lfs_pair_fromle32(tail->tail); + if (err) { + return err; } - int err = lfs_bd_read(lfs, dir->pair[0], - entry.off + 4+entry.d.elen+entry.d.alen, - info->name, entry.d.nlen); + return 0; +} + +static int lfs_dir_split(lfs_t *lfs, + lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount, + lfs_mdir_t *source, uint16_t split, uint16_t end) { + // create tail directory + lfs_mdir_t tail; + int err = lfs_dir_alloc(lfs, &tail); if (err) { return err; } - return 1; -} + tail.split = dir->split; + tail.tail[0] = dir->tail[0]; + tail.tail[1] = dir->tail[1]; -int lfs_dir_seek(lfs_t *lfs, lfs_dir_t *dir, lfs_off_t off) { - // simply walk from head dir - int err = lfs_dir_rewind(lfs, dir); + err = lfs_dir_compact(lfs, &tail, attrs, attrcount, source, split, end); if (err) { return err; } - dir->pos = off; - while (off > (0x7fffffff & dir->d.size)) { - off -= 0x7fffffff & dir->d.size; - if (!(0x80000000 & dir->d.size)) { - return LFS_ERR_INVAL; - } + dir->tail[0] = tail.pair[0]; + dir->tail[1] = tail.pair[1]; + dir->split = true; - err = lfs_dir_fetch(lfs, dir, dir->d.tail); - if (err) { - return err; - } + // update root if needed + if (lfs_pair_cmp(dir->pair, lfs->root) == 0 && split == 0) { + lfs->root[0] = tail.pair[0]; + lfs->root[1] = tail.pair[1]; } - dir->off = off; return 0; } -lfs_soff_t lfs_dir_tell(lfs_t *lfs, lfs_dir_t *dir) { - (void)lfs; - return dir->pos; -} - -int lfs_dir_rewind(lfs_t *lfs, lfs_dir_t *dir) { - // reload the head dir - int err = lfs_dir_fetch(lfs, dir, dir->head); - if (err) { - return err; - } +static int lfs_dir_commit_size(void *p, lfs_tag_t tag, const void *buffer) { + lfs_size_t *size = p; + (void)buffer; - dir->pair[0] = dir->head[0]; - dir->pair[1] = dir->head[1]; - dir->pos = sizeof(dir->d) - 2; - dir->off = sizeof(dir->d); + *size += lfs_tag_dsize(tag); return 0; } +struct lfs_dir_commit_commit { + lfs_t *lfs; + struct lfs_commit *commit; +}; -/// File index list operations /// -static int lfs_ctz_index(lfs_t *lfs, lfs_off_t *off) { - lfs_off_t size = *off; - lfs_off_t b = lfs->cfg->block_size - 2*4; - lfs_off_t i = size / b; - if (i == 0) { - return 0; - } - - i = (size - 4*(lfs_popc(i-1)+2)) / b; - *off = size - b*i - 4*lfs_popc(i); - return i; +static int lfs_dir_commit_commit(void *p, lfs_tag_t tag, const void *buffer) { + struct lfs_dir_commit_commit *commit = p; + return lfs_dir_commitattr(commit->lfs, commit->commit, tag, buffer); } -static int lfs_ctz_find(lfs_t *lfs, - lfs_cache_t *rcache, const lfs_cache_t *pcache, - lfs_block_t head, lfs_size_t size, - lfs_size_t pos, lfs_block_t *block, lfs_off_t *off) { - if (size == 0) { - *block = 0xffffffff; - *off = 0; - return 0; - } - - lfs_off_t current = lfs_ctz_index(lfs, &(lfs_off_t){size-1}); - lfs_off_t target = lfs_ctz_index(lfs, &pos); +static int lfs_dir_compact(lfs_t *lfs, + lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount, + lfs_mdir_t *source, uint16_t begin, uint16_t end) { + // save some state in case block is bad + const lfs_block_t oldpair[2] = {dir->pair[1], dir->pair[0]}; + bool relocated = false; + bool exhausted = false; + + // should we split? + while (end - begin > 1) { + // find size + lfs_size_t size = 0; + int err = lfs_dir_traverse(lfs, + source, 0, 0xffffffff, attrs, attrcount, false, + LFS_MKTAG(0x400, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_NAME, 0, 0), + begin, end, -begin, + lfs_dir_commit_size, &size); + if (err) { + return err; + } - while (current > target) { - lfs_size_t skip = lfs_min( - lfs_npw2(current-target+1) - 1, - lfs_ctz(current)); + // space is complicated, we need room for tail, crc, gstate, + // cleanup delete, and we cap at half a block to give room + // for metadata updates. + if (end - begin < 0xff && + size <= lfs_min(lfs->cfg->block_size - 36, + lfs_alignup(lfs->cfg->block_size/2, + lfs->cfg->prog_size))) { + break; + } - int err = lfs_cache_read(lfs, rcache, pcache, head, 4*skip, &head, 4); - head = lfs_fromle32(head); + // can't fit, need to split, we should really be finding the + // largest size that fits with a small binary search, but right now + // it's not worth the code size + uint16_t split = (end - begin) / 2; + err = lfs_dir_split(lfs, dir, attrs, attrcount, + source, begin+split, end); if (err) { + // if we fail to split, we may be able to overcompact, unless + // we're too big for even the full block, in which case our + // only option is to error + if (err == LFS_ERR_NOSPC && size <= lfs->cfg->block_size - 36) { + break; + } return err; } - LFS_ASSERT(head >= 2 && head <= lfs->cfg->block_count); - current -= 1 << skip; + end = begin + split; } - *block = head; - *off = pos; - return 0; -} + // increment revision count + dir->rev += 1; + if (lfs->cfg->block_cycles && + (dir->rev % (lfs->cfg->block_cycles+1) == 0)) { + if (lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) == 0) { + // oh no! we're writing too much to the superblock, + // should we expand? + lfs_ssize_t res = lfs_fs_size(lfs); + if (res < 0) { + return res; + } -static int lfs_ctz_extend(lfs_t *lfs, - lfs_cache_t *rcache, lfs_cache_t *pcache, - lfs_block_t head, lfs_size_t size, - lfs_block_t *block, lfs_off_t *off) { - while (true) { - // go ahead and grab a block - lfs_block_t nblock; - int err = lfs_alloc(lfs, &nblock); - if (err) { - return err; + // do we have extra space? littlefs can't reclaim this space + // by itself, so expand cautiously + if ((lfs_size_t)res < lfs->cfg->block_count/2) { + LFS_DEBUG("Expanding superblock at rev %"PRIu32, dir->rev); + int err = lfs_dir_split(lfs, dir, attrs, attrcount, + source, begin, end); + if (err && err != LFS_ERR_NOSPC) { + return err; + } + + // welp, we tried, if we ran out of space there's not much + // we can do, we'll error later if we've become frozen + if (!err) { + end = begin; + } + } + } else { + // we're writing too much, time to relocate + exhausted = true; + goto relocate; } - LFS_ASSERT(nblock >= 2 && nblock <= lfs->cfg->block_count); + } - if (true) { - err = lfs_bd_erase(lfs, nblock); + // begin loop to commit compaction to blocks until a compact sticks + while (true) { + { + // There's nothing special about our global delta, so feed it into + // our local global delta + int err = lfs_dir_getgstate(lfs, dir, &lfs->gdelta); + if (err) { + return err; + } + + // setup commit state + struct lfs_commit commit = { + .block = dir->pair[1], + .off = 0, + .ptag = 0xffffffff, + .crc = 0xffffffff, + + .begin = 0, + .end = lfs->cfg->block_size - 8, + }; + + // erase block to write to + err = lfs_bd_erase(lfs, dir->pair[1]); if (err) { if (err == LFS_ERR_CORRUPT) { goto relocate; @@ -1172,741 +1528,2152 @@ static int lfs_ctz_extend(lfs_t *lfs, return err; } - if (size == 0) { - *block = nblock; - *off = 0; - return 0; + // write out header + dir->rev = lfs_tole32(dir->rev); + err = lfs_dir_commitprog(lfs, &commit, + &dir->rev, sizeof(dir->rev)); + dir->rev = lfs_fromle32(dir->rev); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; } - size -= 1; - lfs_off_t index = lfs_ctz_index(lfs, &size); - size += 1; - - // just copy out the last block if it is incomplete - if (size != lfs->cfg->block_size) { - for (lfs_off_t i = 0; i < size; i++) { - uint8_t data; - err = lfs_cache_read(lfs, rcache, NULL, - head, i, &data, 1); - if (err) { - return err; - } - - err = lfs_cache_prog(lfs, pcache, rcache, - nblock, i, &data, 1); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } - return err; - } + // traverse the directory, this time writing out all unique tags + err = lfs_dir_traverse(lfs, + source, 0, 0xffffffff, attrs, attrcount, false, + LFS_MKTAG(0x400, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_NAME, 0, 0), + begin, end, -begin, + lfs_dir_commit_commit, &(struct lfs_dir_commit_commit){ + lfs, &commit}); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; } - - *block = nblock; - *off = size; - return 0; + return err; } - // append block - index += 1; - lfs_size_t skips = lfs_ctz(index) + 1; - - for (lfs_off_t i = 0; i < skips; i++) { - head = lfs_tole32(head); - err = lfs_cache_prog(lfs, pcache, rcache, - nblock, 4*i, &head, 4); - head = lfs_fromle32(head); + // commit tail, which may be new after last size check + if (!lfs_pair_isnull(dir->tail)) { + lfs_pair_tole32(dir->tail); + err = lfs_dir_commitattr(lfs, &commit, + LFS_MKTAG(LFS_TYPE_TAIL + dir->split, 0x3ff, 8), + dir->tail); + lfs_pair_fromle32(dir->tail); if (err) { if (err == LFS_ERR_CORRUPT) { goto relocate; } return err; } + } - if (i != skips-1) { - err = lfs_cache_read(lfs, rcache, NULL, - head, 4*i, &head, 4); - head = lfs_fromle32(head); - if (err) { - return err; + if (!relocated && !lfs_gstate_iszero(&lfs->gdelta)) { + // commit any globals, unless we're relocating, + // in which case our parent will steal our globals + lfs_gstate_tole32(&lfs->gdelta); + err = lfs_dir_commitattr(lfs, &commit, + LFS_MKTAG(LFS_TYPE_MOVESTATE, 0x3ff, + sizeof(lfs->gdelta)), &lfs->gdelta); + lfs_gstate_fromle32(&lfs->gdelta); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; } + return err; } + } - LFS_ASSERT(head >= 2 && head <= lfs->cfg->block_count); + err = lfs_dir_commitcrc(lfs, &commit); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; } - *block = nblock; - *off = 4*skips; - return 0; + // successful compaction, swap dir pair to indicate most recent + lfs_pair_swap(dir->pair); + dir->count = end - begin; + dir->off = commit.off; + dir->etag = commit.ptag; + dir->erased = (dir->off % lfs->cfg->prog_size == 0); + // note we able to have already handled move here + if (lfs_gstate_hasmovehere(&lfs->gpending, dir->pair)) { + lfs_gstate_xormove(&lfs->gpending, + &lfs->gpending, 0x3ff, NULL); + } } + break; relocate: - LFS_DEBUG("Bad block at %" PRIu32, nblock); - - // just clear cache and try a new block + // commit was corrupted, drop caches and prepare to relocate block + relocated = true; lfs_cache_drop(lfs, &lfs->pcache); - } -} + if (!exhausted) { + LFS_DEBUG("Bad block at %"PRIu32, dir->pair[1]); + } -static int lfs_ctz_traverse(lfs_t *lfs, - lfs_cache_t *rcache, const lfs_cache_t *pcache, - lfs_block_t head, lfs_size_t size, - int (*cb)(void*, lfs_block_t), void *data) { - if (size == 0) { - return 0; - } - - lfs_off_t index = lfs_ctz_index(lfs, &(lfs_off_t){size-1}); + // can't relocate superblock, filesystem is now frozen + if (lfs_pair_cmp(oldpair, (const lfs_block_t[2]){0, 1}) == 0) { + LFS_WARN("Superblock %"PRIu32" has become unwritable", oldpair[1]); + return LFS_ERR_NOSPC; + } - while (true) { - int err = cb(data, head); - if (err) { + // relocate half of pair + int err = lfs_alloc(lfs, &dir->pair[1]); + if (err && (err != LFS_ERR_NOSPC && !exhausted)) { return err; } - if (index == 0) { - return 0; - } + continue; + } - lfs_block_t heads[2]; - int count = 2 - (index & 1); - err = lfs_cache_read(lfs, rcache, pcache, head, 0, &heads, count*4); - heads[0] = lfs_fromle32(heads[0]); - heads[1] = lfs_fromle32(heads[1]); + if (!relocated) { + lfs->gstate = lfs->gpending; + lfs->gdelta = (struct lfs_gstate){0}; + } else { + // update references if we relocated + LFS_DEBUG("Relocating %"PRIu32" %"PRIu32" to %"PRIu32" %"PRIu32, + oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]); + int err = lfs_fs_relocate(lfs, oldpair, dir->pair); if (err) { return err; } + } - for (int i = 0; i < count-1; i++) { - err = cb(data, heads[i]); + return 0; +} + +static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, + const struct lfs_mattr *attrs, int attrcount) { + // check for any inline files that aren't RAM backed and + // forcefully evict them, needed for filesystem consistency + for (lfs_file_t *f = (lfs_file_t*)lfs->mlist; f; f = f->next) { + if (dir != &f->m && lfs_pair_cmp(f->m.pair, dir->pair) == 0 && + f->type == LFS_TYPE_REG && (f->flags & LFS_F_INLINE) && + f->ctz.size > lfs->cfg->cache_size) { + f->flags &= ~LFS_F_READING; + f->off = 0; + + lfs_alloc_ack(lfs); + int err = lfs_file_relocate(lfs, f); + if (err) { + return err; + } + + err = lfs_file_flush(lfs, f); if (err) { return err; } } + } - head = heads[count-1]; - index -= count; + // calculate changes to the directory + lfs_tag_t deletetag = 0xffffffff; + lfs_tag_t createtag = 0xffffffff; + for (int i = 0; i < attrcount; i++) { + if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE) { + createtag = attrs[i].tag; + dir->count += 1; + } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE) { + deletetag = attrs[i].tag; + LFS_ASSERT(dir->count > 0); + dir->count -= 1; + } else if (lfs_tag_type1(attrs[i].tag) == LFS_TYPE_TAIL) { + dir->tail[0] = ((lfs_block_t*)attrs[i].buffer)[0]; + dir->tail[1] = ((lfs_block_t*)attrs[i].buffer)[1]; + dir->split = (lfs_tag_chunk(attrs[i].tag) & 1); + lfs_pair_fromle32(dir->tail); + } } -} + // do we have a pending move? + if (lfs_gstate_hasmovehere(&lfs->gpending, dir->pair)) { + deletetag = lfs->gpending.tag & LFS_MKTAG(0x7ff, 0x3ff, 0); + LFS_ASSERT(dir->count > 0); + dir->count -= 1; -/// Top level file operations /// -int lfs_file_opencfg(lfs_t *lfs, lfs_file_t *file, - const char *path, int flags, - const struct lfs_file_config *cfg) { - // deorphan if we haven't yet, needed at most once after poweron - if ((flags & 3) != LFS_O_RDONLY && !lfs->deorphaned) { - int err = lfs_deorphan(lfs); - if (err) { + // mark gdelta so we reflect the move we will fix + lfs_gstate_xormove(&lfs->gdelta, &lfs->gpending, 0x3ff, NULL); + } + + // should we actually drop the directory block? + if (lfs_tag_isvalid(deletetag) && dir->count == 0) { + lfs_mdir_t pdir; + int err = lfs_fs_pred(lfs, dir->pair, &pdir); + if (err && err != LFS_ERR_NOENT) { return err; } - } - // allocate entry for file if it doesn't exist - lfs_dir_t cwd; - lfs_entry_t entry; - int err = lfs_dir_find(lfs, &cwd, &entry, &path); - if (err && (err != LFS_ERR_NOENT || strchr(path, '/') != NULL)) { - return err; + if (err != LFS_ERR_NOENT && pdir.split) { + return lfs_dir_drop(lfs, &pdir, dir); + } } - if (err == LFS_ERR_NOENT) { - if (!(flags & LFS_O_CREAT)) { - return LFS_ERR_NOENT; - } + if (dir->erased || dir->count >= 0xff) { + // try to commit + struct lfs_commit commit = { + .block = dir->pair[0], + .off = dir->off, + .ptag = dir->etag, + .crc = 0xffffffff, - // create entry to remember name - entry.d.type = LFS_TYPE_REG; - entry.d.elen = sizeof(entry.d) - 4; - entry.d.alen = 0; - entry.d.nlen = strlen(path); - entry.d.u.file.head = 0xffffffff; - entry.d.u.file.size = 0; - err = lfs_dir_append(lfs, &cwd, &entry, path); + .begin = dir->off, + .end = lfs->cfg->block_size - 8, + }; + + // traverse attrs that need to be written out + lfs_pair_tole32(dir->tail); + int err = lfs_dir_traverse(lfs, + dir, dir->off, dir->etag, attrs, attrcount, false, + 0, 0, 0, 0, 0, + lfs_dir_commit_commit, &(struct lfs_dir_commit_commit){ + lfs, &commit}); + lfs_pair_fromle32(dir->tail); if (err) { + if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) { + goto compact; + } return err; } - } else if (entry.d.type == LFS_TYPE_DIR) { - return LFS_ERR_ISDIR; - } else if (flags & LFS_O_EXCL) { - return LFS_ERR_EXIST; - } - // setup file struct - file->cfg = cfg; - file->pair[0] = cwd.pair[0]; - file->pair[1] = cwd.pair[1]; - file->poff = entry.off; - file->head = entry.d.u.file.head; - file->size = entry.d.u.file.size; - file->flags = flags; - file->pos = 0; + // commit any global diffs if we have any + if (!lfs_gstate_iszero(&lfs->gdelta)) { + err = lfs_dir_getgstate(lfs, dir, &lfs->gdelta); + if (err) { + return err; + } - if (flags & LFS_O_TRUNC) { - if (file->size != 0) { - file->flags |= LFS_F_DIRTY; + lfs_gstate_tole32(&lfs->gdelta); + err = lfs_dir_commitattr(lfs, &commit, + LFS_MKTAG(LFS_TYPE_MOVESTATE, 0x3ff, + sizeof(lfs->gdelta)), &lfs->gdelta); + lfs_gstate_fromle32(&lfs->gdelta); + if (err) { + if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) { + goto compact; + } + return err; + } } - file->head = 0xffffffff; - file->size = 0; - } - // allocate buffer if needed - file->cache.block = 0xffffffff; - if (file->cfg && file->cfg->buffer) { - file->cache.buffer = file->cfg->buffer; - } else if (lfs->cfg->file_buffer) { - if (lfs->files) { - // already in use - return LFS_ERR_NOMEM; - } - file->cache.buffer = lfs->cfg->file_buffer; - } else if ((file->flags & 3) == LFS_O_RDONLY) { - file->cache.buffer = lfs_malloc(lfs->cfg->read_size); - if (!file->cache.buffer) { - return LFS_ERR_NOMEM; - } - } else { - file->cache.buffer = lfs_malloc(lfs->cfg->prog_size); - if (!file->cache.buffer) { - return LFS_ERR_NOMEM; + // finalize commit with the crc + err = lfs_dir_commitcrc(lfs, &commit); + if (err) { + if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) { + goto compact; + } + return err; } - } - // zero to avoid information leak - lfs_cache_drop(lfs, &file->cache); - if ((file->flags & 3) != LFS_O_RDONLY) { - lfs_cache_zero(lfs, &file->cache); - } - - // add to list of files - file->next = lfs->files; - lfs->files = file; - - return 0; -} + // successful commit, update dir + dir->off = commit.off; + dir->etag = commit.ptag; -int lfs_file_open(lfs_t *lfs, lfs_file_t *file, - const char *path, int flags) { - return lfs_file_opencfg(lfs, file, path, flags, NULL); -} + // note we able to have already handled move here + if (lfs_gstate_hasmovehere(&lfs->gpending, dir->pair)) { + lfs_gstate_xormove(&lfs->gpending, &lfs->gpending, 0x3ff, NULL); + } -int lfs_file_close(lfs_t *lfs, lfs_file_t *file) { - int err = lfs_file_sync(lfs, file); + // update gstate + lfs->gstate = lfs->gpending; + lfs->gdelta = (struct lfs_gstate){0}; + } else { +compact: + // fall back to compaction + lfs_cache_drop(lfs, &lfs->pcache); - // remove from list of files - for (lfs_file_t **p = &lfs->files; *p; p = &(*p)->next) { - if (*p == file) { - *p = file->next; - break; + int err = lfs_dir_compact(lfs, dir, attrs, attrcount, + dir, 0, dir->count); + if (err) { + return err; } } - // clean up memory - if (!(file->cfg && file->cfg->buffer) && !lfs->cfg->file_buffer) { - lfs_free(file->cache.buffer); + // update any directories that are affected + lfs_mdir_t copy = *dir; + + // two passes, once for things that aren't us, and one + // for things that are + for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { + if (lfs_pair_cmp(d->m.pair, copy.pair) == 0) { + d->m = *dir; + if (d->id == lfs_tag_id(deletetag)) { + d->m.pair[0] = 0xffffffff; + d->m.pair[1] = 0xffffffff; + } else if (d->id > lfs_tag_id(deletetag)) { + d->id -= 1; + if (d->type == LFS_TYPE_DIR) { + ((lfs_dir_t*)d)->pos -= 1; + } + } else if (&d->m != dir && d->id >= lfs_tag_id(createtag)) { + d->id += 1; + if (d->type == LFS_TYPE_DIR) { + ((lfs_dir_t*)d)->pos += 1; + } + } + + while (d->id >= d->m.count && d->m.split) { + // we split and id is on tail now + d->id -= d->m.count; + int err = lfs_dir_fetch(lfs, &d->m, d->m.tail); + if (err) { + return err; + } + } + } } - return err; + return 0; } -static int lfs_file_relocate(lfs_t *lfs, lfs_file_t *file) { -relocate: - LFS_DEBUG("Bad block at %" PRIu32, file->block); - // just relocate what exists into new block - lfs_block_t nblock; - int err = lfs_alloc(lfs, &nblock); +/// Top level directory operations /// +int lfs_mkdir(lfs_t *lfs, const char *path) { + // deorphan if we haven't yet, needed at most once after poweron + int err = lfs_fs_forceconsistency(lfs); if (err) { return err; } - err = lfs_bd_erase(lfs, nblock); + lfs_mdir_t cwd; + uint16_t id; + err = lfs_dir_find(lfs, &cwd, &path, &id); + if (!(err == LFS_ERR_NOENT && id != 0x3ff)) { + return (err < 0) ? err : LFS_ERR_EXIST; + } + + // check that name fits + lfs_size_t nlen = strlen(path); + if (nlen > lfs->name_max) { + return LFS_ERR_NAMETOOLONG; + } + + // build up new directory + lfs_alloc_ack(lfs); + lfs_mdir_t dir; + err = lfs_dir_alloc(lfs, &dir); if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } return err; } - // either read from dirty cache or disk - for (lfs_off_t i = 0; i < file->off; i++) { - uint8_t data; - err = lfs_cache_read(lfs, &lfs->rcache, &file->cache, - file->block, i, &data, 1); + // find end of list + lfs_mdir_t pred = cwd; + while (pred.split) { + err = lfs_dir_fetch(lfs, &pred, pred.tail); if (err) { return err; } + } + + // setup dir + lfs_pair_tole32(pred.tail); + err = lfs_dir_commit(lfs, &dir, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8), pred.tail})); + lfs_pair_fromle32(pred.tail); + if (err) { + return err; + } - err = lfs_cache_prog(lfs, &lfs->pcache, &lfs->rcache, - nblock, i, &data, 1); + // current block end of list? + if (cwd.split) { + // update tails, this creates a desync + lfs_fs_preporphans(lfs, +1); + lfs_pair_tole32(dir.pair); + err = lfs_dir_commit(lfs, &pred, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8), dir.pair})); + lfs_pair_fromle32(dir.pair); if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } return err; } + lfs_fs_preporphans(lfs, -1); } - // copy over new state of file - memcpy(file->cache.buffer, lfs->pcache.buffer, lfs->cfg->prog_size); - file->cache.block = lfs->pcache.block; - file->cache.off = lfs->pcache.off; - lfs_cache_zero(lfs, &lfs->pcache); + // now insert into our parent block + lfs_pair_tole32(dir.pair); + err = lfs_dir_commit(lfs, &cwd, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_CREATE, id, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_DIR, id, nlen), path}, + {LFS_MKTAG(LFS_TYPE_DIRSTRUCT, id, 8), dir.pair}, + {!cwd.split + ? LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8) + : LFS_MKTAG(LFS_FROM_NOOP, 0, 0), dir.pair})); + lfs_pair_fromle32(dir.pair); + if (err) { + return err; + } - file->block = nblock; return 0; } -static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file) { - if (file->flags & LFS_F_READING) { - // just drop read cache - lfs_cache_drop(lfs, &file->cache); - file->flags &= ~LFS_F_READING; +int lfs_dir_open(lfs_t *lfs, lfs_dir_t *dir, const char *path) { + lfs_stag_t tag = lfs_dir_find(lfs, &dir->m, &path, NULL); + if (tag < 0) { + return tag; } - if (file->flags & LFS_F_WRITING) { - lfs_off_t pos = file->pos; + if (lfs_tag_type3(tag) != LFS_TYPE_DIR) { + return LFS_ERR_NOTDIR; + } - // copy over anything after current branch - lfs_file_t orig = { - .head = file->head, - .size = file->size, - .flags = LFS_O_RDONLY, - .pos = file->pos, - .cache = lfs->rcache, - }; - lfs_cache_drop(lfs, &lfs->rcache); + lfs_block_t pair[2]; + if (lfs_tag_id(tag) == 0x3ff) { + // handle root dir separately + pair[0] = lfs->root[0]; + pair[1] = lfs->root[1]; + } else { + // get dir pair from parent + lfs_stag_t res = lfs_dir_get(lfs, &dir->m, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, lfs_tag_id(tag), 8), pair); + if (res < 0) { + return res; + } + lfs_pair_fromle32(pair); + } - while (file->pos < file->size) { - // copy over a byte at a time, leave it up to caching - // to make this efficient - uint8_t data; - lfs_ssize_t res = lfs_file_read(lfs, &orig, &data, 1); - if (res < 0) { - return res; - } + // fetch first pair + int err = lfs_dir_fetch(lfs, &dir->m, pair); + if (err) { + return err; + } - res = lfs_file_write(lfs, file, &data, 1); - if (res < 0) { - return res; - } + // setup entry + dir->head[0] = dir->m.pair[0]; + dir->head[1] = dir->m.pair[1]; + dir->id = 0; + dir->pos = 0; - // keep our reference to the rcache in sync - if (lfs->rcache.block != 0xffffffff) { - lfs_cache_drop(lfs, &orig.cache); - lfs_cache_drop(lfs, &lfs->rcache); - } + // add to list of mdirs + dir->type = LFS_TYPE_DIR; + dir->next = (lfs_dir_t*)lfs->mlist; + lfs->mlist = (struct lfs_mlist*)dir; + + return 0; +} + +int lfs_dir_close(lfs_t *lfs, lfs_dir_t *dir) { + // remove from list of mdirs + for (struct lfs_mlist **p = &lfs->mlist; *p; p = &(*p)->next) { + if (*p == (struct lfs_mlist*)dir) { + *p = (*p)->next; + break; } + } - // write out what we have - while (true) { - int err = lfs_cache_flush(lfs, &file->cache, &lfs->rcache); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; - } - return err; + return 0; +} + +int lfs_dir_read(lfs_t *lfs, lfs_dir_t *dir, struct lfs_info *info) { + memset(info, 0, sizeof(*info)); + + // special offset for '.' and '..' + if (dir->pos == 0) { + info->type = LFS_TYPE_DIR; + strcpy(info->name, "."); + dir->pos += 1; + return 1; + } else if (dir->pos == 1) { + info->type = LFS_TYPE_DIR; + strcpy(info->name, ".."); + dir->pos += 1; + return 1; + } + + while (true) { + if (dir->id == dir->m.count) { + if (!dir->m.split) { + return false; } - break; -relocate: - err = lfs_file_relocate(lfs, file); + int err = lfs_dir_fetch(lfs, &dir->m, dir->m.tail); if (err) { return err; } + + dir->id = 0; } - // actual file updates - file->head = file->block; - file->size = file->pos; - file->flags &= ~LFS_F_WRITING; - file->flags |= LFS_F_DIRTY; + int err = lfs_dir_getinfo(lfs, &dir->m, dir->id, info); + if (err && err != LFS_ERR_NOENT) { + return err; + } - file->pos = pos; + dir->id += 1; + if (err != LFS_ERR_NOENT) { + break; + } } - return 0; + dir->pos += 1; + return true; } -int lfs_file_sync(lfs_t *lfs, lfs_file_t *file) { - int err = lfs_file_flush(lfs, file); +int lfs_dir_seek(lfs_t *lfs, lfs_dir_t *dir, lfs_off_t off) { + // simply walk from head dir + int err = lfs_dir_rewind(lfs, dir); if (err) { return err; } - if ((file->flags & LFS_F_DIRTY) && - !(file->flags & LFS_F_ERRED) && - !lfs_pairisnull(file->pair)) { - // update dir entry - lfs_dir_t cwd; - err = lfs_dir_fetch(lfs, &cwd, file->pair); - if (err) { - return err; - } + // first two for ./.. + dir->pos = lfs_min(2, off); + off -= dir->pos; - lfs_entry_t entry = {.off = file->poff}; - err = lfs_bd_read(lfs, cwd.pair[0], entry.off, - &entry.d, sizeof(entry.d)); - lfs_entry_fromle32(&entry.d); - if (err) { - return err; - } + while (off != 0) { + dir->id = lfs_min(dir->m.count, off); + dir->pos += dir->id; + off -= dir->id; - LFS_ASSERT(entry.d.type == LFS_TYPE_REG); - entry.d.u.file.head = file->head; - entry.d.u.file.size = file->size; + if (dir->id == dir->m.count) { + if (!dir->m.split) { + return LFS_ERR_INVAL; + } - err = lfs_dir_update(lfs, &cwd, &entry, NULL); - if (err) { - return err; + err = lfs_dir_fetch(lfs, &dir->m, dir->m.tail); + if (err) { + return err; + } } - - file->flags &= ~LFS_F_DIRTY; } return 0; } -lfs_ssize_t lfs_file_read(lfs_t *lfs, lfs_file_t *file, - void *buffer, lfs_size_t size) { - uint8_t *data = buffer; - lfs_size_t nsize = size; +lfs_soff_t lfs_dir_tell(lfs_t *lfs, lfs_dir_t *dir) { + (void)lfs; + return dir->pos; +} - if ((file->flags & 3) == LFS_O_WRONLY) { - return LFS_ERR_BADF; +int lfs_dir_rewind(lfs_t *lfs, lfs_dir_t *dir) { + // reload the head dir + int err = lfs_dir_fetch(lfs, &dir->m, dir->head); + if (err) { + return err; } - if (file->flags & LFS_F_WRITING) { - // flush out any writes - int err = lfs_file_flush(lfs, file); - if (err) { - return err; - } - } + dir->m.pair[0] = dir->head[0]; + dir->m.pair[1] = dir->head[1]; + dir->id = 0; + dir->pos = 0; + return 0; +} - if (file->pos >= file->size) { - // eof if past end + +/// File index list operations /// +static int lfs_ctz_index(lfs_t *lfs, lfs_off_t *off) { + lfs_off_t size = *off; + lfs_off_t b = lfs->cfg->block_size - 2*4; + lfs_off_t i = size / b; + if (i == 0) { return 0; } - size = lfs_min(size, file->size - file->pos); - nsize = size; + i = (size - 4*(lfs_popc(i-1)+2)) / b; + *off = size - b*i - 4*lfs_popc(i); + return i; +} + +static int lfs_ctz_find(lfs_t *lfs, + const lfs_cache_t *pcache, lfs_cache_t *rcache, + lfs_block_t head, lfs_size_t size, + lfs_size_t pos, lfs_block_t *block, lfs_off_t *off) { + if (size == 0) { + *block = 0xffffffff; + *off = 0; + return 0; + } + + lfs_off_t current = lfs_ctz_index(lfs, &(lfs_off_t){size-1}); + lfs_off_t target = lfs_ctz_index(lfs, &pos); + + while (current > target) { + lfs_size_t skip = lfs_min( + lfs_npw2(current-target+1) - 1, + lfs_ctz(current)); + + int err = lfs_bd_read(lfs, + pcache, rcache, sizeof(head), + head, 4*skip, &head, sizeof(head)); + head = lfs_fromle32(head); + if (err) { + return err; + } + + LFS_ASSERT(head >= 2 && head <= lfs->cfg->block_count); + current -= 1 << skip; + } + + *block = head; + *off = pos; + return 0; +} + +static int lfs_ctz_extend(lfs_t *lfs, + lfs_cache_t *pcache, lfs_cache_t *rcache, + lfs_block_t head, lfs_size_t size, + lfs_block_t *block, lfs_off_t *off) { + while (true) { + // go ahead and grab a block + lfs_block_t nblock; + int err = lfs_alloc(lfs, &nblock); + if (err) { + return err; + } + LFS_ASSERT(nblock >= 2 && nblock <= lfs->cfg->block_count); + + { + err = lfs_bd_erase(lfs, nblock); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; + } + + if (size == 0) { + *block = nblock; + *off = 0; + return 0; + } + + size -= 1; + lfs_off_t index = lfs_ctz_index(lfs, &size); + size += 1; + + // just copy out the last block if it is incomplete + if (size != lfs->cfg->block_size) { + for (lfs_off_t i = 0; i < size; i++) { + uint8_t data; + err = lfs_bd_read(lfs, + NULL, rcache, size-i, + head, i, &data, 1); + if (err) { + return err; + } + + err = lfs_bd_prog(lfs, + pcache, rcache, true, + nblock, i, &data, 1); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; + } + } + + *block = nblock; + *off = size; + return 0; + } + + // append block + index += 1; + lfs_size_t skips = lfs_ctz(index) + 1; + + for (lfs_off_t i = 0; i < skips; i++) { + head = lfs_tole32(head); + err = lfs_bd_prog(lfs, pcache, rcache, true, + nblock, 4*i, &head, 4); + head = lfs_fromle32(head); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; + } + + if (i != skips-1) { + err = lfs_bd_read(lfs, + NULL, rcache, sizeof(head), + head, 4*i, &head, sizeof(head)); + head = lfs_fromle32(head); + if (err) { + return err; + } + } + + LFS_ASSERT(head >= 2 && head <= lfs->cfg->block_count); + } + + *block = nblock; + *off = 4*skips; + return 0; + } + +relocate: + LFS_DEBUG("Bad block at %"PRIu32, nblock); + + // just clear cache and try a new block + lfs_cache_drop(lfs, pcache); + } +} + +static int lfs_ctz_traverse(lfs_t *lfs, + const lfs_cache_t *pcache, lfs_cache_t *rcache, + lfs_block_t head, lfs_size_t size, + int (*cb)(void*, lfs_block_t), void *data) { + if (size == 0) { + return 0; + } + + lfs_off_t index = lfs_ctz_index(lfs, &(lfs_off_t){size-1}); + + while (true) { + int err = cb(data, head); + if (err) { + return err; + } + + if (index == 0) { + return 0; + } + + lfs_block_t heads[2]; + int count = 2 - (index & 1); + err = lfs_bd_read(lfs, + pcache, rcache, count*sizeof(head), + head, 0, &heads, count*sizeof(head)); + heads[0] = lfs_fromle32(heads[0]); + heads[1] = lfs_fromle32(heads[1]); + if (err) { + return err; + } + + for (int i = 0; i < count-1; i++) { + err = cb(data, heads[i]); + if (err) { + return err; + } + } + + head = heads[count-1]; + index -= count; + } +} + + +/// Top level file operations /// +int lfs_file_opencfg(lfs_t *lfs, lfs_file_t *file, + const char *path, int flags, + const struct lfs_file_config *cfg) { + // deorphan if we haven't yet, needed at most once after poweron + if ((flags & 3) != LFS_O_RDONLY) { + int err = lfs_fs_forceconsistency(lfs); + if (err) { + return err; + } + } + + // setup simple file details + int err; + file->cfg = cfg; + file->flags = flags; + file->pos = 0; + file->cache.buffer = NULL; + + // allocate entry for file if it doesn't exist + lfs_stag_t tag = lfs_dir_find(lfs, &file->m, &path, &file->id); + if (tag < 0 && !(tag == LFS_ERR_NOENT && file->id != 0x3ff)) { + err = tag; + goto cleanup; + } + + // get id, add to list of mdirs to catch update changes + file->type = LFS_TYPE_REG; + file->next = (lfs_file_t*)lfs->mlist; + lfs->mlist = (struct lfs_mlist*)file; + + if (tag == LFS_ERR_NOENT) { + if (!(flags & LFS_O_CREAT)) { + err = LFS_ERR_NOENT; + goto cleanup; + } + + // check that name fits + lfs_size_t nlen = strlen(path); + if (nlen > lfs->name_max) { + err = LFS_ERR_NAMETOOLONG; + goto cleanup; + } + + // get next slot and create entry to remember name + err = lfs_dir_commit(lfs, &file->m, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_CREATE, file->id, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_REG, file->id, nlen), path}, + {LFS_MKTAG(LFS_TYPE_INLINESTRUCT, file->id, 0), NULL})); + if (err) { + err = LFS_ERR_NAMETOOLONG; + goto cleanup; + } + + tag = LFS_MKTAG(LFS_TYPE_INLINESTRUCT, 0, 0); + } else if (flags & LFS_O_EXCL) { + err = LFS_ERR_EXIST; + goto cleanup; + } else if (lfs_tag_type3(tag) != LFS_TYPE_REG) { + err = LFS_ERR_ISDIR; + goto cleanup; + } else if (flags & LFS_O_TRUNC) { + // truncate if requested + tag = LFS_MKTAG(LFS_TYPE_INLINESTRUCT, file->id, 0); + file->flags |= LFS_F_DIRTY; + } else { + // try to load what's on disk, if it's inlined we'll fix it later + tag = lfs_dir_get(lfs, &file->m, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, file->id, 8), &file->ctz); + if (tag < 0) { + err = tag; + goto cleanup; + } + lfs_ctz_fromle32(&file->ctz); + } + + // fetch attrs + for (unsigned i = 0; i < file->cfg->attr_count; i++) { + if ((file->flags & 3) != LFS_O_WRONLY) { + lfs_stag_t res = lfs_dir_get(lfs, &file->m, + LFS_MKTAG(0x7ff, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_USERATTR + file->cfg->attrs[i].type, + file->id, file->cfg->attrs[i].size), + file->cfg->attrs[i].buffer); + if (res < 0 && res != LFS_ERR_NOENT) { + err = res; + goto cleanup; + } + } + + if ((file->flags & 3) != LFS_O_RDONLY) { + if (file->cfg->attrs[i].size > lfs->attr_max) { + err = LFS_ERR_NOSPC; + goto cleanup; + } + + file->flags |= LFS_F_DIRTY; + } + } + + // allocate buffer if needed + if (file->cfg->buffer) { + file->cache.buffer = file->cfg->buffer; + } else { + file->cache.buffer = lfs_malloc(lfs->cfg->cache_size); + if (!file->cache.buffer) { + err = LFS_ERR_NOMEM; + goto cleanup; + } + } + + // zero to avoid information leak + lfs_cache_zero(lfs, &file->cache); + + if (lfs_tag_type3(tag) == LFS_TYPE_INLINESTRUCT) { + // load inline files + file->ctz.head = 0xfffffffe; + file->ctz.size = lfs_tag_size(tag); + file->flags |= LFS_F_INLINE; + file->cache.block = file->ctz.head; + file->cache.off = 0; + file->cache.size = lfs->cfg->cache_size; + + // don't always read (may be new/trunc file) + if (file->ctz.size > 0) { + lfs_stag_t res = lfs_dir_get(lfs, &file->m, + LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, file->id, + lfs_min(file->cache.size, 0x3fe)), + file->cache.buffer); + if (res < 0) { + err = res; + goto cleanup; + } + } + } + + return 0; + +cleanup: + // clean up lingering resources + file->flags |= LFS_F_ERRED; + lfs_file_close(lfs, file); + return err; +} + +int lfs_file_open(lfs_t *lfs, lfs_file_t *file, + const char *path, int flags) { + static const struct lfs_file_config defaults = {0}; + return lfs_file_opencfg(lfs, file, path, flags, &defaults); +} + +int lfs_file_close(lfs_t *lfs, lfs_file_t *file) { + int err = lfs_file_sync(lfs, file); + + // remove from list of mdirs + for (struct lfs_mlist **p = &lfs->mlist; *p; p = &(*p)->next) { + if (*p == (struct lfs_mlist*)file) { + *p = (*p)->next; + break; + } + } + + // clean up memory + if (!file->cfg->buffer) { + lfs_free(file->cache.buffer); + } + + return err; +} + +static int lfs_file_relocate(lfs_t *lfs, lfs_file_t *file) { + while (true) { + // just relocate what exists into new block + lfs_block_t nblock; + int err = lfs_alloc(lfs, &nblock); + if (err) { + return err; + } + + err = lfs_bd_erase(lfs, nblock); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; + } + + // either read from dirty cache or disk + for (lfs_off_t i = 0; i < file->off; i++) { + uint8_t data; + if (file->flags & LFS_F_INLINE) { + err = lfs_dir_getread(lfs, &file->m, + // note we evict inline files before they can be dirty + NULL, &file->cache, file->off-i, + LFS_MKTAG(0xfff, 0x1ff, 0), + LFS_MKTAG(LFS_TYPE_INLINESTRUCT, file->id, 0), + i, &data, 1); + if (err) { + return err; + } + } else { + err = lfs_bd_read(lfs, + &file->cache, &lfs->rcache, file->off-i, + file->block, i, &data, 1); + if (err) { + return err; + } + } + + err = lfs_bd_prog(lfs, + &lfs->pcache, &lfs->rcache, true, + nblock, i, &data, 1); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; + } + } + + // copy over new state of file + memcpy(file->cache.buffer, lfs->pcache.buffer, lfs->cfg->cache_size); + file->cache.block = lfs->pcache.block; + file->cache.off = lfs->pcache.off; + file->cache.size = lfs->pcache.size; + lfs_cache_zero(lfs, &lfs->pcache); + + file->block = nblock; + file->flags &= ~LFS_F_INLINE; + file->flags |= LFS_F_WRITING; + return 0; + +relocate: + LFS_DEBUG("Bad block at %"PRIu32, nblock); + + // just clear cache and try a new block + lfs_cache_drop(lfs, &lfs->pcache); + } +} + +static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file) { + if (file->flags & LFS_F_READING) { + if (!(file->flags & LFS_F_INLINE)) { + lfs_cache_drop(lfs, &file->cache); + } + file->flags &= ~LFS_F_READING; + } + + if (file->flags & LFS_F_WRITING) { + lfs_off_t pos = file->pos; + + if (!(file->flags & LFS_F_INLINE)) { + // copy over anything after current branch + lfs_file_t orig = { + .ctz.head = file->ctz.head, + .ctz.size = file->ctz.size, + .flags = LFS_O_RDONLY, + .pos = file->pos, + .cache = lfs->rcache, + }; + lfs_cache_drop(lfs, &lfs->rcache); + + while (file->pos < file->ctz.size) { + // copy over a byte at a time, leave it up to caching + // to make this efficient + uint8_t data; + lfs_ssize_t res = lfs_file_read(lfs, &orig, &data, 1); + if (res < 0) { + return res; + } + + res = lfs_file_write(lfs, file, &data, 1); + if (res < 0) { + return res; + } + + // keep our reference to the rcache in sync + if (lfs->rcache.block != 0xffffffff) { + lfs_cache_drop(lfs, &orig.cache); + lfs_cache_drop(lfs, &lfs->rcache); + } + } + + // write out what we have + while (true) { + int err = lfs_bd_flush(lfs, &file->cache, &lfs->rcache, true); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + return err; + } + + break; + +relocate: + LFS_DEBUG("Bad block at %"PRIu32, file->block); + err = lfs_file_relocate(lfs, file); + if (err) { + return err; + } + } + } else { + file->ctz.size = lfs_max(file->pos, file->ctz.size); + } + + // actual file updates + file->ctz.head = file->block; + file->ctz.size = file->pos; + file->flags &= ~LFS_F_WRITING; + file->flags |= LFS_F_DIRTY; + + file->pos = pos; + } + + return 0; +} + +int lfs_file_sync(lfs_t *lfs, lfs_file_t *file) { + while (true) { + int err = lfs_file_flush(lfs, file); + if (err) { + file->flags |= LFS_F_ERRED; + return err; + } + + if ((file->flags & LFS_F_DIRTY) && + !(file->flags & LFS_F_ERRED) && + !lfs_pair_isnull(file->m.pair)) { + // update dir entry + uint16_t type; + const void *buffer; + lfs_size_t size; + struct lfs_ctz ctz; + if (file->flags & LFS_F_INLINE) { + // inline the whole file + type = LFS_TYPE_INLINESTRUCT; + buffer = file->cache.buffer; + size = file->ctz.size; + } else { + // update the ctz reference + type = LFS_TYPE_CTZSTRUCT; + // copy ctz so alloc will work during a relocate + ctz = file->ctz; + lfs_ctz_tole32(&ctz); + buffer = &ctz; + size = sizeof(ctz); + } + + // commit file data and attributes + err = lfs_dir_commit(lfs, &file->m, LFS_MKATTRS( + {LFS_MKTAG(type, file->id, size), buffer}, + {LFS_MKTAG(LFS_FROM_USERATTRS, file->id, + file->cfg->attr_count), file->cfg->attrs})); + if (err) { + if (err == LFS_ERR_NOSPC && (file->flags & LFS_F_INLINE)) { + goto relocate; + } + file->flags |= LFS_F_ERRED; + return err; + } + + file->flags &= ~LFS_F_DIRTY; + } + + return 0; + +relocate: + // inline file doesn't fit anymore + file->off = file->pos; + err = lfs_file_relocate(lfs, file); + if (err) { + file->flags |= LFS_F_ERRED; + return err; + } + } +} + +lfs_ssize_t lfs_file_read(lfs_t *lfs, lfs_file_t *file, + void *buffer, lfs_size_t size) { + uint8_t *data = buffer; + lfs_size_t nsize = size; + + if ((file->flags & 3) == LFS_O_WRONLY) { + return LFS_ERR_BADF; + } + + if (file->flags & LFS_F_WRITING) { + // flush out any writes + int err = lfs_file_flush(lfs, file); + if (err) { + return err; + } + } + + if (file->pos >= file->ctz.size) { + // eof if past end + return 0; + } + + size = lfs_min(size, file->ctz.size - file->pos); + nsize = size; + + while (nsize > 0) { + // check if we need a new block + if (!(file->flags & LFS_F_READING) || + file->off == lfs->cfg->block_size) { + if (!(file->flags & LFS_F_INLINE)) { + int err = lfs_ctz_find(lfs, NULL, &file->cache, + file->ctz.head, file->ctz.size, + file->pos, &file->block, &file->off); + if (err) { + return err; + } + } else { + file->block = 0xfffffffe; + file->off = file->pos; + } + + file->flags |= LFS_F_READING; + } + + // read as much as we can in current block + lfs_size_t diff = lfs_min(nsize, lfs->cfg->block_size - file->off); + if (file->flags & LFS_F_INLINE) { + int err = lfs_dir_getread(lfs, &file->m, + NULL, &file->cache, lfs->cfg->block_size, + LFS_MKTAG(0xfff, 0x1ff, 0), + LFS_MKTAG(LFS_TYPE_INLINESTRUCT, file->id, 0), + file->off, data, diff); + if (err) { + return err; + } + } else { + int err = lfs_bd_read(lfs, + NULL, &file->cache, lfs->cfg->block_size, + file->block, file->off, data, diff); + if (err) { + return err; + } + } + + file->pos += diff; + file->off += diff; + data += diff; + nsize -= diff; + } + + return size; +} + +lfs_ssize_t lfs_file_write(lfs_t *lfs, lfs_file_t *file, + const void *buffer, lfs_size_t size) { + const uint8_t *data = buffer; + lfs_size_t nsize = size; + + if ((file->flags & 3) == LFS_O_RDONLY) { + return LFS_ERR_BADF; + } + + if (file->flags & LFS_F_READING) { + // drop any reads + int err = lfs_file_flush(lfs, file); + if (err) { + return err; + } + } + + if ((file->flags & LFS_O_APPEND) && file->pos < file->ctz.size) { + file->pos = file->ctz.size; + } + + if (file->pos + size > lfs->file_max) { + // Larger than file limit? + return LFS_ERR_FBIG; + } + + if (!(file->flags & LFS_F_WRITING) && file->pos > file->ctz.size) { + // fill with zeros + lfs_off_t pos = file->pos; + file->pos = file->ctz.size; + + while (file->pos < pos) { + lfs_ssize_t res = lfs_file_write(lfs, file, &(uint8_t){0}, 1); + if (res < 0) { + return res; + } + } + } + + if ((file->flags & LFS_F_INLINE) && + lfs_max(file->pos+nsize, file->ctz.size) > + lfs_min(LFS_ATTR_MAX, lfs_min( + lfs->cfg->cache_size, lfs->cfg->block_size/8))) { + // inline file doesn't fit anymore + file->off = file->pos; + lfs_alloc_ack(lfs); + int err = lfs_file_relocate(lfs, file); + if (err) { + file->flags |= LFS_F_ERRED; + return err; + } + } + + while (nsize > 0) { + // check if we need a new block + if (!(file->flags & LFS_F_WRITING) || + file->off == lfs->cfg->block_size) { + if (!(file->flags & LFS_F_INLINE)) { + if (!(file->flags & LFS_F_WRITING) && file->pos > 0) { + // find out which block we're extending from + int err = lfs_ctz_find(lfs, NULL, &file->cache, + file->ctz.head, file->ctz.size, + file->pos-1, &file->block, &file->off); + if (err) { + file->flags |= LFS_F_ERRED; + return err; + } + + // mark cache as dirty since we may have read data into it + lfs_cache_zero(lfs, &file->cache); + } + + // extend file with new blocks + lfs_alloc_ack(lfs); + int err = lfs_ctz_extend(lfs, &file->cache, &lfs->rcache, + file->block, file->pos, + &file->block, &file->off); + if (err) { + file->flags |= LFS_F_ERRED; + return err; + } + } else { + file->block = 0xfffffffe; + file->off = file->pos; + } + + file->flags |= LFS_F_WRITING; + } + + // program as much as we can in current block + lfs_size_t diff = lfs_min(nsize, lfs->cfg->block_size - file->off); + while (true) { + int err = lfs_bd_prog(lfs, &file->cache, &lfs->rcache, true, + file->block, file->off, data, diff); + if (err) { + if (err == LFS_ERR_CORRUPT) { + goto relocate; + } + file->flags |= LFS_F_ERRED; + return err; + } - while (nsize > 0) { - // check if we need a new block - if (!(file->flags & LFS_F_READING) || - file->off == lfs->cfg->block_size) { - int err = lfs_ctz_find(lfs, &file->cache, NULL, - file->head, file->size, - file->pos, &file->block, &file->off); + break; +relocate: + err = lfs_file_relocate(lfs, file); if (err) { + file->flags |= LFS_F_ERRED; + return err; + } + } + + file->pos += diff; + file->off += diff; + data += diff; + nsize -= diff; + + lfs_alloc_ack(lfs); + } + + file->flags &= ~LFS_F_ERRED; + return size; +} + +lfs_soff_t lfs_file_seek(lfs_t *lfs, lfs_file_t *file, + lfs_soff_t off, int whence) { + // write out everything beforehand, may be noop if rdonly + int err = lfs_file_flush(lfs, file); + if (err) { + return err; + } + + // find new pos + lfs_off_t npos = file->pos; + if (whence == LFS_SEEK_SET) { + npos = off; + } else if (whence == LFS_SEEK_CUR) { + npos = file->pos + off; + } else if (whence == LFS_SEEK_END) { + npos = file->ctz.size + off; + } + + if (npos > lfs->file_max) { + // file position out of range + return LFS_ERR_INVAL; + } + + // update pos + file->pos = npos; + return npos; +} + +int lfs_file_truncate(lfs_t *lfs, lfs_file_t *file, lfs_off_t size) { + if ((file->flags & 3) == LFS_O_RDONLY) { + return LFS_ERR_BADF; + } + + lfs_off_t oldsize = lfs_file_size(lfs, file); + if (size < oldsize) { + // need to flush since directly changing metadata + int err = lfs_file_flush(lfs, file); + if (err) { + return err; + } + + // lookup new head in ctz skip list + err = lfs_ctz_find(lfs, NULL, &file->cache, + file->ctz.head, file->ctz.size, + size, &file->ctz.head, &(lfs_off_t){0}); + if (err) { + return err; + } + + file->ctz.size = size; + file->flags |= LFS_F_DIRTY; + } else if (size > oldsize) { + lfs_off_t pos = file->pos; + + // flush+seek if not already at end + if (file->pos != oldsize) { + int err = lfs_file_seek(lfs, file, 0, LFS_SEEK_END); + if (err < 0) { return err; } + } + + // fill with zeros + while (file->pos < size) { + lfs_ssize_t res = lfs_file_write(lfs, file, &(uint8_t){0}, 1); + if (res < 0) { + return res; + } + } + + // restore pos + int err = lfs_file_seek(lfs, file, pos, LFS_SEEK_SET); + if (err < 0) { + return err; + } + } + + return 0; +} + +lfs_soff_t lfs_file_tell(lfs_t *lfs, lfs_file_t *file) { + (void)lfs; + return file->pos; +} + +int lfs_file_rewind(lfs_t *lfs, lfs_file_t *file) { + lfs_soff_t res = lfs_file_seek(lfs, file, 0, LFS_SEEK_SET); + if (res < 0) { + return res; + } + + return 0; +} + +lfs_soff_t lfs_file_size(lfs_t *lfs, lfs_file_t *file) { + (void)lfs; + if (file->flags & LFS_F_WRITING) { + return lfs_max(file->pos, file->ctz.size); + } else { + return file->ctz.size; + } +} + + +/// General fs operations /// +int lfs_stat(lfs_t *lfs, const char *path, struct lfs_info *info) { + lfs_mdir_t cwd; + lfs_stag_t tag = lfs_dir_find(lfs, &cwd, &path, NULL); + if (tag < 0) { + return tag; + } + + return lfs_dir_getinfo(lfs, &cwd, lfs_tag_id(tag), info); +} + +int lfs_remove(lfs_t *lfs, const char *path) { + // deorphan if we haven't yet, needed at most once after poweron + int err = lfs_fs_forceconsistency(lfs); + if (err) { + return err; + } + + lfs_mdir_t cwd; + lfs_stag_t tag = lfs_dir_find(lfs, &cwd, &path, NULL); + if (tag < 0 || lfs_tag_id(tag) == 0x3ff) { + return (tag < 0) ? tag : LFS_ERR_INVAL; + } + + lfs_mdir_t dir; + if (lfs_tag_type3(tag) == LFS_TYPE_DIR) { + // must be empty before removal + lfs_block_t pair[2]; + lfs_stag_t res = lfs_dir_get(lfs, &cwd, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, lfs_tag_id(tag), 8), pair); + if (res < 0) { + return res; + } + lfs_pair_fromle32(pair); + + err = lfs_dir_fetch(lfs, &dir, pair); + if (err) { + return err; + } + + if (dir.count > 0 || dir.split) { + return LFS_ERR_NOTEMPTY; + } + + // mark fs as orphaned + lfs_fs_preporphans(lfs, +1); + } + + // delete the entry + err = lfs_dir_commit(lfs, &cwd, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_DELETE, lfs_tag_id(tag), 0), NULL})); + if (err) { + return err; + } + + if (lfs_tag_type3(tag) == LFS_TYPE_DIR) { + // fix orphan + lfs_fs_preporphans(lfs, -1); + + err = lfs_fs_pred(lfs, dir.pair, &cwd); + if (err) { + return err; + } + + err = lfs_dir_drop(lfs, &cwd, &dir); + if (err) { + return err; + } + } + + return 0; +} + +int lfs_rename(lfs_t *lfs, const char *oldpath, const char *newpath) { + // deorphan if we haven't yet, needed at most once after poweron + int err = lfs_fs_forceconsistency(lfs); + if (err) { + return err; + } + + // find old entry + lfs_mdir_t oldcwd; + lfs_stag_t oldtag = lfs_dir_find(lfs, &oldcwd, &oldpath, NULL); + if (oldtag < 0 || lfs_tag_id(oldtag) == 0x3ff) { + return (oldtag < 0) ? oldtag : LFS_ERR_INVAL; + } + + // find new entry + lfs_mdir_t newcwd; + uint16_t newid; + lfs_stag_t prevtag = lfs_dir_find(lfs, &newcwd, &newpath, &newid); + if ((prevtag < 0 || lfs_tag_id(prevtag) == 0x3ff) && + !(prevtag == LFS_ERR_NOENT && newid != 0x3ff)) { + return (prevtag < 0) ? prevtag : LFS_ERR_INVAL; + } + + lfs_mdir_t prevdir; + if (prevtag == LFS_ERR_NOENT) { + // check that name fits + lfs_size_t nlen = strlen(newpath); + if (nlen > lfs->name_max) { + return LFS_ERR_NAMETOOLONG; + } + } else if (lfs_tag_type3(prevtag) != lfs_tag_type3(oldtag)) { + return LFS_ERR_ISDIR; + } else if (lfs_tag_type3(prevtag) == LFS_TYPE_DIR) { + // must be empty before removal + lfs_block_t prevpair[2]; + lfs_stag_t res = lfs_dir_get(lfs, &newcwd, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, newid, 8), prevpair); + if (res < 0) { + return res; + } + lfs_pair_fromle32(prevpair); + + // must be empty before removal + err = lfs_dir_fetch(lfs, &prevdir, prevpair); + if (err) { + return err; + } + + if (prevdir.count > 0 || prevdir.split) { + return LFS_ERR_NOTEMPTY; + } + + // mark fs as orphaned + lfs_fs_preporphans(lfs, +1); + } + + // create move to fix later + uint16_t newoldtagid = lfs_tag_id(oldtag); + if (lfs_pair_cmp(oldcwd.pair, newcwd.pair) == 0 && + prevtag == LFS_ERR_NOENT && newid <= newoldtagid) { + // there is a small chance we are being renamed in the same directory + // to an id less than our old id, the global update to handle this + // is a bit messy + newoldtagid += 1; + } + + lfs_fs_prepmove(lfs, newoldtagid, oldcwd.pair); + + // move over all attributes + err = lfs_dir_commit(lfs, &newcwd, LFS_MKATTRS( + {prevtag != LFS_ERR_NOENT + ? LFS_MKTAG(LFS_TYPE_DELETE, newid, 0) + : LFS_MKTAG(LFS_FROM_NOOP, 0, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_CREATE, newid, 0), NULL}, + {LFS_MKTAG(lfs_tag_type3(oldtag), newid, strlen(newpath)), + newpath}, + {LFS_MKTAG(LFS_FROM_MOVE, newid, lfs_tag_id(oldtag)), &oldcwd})); + if (err) { + return err; + } + + // let commit clean up after move (if we're different! otherwise move + // logic already fixed it for us) + if (lfs_pair_cmp(oldcwd.pair, newcwd.pair) != 0) { + err = lfs_dir_commit(lfs, &oldcwd, NULL, 0); + if (err) { + return err; + } + } + + if (prevtag != LFS_ERR_NOENT && lfs_tag_type3(prevtag) == LFS_TYPE_DIR) { + // fix orphan + lfs_fs_preporphans(lfs, -1); + + err = lfs_fs_pred(lfs, prevdir.pair, &newcwd); + if (err) { + return err; + } + + err = lfs_dir_drop(lfs, &newcwd, &prevdir); + if (err) { + return err; + } + } + + return 0; +} + +lfs_ssize_t lfs_getattr(lfs_t *lfs, const char *path, + uint8_t type, void *buffer, lfs_size_t size) { + lfs_mdir_t cwd; + lfs_stag_t tag = lfs_dir_find(lfs, &cwd, &path, NULL); + if (tag < 0) { + return tag; + } + + uint16_t id = lfs_tag_id(tag); + if (id == 0x3ff) { + // special case for root + id = 0; + int err = lfs_dir_fetch(lfs, &cwd, lfs->root); + if (err) { + return err; + } + } + + tag = lfs_dir_get(lfs, &cwd, LFS_MKTAG(0x7ff, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_USERATTR + type, + id, lfs_min(size, lfs->attr_max)), + buffer); + if (tag < 0) { + if (tag == LFS_ERR_NOENT) { + return LFS_ERR_NOATTR; + } + return tag; + } + + return lfs_tag_size(tag); +} + +static int lfs_commitattr(lfs_t *lfs, const char *path, + uint8_t type, const void *buffer, lfs_size_t size) { + lfs_mdir_t cwd; + lfs_stag_t tag = lfs_dir_find(lfs, &cwd, &path, NULL); + if (tag < 0) { + return tag; + } + + uint16_t id = lfs_tag_id(tag); + if (id == 0x3ff) { + // special case for root + id = 0; + int err = lfs_dir_fetch(lfs, &cwd, lfs->root); + if (err) { + return err; + } + } + + return lfs_dir_commit(lfs, &cwd, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_USERATTR + type, id, size), buffer})); +} + +int lfs_setattr(lfs_t *lfs, const char *path, + uint8_t type, const void *buffer, lfs_size_t size) { + if (size > lfs->attr_max) { + return LFS_ERR_NOSPC; + } + + return lfs_commitattr(lfs, path, type, buffer, size); +} + +int lfs_removeattr(lfs_t *lfs, const char *path, uint8_t type) { + return lfs_commitattr(lfs, path, type, NULL, 0x3ff); +} + + +/// Filesystem operations /// +static int lfs_init(lfs_t *lfs, const struct lfs_config *cfg) { + lfs->cfg = cfg; + int err = 0; + + // check that block size is a multiple of cache size is a multiple + // of prog and read sizes + LFS_ASSERT(lfs->cfg->cache_size % lfs->cfg->read_size == 0); + LFS_ASSERT(lfs->cfg->cache_size % lfs->cfg->prog_size == 0); + LFS_ASSERT(lfs->cfg->block_size % lfs->cfg->cache_size == 0); + + // check that the block size is large enough to fit ctz pointers + LFS_ASSERT(4*lfs_npw2(0xffffffff / (lfs->cfg->block_size-2*4)) + <= lfs->cfg->block_size); + + // we don't support some corner cases + LFS_ASSERT(lfs->cfg->block_cycles < 0xffffffff); + + // setup read cache + if (lfs->cfg->read_buffer) { + lfs->rcache.buffer = lfs->cfg->read_buffer; + } else { + lfs->rcache.buffer = lfs_malloc(lfs->cfg->cache_size); + if (!lfs->rcache.buffer) { + err = LFS_ERR_NOMEM; + goto cleanup; + } + } - file->flags |= LFS_F_READING; + // setup program cache + if (lfs->cfg->prog_buffer) { + lfs->pcache.buffer = lfs->cfg->prog_buffer; + } else { + lfs->pcache.buffer = lfs_malloc(lfs->cfg->cache_size); + if (!lfs->pcache.buffer) { + err = LFS_ERR_NOMEM; + goto cleanup; } + } - // read as much as we can in current block - lfs_size_t diff = lfs_min(nsize, lfs->cfg->block_size - file->off); - int err = lfs_cache_read(lfs, &file->cache, NULL, - file->block, file->off, data, diff); - if (err) { - return err; + // zero to avoid information leaks + lfs_cache_zero(lfs, &lfs->rcache); + lfs_cache_zero(lfs, &lfs->pcache); + + // setup lookahead, must be multiple of 64-bits + LFS_ASSERT(lfs->cfg->lookahead_size > 0); + LFS_ASSERT(lfs->cfg->lookahead_size % 8 == 0 && + (uintptr_t)lfs->cfg->lookahead_buffer % 8 == 0); + if (lfs->cfg->lookahead_buffer) { + lfs->free.buffer = lfs->cfg->lookahead_buffer; + } else { + lfs->free.buffer = lfs_malloc(lfs->cfg->lookahead_size); + if (!lfs->free.buffer) { + err = LFS_ERR_NOMEM; + goto cleanup; } + } - file->pos += diff; - file->off += diff; - data += diff; - nsize -= diff; + // check that the size limits are sane + LFS_ASSERT(lfs->cfg->name_max <= LFS_NAME_MAX); + lfs->name_max = lfs->cfg->name_max; + if (!lfs->name_max) { + lfs->name_max = LFS_NAME_MAX; } - return size; + LFS_ASSERT(lfs->cfg->file_max <= LFS_FILE_MAX); + lfs->file_max = lfs->cfg->file_max; + if (!lfs->file_max) { + lfs->file_max = LFS_FILE_MAX; + } + + LFS_ASSERT(lfs->cfg->attr_max <= LFS_ATTR_MAX); + lfs->attr_max = lfs->cfg->attr_max; + if (!lfs->attr_max) { + lfs->attr_max = LFS_ATTR_MAX; + } + + // setup default state + lfs->root[0] = 0xffffffff; + lfs->root[1] = 0xffffffff; + lfs->mlist = NULL; + lfs->seed = 0; + lfs->gstate = (struct lfs_gstate){0}; + lfs->gpending = (struct lfs_gstate){0}; + lfs->gdelta = (struct lfs_gstate){0}; +#ifdef LFS_MIGRATE + lfs->lfs1 = NULL; +#endif + + return 0; + +cleanup: + lfs_deinit(lfs); + return err; } -lfs_ssize_t lfs_file_write(lfs_t *lfs, lfs_file_t *file, - const void *buffer, lfs_size_t size) { - const uint8_t *data = buffer; - lfs_size_t nsize = size; +static int lfs_deinit(lfs_t *lfs) { + // free allocated memory + if (!lfs->cfg->read_buffer) { + lfs_free(lfs->rcache.buffer); + } - if ((file->flags & 3) == LFS_O_RDONLY) { - return LFS_ERR_BADF; + if (!lfs->cfg->prog_buffer) { + lfs_free(lfs->pcache.buffer); } - if (file->flags & LFS_F_READING) { - // drop any reads - int err = lfs_file_flush(lfs, file); + if (!lfs->cfg->lookahead_buffer) { + lfs_free(lfs->free.buffer); + } + + return 0; +} + +int lfs_format(lfs_t *lfs, const struct lfs_config *cfg) { + int err = 0; + { + err = lfs_init(lfs, cfg); if (err) { return err; } - } - if ((file->flags & LFS_O_APPEND) && file->pos < file->size) { - file->pos = file->size; + // create free lookahead + memset(lfs->free.buffer, 0, lfs->cfg->lookahead_size); + lfs->free.off = 0; + lfs->free.size = lfs_min(8*lfs->cfg->lookahead_size, + lfs->cfg->block_count); + lfs->free.i = 0; + lfs_alloc_ack(lfs); + + // create root dir + lfs_mdir_t root; + err = lfs_dir_alloc(lfs, &root); + if (err) { + goto cleanup; + } + + // write one superblock + lfs_superblock_t superblock = { + .version = LFS_DISK_VERSION, + .block_size = lfs->cfg->block_size, + .block_count = lfs->cfg->block_count, + .name_max = lfs->name_max, + .file_max = lfs->file_max, + .attr_max = lfs->attr_max, + }; + + lfs_superblock_tole32(&superblock); + err = lfs_dir_commit(lfs, &root, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_CREATE, 0, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_SUPERBLOCK, 0, 8), "littlefs"}, + {LFS_MKTAG(LFS_TYPE_INLINESTRUCT, 0, sizeof(superblock)), + &superblock})); + if (err) { + goto cleanup; + } + + // sanity check that fetch works + err = lfs_dir_fetch(lfs, &root, (const lfs_block_t[2]){0, 1}); + if (err) { + goto cleanup; + } } - if (file->pos + size > LFS_FILE_MAX) { - // larger than file limit? - return LFS_ERR_FBIG; +cleanup: + lfs_deinit(lfs); + return err; +} + +int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) { + int err = lfs_init(lfs, cfg); + if (err) { + return err; } - if (!(file->flags & LFS_F_WRITING) && file->pos > file->size) { - // fill with zeros - lfs_off_t pos = file->pos; - file->pos = file->size; + // scan directory blocks for superblock and any global updates + lfs_mdir_t dir = {.tail = {0, 1}}; + while (!lfs_pair_isnull(dir.tail)) { + // fetch next block in tail list + lfs_stag_t tag = lfs_dir_fetchmatch(lfs, &dir, dir.tail, + LFS_MKTAG(0x7ff, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_SUPERBLOCK, 0, 8), + NULL, + lfs_dir_find_match, &(struct lfs_dir_find_match){ + lfs, "littlefs", 8}); + if (tag < 0) { + err = tag; + goto cleanup; + } - while (file->pos < pos) { - lfs_ssize_t res = lfs_file_write(lfs, file, &(uint8_t){0}, 1); - if (res < 0) { - return res; + // has superblock? + if (tag && !lfs_tag_isdelete(tag)) { + // update root + lfs->root[0] = dir.pair[0]; + lfs->root[1] = dir.pair[1]; + + // grab superblock + lfs_superblock_t superblock; + tag = lfs_dir_get(lfs, &dir, LFS_MKTAG(0x7ff, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_INLINESTRUCT, 0, sizeof(superblock)), + &superblock); + if (tag < 0) { + err = tag; + goto cleanup; + } + lfs_superblock_fromle32(&superblock); + + // check version + uint16_t major_version = (0xffff & (superblock.version >> 16)); + uint16_t minor_version = (0xffff & (superblock.version >> 0)); + if ((major_version != LFS_DISK_VERSION_MAJOR || + minor_version > LFS_DISK_VERSION_MINOR)) { + LFS_ERROR("Invalid version %"PRIu16".%"PRIu16, + major_version, minor_version); + err = LFS_ERR_INVAL; + goto cleanup; } - } - } - while (nsize > 0) { - // check if we need a new block - if (!(file->flags & LFS_F_WRITING) || - file->off == lfs->cfg->block_size) { - if (!(file->flags & LFS_F_WRITING) && file->pos > 0) { - // find out which block we're extending from - int err = lfs_ctz_find(lfs, &file->cache, NULL, - file->head, file->size, - file->pos-1, &file->block, &file->off); - if (err) { - file->flags |= LFS_F_ERRED; - return err; + // check superblock configuration + if (superblock.name_max) { + if (superblock.name_max > lfs->name_max) { + LFS_ERROR("Unsupported name_max (%"PRIu32" > %"PRIu32")", + superblock.name_max, lfs->name_max); + err = LFS_ERR_INVAL; + goto cleanup; } - // mark cache as dirty since we may have read data into it - lfs_cache_zero(lfs, &file->cache); + lfs->name_max = superblock.name_max; } - // extend file with new blocks - lfs_alloc_ack(lfs); - int err = lfs_ctz_extend(lfs, &lfs->rcache, &file->cache, - file->block, file->pos, - &file->block, &file->off); - if (err) { - file->flags |= LFS_F_ERRED; - return err; - } + if (superblock.file_max) { + if (superblock.file_max > lfs->file_max) { + LFS_ERROR("Unsupported file_max (%"PRIu32" > %"PRIu32")", + superblock.file_max, lfs->file_max); + err = LFS_ERR_INVAL; + goto cleanup; + } - file->flags |= LFS_F_WRITING; - } + lfs->file_max = superblock.file_max; + } - // program as much as we can in current block - lfs_size_t diff = lfs_min(nsize, lfs->cfg->block_size - file->off); - while (true) { - int err = lfs_cache_prog(lfs, &file->cache, &lfs->rcache, - file->block, file->off, data, diff); - if (err) { - if (err == LFS_ERR_CORRUPT) { - goto relocate; + if (superblock.attr_max) { + if (superblock.attr_max > lfs->attr_max) { + LFS_ERROR("Unsupported attr_max (%"PRIu32" > %"PRIu32")", + superblock.attr_max, lfs->attr_max); + err = LFS_ERR_INVAL; + goto cleanup; } - file->flags |= LFS_F_ERRED; - return err; - } - break; -relocate: - err = lfs_file_relocate(lfs, file); - if (err) { - file->flags |= LFS_F_ERRED; - return err; + lfs->attr_max = superblock.attr_max; } } - file->pos += diff; - file->off += diff; - data += diff; - nsize -= diff; - - lfs_alloc_ack(lfs); + // has gstate? + err = lfs_dir_getgstate(lfs, &dir, &lfs->gpending); + if (err) { + return err; + } } - file->flags &= ~LFS_F_ERRED; - return size; -} - -lfs_soff_t lfs_file_seek(lfs_t *lfs, lfs_file_t *file, - lfs_soff_t off, int whence) { - // write out everything beforehand, may be noop if rdonly - int err = lfs_file_flush(lfs, file); - if (err) { - return err; + // found superblock? + if (lfs_pair_isnull(lfs->root)) { + err = LFS_ERR_INVAL; + goto cleanup; } - // find new pos - lfs_soff_t npos = file->pos; - if (whence == LFS_SEEK_SET) { - npos = off; - } else if (whence == LFS_SEEK_CUR) { - npos = file->pos + off; - } else if (whence == LFS_SEEK_END) { - npos = file->size + off; + // update littlefs with gstate + lfs->gpending.tag += !lfs_tag_isvalid(lfs->gpending.tag); + lfs->gstate = lfs->gpending; + if (lfs_gstate_hasmove(&lfs->gstate)) { + LFS_DEBUG("Found move %"PRIu32" %"PRIu32" %"PRIu16, + lfs->gstate.pair[0], + lfs->gstate.pair[1], + lfs_tag_id(lfs->gstate.tag)); } - if (npos < 0 || npos > LFS_FILE_MAX) { - // file position out of range - return LFS_ERR_INVAL; - } + // setup free lookahead + lfs->free.off = lfs->seed % lfs->cfg->block_size; + lfs->free.size = 0; + lfs->free.i = 0; + lfs_alloc_ack(lfs); - // update pos - file->pos = npos; - return npos; + return 0; + +cleanup: + lfs_unmount(lfs); + return err; } -int lfs_file_truncate(lfs_t *lfs, lfs_file_t *file, lfs_off_t size) { - if ((file->flags & 3) == LFS_O_RDONLY) { - return LFS_ERR_BADF; - } +int lfs_unmount(lfs_t *lfs) { + return lfs_deinit(lfs); +} - lfs_off_t oldsize = lfs_file_size(lfs, file); - if (size < oldsize) { - // need to flush since directly changing metadata - int err = lfs_file_flush(lfs, file); - if (err) { - return err; - } - // lookup new head in ctz skip list - err = lfs_ctz_find(lfs, &file->cache, NULL, - file->head, file->size, - size, &file->head, &(lfs_off_t){0}); +/// Filesystem filesystem operations /// +int lfs_fs_traverse(lfs_t *lfs, + int (*cb)(void *data, lfs_block_t block), void *data) { + // iterate over metadata pairs + lfs_mdir_t dir = {.tail = {0, 1}}; + +#ifdef LFS_MIGRATE + // also consider v1 blocks during migration + if (lfs->lfs1) { + int err = lfs1_traverse(lfs, cb, data); if (err) { return err; } - file->size = size; - file->flags |= LFS_F_DIRTY; - } else if (size > oldsize) { - lfs_off_t pos = file->pos; + dir.tail[0] = lfs->root[0]; + dir.tail[1] = lfs->root[1]; + } +#endif - // flush+seek if not already at end - if (file->pos != oldsize) { - int err = lfs_file_seek(lfs, file, 0, LFS_SEEK_END); - if (err < 0) { + while (!lfs_pair_isnull(dir.tail)) { + for (int i = 0; i < 2; i++) { + int err = cb(data, dir.tail[i]); + if (err) { return err; } } - // fill with zeros - while (file->pos < size) { - lfs_ssize_t res = lfs_file_write(lfs, file, &(uint8_t){0}, 1); - if (res < 0) { - return res; - } + // iterate through ids in directory + int err = lfs_dir_fetch(lfs, &dir, dir.tail); + if (err) { + return err; } - // restore pos - int err = lfs_file_seek(lfs, file, pos, LFS_SEEK_SET); - if (err < 0) { - return err; + for (uint16_t id = 0; id < dir.count; id++) { + struct lfs_ctz ctz; + lfs_stag_t tag = lfs_dir_get(lfs, &dir, LFS_MKTAG(0x700, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_STRUCT, id, sizeof(ctz)), &ctz); + if (tag < 0) { + if (tag == LFS_ERR_NOENT) { + continue; + } + return tag; + } + lfs_ctz_fromle32(&ctz); + + if (lfs_tag_type3(tag) == LFS_TYPE_CTZSTRUCT) { + err = lfs_ctz_traverse(lfs, NULL, &lfs->rcache, + ctz.head, ctz.size, cb, data); + if (err) { + return err; + } + } } } - return 0; -} + // iterate over any open files + for (lfs_file_t *f = (lfs_file_t*)lfs->mlist; f; f = f->next) { + if (f->type != LFS_TYPE_REG) { + continue; + } -lfs_soff_t lfs_file_tell(lfs_t *lfs, lfs_file_t *file) { - (void)lfs; - return file->pos; -} + if ((f->flags & LFS_F_DIRTY) && !(f->flags & LFS_F_INLINE)) { + int err = lfs_ctz_traverse(lfs, &f->cache, &lfs->rcache, + f->ctz.head, f->ctz.size, cb, data); + if (err) { + return err; + } + } -int lfs_file_rewind(lfs_t *lfs, lfs_file_t *file) { - lfs_soff_t res = lfs_file_seek(lfs, file, 0, LFS_SEEK_SET); - if (res < 0) { - return res; + if ((f->flags & LFS_F_WRITING) && !(f->flags & LFS_F_INLINE)) { + int err = lfs_ctz_traverse(lfs, &f->cache, &lfs->rcache, + f->block, f->pos, cb, data); + if (err) { + return err; + } + } } return 0; } -lfs_soff_t lfs_file_size(lfs_t *lfs, lfs_file_t *file) { - (void)lfs; - if (file->flags & LFS_F_WRITING) { - return lfs_max(file->pos, file->size); - } else { - return file->size; +static int lfs_fs_pred(lfs_t *lfs, + const lfs_block_t pair[2], lfs_mdir_t *pdir) { + // iterate over all directory directory entries + pdir->tail[0] = 0; + pdir->tail[1] = 1; + while (!lfs_pair_isnull(pdir->tail)) { + if (lfs_pair_cmp(pdir->tail, pair) == 0) { + return 0; + } + + int err = lfs_dir_fetch(lfs, pdir, pdir->tail); + if (err) { + return err; + } } + + return LFS_ERR_NOENT; } +struct lfs_fs_parent_match { + lfs_t *lfs; + const lfs_block_t pair[2]; +}; -/// General fs operations /// -int lfs_stat(lfs_t *lfs, const char *path, struct lfs_info *info) { - lfs_dir_t cwd; - lfs_entry_t entry; - int err = lfs_dir_find(lfs, &cwd, &entry, &path); +static int lfs_fs_parent_match(void *data, + lfs_tag_t tag, const void *buffer) { + struct lfs_fs_parent_match *find = data; + lfs_t *lfs = find->lfs; + const struct lfs_diskoff *disk = buffer; + (void)tag; + + lfs_block_t child[2]; + int err = lfs_bd_read(lfs, + &lfs->pcache, &lfs->rcache, lfs->cfg->block_size, + disk->block, disk->off, &child, sizeof(child)); if (err) { return err; } - memset(info, 0, sizeof(*info)); - info->type = entry.d.type; - if (info->type == LFS_TYPE_REG) { - info->size = entry.d.u.file.size; - } + lfs_pair_fromle32(child); + return (lfs_pair_cmp(child, find->pair) == 0) ? LFS_CMP_EQ : LFS_CMP_LT; +} - if (lfs_paircmp(entry.d.u.dir, lfs->root) == 0) { - strcpy(info->name, "/"); - } else { - err = lfs_bd_read(lfs, cwd.pair[0], - entry.off + 4+entry.d.elen+entry.d.alen, - info->name, entry.d.nlen); - if (err) { - return err; +static lfs_stag_t lfs_fs_parent(lfs_t *lfs, const lfs_block_t pair[2], + lfs_mdir_t *parent) { + // use fetchmatch with callback to find pairs + parent->tail[0] = 0; + parent->tail[1] = 1; + while (!lfs_pair_isnull(parent->tail)) { + lfs_stag_t tag = lfs_dir_fetchmatch(lfs, parent, parent->tail, + LFS_MKTAG(0x7ff, 0, 0x3ff), + LFS_MKTAG(LFS_TYPE_DIRSTRUCT, 0, 8), + NULL, + lfs_fs_parent_match, &(struct lfs_fs_parent_match){ + lfs, {pair[0], pair[1]}}); + if (tag && tag != LFS_ERR_NOENT) { + return tag; } } - return 0; + return LFS_ERR_NOENT; } -int lfs_remove(lfs_t *lfs, const char *path) { - // deorphan if we haven't yet, needed at most once after poweron - if (!lfs->deorphaned) { - int err = lfs_deorphan(lfs); - if (err) { - return err; +static int lfs_fs_relocate(lfs_t *lfs, + const lfs_block_t oldpair[2], lfs_block_t newpair[2]) { + // update internal root + if (lfs_pair_cmp(oldpair, lfs->root) == 0) { + LFS_DEBUG("Relocating root %"PRIu32" %"PRIu32, + newpair[0], newpair[1]); + lfs->root[0] = newpair[0]; + lfs->root[1] = newpair[1]; + } + + // update internally tracked dirs + for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { + if (lfs_pair_cmp(oldpair, d->m.pair) == 0) { + d->m.pair[0] = newpair[0]; + d->m.pair[1] = newpair[1]; } } - lfs_dir_t cwd; - lfs_entry_t entry; - int err = lfs_dir_find(lfs, &cwd, &entry, &path); - if (err) { - return err; + // find parent + lfs_mdir_t parent; + lfs_stag_t tag = lfs_fs_parent(lfs, oldpair, &parent); + if (tag < 0 && tag != LFS_ERR_NOENT) { + return tag; } - lfs_dir_t dir; - if (entry.d.type == LFS_TYPE_DIR) { - // must be empty before removal, checking size - // without masking top bit checks for any case where - // dir is not empty - err = lfs_dir_fetch(lfs, &dir, entry.d.u.dir); + if (tag != LFS_ERR_NOENT) { + // update disk, this creates a desync + lfs_fs_preporphans(lfs, +1); + + lfs_pair_tole32(newpair); + int err = lfs_dir_commit(lfs, &parent, LFS_MKATTRS({tag, newpair})); + lfs_pair_fromle32(newpair); if (err) { return err; - } else if (dir.d.size != sizeof(dir.d)+4) { - return LFS_ERR_NOTEMPTY; } + + // next step, clean up orphans + lfs_fs_preporphans(lfs, -1); } - // remove the entry - err = lfs_dir_remove(lfs, &cwd, &entry); - if (err) { + // find pred + int err = lfs_fs_pred(lfs, oldpair, &parent); + if (err && err != LFS_ERR_NOENT) { return err; } - // if we were a directory, find pred, replace tail - if (entry.d.type == LFS_TYPE_DIR) { - int res = lfs_pred(lfs, dir.pair, &cwd); - if (res < 0) { - return res; - } - - LFS_ASSERT(res); // must have pred - cwd.d.tail[0] = dir.d.tail[0]; - cwd.d.tail[1] = dir.d.tail[1]; - - err = lfs_dir_commit(lfs, &cwd, NULL, 0); + // if we can't find dir, it must be new + if (err != LFS_ERR_NOENT) { + // replace bad pair, either we clean up desync, or no desync occured + lfs_pair_tole32(newpair); + err = lfs_dir_commit(lfs, &parent, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_TAIL + parent.split, 0x3ff, 8), newpair})); + lfs_pair_fromle32(newpair); if (err) { return err; } @@ -1915,348 +3682,402 @@ int lfs_remove(lfs_t *lfs, const char *path) { return 0; } -int lfs_rename(lfs_t *lfs, const char *oldpath, const char *newpath) { - // deorphan if we haven't yet, needed at most once after poweron - if (!lfs->deorphaned) { - int err = lfs_deorphan(lfs); - if (err) { - return err; - } +static void lfs_fs_preporphans(lfs_t *lfs, int8_t orphans) { + lfs->gpending.tag += orphans; + lfs_gstate_xororphans(&lfs->gdelta, &lfs->gpending, + lfs_gstate_hasorphans(&lfs->gpending)); + lfs_gstate_xororphans(&lfs->gpending, &lfs->gpending, + lfs_gstate_hasorphans(&lfs->gpending)); +} + +static void lfs_fs_prepmove(lfs_t *lfs, + uint16_t id, const lfs_block_t pair[2]) { + lfs_gstate_xormove(&lfs->gdelta, &lfs->gpending, id, pair); + lfs_gstate_xormove(&lfs->gpending, &lfs->gpending, id, pair); +} + + +static int lfs_fs_demove(lfs_t *lfs) { + if (!lfs_gstate_hasmove(&lfs->gstate)) { + return 0; } - // find old entry - lfs_dir_t oldcwd; - lfs_entry_t oldentry; - int err = lfs_dir_find(lfs, &oldcwd, &oldentry, &(const char *){oldpath}); + // Fix bad moves + LFS_DEBUG("Fixing move %"PRIu32" %"PRIu32" %"PRIu16, + lfs->gstate.pair[0], + lfs->gstate.pair[1], + lfs_tag_id(lfs->gstate.tag)); + + // fetch and delete the moved entry + lfs_mdir_t movedir; + int err = lfs_dir_fetch(lfs, &movedir, lfs->gstate.pair); if (err) { return err; } - // mark as moving - oldentry.d.type |= 0x80; - err = lfs_dir_update(lfs, &oldcwd, &oldentry, NULL); + // rely on cancel logic inside commit + err = lfs_dir_commit(lfs, &movedir, NULL, 0); if (err) { return err; } - // allocate new entry - lfs_dir_t newcwd; - lfs_entry_t preventry; - err = lfs_dir_find(lfs, &newcwd, &preventry, &newpath); - if (err && (err != LFS_ERR_NOENT || strchr(newpath, '/') != NULL)) { - return err; - } + return 0; +} - // must have same type - bool prevexists = (err != LFS_ERR_NOENT); - if (prevexists && preventry.d.type != (0x7f & oldentry.d.type)) { - return LFS_ERR_ISDIR; +static int lfs_fs_deorphan(lfs_t *lfs) { + if (!lfs_gstate_hasorphans(&lfs->gstate)) { + return 0; } - lfs_dir_t dir; - if (prevexists && preventry.d.type == LFS_TYPE_DIR) { - // must be empty before removal, checking size - // without masking top bit checks for any case where - // dir is not empty - err = lfs_dir_fetch(lfs, &dir, preventry.d.u.dir); + // Fix any orphans + lfs_mdir_t pdir = {.split = true}; + lfs_mdir_t dir = {.tail = {0, 1}}; + + // iterate over all directory directory entries + while (!lfs_pair_isnull(dir.tail)) { + int err = lfs_dir_fetch(lfs, &dir, dir.tail); if (err) { return err; - } else if (dir.d.size != sizeof(dir.d)+4) { - return LFS_ERR_NOTEMPTY; } - } - // move to new location - lfs_entry_t newentry = preventry; - newentry.d = oldentry.d; - newentry.d.type &= ~0x80; - newentry.d.nlen = strlen(newpath); + // check head blocks for orphans + if (!pdir.split) { + // check if we have a parent + lfs_mdir_t parent; + lfs_stag_t tag = lfs_fs_parent(lfs, pdir.tail, &parent); + if (tag < 0 && tag != LFS_ERR_NOENT) { + return tag; + } - if (prevexists) { - err = lfs_dir_update(lfs, &newcwd, &newentry, newpath); - if (err) { - return err; - } - } else { - err = lfs_dir_append(lfs, &newcwd, &newentry, newpath); - if (err) { - return err; - } - } + if (tag == LFS_ERR_NOENT) { + // we are an orphan + LFS_DEBUG("Fixing orphan %"PRIu32" %"PRIu32, + pdir.tail[0], pdir.tail[1]); - // fetch old pair again in case dir block changed - lfs->moving = true; - err = lfs_dir_find(lfs, &oldcwd, &oldentry, &oldpath); - if (err) { - return err; - } - lfs->moving = false; + err = lfs_dir_drop(lfs, &pdir, &dir); + if (err) { + return err; + } - // remove old entry - err = lfs_dir_remove(lfs, &oldcwd, &oldentry); - if (err) { - return err; - } + break; + } - // if we were a directory, find pred, replace tail - if (prevexists && preventry.d.type == LFS_TYPE_DIR) { - int res = lfs_pred(lfs, dir.pair, &newcwd); - if (res < 0) { - return res; - } + lfs_block_t pair[2]; + lfs_stag_t res = lfs_dir_get(lfs, &parent, + LFS_MKTAG(0x7ff, 0x3ff, 0), tag, pair); + if (res < 0) { + return res; + } + lfs_pair_fromle32(pair); - LFS_ASSERT(res); // must have pred - newcwd.d.tail[0] = dir.d.tail[0]; - newcwd.d.tail[1] = dir.d.tail[1]; + if (!lfs_pair_sync(pair, pdir.tail)) { + // we have desynced + LFS_DEBUG("Fixing half-orphan %"PRIu32" %"PRIu32, + pair[0], pair[1]); - err = lfs_dir_commit(lfs, &newcwd, NULL, 0); - if (err) { - return err; + lfs_pair_tole32(pair); + err = lfs_dir_commit(lfs, &pdir, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8), pair})); + lfs_pair_fromle32(pair); + if (err) { + return err; + } + + break; + } } + + memcpy(&pdir, &dir, sizeof(pdir)); } + // mark orphans as fixed + lfs_fs_preporphans(lfs, -lfs_gstate_getorphans(&lfs->gstate)); + lfs->gstate = lfs->gpending; return 0; } - -/// Filesystem operations /// -static void lfs_deinit(lfs_t *lfs) { - // free allocated memory - if (!lfs->cfg->read_buffer) { - lfs_free(lfs->rcache.buffer); +static int lfs_fs_forceconsistency(lfs_t *lfs) { + int err = lfs_fs_demove(lfs); + if (err) { + return err; } - if (!lfs->cfg->prog_buffer) { - lfs_free(lfs->pcache.buffer); + err = lfs_fs_deorphan(lfs); + if (err) { + return err; } - if (!lfs->cfg->lookahead_buffer) { - lfs_free(lfs->free.buffer); - } + return 0; } -static int lfs_init(lfs_t *lfs, const struct lfs_config *cfg) { - lfs->cfg = cfg; +static int lfs_fs_size_count(void *p, lfs_block_t block) { + (void)block; + lfs_size_t *size = p; + *size += 1; + return 0; +} - // setup read cache - if (lfs->cfg->read_buffer) { - lfs->rcache.buffer = lfs->cfg->read_buffer; - } else { - lfs->rcache.buffer = lfs_malloc(lfs->cfg->read_size); - if (!lfs->rcache.buffer) { - goto cleanup; - } +lfs_ssize_t lfs_fs_size(lfs_t *lfs) { + lfs_size_t size = 0; + int err = lfs_fs_traverse(lfs, lfs_fs_size_count, &size); + if (err) { + return err; } - // setup program cache - if (lfs->cfg->prog_buffer) { - lfs->pcache.buffer = lfs->cfg->prog_buffer; - } else { - lfs->pcache.buffer = lfs_malloc(lfs->cfg->prog_size); - if (!lfs->pcache.buffer) { - goto cleanup; - } - } + return size; +} - // zero to avoid information leaks - lfs_cache_zero(lfs, &lfs->pcache); - lfs_cache_drop(lfs, &lfs->rcache); +#ifdef LFS_MIGRATE +////// Migration from littelfs v1 below this ////// - // setup lookahead, round down to nearest 32-bits - LFS_ASSERT(lfs->cfg->lookahead % 32 == 0); - LFS_ASSERT(lfs->cfg->lookahead > 0); - if (lfs->cfg->lookahead_buffer) { - lfs->free.buffer = lfs->cfg->lookahead_buffer; - } else { - lfs->free.buffer = lfs_malloc(lfs->cfg->lookahead/8); - if (!lfs->free.buffer) { - goto cleanup; - } - } +/// Version info /// - // check that program and read sizes are multiples of the block size - LFS_ASSERT(lfs->cfg->prog_size % lfs->cfg->read_size == 0); - LFS_ASSERT(lfs->cfg->block_size % lfs->cfg->prog_size == 0); +// Software library version +// Major (top-nibble), incremented on backwards incompatible changes +// Minor (bottom-nibble), incremented on feature additions +#define LFS1_VERSION 0x00010007 +#define LFS1_VERSION_MAJOR (0xffff & (LFS1_VERSION >> 16)) +#define LFS1_VERSION_MINOR (0xffff & (LFS1_VERSION >> 0)) - // check that the block size is large enough to fit ctz pointers - LFS_ASSERT(4*lfs_npw2(0xffffffff / (lfs->cfg->block_size-2*4)) - <= lfs->cfg->block_size); +// Version of On-disk data structures +// Major (top-nibble), incremented on backwards incompatible changes +// Minor (bottom-nibble), incremented on feature additions +#define LFS1_DISK_VERSION 0x00010001 +#define LFS1_DISK_VERSION_MAJOR (0xffff & (LFS1_DISK_VERSION >> 16)) +#define LFS1_DISK_VERSION_MINOR (0xffff & (LFS1_DISK_VERSION >> 0)) - // setup default state - lfs->root[0] = 0xffffffff; - lfs->root[1] = 0xffffffff; - lfs->files = NULL; - lfs->dirs = NULL; - lfs->deorphaned = false; - lfs->moving = false; - return 0; +/// v1 Definitions /// -cleanup: - lfs_deinit(lfs); - return LFS_ERR_NOMEM; +// File types +enum lfs1_type { + LFS1_TYPE_REG = 0x11, + LFS1_TYPE_DIR = 0x22, + LFS1_TYPE_SUPERBLOCK = 0x2e, +}; + +typedef struct lfs1 { + lfs_block_t root[2]; +} lfs1_t; + +typedef struct lfs1_entry { + lfs_off_t off; + + struct lfs1_disk_entry { + uint8_t type; + uint8_t elen; + uint8_t alen; + uint8_t nlen; + union { + struct { + lfs_block_t head; + lfs_size_t size; + } file; + lfs_block_t dir[2]; + } u; + } d; +} lfs1_entry_t; + +typedef struct lfs1_dir { + struct lfs1_dir *next; + lfs_block_t pair[2]; + lfs_off_t off; + + lfs_block_t head[2]; + lfs_off_t pos; + + struct lfs1_disk_dir { + uint32_t rev; + lfs_size_t size; + lfs_block_t tail[2]; + } d; +} lfs1_dir_t; + +typedef struct lfs1_superblock { + lfs_off_t off; + + struct lfs1_disk_superblock { + uint8_t type; + uint8_t elen; + uint8_t alen; + uint8_t nlen; + lfs_block_t root[2]; + uint32_t block_size; + uint32_t block_count; + uint32_t version; + char magic[8]; + } d; +} lfs1_superblock_t; + + +/// Low-level wrappers v1->v2 /// +void lfs1_crc(uint32_t *crc, const void *buffer, size_t size) { + *crc = lfs_crc(*crc, buffer, size); } -int lfs_format(lfs_t *lfs, const struct lfs_config *cfg) { - int err = 0; - if (true) { - err = lfs_init(lfs, cfg); +static int lfs1_bd_read(lfs_t *lfs, lfs_block_t block, + lfs_off_t off, void *buffer, lfs_size_t size) { + // if we ever do more than writes to alternating pairs, + // this may need to consider pcache + return lfs_bd_read(lfs, &lfs->pcache, &lfs->rcache, size, + block, off, buffer, size); +} + +static int lfs1_bd_crc(lfs_t *lfs, lfs_block_t block, + lfs_off_t off, lfs_size_t size, uint32_t *crc) { + for (lfs_off_t i = 0; i < size; i++) { + uint8_t c; + int err = lfs1_bd_read(lfs, block, off+i, &c, 1); if (err) { return err; } - // create free lookahead - memset(lfs->free.buffer, 0, lfs->cfg->lookahead/8); - lfs->free.off = 0; - lfs->free.size = lfs_min(lfs->cfg->lookahead, lfs->cfg->block_count); - lfs->free.i = 0; - lfs_alloc_ack(lfs); + lfs1_crc(crc, &c, 1); + } - // create superblock dir - lfs_dir_t superdir; - err = lfs_dir_alloc(lfs, &superdir); - if (err) { - goto cleanup; - } + return 0; +} - // write root directory - lfs_dir_t root; - err = lfs_dir_alloc(lfs, &root); - if (err) { - goto cleanup; - } - err = lfs_dir_commit(lfs, &root, NULL, 0); - if (err) { - goto cleanup; - } +/// Endian swapping functions /// +static void lfs1_dir_fromle32(struct lfs1_disk_dir *d) { + d->rev = lfs_fromle32(d->rev); + d->size = lfs_fromle32(d->size); + d->tail[0] = lfs_fromle32(d->tail[0]); + d->tail[1] = lfs_fromle32(d->tail[1]); +} + +static void lfs1_dir_tole32(struct lfs1_disk_dir *d) { + d->rev = lfs_tole32(d->rev); + d->size = lfs_tole32(d->size); + d->tail[0] = lfs_tole32(d->tail[0]); + d->tail[1] = lfs_tole32(d->tail[1]); +} - lfs->root[0] = root.pair[0]; - lfs->root[1] = root.pair[1]; +static void lfs1_entry_fromle32(struct lfs1_disk_entry *d) { + d->u.dir[0] = lfs_fromle32(d->u.dir[0]); + d->u.dir[1] = lfs_fromle32(d->u.dir[1]); +} - // write superblocks - lfs_superblock_t superblock = { - .off = sizeof(superdir.d), - .d.type = LFS_TYPE_SUPERBLOCK, - .d.elen = sizeof(superblock.d) - sizeof(superblock.d.magic) - 4, - .d.nlen = sizeof(superblock.d.magic), - .d.version = LFS_DISK_VERSION, - .d.magic = {"littlefs"}, - .d.block_size = lfs->cfg->block_size, - .d.block_count = lfs->cfg->block_count, - .d.root = {lfs->root[0], lfs->root[1]}, - }; - superdir.d.tail[0] = root.pair[0]; - superdir.d.tail[1] = root.pair[1]; - superdir.d.size = sizeof(superdir.d) + sizeof(superblock.d) + 4; +static void lfs1_entry_tole32(struct lfs1_disk_entry *d) { + d->u.dir[0] = lfs_tole32(d->u.dir[0]); + d->u.dir[1] = lfs_tole32(d->u.dir[1]); +} - // write both pairs to be safe - lfs_superblock_tole32(&superblock.d); - bool valid = false; - for (int i = 0; i < 2; i++) { - err = lfs_dir_commit(lfs, &superdir, (struct lfs_region[]){ - {sizeof(superdir.d), sizeof(superblock.d), - &superblock.d, sizeof(superblock.d)} - }, 1); - if (err && err != LFS_ERR_CORRUPT) { - goto cleanup; - } +static void lfs1_superblock_fromle32(struct lfs1_disk_superblock *d) { + d->root[0] = lfs_fromle32(d->root[0]); + d->root[1] = lfs_fromle32(d->root[1]); + d->block_size = lfs_fromle32(d->block_size); + d->block_count = lfs_fromle32(d->block_count); + d->version = lfs_fromle32(d->version); +} - valid = valid || !err; - } - if (!valid) { - err = LFS_ERR_CORRUPT; - goto cleanup; - } +///// Metadata pair and directory operations /// +static inline lfs_size_t lfs1_entry_size(const lfs1_entry_t *entry) { + return 4 + entry->d.elen + entry->d.alen + entry->d.nlen; +} - // sanity check that fetch works - err = lfs_dir_fetch(lfs, &superdir, (const lfs_block_t[2]){0, 1}); +static int lfs1_dir_fetch(lfs_t *lfs, + lfs1_dir_t *dir, const lfs_block_t pair[2]) { + // copy out pair, otherwise may be aliasing dir + const lfs_block_t tpair[2] = {pair[0], pair[1]}; + bool valid = false; + + // check both blocks for the most recent revision + for (int i = 0; i < 2; i++) { + struct lfs1_disk_dir test; + int err = lfs1_bd_read(lfs, tpair[i], 0, &test, sizeof(test)); + lfs1_dir_fromle32(&test); if (err) { - goto cleanup; + if (err == LFS_ERR_CORRUPT) { + continue; + } + return err; } - lfs_alloc_ack(lfs); - } + if (valid && lfs_scmp(test.rev, dir->d.rev) < 0) { + continue; + } -cleanup: - lfs_deinit(lfs); - return err; -} + if ((0x7fffffff & test.size) < sizeof(test)+4 || + (0x7fffffff & test.size) > lfs->cfg->block_size) { + continue; + } -int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) { - int err = 0; - if (true) { - err = lfs_init(lfs, cfg); + uint32_t crc = 0xffffffff; + lfs1_dir_tole32(&test); + lfs1_crc(&crc, &test, sizeof(test)); + lfs1_dir_fromle32(&test); + err = lfs1_bd_crc(lfs, tpair[i], sizeof(test), + (0x7fffffff & test.size) - sizeof(test), &crc); if (err) { + if (err == LFS_ERR_CORRUPT) { + continue; + } return err; - } - - // setup free lookahead - lfs->free.off = 0; - lfs->free.size = 0; - lfs->free.i = 0; - lfs_alloc_ack(lfs); + } - // load superblock - lfs_dir_t dir; - lfs_superblock_t superblock; - err = lfs_dir_fetch(lfs, &dir, (const lfs_block_t[2]){0, 1}); - if (err && err != LFS_ERR_CORRUPT) { - goto cleanup; + if (crc != 0) { + continue; } - if (!err) { - err = lfs_bd_read(lfs, dir.pair[0], sizeof(dir.d), - &superblock.d, sizeof(superblock.d)); - lfs_superblock_fromle32(&superblock.d); - if (err) { - goto cleanup; - } + valid = true; - lfs->root[0] = superblock.d.root[0]; - lfs->root[1] = superblock.d.root[1]; - } + // setup dir in case it's valid + dir->pair[0] = tpair[(i+0) % 2]; + dir->pair[1] = tpair[(i+1) % 2]; + dir->off = sizeof(dir->d); + dir->d = test; + } - if (err || memcmp(superblock.d.magic, "littlefs", 8) != 0) { - LFS_ERROR("Invalid superblock at %d %d", 0, 1); - err = LFS_ERR_CORRUPT; - goto cleanup; + if (!valid) { + LFS_ERROR("Corrupted dir pair at %" PRIu32 " %" PRIu32 , + tpair[0], tpair[1]); + return LFS_ERR_CORRUPT; + } + + return 0; +} + +static int lfs1_dir_next(lfs_t *lfs, lfs1_dir_t *dir, lfs1_entry_t *entry) { + while (dir->off + sizeof(entry->d) > (0x7fffffff & dir->d.size)-4) { + if (!(0x80000000 & dir->d.size)) { + entry->off = dir->off; + return LFS_ERR_NOENT; } - uint16_t major_version = (0xffff & (superblock.d.version >> 16)); - uint16_t minor_version = (0xffff & (superblock.d.version >> 0)); - if ((major_version != LFS_DISK_VERSION_MAJOR || - minor_version > LFS_DISK_VERSION_MINOR)) { - LFS_ERROR("Invalid version %d.%d", major_version, minor_version); - err = LFS_ERR_INVAL; - goto cleanup; + int err = lfs1_dir_fetch(lfs, dir, dir->d.tail); + if (err) { + return err; } - return 0; + dir->off = sizeof(dir->d); + dir->pos += sizeof(dir->d) + 4; } -cleanup: - - lfs_deinit(lfs); - return err; -} + int err = lfs1_bd_read(lfs, dir->pair[0], dir->off, + &entry->d, sizeof(entry->d)); + lfs1_entry_fromle32(&entry->d); + if (err) { + return err; + } -int lfs_unmount(lfs_t *lfs) { - lfs_deinit(lfs); + entry->off = dir->off; + dir->off += lfs1_entry_size(entry); + dir->pos += lfs1_entry_size(entry); return 0; } - -/// Littlefs specific operations /// -int lfs_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data) { - if (lfs_pairisnull(lfs->root)) { +/// littlefs v1 specific operations /// +int lfs1_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data) { + if (lfs_pair_isnull(lfs->lfs1->root)) { return 0; } // iterate over metadata pairs - lfs_dir_t dir; - lfs_entry_t entry; + lfs1_dir_t dir; + lfs1_entry_t entry; lfs_block_t cwd[2] = {0, 1}; while (true) { @@ -2267,23 +4088,23 @@ int lfs_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data) { } } - int err = lfs_dir_fetch(lfs, &dir, cwd); + int err = lfs1_dir_fetch(lfs, &dir, cwd); if (err) { return err; } // iterate over contents while (dir.off + sizeof(entry.d) <= (0x7fffffff & dir.d.size)-4) { - err = lfs_bd_read(lfs, dir.pair[0], dir.off, + err = lfs1_bd_read(lfs, dir.pair[0], dir.off, &entry.d, sizeof(entry.d)); - lfs_entry_fromle32(&entry.d); + lfs1_entry_fromle32(&entry.d); if (err) { return err; } - dir.off += lfs_entry_size(&entry); - if ((0x70 & entry.d.type) == (0x70 & LFS_TYPE_REG)) { - err = lfs_ctz_traverse(lfs, &lfs->rcache, NULL, + dir.off += lfs1_entry_size(&entry); + if ((0x70 & entry.d.type) == (0x70 & LFS1_TYPE_REG)) { + err = lfs_ctz_traverse(lfs, NULL, &lfs->rcache, entry.d.u.file.head, entry.d.u.file.size, cb, data); if (err) { return err; @@ -2291,79 +4112,55 @@ int lfs_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data) { } } - cwd[0] = dir.d.tail[0]; - cwd[1] = dir.d.tail[1]; - - if (lfs_pairisnull(cwd)) { - break; - } - } - - // iterate over any open files - for (lfs_file_t *f = lfs->files; f; f = f->next) { - if (f->flags & LFS_F_DIRTY) { - int err = lfs_ctz_traverse(lfs, &lfs->rcache, &f->cache, - f->head, f->size, cb, data); + // we also need to check if we contain a threaded v2 directory + lfs_mdir_t dir2 = {.split=true, .tail={cwd[0], cwd[1]}}; + while (dir2.split) { + err = lfs_dir_fetch(lfs, &dir2, dir2.tail); if (err) { - return err; + break; } - } - if (f->flags & LFS_F_WRITING) { - int err = lfs_ctz_traverse(lfs, &lfs->rcache, &f->cache, - f->block, f->pos, cb, data); - if (err) { - return err; + for (int i = 0; i < 2; i++) { + err = cb(data, dir2.pair[i]); + if (err) { + return err; + } } } + + cwd[0] = dir.d.tail[0]; + cwd[1] = dir.d.tail[1]; + + if (lfs_pair_isnull(cwd)) { + break; + } } return 0; } -static int lfs_pred(lfs_t *lfs, const lfs_block_t dir[2], lfs_dir_t *pdir) { - if (lfs_pairisnull(lfs->root)) { +static int lfs1_moved(lfs_t *lfs, const void *e) { + if (lfs_pair_isnull(lfs->lfs1->root)) { return 0; } - // iterate over all directory directory entries - int err = lfs_dir_fetch(lfs, pdir, (const lfs_block_t[2]){0, 1}); + // skip superblock + lfs1_dir_t cwd; + int err = lfs1_dir_fetch(lfs, &cwd, (const lfs_block_t[2]){0, 1}); if (err) { return err; } - while (!lfs_pairisnull(pdir->d.tail)) { - if (lfs_paircmp(pdir->d.tail, dir) == 0) { - return true; - } - - err = lfs_dir_fetch(lfs, pdir, pdir->d.tail); - if (err) { - return err; - } - } - - return false; -} - -static int lfs_parent(lfs_t *lfs, const lfs_block_t dir[2], - lfs_dir_t *parent, lfs_entry_t *entry) { - if (lfs_pairisnull(lfs->root)) { - return 0; - } - - parent->d.tail[0] = 0; - parent->d.tail[1] = 1; - // iterate over all directory directory entries - while (!lfs_pairisnull(parent->d.tail)) { - int err = lfs_dir_fetch(lfs, parent, parent->d.tail); + lfs1_entry_t entry; + while (!lfs_pair_isnull(cwd.d.tail)) { + err = lfs1_dir_fetch(lfs, &cwd, cwd.d.tail); if (err) { return err; } while (true) { - err = lfs_dir_next(lfs, parent, entry); + err = lfs1_dir_next(lfs, &cwd, &entry); if (err && err != LFS_ERR_NOENT) { return err; } @@ -2372,8 +4169,8 @@ static int lfs_parent(lfs_t *lfs, const lfs_block_t dir[2], break; } - if (((0x70 & entry->d.type) == (0x70 & LFS_TYPE_DIR)) && - lfs_paircmp(entry->d.u.dir, dir) == 0) { + if (!(0x80 & entry.d.type) && + memcmp(&entry.d.u, e, sizeof(entry.d.u)) == 0) { return true; } } @@ -2382,202 +4179,281 @@ static int lfs_parent(lfs_t *lfs, const lfs_block_t dir[2], return false; } -static int lfs_moved(lfs_t *lfs, const void *e) { - if (lfs_pairisnull(lfs->root)) { - return 0; - } - - // skip superblock - lfs_dir_t cwd; - int err = lfs_dir_fetch(lfs, &cwd, (const lfs_block_t[2]){0, 1}); - if (err) { - return err; - } - - // iterate over all directory directory entries - lfs_entry_t entry; - while (!lfs_pairisnull(cwd.d.tail)) { - err = lfs_dir_fetch(lfs, &cwd, cwd.d.tail); +/// Filesystem operations /// +static int lfs1_mount(lfs_t *lfs, struct lfs1 *lfs1, + const struct lfs_config *cfg) { + int err = 0; + { + err = lfs_init(lfs, cfg); if (err) { return err; } - while (true) { - err = lfs_dir_next(lfs, &cwd, &entry); - if (err && err != LFS_ERR_NOENT) { - return err; - } + lfs->lfs1 = lfs1; + lfs->lfs1->root[0] = 0xffffffff; + lfs->lfs1->root[1] = 0xffffffff; - if (err == LFS_ERR_NOENT) { - break; - } + // setup free lookahead + lfs->free.off = 0; + lfs->free.size = 0; + lfs->free.i = 0; + lfs_alloc_ack(lfs); - if (!(0x80 & entry.d.type) && - memcmp(&entry.d.u, e, sizeof(entry.d.u)) == 0) { - return true; - } + // load superblock + lfs1_dir_t dir; + lfs1_superblock_t superblock; + err = lfs1_dir_fetch(lfs, &dir, (const lfs_block_t[2]){0, 1}); + if (err && err != LFS_ERR_CORRUPT) { + goto cleanup; } - } - return false; -} - -static int lfs_relocate(lfs_t *lfs, - const lfs_block_t oldpair[2], const lfs_block_t newpair[2]) { - // find parent - lfs_dir_t parent; - lfs_entry_t entry; - int res = lfs_parent(lfs, oldpair, &parent, &entry); - if (res < 0) { - return res; - } + if (!err) { + err = lfs1_bd_read(lfs, dir.pair[0], sizeof(dir.d), + &superblock.d, sizeof(superblock.d)); + lfs1_superblock_fromle32(&superblock.d); + if (err) { + goto cleanup; + } - if (res) { - // update disk, this creates a desync - entry.d.u.dir[0] = newpair[0]; - entry.d.u.dir[1] = newpair[1]; + lfs->lfs1->root[0] = superblock.d.root[0]; + lfs->lfs1->root[1] = superblock.d.root[1]; + } - int err = lfs_dir_update(lfs, &parent, &entry, NULL); - if (err) { - return err; + if (err || memcmp(superblock.d.magic, "littlefs", 8) != 0) { + LFS_ERROR("Invalid superblock at %d %d", 0, 1); + err = LFS_ERR_CORRUPT; + goto cleanup; } - // update internal root - if (lfs_paircmp(oldpair, lfs->root) == 0) { - LFS_DEBUG("Relocating root %" PRIu32 " %" PRIu32, - newpair[0], newpair[1]); - lfs->root[0] = newpair[0]; - lfs->root[1] = newpair[1]; + uint16_t major_version = (0xffff & (superblock.d.version >> 16)); + uint16_t minor_version = (0xffff & (superblock.d.version >> 0)); + if ((major_version != LFS1_DISK_VERSION_MAJOR || + minor_version > LFS1_DISK_VERSION_MINOR)) { + LFS_ERROR("Invalid version %d.%d", major_version, minor_version); + err = LFS_ERR_INVAL; + goto cleanup; } - // clean up bad block, which should now be a desync - return lfs_deorphan(lfs); + return 0; } - // find pred - res = lfs_pred(lfs, oldpair, &parent); - if (res < 0) { - return res; - } +cleanup: + lfs_deinit(lfs); + return err; +} - if (res) { - // just replace bad pair, no desync can occur - parent.d.tail[0] = newpair[0]; - parent.d.tail[1] = newpair[1]; +static int lfs1_unmount(lfs_t *lfs) { + return lfs_deinit(lfs); +} - return lfs_dir_commit(lfs, &parent, NULL, 0); +/// v1 migration /// +int lfs_migrate(lfs_t *lfs, const struct lfs_config *cfg) { + struct lfs1 lfs1; + int err = lfs1_mount(lfs, &lfs1, cfg); + if (err) { + return err; } - // couldn't find dir, must be new - return 0; -} + { + // iterate through each directory, copying over entries + // into new directory + lfs1_dir_t dir1; + lfs_mdir_t dir2; + dir1.d.tail[0] = lfs->lfs1->root[0]; + dir1.d.tail[1] = lfs->lfs1->root[1]; + while (!lfs_pair_isnull(dir1.d.tail)) { + // iterate old dir + err = lfs1_dir_fetch(lfs, &dir1, dir1.d.tail); + if (err) { + goto cleanup; + } -int lfs_deorphan(lfs_t *lfs) { - lfs->deorphaned = true; + // create new dir and bind as temporary pretend root + err = lfs_dir_alloc(lfs, &dir2); + if (err) { + goto cleanup; + } - if (lfs_pairisnull(lfs->root)) { - return 0; - } + dir2.rev = dir1.d.rev; + dir1.head[0] = dir1.pair[0]; + dir1.head[1] = dir1.pair[1]; + lfs->root[0] = dir2.pair[0]; + lfs->root[1] = dir2.pair[1]; - lfs_dir_t pdir = {.d.size = 0x80000000}; - lfs_dir_t cwd = {.d.tail[0] = 0, .d.tail[1] = 1}; + err = lfs_dir_commit(lfs, &dir2, NULL, 0); + if (err) { + goto cleanup; + } - // iterate over all directory directory entries - for (lfs_size_t i = 0; i < lfs->cfg->block_count; i++) { - if (lfs_pairisnull(cwd.d.tail)) { - return 0; - } + while (true) { + lfs1_entry_t entry1; + err = lfs1_dir_next(lfs, &dir1, &entry1); + if (err && err != LFS_ERR_NOENT) { + goto cleanup; + } - int err = lfs_dir_fetch(lfs, &cwd, cwd.d.tail); - if (err) { - return err; - } + if (err == LFS_ERR_NOENT) { + break; + } - // check head blocks for orphans - if (!(0x80000000 & pdir.d.size)) { - // check if we have a parent - lfs_dir_t parent; - lfs_entry_t entry; - int res = lfs_parent(lfs, pdir.d.tail, &parent, &entry); - if (res < 0) { - return res; - } + // check that entry has not been moved + if (entry1.d.type & 0x80) { + int moved = lfs1_moved(lfs, &entry1.d.u); + if (moved < 0) { + err = moved; + goto cleanup; + } - if (!res) { - // we are an orphan - LFS_DEBUG("Found orphan %" PRIu32 " %" PRIu32, - pdir.d.tail[0], pdir.d.tail[1]); + if (moved) { + continue; + } + + entry1.d.type &= ~0x80; + } + + // also fetch name + char name[LFS_NAME_MAX+1]; + memset(name, 0, sizeof(name)); + err = lfs1_bd_read(lfs, dir1.pair[0], + entry1.off + 4+entry1.d.elen+entry1.d.alen, + name, entry1.d.nlen); + if (err) { + goto cleanup; + } - pdir.d.tail[0] = cwd.d.tail[0]; - pdir.d.tail[1] = cwd.d.tail[1]; + bool isdir = (entry1.d.type == LFS1_TYPE_DIR); - err = lfs_dir_commit(lfs, &pdir, NULL, 0); + // create entry in new dir + err = lfs_dir_fetch(lfs, &dir2, lfs->root); if (err) { - return err; + goto cleanup; } - return 0; + uint16_t id; + err = lfs_dir_find(lfs, &dir2, &(const char*){name}, &id); + if (!(err == LFS_ERR_NOENT && id != 0x3ff)) { + err = (err < 0) ? err : LFS_ERR_EXIST; + goto cleanup; + } + + lfs1_entry_tole32(&entry1.d); + err = lfs_dir_commit(lfs, &dir2, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_CREATE, id, 0), NULL}, + {LFS_MKTAG( + isdir ? LFS_TYPE_DIR : LFS_TYPE_REG, + id, entry1.d.nlen), name}, + {LFS_MKTAG( + isdir ? LFS_TYPE_DIRSTRUCT : LFS_TYPE_CTZSTRUCT, + id, sizeof(&entry1.d.u)), &entry1.d.u})); + lfs1_entry_fromle32(&entry1.d); + if (err) { + goto cleanup; + } } - if (!lfs_pairsync(entry.d.u.dir, pdir.d.tail)) { - // we have desynced - LFS_DEBUG("Found desync %" PRIu32 " %" PRIu32, - entry.d.u.dir[0], entry.d.u.dir[1]); + if (!lfs_pair_isnull(dir1.d.tail)) { + // find last block and update tail to thread into fs + err = lfs_dir_fetch(lfs, &dir2, lfs->root); + if (err) { + goto cleanup; + } - pdir.d.tail[0] = entry.d.u.dir[0]; - pdir.d.tail[1] = entry.d.u.dir[1]; + while (dir2.split) { + err = lfs_dir_fetch(lfs, &dir2, dir2.tail); + if (err) { + goto cleanup; + } + } - err = lfs_dir_commit(lfs, &pdir, NULL, 0); + lfs_pair_tole32(dir2.pair); + err = lfs_dir_commit(lfs, &dir2, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 0), + dir1.d.tail})); + lfs_pair_fromle32(dir2.pair); if (err) { - return err; + goto cleanup; } - - return 0; } - } - // check entries for moves - lfs_entry_t entry; - while (true) { - err = lfs_dir_next(lfs, &cwd, &entry); - if (err && err != LFS_ERR_NOENT) { - return err; + // Copy over first block to thread into fs. Unfortunately + // if this fails there is not much we can do. + LFS_DEBUG("Migrating %"PRIu32" %"PRIu32" -> %"PRIu32" %"PRIu32, + lfs->root[0], lfs->root[1], dir1.head[0], dir1.head[1]); + + err = lfs_bd_erase(lfs, dir1.head[1]); + if (err) { + goto cleanup; } - if (err == LFS_ERR_NOENT) { - break; + err = lfs_dir_fetch(lfs, &dir2, lfs->root); + if (err) { + goto cleanup; } - // found moved entry - if (entry.d.type & 0x80) { - int moved = lfs_moved(lfs, &entry.d.u); - if (moved < 0) { - return moved; + for (lfs_off_t i = 0; i < dir2.off; i++) { + uint8_t dat; + err = lfs_bd_read(lfs, + NULL, &lfs->rcache, dir2.off, + dir2.pair[0], i, &dat, 1); + if (err) { + goto cleanup; } - if (moved) { - LFS_DEBUG("Found move %" PRIu32 " %" PRIu32, - entry.d.u.dir[0], entry.d.u.dir[1]); - err = lfs_dir_remove(lfs, &cwd, &entry); - if (err) { - return err; - } - } else { - LFS_DEBUG("Found partial move %" PRIu32 " %" PRIu32, - entry.d.u.dir[0], entry.d.u.dir[1]); - entry.d.type &= ~0x80; - err = lfs_dir_update(lfs, &cwd, &entry, NULL); - if (err) { - return err; - } + err = lfs_bd_prog(lfs, + &lfs->pcache, &lfs->rcache, true, + dir1.head[1], i, &dat, 1); + if (err) { + goto cleanup; } } } - memcpy(&pdir, &cwd, sizeof(pdir)); + // Create new superblock. This marks a successful migration! + err = lfs1_dir_fetch(lfs, &dir1, (const lfs_block_t[2]){0, 1}); + if (err) { + goto cleanup; + } + + dir2.pair[0] = dir1.pair[0]; + dir2.pair[1] = dir1.pair[1]; + dir2.rev = dir1.d.rev; + dir2.off = sizeof(dir2.rev); + dir2.etag = 0xffffffff; + dir2.count = 0; + dir2.tail[0] = lfs->lfs1->root[0]; + dir2.tail[1] = lfs->lfs1->root[1]; + dir2.erased = false; + dir2.split = true; + + lfs_superblock_t superblock = { + .version = LFS_DISK_VERSION, + .block_size = lfs->cfg->block_size, + .block_count = lfs->cfg->block_count, + .name_max = lfs->name_max, + .file_max = lfs->file_max, + .attr_max = lfs->attr_max, + }; + + lfs_superblock_tole32(&superblock); + err = lfs_dir_commit(lfs, &dir2, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_CREATE, 0, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_SUPERBLOCK, 0, 8), "littlefs"}, + {LFS_MKTAG(LFS_TYPE_INLINESTRUCT, 0, sizeof(superblock)), + &superblock})); + if (err) { + goto cleanup; + } + + // sanity check that fetch works + err = lfs_dir_fetch(lfs, &dir2, (const lfs_block_t[2]){0, 1}); + if (err) { + goto cleanup; + } } - // If we reached here, we have more directory pairs than blocks in the - // filesystem... So something must be horribly wrong - return LFS_ERR_CORRUPT; +cleanup: + lfs1_unmount(lfs); + return err; } + +#endif diff --git a/lfs.h b/lfs.h index 9c3174e7..ae991e07 100644 --- a/lfs.h +++ b/lfs.h @@ -21,14 +21,14 @@ extern "C" // Software library version // Major (top-nibble), incremented on backwards incompatible changes // Minor (bottom-nibble), incremented on feature additions -#define LFS_VERSION 0x00010007 +#define LFS_VERSION 0x00020000 #define LFS_VERSION_MAJOR (0xffff & (LFS_VERSION >> 16)) #define LFS_VERSION_MINOR (0xffff & (LFS_VERSION >> 0)) // Version of On-disk data structures // Major (top-nibble), incremented on backwards incompatible changes // Minor (bottom-nibble), incremented on feature additions -#define LFS_DISK_VERSION 0x00010001 +#define LFS_DISK_VERSION 0x00020000 #define LFS_DISK_VERSION_MAJOR (0xffff & (LFS_DISK_VERSION >> 16)) #define LFS_DISK_VERSION_MINOR (0xffff & (LFS_DISK_VERSION >> 0)) @@ -44,57 +44,98 @@ typedef int32_t lfs_soff_t; typedef uint32_t lfs_block_t; -// Max name size in bytes +// Maximum name size in bytes, may be redefined to reduce the size of the +// info struct. Limited to <= 1022. Stored in superblock and must be +// respected by other littlefs drivers. #ifndef LFS_NAME_MAX #define LFS_NAME_MAX 255 #endif -// Max file size in bytes +// Maximum size of a file in bytes, may be redefined to limit to support other +// drivers. Limited on disk to <= 4294967296. However, above 2147483647 the +// functions lfs_file_seek, lfs_file_size, and lfs_file_tell will return +// incorrect values due to using signed integers. Stored in superblock and +// must be respected by other littlefs drivers. #ifndef LFS_FILE_MAX #define LFS_FILE_MAX 2147483647 #endif +// Maximum size of custom attributes in bytes, may be redefined, but there is +// no real benefit to using a smaller LFS_ATTR_MAX. Limited to <= 1022. +#ifndef LFS_ATTR_MAX +#define LFS_ATTR_MAX 1022 +#endif + // Possible error codes, these are negative to allow // valid positive return values enum lfs_error { - LFS_ERR_OK = 0, // No error - LFS_ERR_IO = -5, // Error during device operation - LFS_ERR_CORRUPT = -52, // Corrupted - LFS_ERR_NOENT = -2, // No directory entry - LFS_ERR_EXIST = -17, // Entry already exists - LFS_ERR_NOTDIR = -20, // Entry is not a dir - LFS_ERR_ISDIR = -21, // Entry is a dir - LFS_ERR_NOTEMPTY = -39, // Dir is not empty - LFS_ERR_BADF = -9, // Bad file number - LFS_ERR_FBIG = -27, // File too large - LFS_ERR_INVAL = -22, // Invalid parameter - LFS_ERR_NOSPC = -28, // No space left on device - LFS_ERR_NOMEM = -12, // No more memory available + LFS_ERR_OK = 0, // No error + LFS_ERR_IO = -5, // Error during device operation + LFS_ERR_CORRUPT = -84, // Corrupted + LFS_ERR_NOENT = -2, // No directory entry + LFS_ERR_EXIST = -17, // Entry already exists + LFS_ERR_NOTDIR = -20, // Entry is not a dir + LFS_ERR_ISDIR = -21, // Entry is a dir + LFS_ERR_NOTEMPTY = -39, // Dir is not empty + LFS_ERR_BADF = -9, // Bad file number + LFS_ERR_FBIG = -27, // File too large + LFS_ERR_INVAL = -22, // Invalid parameter + LFS_ERR_NOSPC = -28, // No space left on device + LFS_ERR_NOMEM = -12, // No more memory available + LFS_ERR_NOATTR = -61, // No data/attr available + LFS_ERR_NAMETOOLONG = -36, // File name too long }; // File types enum lfs_type { - LFS_TYPE_REG = 0x11, - LFS_TYPE_DIR = 0x22, - LFS_TYPE_SUPERBLOCK = 0x2e, + // file types + LFS_TYPE_REG = 0x001, + LFS_TYPE_DIR = 0x002, + + // internally used types + LFS_TYPE_SPLICE = 0x400, + LFS_TYPE_NAME = 0x000, + LFS_TYPE_STRUCT = 0x200, + LFS_TYPE_USERATTR = 0x300, + LFS_TYPE_FROM = 0x100, + LFS_TYPE_TAIL = 0x600, + LFS_TYPE_GLOBALS = 0x700, + LFS_TYPE_CRC = 0x500, + + // internally used type specializations + LFS_TYPE_CREATE = 0x401, + LFS_TYPE_DELETE = 0x4ff, + LFS_TYPE_SUPERBLOCK = 0x0ff, + LFS_TYPE_DIRSTRUCT = 0x200, + LFS_TYPE_CTZSTRUCT = 0x202, + LFS_TYPE_INLINESTRUCT = 0x201, + LFS_TYPE_SOFTTAIL = 0x600, + LFS_TYPE_HARDTAIL = 0x601, + LFS_TYPE_MOVESTATE = 0x7ff, + + // internal chip sources + LFS_FROM_NOOP = 0x000, + LFS_FROM_MOVE = 0x101, + LFS_FROM_USERATTRS = 0x102, }; // File open flags enum lfs_open_flags { // open flags - LFS_O_RDONLY = 1, // Open a file as read only - LFS_O_WRONLY = 2, // Open a file as write only - LFS_O_RDWR = 3, // Open a file as read and write - LFS_O_CREAT = 0x0100, // Create a file if it does not exist - LFS_O_EXCL = 0x0200, // Fail if a file already exists - LFS_O_TRUNC = 0x0400, // Truncate the existing file to zero size - LFS_O_APPEND = 0x0800, // Move to end of file on every write + LFS_O_RDONLY = 1, // Open a file as read only + LFS_O_WRONLY = 2, // Open a file as write only + LFS_O_RDWR = 3, // Open a file as read and write + LFS_O_CREAT = 0x0100, // Create a file if it does not exist + LFS_O_EXCL = 0x0200, // Fail if a file already exists + LFS_O_TRUNC = 0x0400, // Truncate the existing file to zero size + LFS_O_APPEND = 0x0800, // Move to end of file on every write // internally used flags - LFS_F_DIRTY = 0x10000, // File does not match storage - LFS_F_WRITING = 0x20000, // File has been written since last flush - LFS_F_READING = 0x40000, // File has been read since last flush - LFS_F_ERRED = 0x80000, // An error occured during write + LFS_F_DIRTY = 0x010000, // File does not match storage + LFS_F_WRITING = 0x020000, // File has been written since last flush + LFS_F_READING = 0x040000, // File has been read since last flush + LFS_F_ERRED = 0x080000, // An error occured during write + LFS_F_INLINE = 0x100000, // Currently inlined in directory entry }; // File seek flags @@ -132,52 +173,68 @@ struct lfs_config { // are propogated to the user. int (*sync)(const struct lfs_config *c); - // Minimum size of a block read. This determines the size of read buffers. - // This may be larger than the physical read size to improve performance - // by caching more of the block device. + // Minimum size of a block read. All read operations will be a + // multiple of this value. lfs_size_t read_size; - // Minimum size of a block program. This determines the size of program - // buffers. This may be larger than the physical program size to improve - // performance by caching more of the block device. - // Must be a multiple of the read size. + // Minimum size of a block program. All program operations will be a + // multiple of this value. lfs_size_t prog_size; // Size of an erasable block. This does not impact ram consumption and - // may be larger than the physical erase size. However, this should be - // kept small as each file currently takes up an entire block. - // Must be a multiple of the program size. + // may be larger than the physical erase size. However, non-inlined files + // take up at minimum one block. Must be a multiple of the read + // and program sizes. lfs_size_t block_size; // Number of erasable blocks on the device. lfs_size_t block_count; - // Number of blocks to lookahead during block allocation. A larger - // lookahead reduces the number of passes required to allocate a block. - // The lookahead buffer requires only 1 bit per block so it can be quite - // large with little ram impact. Should be a multiple of 32. - lfs_size_t lookahead; - - // Optional, statically allocated read buffer. Must be read sized. + // Number of erase cycles before we should move data to another block. + // May be zero, in which case no block-level wear-leveling is performed. + uint32_t block_cycles; + + // Size of block caches. Each cache buffers a portion of a block in RAM. + // The littlefs needs a read cache, a program cache, and one additional + // cache per file. Larger caches can improve performance by storing more + // data and reducing the number of disk accesses. Must be a multiple of + // the read and program sizes, and a factor of the block size. + lfs_size_t cache_size; + + // Size of the lookahead buffer in bytes. A larger lookahead buffer + // increases the number of blocks found during an allocation pass. The + // lookahead buffer is stored as a compact bitmap, so each byte of RAM + // can track 8 blocks. Must be a multiple of 4. + lfs_size_t lookahead_size; + + // Optional statically allocated read buffer. Must be cache_size. + // By default lfs_malloc is used to allocate this buffer. void *read_buffer; - // Optional, statically allocated program buffer. Must be program sized. + // Optional statically allocated program buffer. Must be cache_size. + // By default lfs_malloc is used to allocate this buffer. void *prog_buffer; - // Optional, statically allocated lookahead buffer. Must be 1 bit per - // lookahead block. + // Optional statically allocated lookahead buffer. Must be lookahead_size + // and aligned to a 64-bit boundary. By default lfs_malloc is used to + // allocate this buffer. void *lookahead_buffer; - // Optional, statically allocated buffer for files. Must be program sized. - // If enabled, only one file may be opened at a time. - void *file_buffer; -}; - -// Optional configuration provided during lfs_file_opencfg -struct lfs_file_config { - // Optional, statically allocated buffer for files. Must be program sized. - // If NULL, malloc will be used by default. - void *buffer; + // Optional upper limit on length of file names in bytes. No downside for + // larger names except the size of the info struct which is controlled by + // the LFS_NAME_MAX define. Defaults to LFS_NAME_MAX when zero. Stored in + // superblock and must be respected by other littlefs drivers. + lfs_size_t name_max; + + // Optional upper limit on files in bytes. No downside for larger files + // but must be <= LFS_FILE_MAX. Defaults to LFS_FILE_MAX when zero. Stored + // in superblock and must be respected by other littlefs drivers. + lfs_size_t file_max; + + // Optional upper limit on custom attributes in bytes. No downside for + // larger attributes size but must be <= LFS_ATTR_MAX. Defaults to + // LFS_ATTR_MAX when zero. + lfs_size_t attr_max; }; // File info structure @@ -185,108 +242,149 @@ struct lfs_info { // Type of the file, either LFS_TYPE_REG or LFS_TYPE_DIR uint8_t type; - // Size of the file, only valid for REG files + // Size of the file, only valid for REG files. Limited to 32-bits. lfs_size_t size; - // Name of the file stored as a null-terminated string + // Name of the file stored as a null-terminated string. Limited to + // LFS_NAME_MAX+1, which can be changed by redefining LFS_NAME_MAX to + // reduce RAM. LFS_NAME_MAX is stored in superblock and must be + // respected by other littlefs drivers. char name[LFS_NAME_MAX+1]; }; +// Custom attribute structure, used to describe custom attributes +// committed atomically during file writes. +struct lfs_attr { + // 8-bit type of attribute, provided by user and used to + // identify the attribute + uint8_t type; -/// littlefs data structures /// -typedef struct lfs_entry { - lfs_off_t off; + // Pointer to buffer containing the attribute + void *buffer; + + // Size of attribute in bytes, limited to LFS_ATTR_MAX + lfs_size_t size; +}; + +// Optional configuration provided during lfs_file_opencfg +struct lfs_file_config { + // Optional statically allocated file buffer. Must be cache_size. + // By default lfs_malloc is used to allocate this buffer. + void *buffer; + + // Optional list of custom attributes related to the file. If the file + // is opened with read access, these attributes will be read from disk + // during the open call. If the file is opened with write access, the + // attributes will be written to disk every file sync or close. This + // write occurs atomically with update to the file's contents. + // + // Custom attributes are uniquely identified by an 8-bit type and limited + // to LFS_ATTR_MAX bytes. When read, if the stored attribute is smaller + // than the buffer, it will be padded with zeros. If the stored attribute + // is larger, then it will be silently truncated. If the attribute is not + // found, it will be created implicitly. + struct lfs_attr *attrs; + + // Number of custom attributes in the list + lfs_size_t attr_count; +}; - struct lfs_disk_entry { - uint8_t type; - uint8_t elen; - uint8_t alen; - uint8_t nlen; - union { - struct { - lfs_block_t head; - lfs_size_t size; - } file; - lfs_block_t dir[2]; - } u; - } d; -} lfs_entry_t; +/// internal littlefs data structures /// typedef struct lfs_cache { lfs_block_t block; lfs_off_t off; + lfs_size_t size; uint8_t *buffer; } lfs_cache_t; +typedef struct lfs_mdir { + lfs_block_t pair[2]; + uint32_t rev; + lfs_off_t off; + uint32_t etag; + uint16_t count; + bool erased; + bool split; + lfs_block_t tail[2]; +} lfs_mdir_t; + +// littlefs directory type +typedef struct lfs_dir { + struct lfs_dir *next; + uint16_t id; + uint8_t type; + lfs_mdir_t m; + + lfs_off_t pos; + lfs_block_t head[2]; +} lfs_dir_t; + +// littlefs file type typedef struct lfs_file { struct lfs_file *next; - lfs_block_t pair[2]; - lfs_off_t poff; + uint16_t id; + uint8_t type; + lfs_mdir_t m; - lfs_block_t head; - lfs_size_t size; + struct lfs_ctz { + lfs_block_t head; + lfs_size_t size; + } ctz; - const struct lfs_file_config *cfg; uint32_t flags; lfs_off_t pos; lfs_block_t block; lfs_off_t off; lfs_cache_t cache; -} lfs_file_t; -typedef struct lfs_dir { - struct lfs_dir *next; - lfs_block_t pair[2]; - lfs_off_t off; - - lfs_block_t head[2]; - lfs_off_t pos; - - struct lfs_disk_dir { - uint32_t rev; - lfs_size_t size; - lfs_block_t tail[2]; - } d; -} lfs_dir_t; + const struct lfs_file_config *cfg; +} lfs_file_t; typedef struct lfs_superblock { - lfs_off_t off; - - struct lfs_disk_superblock { - uint8_t type; - uint8_t elen; - uint8_t alen; - uint8_t nlen; - lfs_block_t root[2]; - uint32_t block_size; - uint32_t block_count; - uint32_t version; - char magic[8]; - } d; + uint32_t version; + lfs_size_t block_size; + lfs_size_t block_count; + lfs_size_t name_max; + lfs_size_t file_max; + lfs_size_t attr_max; } lfs_superblock_t; -typedef struct lfs_free { - lfs_block_t off; - lfs_block_t size; - lfs_block_t i; - lfs_block_t ack; - uint32_t *buffer; -} lfs_free_t; - -// The littlefs type +// The littlefs filesystem type typedef struct lfs { - const struct lfs_config *cfg; + lfs_cache_t rcache; + lfs_cache_t pcache; lfs_block_t root[2]; - lfs_file_t *files; - lfs_dir_t *dirs; + struct lfs_mlist { + struct lfs_mlist *next; + uint16_t id; + uint8_t type; + lfs_mdir_t m; + } *mlist; + uint32_t seed; + + struct lfs_gstate { + uint32_t tag; + lfs_block_t pair[2]; + } gstate, gpending, gdelta; + + struct lfs_free { + lfs_block_t off; + lfs_block_t size; + lfs_block_t i; + lfs_block_t ack; + uint32_t *buffer; + } free; - lfs_cache_t rcache; - lfs_cache_t pcache; + const struct lfs_config *cfg; + lfs_size_t name_max; + lfs_size_t file_max; + lfs_size_t attr_max; - lfs_free_t free; - bool deorphaned; - bool moving; +#ifdef LFS_MIGRATE + struct lfs1 *lfs1; +#endif } lfs_t; @@ -339,6 +437,38 @@ int lfs_rename(lfs_t *lfs, const char *oldpath, const char *newpath); // Returns a negative error code on failure. int lfs_stat(lfs_t *lfs, const char *path, struct lfs_info *info); +// Get a custom attribute +// +// Custom attributes are uniquely identified by an 8-bit type and limited +// to LFS_ATTR_MAX bytes. When read, if the stored attribute is smaller than +// the buffer, it will be padded with zeros. If the stored attribute is larger, +// then it will be silently truncated. If no attribute is found, the error +// LFS_ERR_NOATTR is returned and the buffer is filled with zeros. +// +// Returns the size of the attribute, or a negative error code on failure. +// Note, the returned size is the size of the attribute on disk, irrespective +// of the size of the buffer. This can be used to dynamically allocate a buffer +// or check for existance. +lfs_ssize_t lfs_getattr(lfs_t *lfs, const char *path, + uint8_t type, void *buffer, lfs_size_t size); + +// Set custom attributes +// +// Custom attributes are uniquely identified by an 8-bit type and limited +// to LFS_ATTR_MAX bytes. If an attribute is not found, it will be +// implicitly created. +// +// Returns a negative error code on failure. +int lfs_setattr(lfs_t *lfs, const char *path, + uint8_t type, const void *buffer, lfs_size_t size); + +// Removes a custom attribute +// +// If an attribute is not found, nothing happens. +// +// Returns a negative error code on failure. +int lfs_removeattr(lfs_t *lfs, const char *path, uint8_t type); + /// File operations /// @@ -473,7 +603,15 @@ lfs_soff_t lfs_dir_tell(lfs_t *lfs, lfs_dir_t *dir); int lfs_dir_rewind(lfs_t *lfs, lfs_dir_t *dir); -/// Miscellaneous littlefs specific operations /// +/// Filesystem-level filesystem operations + +// Finds the current size of the filesystem +// +// Note: Result is best effort. If files share COW structures, the returned +// size may be larger than the filesystem actually is. +// +// Returns the number of allocated blocks, or a negative error code on failure. +lfs_ssize_t lfs_fs_size(lfs_t *lfs); // Traverse through all blocks in use by the filesystem // @@ -482,16 +620,22 @@ int lfs_dir_rewind(lfs_t *lfs, lfs_dir_t *dir); // blocks are in use or how much of the storage is available. // // Returns a negative error code on failure. -int lfs_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data); +int lfs_fs_traverse(lfs_t *lfs, int (*cb)(void*, lfs_block_t), void *data); -// Prunes any recoverable errors that may have occured in the filesystem +#ifdef LFS_MIGRATE +// Attempts to migrate a previous version of littlefs // -// Not needed to be called by user unless an operation is interrupted -// but the filesystem is still mounted. This is already called on first -// allocation. +// Behaves similarly to the lfs_format function. Attempts to mount +// the previous version of littlefs and update the filesystem so it can be +// mounted with the current version of littlefs. +// +// Requires a littlefs object and config struct. This clobbers the littlefs +// object, and does not leave the filesystem mounted. The config struct must +// be zeroed for defaults and backwards compatibility. // // Returns a negative error code on failure. -int lfs_deorphan(lfs_t *lfs); +int lfs_migrate(lfs_t *lfs, const struct lfs_config *cfg); +#endif #ifdef __cplusplus diff --git a/lfs_util.c b/lfs_util.c index 9ca0756d..0b60e3b4 100644 --- a/lfs_util.c +++ b/lfs_util.c @@ -11,7 +11,7 @@ // Software CRC implementation with small lookup table -void lfs_crc(uint32_t *restrict crc, const void *buffer, size_t size) { +uint32_t lfs_crc(uint32_t crc, const void *buffer, size_t size) { static const uint32_t rtable[16] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, @@ -22,9 +22,11 @@ void lfs_crc(uint32_t *restrict crc, const void *buffer, size_t size) { const uint8_t *data = buffer; for (size_t i = 0; i < size; i++) { - *crc = (*crc >> 4) ^ rtable[(*crc ^ (data[i] >> 0)) & 0xf]; - *crc = (*crc >> 4) ^ rtable[(*crc ^ (data[i] >> 4)) & 0xf]; + crc = (crc >> 4) ^ rtable[(crc ^ (data[i] >> 0)) & 0xf]; + crc = (crc >> 4) ^ rtable[(crc ^ (data[i] >> 4)) & 0xf]; } + + return crc; } diff --git a/lfs_util.h b/lfs_util.h index b2dc2371..28b14005 100644 --- a/lfs_util.h +++ b/lfs_util.h @@ -11,8 +11,8 @@ // LFS_CONFIG as a header file to include (-DLFS_CONFIG=lfs_config.h). // // If LFS_CONFIG is used, none of the default utils will be emitted and must be -// provided by the config file. To start I would suggest copying lfs_util.h and -// modifying as needed. +// provided by the config file. To start, I would suggest copying lfs_util.h +// and modifying as needed. #ifdef LFS_CONFIG #define LFS_STRINGIZE(x) LFS_STRINGIZE2(x) #define LFS_STRINGIZE2(x) #x @@ -23,6 +23,7 @@ #include #include #include +#include #ifndef LFS_NO_MALLOC #include @@ -87,6 +88,15 @@ static inline uint32_t lfs_min(uint32_t a, uint32_t b) { return (a < b) ? a : b; } +// Align to nearest multiple of a size +static inline uint32_t lfs_aligndown(uint32_t a, uint32_t alignment) { + return a - (a % alignment); +} + +static inline uint32_t lfs_alignup(uint32_t a, uint32_t alignment) { + return lfs_aligndown(a + alignment-1, alignment); +} + // Find the next smallest power of 2 less than or equal to a static inline uint32_t lfs_npw2(uint32_t a) { #if !defined(LFS_NO_INTRINSICS) && (defined(__GNUC__) || defined(__CC_ARM)) @@ -130,7 +140,7 @@ static inline int lfs_scmp(uint32_t a, uint32_t b) { return (int)(unsigned)(a - b); } -// Convert from 32-bit little-endian to native order +// Convert between 32-bit little-endian and native order static inline uint32_t lfs_fromle32(uint32_t a) { #if !defined(LFS_NO_INTRINSICS) && ( \ (defined( BYTE_ORDER ) && BYTE_ORDER == ORDER_LITTLE_ENDIAN ) || \ @@ -150,15 +160,39 @@ static inline uint32_t lfs_fromle32(uint32_t a) { #endif } -// Convert to 32-bit little-endian from native order static inline uint32_t lfs_tole32(uint32_t a) { return lfs_fromle32(a); } +// Convert between 32-bit big-endian and native order +static inline uint32_t lfs_frombe32(uint32_t a) { +#if !defined(LFS_NO_INTRINSICS) && ( \ + (defined( BYTE_ORDER ) && BYTE_ORDER == ORDER_LITTLE_ENDIAN ) || \ + (defined(__BYTE_ORDER ) && __BYTE_ORDER == __ORDER_LITTLE_ENDIAN ) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + return __builtin_bswap32(a); +#elif !defined(LFS_NO_INTRINSICS) && ( \ + (defined( BYTE_ORDER ) && BYTE_ORDER == ORDER_BIG_ENDIAN ) || \ + (defined(__BYTE_ORDER ) && __BYTE_ORDER == __ORDER_BIG_ENDIAN ) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + return a; +#else + return (((uint8_t*)&a)[0] << 24) | + (((uint8_t*)&a)[1] << 16) | + (((uint8_t*)&a)[2] << 8) | + (((uint8_t*)&a)[3] << 0); +#endif +} + +static inline uint32_t lfs_tobe32(uint32_t a) { + return lfs_frombe32(a); +} + // Calculate CRC-32 with polynomial = 0x04c11db7 -void lfs_crc(uint32_t *crc, const void *buffer, size_t size); +uint32_t lfs_crc(uint32_t crc, const void *buffer, size_t size); // Allocate memory, only used if buffers are not provided to littlefs +// Note, memory must be 64-bit aligned static inline void *lfs_malloc(size_t size) { #ifndef LFS_NO_MALLOC return malloc(size); diff --git a/tests/corrupt.py b/tests/corrupt.py new file mode 100755 index 00000000..44f4f667 --- /dev/null +++ b/tests/corrupt.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import struct +import sys +import os +import argparse + +def corrupt(block): + with open(block, 'r+b') as file: + # skip rev + file.read(4) + + # go to last commit + tag = 0xffffffff + while True: + try: + ntag, = struct.unpack('>I', file.read(4)) + except struct.error: + break + + tag ^= ntag + size = (tag & 0x3ff) if (tag & 0x3ff) != 0x3ff else 0 + file.seek(size, os.SEEK_CUR) + + # lob off last 3 bytes + file.seek(-(size + 3), os.SEEK_CUR) + file.truncate() + +def main(args): + if args.n or not args.blocks: + with open('blocks/.history', 'rb') as file: + for i in range(int(args.n or 1)): + last, = struct.unpack('I', data) + except struct.error: + break + + tag ^= ntag + off += 4 + + type = (tag & 0x7ff00000) >> 20 + id = (tag & 0x000ffc00) >> 10 + size = (tag & 0x000003ff) >> 0 + iscrc = (type & 0x700) == 0x500 + + data = file.read(size if size != 0x3ff else 0) + if iscrc: + crc = binascii.crc32(data[:4], crc) + else: + crc = binascii.crc32(data, crc) + + print '%04x: %08x %-15s %3s %4s %-23s %-8s' % ( + off, tag, + typeof(type) + (' bad!' if iscrc and ~crc else ''), + id if id != 0x3ff else '.', + size if size != 0x3ff else 'x', + ' '.join('%02x' % ord(c) for c in data[:8]), + ''.join(c if c >= ' ' and c <= '~' else '.' for c in data[:8])) + + off += size if size != 0x3ff else 0 + if iscrc: + crc = 0 + tag ^= (type & 1) << 31 + + return 0 + +if __name__ == "__main__": + import sys + sys.exit(main(*sys.argv[1:])) diff --git a/tests/stats.py b/tests/stats.py index 2ba1fb65..ab21b59a 100755 --- a/tests/stats.py +++ b/tests/stats.py @@ -7,7 +7,7 @@ import re def main(): - with open('blocks/config') as file: + with open('blocks/.config') as file: s = struct.unpack('(.*);', line, re.DOTALL | re.MULTILINE) if match: tab, test, expect = match.groups() diff --git a/tests/test_alloc.sh b/tests/test_alloc.sh index 6b3b181f..3f993e9f 100755 --- a/tests/test_alloc.sh +++ b/tests/test_alloc.sh @@ -194,76 +194,125 @@ tests/test.py << TEST lfs_file_read(&lfs, &file[0], buffer, size) => size; memcmp(buffer, "exhaustion", size) => 0; lfs_file_close(&lfs, &file[0]) => 0; + lfs_remove(&lfs, "exhaustion") => 0; lfs_unmount(&lfs) => 0; TEST echo "--- Dir exhaustion test ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_remove(&lfs, "exhaustion") => 0; - lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + // find out max file size + lfs_mkdir(&lfs, "exhaustiondir") => 0; size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); - for (lfs_size_t i = 0; - i < (cfg.block_count-6)*(cfg.block_size-8); - i += size) { + lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + int count = 0; + int err; + while (true) { + err = lfs_file_write(&lfs, &file[0], buffer, size); + if (err < 0) { + break; + } + + count += 1; + } + err => LFS_ERR_NOSPC; + lfs_file_close(&lfs, &file[0]) => 0; + + lfs_remove(&lfs, "exhaustion") => 0; + lfs_remove(&lfs, "exhaustiondir") => 0; + + // see if dir fits with max file size + lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + for (int i = 0; i < count; i++) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } lfs_file_close(&lfs, &file[0]) => 0; lfs_mkdir(&lfs, "exhaustiondir") => 0; lfs_remove(&lfs, "exhaustiondir") => 0; + lfs_remove(&lfs, "exhaustion") => 0; - lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_APPEND); - size = strlen("blahblahblahblah"); - memcpy(buffer, "blahblahblahblah", size); - for (lfs_size_t i = 0; - i < (cfg.block_size-8); - i += size) { + // see if dir fits with > max file size + lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + for (int i = 0; i < count+1; i++) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } lfs_file_close(&lfs, &file[0]) => 0; lfs_mkdir(&lfs, "exhaustiondir") => LFS_ERR_NOSPC; + + lfs_remove(&lfs, "exhaustion") => 0; lfs_unmount(&lfs) => 0; TEST echo "--- Chained dir exhaustion test ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_remove(&lfs, "exhaustion") => 0; - lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + // find out max file size + lfs_mkdir(&lfs, "exhaustiondir") => 0; + for (int i = 0; i < 10; i++) { + sprintf((char*)buffer, "dirwithanexhaustivelylongnameforpadding%d", i); + lfs_mkdir(&lfs, (char*)buffer) => 0; + } size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); - for (lfs_size_t i = 0; - i < (cfg.block_count-24)*(cfg.block_size-8); - i += size) { - lfs_file_write(&lfs, &file[0], buffer, size) => size; + lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + int count = 0; + int err; + while (true) { + err = lfs_file_write(&lfs, &file[0], buffer, size); + if (err < 0) { + break; + } + + count += 1; } + err => LFS_ERR_NOSPC; lfs_file_close(&lfs, &file[0]) => 0; - for (int i = 0; i < 9; i++) { + lfs_remove(&lfs, "exhaustion") => 0; + lfs_remove(&lfs, "exhaustiondir") => 0; + for (int i = 0; i < 10; i++) { + sprintf((char*)buffer, "dirwithanexhaustivelylongnameforpadding%d", i); + lfs_remove(&lfs, (char*)buffer) => 0; + } + + // see that chained dir fails + lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); + for (int i = 0; i < count+1; i++) { + lfs_file_write(&lfs, &file[0], buffer, size) => size; + } + lfs_file_sync(&lfs, &file[0]) => 0; + + for (int i = 0; i < 10; i++) { sprintf((char*)buffer, "dirwithanexhaustivelylongnameforpadding%d", i); lfs_mkdir(&lfs, (char*)buffer) => 0; } lfs_mkdir(&lfs, "exhaustiondir") => LFS_ERR_NOSPC; - lfs_remove(&lfs, "exhaustion") => 0; - lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); - size = strlen("blahblahblahblah"); - memcpy(buffer, "blahblahblahblah", size); - for (lfs_size_t i = 0; - i < (cfg.block_count-26)*(cfg.block_size-8); - i += size) { - lfs_file_write(&lfs, &file[0], buffer, size) => size; + // shorten file to try a second chained dir + while (true) { + err = lfs_mkdir(&lfs, "exhaustiondir"); + if (err != LFS_ERR_NOSPC) { + break; + } + + lfs_ssize_t filesize = lfs_file_size(&lfs, &file[0]); + filesize > 0 => true; + + lfs_file_truncate(&lfs, &file[0], filesize - size) => 0; + lfs_file_sync(&lfs, &file[0]) => 0; } - lfs_file_close(&lfs, &file[0]) => 0; + err => 0; - lfs_mkdir(&lfs, "exhaustiondir") => 0; lfs_mkdir(&lfs, "exhaustiondir2") => LFS_ERR_NOSPC; + + lfs_file_close(&lfs, &file[0]) => 0; + lfs_unmount(&lfs) => 0; TEST echo "--- Split dir test ---" @@ -274,28 +323,38 @@ TEST tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - // create one block whole for half a directory + // create one block hole for half a directory lfs_file_open(&lfs, &file[0], "bump", LFS_O_WRONLY | LFS_O_CREAT) => 0; - lfs_file_write(&lfs, &file[0], (void*)"hi", 2) => 2; + for (lfs_size_t i = 0; i < cfg.block_size; i += 2) { + memcpy(&buffer[i], "hi", 2); + } + lfs_file_write(&lfs, &file[0], buffer, cfg.block_size) => cfg.block_size; lfs_file_close(&lfs, &file[0]) => 0; lfs_file_open(&lfs, &file[0], "exhaustion", LFS_O_WRONLY | LFS_O_CREAT); size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < (cfg.block_count-6)*(cfg.block_size-8); + i < (cfg.block_count-4)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } lfs_file_close(&lfs, &file[0]) => 0; + // remount to force reset of lookahead + lfs_unmount(&lfs) => 0; + lfs_mount(&lfs, &cfg) => 0; + // open hole lfs_remove(&lfs, "bump") => 0; lfs_mkdir(&lfs, "splitdir") => 0; lfs_file_open(&lfs, &file[0], "splitdir/bump", LFS_O_WRONLY | LFS_O_CREAT) => 0; - lfs_file_write(&lfs, &file[0], buffer, size) => LFS_ERR_NOSPC; + for (lfs_size_t i = 0; i < cfg.block_size; i += 2) { + memcpy(&buffer[i], "hi", 2); + } + lfs_file_write(&lfs, &file[0], buffer, 2*cfg.block_size) => LFS_ERR_NOSPC; lfs_file_close(&lfs, &file[0]) => 0; lfs_unmount(&lfs) => 0; @@ -314,7 +373,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4)/2)*(cfg.block_size-8); + i < ((cfg.block_count-2)/2)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } @@ -325,7 +384,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4+1)/2)*(cfg.block_size-8); + i < ((cfg.block_count-2+1)/2)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } @@ -333,7 +392,6 @@ tests/test.py << TEST // remount to force reset of lookahead lfs_unmount(&lfs) => 0; - lfs_mount(&lfs, &cfg) => 0; // rewrite one file @@ -343,7 +401,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4)/2)*(cfg.block_size-8); + i < ((cfg.block_count-2)/2)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } @@ -357,7 +415,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4+1)/2)*(cfg.block_size-8); + i < ((cfg.block_count-2+1)/2)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } @@ -377,7 +435,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4)/2)*(cfg.block_size-8); + i < ((cfg.block_count-2)/2)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } @@ -388,7 +446,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4+1)/2)*(cfg.block_size-8); + i < ((cfg.block_count-2+1)/2)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } @@ -396,7 +454,6 @@ tests/test.py << TEST // remount to force reset of lookahead lfs_unmount(&lfs) => 0; - lfs_mount(&lfs, &cfg) => 0; // rewrite one file with a hole of one block @@ -406,7 +463,7 @@ tests/test.py << TEST size = strlen("blahblahblahblah"); memcpy(buffer, "blahblahblahblah", size); for (lfs_size_t i = 0; - i < ((cfg.block_count-4)/2 - 1)*(cfg.block_size-8); + i < ((cfg.block_count-2)/2 - 1)*(cfg.block_size-8); i += size) { lfs_file_write(&lfs, &file[0], buffer, size) => size; } diff --git a/tests/test_attrs.sh b/tests/test_attrs.sh new file mode 100755 index 00000000..e4ff4ce6 --- /dev/null +++ b/tests/test_attrs.sh @@ -0,0 +1,286 @@ +#!/bin/bash +set -eu + +echo "=== Attr tests ===" +rm -rf blocks +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + lfs_mkdir(&lfs, "hello") => 0; + lfs_file_open(&lfs, &file[0], "hello/hello", + LFS_O_WRONLY | LFS_O_CREAT) => 0; + lfs_file_write(&lfs, &file[0], "hello", strlen("hello")) + => strlen("hello"); + lfs_file_close(&lfs, &file[0]); + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Set/get attribute ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_setattr(&lfs, "hello", 'A', "aaaa", 4) => 0; + lfs_setattr(&lfs, "hello", 'B', "bbbbbb", 6) => 0; + lfs_setattr(&lfs, "hello", 'C', "ccccc", 5) => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 6) => 6; + lfs_getattr(&lfs, "hello", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "bbbbbb", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "hello", 'B', "", 0) => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 6) => 0; + lfs_getattr(&lfs, "hello", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "\0\0\0\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_removeattr(&lfs, "hello", 'B') => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 6) => LFS_ERR_NOATTR; + lfs_getattr(&lfs, "hello", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "\0\0\0\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "hello", 'B', "dddddd", 6) => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 6) => 6; + lfs_getattr(&lfs, "hello", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "dddddd", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "hello", 'B', "eee", 3) => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 6) => 3; + lfs_getattr(&lfs, "hello", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "eee\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "hello", 'A', buffer, LFS_ATTR_MAX+1) => LFS_ERR_NOSPC; + lfs_setattr(&lfs, "hello", 'B', "fffffffff", 9) => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 6) => 9; + lfs_getattr(&lfs, "hello", 'C', buffer+10, 5) => 5; + + lfs_unmount(&lfs) => 0; +TEST +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_getattr(&lfs, "hello", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "hello", 'B', buffer+4, 9) => 9; + lfs_getattr(&lfs, "hello", 'C', buffer+13, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "fffffffff", 9) => 0; + memcmp(buffer+13, "ccccc", 5) => 0; + + lfs_file_open(&lfs, &file[0], "hello/hello", LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], buffer, sizeof(buffer)) => strlen("hello"); + memcmp(buffer, "hello", strlen("hello")) => 0; + lfs_file_close(&lfs, &file[0]); + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Set/get root attribute ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_setattr(&lfs, "/", 'A', "aaaa", 4) => 0; + lfs_setattr(&lfs, "/", 'B', "bbbbbb", 6) => 0; + lfs_setattr(&lfs, "/", 'C', "ccccc", 5) => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 6) => 6; + lfs_getattr(&lfs, "/", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "bbbbbb", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "/", 'B', "", 0) => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 6) => 0; + lfs_getattr(&lfs, "/", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "\0\0\0\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_removeattr(&lfs, "/", 'B') => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 6) => LFS_ERR_NOATTR; + lfs_getattr(&lfs, "/", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "\0\0\0\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "/", 'B', "dddddd", 6) => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 6) => 6; + lfs_getattr(&lfs, "/", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "dddddd", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "/", 'B', "eee", 3) => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 6) => 3; + lfs_getattr(&lfs, "/", 'C', buffer+10, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "eee\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + lfs_setattr(&lfs, "/", 'A', buffer, LFS_ATTR_MAX+1) => LFS_ERR_NOSPC; + lfs_setattr(&lfs, "/", 'B', "fffffffff", 9) => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 6) => 9; + lfs_getattr(&lfs, "/", 'C', buffer+10, 5) => 5; + lfs_unmount(&lfs) => 0; +TEST +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_getattr(&lfs, "/", 'A', buffer, 4) => 4; + lfs_getattr(&lfs, "/", 'B', buffer+4, 9) => 9; + lfs_getattr(&lfs, "/", 'C', buffer+13, 5) => 5; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "fffffffff", 9) => 0; + memcmp(buffer+13, "ccccc", 5) => 0; + + lfs_file_open(&lfs, &file[0], "hello/hello", LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], buffer, sizeof(buffer)) => strlen("hello"); + memcmp(buffer, "hello", strlen("hello")) => 0; + lfs_file_close(&lfs, &file[0]); + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Set/get file attribute ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + struct lfs_attr attrs1[] = { + {'A', buffer, 4}, + {'B', buffer+4, 6}, + {'C', buffer+10, 5}, + }; + struct lfs_file_config cfg1 = {.attrs=attrs1, .attr_count=3}; + + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_WRONLY, &cfg1) => 0; + memcpy(buffer, "aaaa", 4); + memcpy(buffer+4, "bbbbbb", 6); + memcpy(buffer+10, "ccccc", 5); + lfs_file_close(&lfs, &file[0]) => 0; + memset(buffer, 0, 15); + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDONLY, &cfg1) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "bbbbbb", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + attrs1[1].size = 0; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_WRONLY, &cfg1) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + memset(buffer, 0, 15); + attrs1[1].size = 6; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDONLY, &cfg1) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "\0\0\0\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + attrs1[1].size = 6; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_WRONLY, &cfg1) => 0; + memcpy(buffer+4, "dddddd", 6); + lfs_file_close(&lfs, &file[0]) => 0; + memset(buffer, 0, 15); + attrs1[1].size = 6; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDONLY, &cfg1) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "dddddd", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + attrs1[1].size = 3; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_WRONLY, &cfg1) => 0; + memcpy(buffer+4, "eee", 3); + lfs_file_close(&lfs, &file[0]) => 0; + memset(buffer, 0, 15); + attrs1[1].size = 6; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDONLY, &cfg1) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "eee\0\0\0", 6) => 0; + memcmp(buffer+10, "ccccc", 5) => 0; + + attrs1[0].size = LFS_ATTR_MAX+1; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_WRONLY, &cfg1) + => LFS_ERR_NOSPC; + + struct lfs_attr attrs2[] = { + {'A', buffer, 4}, + {'B', buffer+4, 9}, + {'C', buffer+13, 5}, + }; + struct lfs_file_config cfg2 = {.attrs=attrs2, .attr_count=3}; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDWR, &cfg2) => 0; + memcpy(buffer+4, "fffffffff", 9); + lfs_file_close(&lfs, &file[0]) => 0; + attrs1[0].size = 4; + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDONLY, &cfg1) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + + lfs_unmount(&lfs) => 0; +TEST +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + struct lfs_attr attrs2[] = { + {'A', buffer, 4}, + {'B', buffer+4, 9}, + {'C', buffer+13, 5}, + }; + struct lfs_file_config cfg2 = {.attrs=attrs2, .attr_count=3}; + + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_RDONLY, &cfg2) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + memcmp(buffer, "aaaa", 4) => 0; + memcmp(buffer+4, "fffffffff", 9) => 0; + memcmp(buffer+13, "ccccc", 5) => 0; + + lfs_file_open(&lfs, &file[0], "hello/hello", LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], buffer, sizeof(buffer)) => strlen("hello"); + memcmp(buffer, "hello", strlen("hello")) => 0; + lfs_file_close(&lfs, &file[0]); + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Deferred file attributes ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + struct lfs_attr attrs1[] = { + {'B', "gggg", 4}, + {'C', "", 0}, + {'D', "hhhh", 4}, + }; + struct lfs_file_config cfg1 = {.attrs=attrs1, .attr_count=3}; + + lfs_file_opencfg(&lfs, &file[0], "hello/hello", LFS_O_WRONLY, &cfg1) => 0; + + lfs_getattr(&lfs, "hello/hello", 'B', buffer, 9) => 9; + lfs_getattr(&lfs, "hello/hello", 'C', buffer+9, 9) => 5; + lfs_getattr(&lfs, "hello/hello", 'D', buffer+18, 9) => LFS_ERR_NOATTR; + memcmp(buffer, "fffffffff", 9) => 0; + memcmp(buffer+9, "ccccc\0\0\0\0", 9) => 0; + memcmp(buffer+18, "\0\0\0\0\0\0\0\0\0", 9) => 0; + + lfs_file_sync(&lfs, &file[0]) => 0; + lfs_getattr(&lfs, "hello/hello", 'B', buffer, 9) => 4; + lfs_getattr(&lfs, "hello/hello", 'C', buffer+9, 9) => 0; + lfs_getattr(&lfs, "hello/hello", 'D', buffer+18, 9) => 4; + memcmp(buffer, "gggg\0\0\0\0\0", 9) => 0; + memcmp(buffer+9, "\0\0\0\0\0\0\0\0\0", 9) => 0; + memcmp(buffer+18, "hhhh\0\0\0\0\0", 9) => 0; + + lfs_file_close(&lfs, &file[0]) => 0; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Results ---" +tests/stats.py diff --git a/tests/test_corrupt.sh b/tests/test_corrupt.sh index 44f1caee..81b06747 100755 --- a/tests/test_corrupt.sh +++ b/tests/test_corrupt.sh @@ -71,24 +71,25 @@ echo "--- Sanity check ---" rm -rf blocks lfs_mktree lfs_chktree +BLOCKS="$(ls blocks | grep -vw '[01]')" echo "--- Block corruption ---" -for i in {0..33} +for b in $BLOCKS do rm -rf blocks mkdir blocks - ln -s /dev/zero blocks/$(printf '%x' $i) + ln -s /dev/zero blocks/$b lfs_mktree lfs_chktree done echo "--- Block persistance ---" -for i in {0..33} +for b in $BLOCKS do rm -rf blocks mkdir blocks lfs_mktree - chmod a-w blocks/$(printf '%x' $i) + chmod a-w blocks/$b || true lfs_mktree lfs_chktree done @@ -96,7 +97,7 @@ done echo "--- Big region corruption ---" rm -rf blocks mkdir blocks -for i in {2..255} +for i in {2..512} do ln -s /dev/zero blocks/$(printf '%x' $i) done @@ -106,7 +107,7 @@ lfs_chktree echo "--- Alternating corruption ---" rm -rf blocks mkdir blocks -for i in {2..511..2} +for i in {2..1024..2} do ln -s /dev/zero blocks/$(printf '%x' $i) done diff --git a/tests/test_dirs.sh b/tests/test_dirs.sh index 874808d4..5f2020f0 100755 --- a/tests/test_dirs.sh +++ b/tests/test_dirs.sh @@ -43,11 +43,11 @@ tests/test.py << TEST strcmp(info.name, "..") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "potato") => 0; - info.type => LFS_TYPE_DIR; - lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "burito") => 0; info.type => LFS_TYPE_REG; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "potato") => 0; + info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; lfs_unmount(&lfs) => 0; @@ -85,10 +85,10 @@ tests/test.py << TEST strcmp(info.name, "baked") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "sweet") => 0; + strcmp(info.name, "fried") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "fried") => 0; + strcmp(info.name, "sweet") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; @@ -100,7 +100,7 @@ tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_mkdir(&lfs, "cactus") => 0; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "cactus/test%d", i); + sprintf((char*)buffer, "cactus/test%03d", i); lfs_mkdir(&lfs, (char*)buffer) => 0; } lfs_unmount(&lfs) => 0; @@ -115,7 +115,7 @@ tests/test.py << TEST strcmp(info.name, "..") => 0; info.type => LFS_TYPE_DIR; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "test%d", i); + sprintf((char*)buffer, "test%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; info.type => LFS_TYPE_DIR; @@ -208,10 +208,10 @@ tests/test.py << TEST strcmp(info.name, "baked") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "sweet") => 0; + strcmp(info.name, "fried") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "fried") => 0; + strcmp(info.name, "sweet") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; @@ -241,10 +241,10 @@ tests/test.py << TEST strcmp(info.name, "baked") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "sweet") => 0; + strcmp(info.name, "fried") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "fried") => 0; + strcmp(info.name, "sweet") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; @@ -273,10 +273,10 @@ tests/test.py << TEST strcmp(info.name, "baked") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "sweet") => 0; + strcmp(info.name, "fried") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "fried") => 0; + strcmp(info.name, "sweet") => 0; info.type => LFS_TYPE_DIR; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; @@ -330,8 +330,8 @@ echo "--- Multi-block rename ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "cactus/test%d", i); - sprintf((char*)wbuffer, "cactus/tedd%d", i); + sprintf((char*)buffer, "cactus/test%03d", i); + sprintf((char*)wbuffer, "cactus/tedd%03d", i); lfs_rename(&lfs, (char*)buffer, (char*)wbuffer) => 0; } lfs_unmount(&lfs) => 0; @@ -346,7 +346,7 @@ tests/test.py << TEST strcmp(info.name, "..") => 0; info.type => LFS_TYPE_DIR; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "tedd%d", i); + sprintf((char*)buffer, "tedd%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; info.type => LFS_TYPE_DIR; @@ -361,7 +361,7 @@ tests/test.py << TEST lfs_remove(&lfs, "cactus") => LFS_ERR_NOTEMPTY; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "cactus/tedd%d", i); + sprintf((char*)buffer, "cactus/tedd%03d", i); lfs_remove(&lfs, (char*)buffer) => 0; } @@ -390,7 +390,7 @@ tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_mkdir(&lfs, "prickly-pear") => 0; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "prickly-pear/test%d", i); + sprintf((char*)buffer, "prickly-pear/test%03d", i); lfs_file_open(&lfs, &file[0], (char*)buffer, LFS_O_WRONLY | LFS_O_CREAT) => 0; size = 6; @@ -410,7 +410,7 @@ tests/test.py << TEST strcmp(info.name, "..") => 0; info.type => LFS_TYPE_DIR; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "test%d", i); + sprintf((char*)buffer, "test%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; info.type => LFS_TYPE_REG; @@ -424,8 +424,8 @@ echo "--- Multi-block rename with files ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "prickly-pear/test%d", i); - sprintf((char*)wbuffer, "prickly-pear/tedd%d", i); + sprintf((char*)buffer, "prickly-pear/test%03d", i); + sprintf((char*)wbuffer, "prickly-pear/tedd%03d", i); lfs_rename(&lfs, (char*)buffer, (char*)wbuffer) => 0; } lfs_unmount(&lfs) => 0; @@ -440,7 +440,7 @@ tests/test.py << TEST strcmp(info.name, "..") => 0; info.type => LFS_TYPE_DIR; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "tedd%d", i); + sprintf((char*)buffer, "tedd%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; info.type => LFS_TYPE_REG; @@ -456,7 +456,7 @@ tests/test.py << TEST lfs_remove(&lfs, "prickly-pear") => LFS_ERR_NOTEMPTY; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "prickly-pear/tedd%d", i); + sprintf((char*)buffer, "prickly-pear/tedd%03d", i); lfs_remove(&lfs, (char*)buffer) => 0; } diff --git a/tests/test_entries.sh b/tests/test_entries.sh new file mode 100755 index 00000000..4728b7f3 --- /dev/null +++ b/tests/test_entries.sh @@ -0,0 +1,221 @@ +#!/bin/bash +set -eu + +# Note: These tests are intended for 512 byte inline size at different +# inline sizes they should still pass, but won't be testing anything + +echo "=== Entry tests ===" +rm -rf blocks +function read_file { +cat << TEST + + size = $2; + lfs_file_open(&lfs, &file[0], "$1", LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], rbuffer, size) => size; + memcmp(rbuffer, wbuffer, size) => 0; + lfs_file_close(&lfs, &file[0]) => 0; +TEST +} + +function write_file { +cat << TEST + + size = $2; + lfs_file_open(&lfs, &file[0], "$1", + LFS_O_WRONLY | LFS_O_CREAT | LFS_O_TRUNC) => 0; + memset(wbuffer, 'c', size); + lfs_file_write(&lfs, &file[0], wbuffer, size) => size; + lfs_file_close(&lfs, &file[0]) => 0; +TEST +} + +echo "--- Entry grow test ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + $(write_file "hi0" 20) + $(write_file "hi1" 20) + $(write_file "hi2" 20) + $(write_file "hi3" 20) + + $(read_file "hi1" 20) + $(write_file "hi1" 200) + + $(read_file "hi0" 20) + $(read_file "hi1" 200) + $(read_file "hi2" 20) + $(read_file "hi3" 20) + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Entry shrink test ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + $(write_file "hi0" 20) + $(write_file "hi1" 200) + $(write_file "hi2" 20) + $(write_file "hi3" 20) + + $(read_file "hi1" 200) + $(write_file "hi1" 20) + + $(read_file "hi0" 20) + $(read_file "hi1" 20) + $(read_file "hi2" 20) + $(read_file "hi3" 20) + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Entry spill test ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + $(write_file "hi0" 200) + $(write_file "hi1" 200) + $(write_file "hi2" 200) + $(write_file "hi3" 200) + + $(read_file "hi0" 200) + $(read_file "hi1" 200) + $(read_file "hi2" 200) + $(read_file "hi3" 200) + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Entry push spill test ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + $(write_file "hi0" 200) + $(write_file "hi1" 20) + $(write_file "hi2" 200) + $(write_file "hi3" 200) + + $(read_file "hi1" 20) + $(write_file "hi1" 200) + + $(read_file "hi0" 200) + $(read_file "hi1" 200) + $(read_file "hi2" 200) + $(read_file "hi3" 200) + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Entry push spill two test ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + $(write_file "hi0" 200) + $(write_file "hi1" 20) + $(write_file "hi2" 200) + $(write_file "hi3" 200) + $(write_file "hi4" 200) + + $(read_file "hi1" 20) + $(write_file "hi1" 200) + + $(read_file "hi0" 200) + $(read_file "hi1" 200) + $(read_file "hi2" 200) + $(read_file "hi3" 200) + $(read_file "hi4" 200) + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Entry drop test ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + $(write_file "hi0" 200) + $(write_file "hi1" 200) + $(write_file "hi2" 200) + $(write_file "hi3" 200) + + lfs_remove(&lfs, "hi1") => 0; + lfs_stat(&lfs, "hi1", &info) => LFS_ERR_NOENT; + $(read_file "hi0" 200) + $(read_file "hi2" 200) + $(read_file "hi3" 200) + + lfs_remove(&lfs, "hi2") => 0; + lfs_stat(&lfs, "hi2", &info) => LFS_ERR_NOENT; + $(read_file "hi0" 200) + $(read_file "hi3" 200) + + lfs_remove(&lfs, "hi3") => 0; + lfs_stat(&lfs, "hi3", &info) => LFS_ERR_NOENT; + $(read_file "hi0" 200) + + lfs_remove(&lfs, "hi0") => 0; + lfs_stat(&lfs, "hi0", &info) => LFS_ERR_NOENT; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Create too big ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + memset(buffer, 'm', 200); + buffer[200] = '\0'; + + size = 400; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT | LFS_O_TRUNC) => 0; + memset(wbuffer, 'c', size); + lfs_file_write(&lfs, &file[0], wbuffer, size) => size; + lfs_file_close(&lfs, &file[0]) => 0; + + size = 400; + lfs_file_open(&lfs, &file[0], (char*)buffer, LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], rbuffer, size) => size; + memcmp(rbuffer, wbuffer, size) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Resize too big ---" +tests/test.py << TEST + lfs_format(&lfs, &cfg) => 0; + + lfs_mount(&lfs, &cfg) => 0; + memset(buffer, 'm', 200); + buffer[200] = '\0'; + + size = 40; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT | LFS_O_TRUNC) => 0; + memset(wbuffer, 'c', size); + lfs_file_write(&lfs, &file[0], wbuffer, size) => size; + lfs_file_close(&lfs, &file[0]) => 0; + + size = 40; + lfs_file_open(&lfs, &file[0], (char*)buffer, LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], rbuffer, size) => size; + memcmp(rbuffer, wbuffer, size) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + + size = 400; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT | LFS_O_TRUNC) => 0; + memset(wbuffer, 'c', size); + lfs_file_write(&lfs, &file[0], wbuffer, size) => size; + lfs_file_close(&lfs, &file[0]) => 0; + + size = 400; + lfs_file_open(&lfs, &file[0], (char*)buffer, LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], rbuffer, size) => size; + memcmp(rbuffer, wbuffer, size) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Results ---" +tests/stats.py diff --git a/tests/test_files.sh b/tests/test_files.sh index bbecea92..5251c61f 100755 --- a/tests/test_files.sh +++ b/tests/test_files.sh @@ -29,7 +29,7 @@ tests/test.py << TEST TEST w_test() { -tests/test.py << TEST +tests/test.py ${4:-} << TEST size = $1; lfs_size_t chunk = 31; srand(0); @@ -115,21 +115,21 @@ tests/test.py << TEST info.type => LFS_TYPE_REG; info.size => strlen("Hello World!\n"); lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "smallavacado") => 0; + strcmp(info.name, "largeavacado") => 0; info.type => LFS_TYPE_REG; - info.size => $SMALLSIZE; + info.size => $LARGESIZE; lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "mediumavacado") => 0; info.type => LFS_TYPE_REG; info.size => $MEDIUMSIZE; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "largeavacado") => 0; - info.type => LFS_TYPE_REG; - info.size => $LARGESIZE; - lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "noavacado") => 0; info.type => LFS_TYPE_REG; info.size => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "smallavacado") => 0; + info.type => LFS_TYPE_REG; + info.size => $SMALLSIZE; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; lfs_unmount(&lfs) => 0; diff --git a/tests/test_format.sh b/tests/test_format.sh index b9071015..5a115359 100755 --- a/tests/test_format.sh +++ b/tests/test_format.sh @@ -9,39 +9,40 @@ tests/test.py << TEST lfs_format(&lfs, &cfg) => 0; TEST -echo "--- Invalid superblocks ---" -ln -f -s /dev/zero blocks/0 -ln -f -s /dev/zero blocks/1 -tests/test.py << TEST - lfs_format(&lfs, &cfg) => LFS_ERR_CORRUPT; -TEST -rm blocks/0 blocks/1 - echo "--- Basic mounting ---" tests/test.py << TEST lfs_format(&lfs, &cfg) => 0; -TEST -tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; lfs_unmount(&lfs) => 0; TEST -echo "--- Invalid mount ---" +echo "--- Invalid superblocks ---" +ln -f -s /dev/zero blocks/0 +ln -f -s /dev/zero blocks/1 tests/test.py << TEST - lfs_format(&lfs, &cfg) => 0; + lfs_format(&lfs, &cfg) => LFS_ERR_NOSPC; TEST rm blocks/0 blocks/1 + +echo "--- Invalid mount ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => LFS_ERR_CORRUPT; TEST -echo "--- Valid corrupt mount ---" +echo "--- Expanding superblock ---" tests/test.py << TEST lfs_format(&lfs, &cfg) => 0; + lfs_mount(&lfs, &cfg) => 0; + for (int i = 0; i < 100; i++) { + lfs_mkdir(&lfs, "dummy") => 0; + lfs_remove(&lfs, "dummy") => 0; + } + lfs_unmount(&lfs) => 0; TEST -rm blocks/0 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; + lfs_mkdir(&lfs, "dummy") => 0; lfs_unmount(&lfs) => 0; TEST diff --git a/tests/test_move.sh b/tests/test_move.sh index 9e5ababf..458ca1ed 100755 --- a/tests/test_move.sh +++ b/tests/test_move.sh @@ -59,7 +59,7 @@ tests/test.py << TEST lfs_rename(&lfs, "b/hello", "c/hello") => 0; lfs_unmount(&lfs) => 0; TEST -rm -v blocks/7 +tests/corrupt.py -n 1 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "b") => 0; @@ -86,8 +86,7 @@ tests/test.py << TEST lfs_rename(&lfs, "c/hello", "d/hello") => 0; lfs_unmount(&lfs) => 0; TEST -rm -v blocks/8 -rm -v blocks/a +tests/corrupt.py -n 2 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "c") => 0; @@ -108,6 +107,32 @@ tests/test.py << TEST lfs_unmount(&lfs) => 0; TEST +echo "--- Move file after corrupt ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_rename(&lfs, "c/hello", "d/hello") => 0; + lfs_unmount(&lfs) => 0; +TEST +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_dir_open(&lfs, &dir[0], "c") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, ".") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "..") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 0; + lfs_dir_close(&lfs, &dir[0]) => 0; + lfs_dir_open(&lfs, &dir[0], "d") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, ".") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "..") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "hello") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 0; + lfs_unmount(&lfs) => 0; +TEST + echo "--- Move dir ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; @@ -140,7 +165,7 @@ tests/test.py << TEST lfs_rename(&lfs, "b/hi", "c/hi") => 0; lfs_unmount(&lfs) => 0; TEST -rm -v blocks/7 +tests/corrupt.py -n 1 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "b") => 0; @@ -156,8 +181,6 @@ tests/test.py << TEST lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "..") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "hello") => 0; - lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "hi") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_unmount(&lfs) => 0; @@ -169,8 +192,7 @@ tests/test.py << TEST lfs_rename(&lfs, "c/hi", "d/hi") => 0; lfs_unmount(&lfs) => 0; TEST -rm -v blocks/9 -rm -v blocks/a +tests/corrupt.py -n 2 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "c") => 0; @@ -179,9 +201,33 @@ tests/test.py << TEST lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "..") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "hi") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 0; + lfs_dir_close(&lfs, &dir[0]) => 0; + lfs_dir_open(&lfs, &dir[0], "d") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, ".") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "..") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "hello") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 0; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Move dir after corrupt ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_rename(&lfs, "c/hi", "d/hi") => 0; + lfs_unmount(&lfs) => 0; +TEST +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_dir_open(&lfs, &dir[0], "c") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; - strcmp(info.name, "hi") => 0; + strcmp(info.name, ".") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "..") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; lfs_dir_open(&lfs, &dir[0], "d") => 0; @@ -189,6 +235,10 @@ tests/test.py << TEST strcmp(info.name, ".") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "..") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "hello") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "hi") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_unmount(&lfs) => 0; TEST @@ -199,27 +249,73 @@ tests/test.py << TEST lfs_dir_open(&lfs, &dir[0], "a/hi") => LFS_ERR_NOENT; lfs_dir_open(&lfs, &dir[0], "b/hi") => LFS_ERR_NOENT; - lfs_dir_open(&lfs, &dir[0], "d/hi") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "c/hi") => LFS_ERR_NOENT; - lfs_dir_open(&lfs, &dir[0], "c/hi") => 0; + lfs_dir_open(&lfs, &dir[0], "d/hi") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, ".") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "..") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "bonjour") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "hola") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "ohayo") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 0; + lfs_dir_close(&lfs, &dir[0]) => 0; + + lfs_dir_open(&lfs, &dir[0], "a/hello") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "b/hello") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "c/hello") => LFS_ERR_NOENT; + + lfs_file_open(&lfs, &file[0], "d/hello", LFS_O_RDONLY) => 0; + lfs_file_read(&lfs, &file[0], buffer, 5) => 5; + memcmp(buffer, "hola\n", 5) => 0; + lfs_file_read(&lfs, &file[0], buffer, 8) => 8; + memcmp(buffer, "bonjour\n", 8) => 0; + lfs_file_read(&lfs, &file[0], buffer, 6) => 6; + memcmp(buffer, "ohayo\n", 6) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Move state stealing ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + + lfs_remove(&lfs, "b") => 0; + lfs_remove(&lfs, "c") => 0; + + lfs_unmount(&lfs) => 0; +TEST +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + + lfs_dir_open(&lfs, &dir[0], "a/hi") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "b") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "c") => LFS_ERR_NOENT; + + lfs_dir_open(&lfs, &dir[0], "d/hi") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, ".") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "..") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "bonjour") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; + strcmp(info.name, "hola") => 0; + lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, "ohayo") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 0; lfs_dir_close(&lfs, &dir[0]) => 0; lfs_dir_open(&lfs, &dir[0], "a/hello") => LFS_ERR_NOENT; - lfs_dir_open(&lfs, &dir[0], "b/hello") => LFS_ERR_NOENT; - lfs_dir_open(&lfs, &dir[0], "d/hello") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "b") => LFS_ERR_NOENT; + lfs_dir_open(&lfs, &dir[0], "c") => LFS_ERR_NOENT; - lfs_file_open(&lfs, &file[0], "c/hello", LFS_O_RDONLY) => 0; + lfs_file_open(&lfs, &file[0], "d/hello", LFS_O_RDONLY) => 0; lfs_file_read(&lfs, &file[0], buffer, 5) => 5; memcmp(buffer, "hola\n", 5) => 0; lfs_file_read(&lfs, &file[0], buffer, 8) => 8; diff --git a/tests/test_orphan.sh b/tests/test_orphan.sh index 71d6d4fc..9c2cb7b6 100755 --- a/tests/test_orphan.sh +++ b/tests/test_orphan.sh @@ -15,25 +15,29 @@ tests/test.py << TEST lfs_mkdir(&lfs, "parent/child") => 0; lfs_remove(&lfs, "parent/orphan") => 0; TEST -# remove most recent file, this should be the update to the previous +# corrupt most recent commit, this should be the update to the previous # linked-list entry and should orphan the child -rm -v blocks/8 +tests/corrupt.py tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; + + lfs_stat(&lfs, "parent/orphan", &info) => LFS_ERR_NOENT; + lfs_ssize_t before = lfs_fs_size(&lfs); + before => 8; + + lfs_unmount(&lfs) => 0; + lfs_mount(&lfs, &cfg) => 0; + lfs_stat(&lfs, "parent/orphan", &info) => LFS_ERR_NOENT; - unsigned before = 0; - lfs_traverse(&lfs, test_count, &before) => 0; - test_log("before", before); + lfs_ssize_t orphaned = lfs_fs_size(&lfs); + orphaned => 8; - lfs_deorphan(&lfs) => 0; + lfs_mkdir(&lfs, "parent/otherchild") => 0; lfs_stat(&lfs, "parent/orphan", &info) => LFS_ERR_NOENT; - unsigned after = 0; - lfs_traverse(&lfs, test_count, &after) => 0; - test_log("after", after); + lfs_ssize_t deorphaned = lfs_fs_size(&lfs); + deorphaned => 8; - int diff = before - after; - diff => 2; lfs_unmount(&lfs) => 0; TEST diff --git a/tests/test_paths.sh b/tests/test_paths.sh index 79c4e665..3cffcfea 100755 --- a/tests/test_paths.sh +++ b/tests/test_paths.sh @@ -128,6 +128,14 @@ tests/test.py << TEST lfs_mkdir(&lfs, "/") => LFS_ERR_EXIST; lfs_file_open(&lfs, &file[0], "/", LFS_O_WRONLY | LFS_O_CREAT) => LFS_ERR_ISDIR; + + // more corner cases + lfs_remove(&lfs, "") => LFS_ERR_INVAL; + lfs_remove(&lfs, ".") => LFS_ERR_INVAL; + lfs_remove(&lfs, "..") => LFS_ERR_INVAL; + lfs_remove(&lfs, "/") => LFS_ERR_INVAL; + lfs_remove(&lfs, "//") => LFS_ERR_INVAL; + lfs_remove(&lfs, "./") => LFS_ERR_INVAL; lfs_unmount(&lfs) => 0; TEST @@ -139,5 +147,55 @@ tests/test.py << TEST lfs_unmount(&lfs) => 0; TEST +echo "--- Superblock conflict test ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + lfs_mkdir(&lfs, "littlefs") => 0; + lfs_remove(&lfs, "littlefs") => 0; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Max path test ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + memset(buffer, 'w', LFS_NAME_MAX+1); + buffer[LFS_NAME_MAX+2] = '\0'; + lfs_mkdir(&lfs, (char*)buffer) => LFS_ERR_NAMETOOLONG; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT) => LFS_ERR_NAMETOOLONG; + + memcpy(buffer, "coffee/", strlen("coffee/")); + memset(buffer+strlen("coffee/"), 'w', LFS_NAME_MAX+1); + buffer[strlen("coffee/")+LFS_NAME_MAX+2] = '\0'; + lfs_mkdir(&lfs, (char*)buffer) => LFS_ERR_NAMETOOLONG; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT) => LFS_ERR_NAMETOOLONG; + lfs_unmount(&lfs) => 0; +TEST + +echo "--- Really big path test ---" +tests/test.py << TEST + lfs_mount(&lfs, &cfg) => 0; + memset(buffer, 'w', LFS_NAME_MAX); + buffer[LFS_NAME_MAX+1] = '\0'; + lfs_mkdir(&lfs, (char*)buffer) => 0; + lfs_remove(&lfs, (char*)buffer) => 0; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + lfs_remove(&lfs, (char*)buffer) => 0; + + memcpy(buffer, "coffee/", strlen("coffee/")); + memset(buffer+strlen("coffee/"), 'w', LFS_NAME_MAX); + buffer[strlen("coffee/")+LFS_NAME_MAX+1] = '\0'; + lfs_mkdir(&lfs, (char*)buffer) => 0; + lfs_remove(&lfs, (char*)buffer) => 0; + lfs_file_open(&lfs, &file[0], (char*)buffer, + LFS_O_WRONLY | LFS_O_CREAT) => 0; + lfs_file_close(&lfs, &file[0]) => 0; + lfs_remove(&lfs, (char*)buffer) => 0; + lfs_unmount(&lfs) => 0; +TEST + echo "--- Results ---" tests/stats.py diff --git a/tests/test_seek.sh b/tests/test_seek.sh index aa8e6432..2cd711ad 100755 --- a/tests/test_seek.sh +++ b/tests/test_seek.sh @@ -12,7 +12,7 @@ tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_mkdir(&lfs, "hello") => 0; for (int i = 0; i < $LARGESIZE; i++) { - sprintf((char*)buffer, "hello/kitty%d", i); + sprintf((char*)buffer, "hello/kitty%03d", i); lfs_file_open(&lfs, &file[0], (char*)buffer, LFS_O_WRONLY | LFS_O_CREAT | LFS_O_APPEND) => 0; @@ -39,7 +39,7 @@ tests/test.py << TEST lfs_soff_t pos; int i; for (i = 0; i < $SMALLSIZE; i++) { - sprintf((char*)buffer, "kitty%d", i); + sprintf((char*)buffer, "kitty%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; pos = lfs_dir_tell(&lfs, &dir[0]); @@ -47,12 +47,12 @@ tests/test.py << TEST pos >= 0 => 1; lfs_dir_seek(&lfs, &dir[0], pos) => 0; - sprintf((char*)buffer, "kitty%d", i); + sprintf((char*)buffer, "kitty%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; lfs_dir_rewind(&lfs, &dir[0]) => 0; - sprintf((char*)buffer, "kitty%d", 0); + sprintf((char*)buffer, "kitty%03d", 0); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, ".") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; @@ -61,7 +61,7 @@ tests/test.py << TEST strcmp(info.name, (char*)buffer) => 0; lfs_dir_seek(&lfs, &dir[0], pos) => 0; - sprintf((char*)buffer, "kitty%d", i); + sprintf((char*)buffer, "kitty%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; @@ -81,7 +81,7 @@ tests/test.py << TEST lfs_soff_t pos; int i; for (i = 0; i < $MEDIUMSIZE; i++) { - sprintf((char*)buffer, "kitty%d", i); + sprintf((char*)buffer, "kitty%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; pos = lfs_dir_tell(&lfs, &dir[0]); @@ -89,12 +89,12 @@ tests/test.py << TEST pos >= 0 => 1; lfs_dir_seek(&lfs, &dir[0], pos) => 0; - sprintf((char*)buffer, "kitty%d", i); + sprintf((char*)buffer, "kitty%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; lfs_dir_rewind(&lfs, &dir[0]) => 0; - sprintf((char*)buffer, "kitty%d", 0); + sprintf((char*)buffer, "kitty%03d", 0); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, ".") => 0; lfs_dir_read(&lfs, &dir[0], &info) => 1; @@ -103,7 +103,7 @@ tests/test.py << TEST strcmp(info.name, (char*)buffer) => 0; lfs_dir_seek(&lfs, &dir[0], pos) => 0; - sprintf((char*)buffer, "kitty%d", i); + sprintf((char*)buffer, "kitty%03d", i); lfs_dir_read(&lfs, &dir[0], &info) => 1; strcmp(info.name, (char*)buffer) => 0; @@ -114,7 +114,7 @@ TEST echo "--- Simple file seek ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_file_open(&lfs, &file[0], "hello/kitty42", LFS_O_RDONLY) => 0; + lfs_file_open(&lfs, &file[0], "hello/kitty042", LFS_O_RDONLY) => 0; lfs_soff_t pos; size = strlen("kittycatcat"); @@ -163,7 +163,7 @@ TEST echo "--- Large file seek ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_file_open(&lfs, &file[0], "hello/kitty42", LFS_O_RDONLY) => 0; + lfs_file_open(&lfs, &file[0], "hello/kitty042", LFS_O_RDONLY) => 0; lfs_soff_t pos; size = strlen("kittycatcat"); @@ -212,7 +212,7 @@ TEST echo "--- Simple file seek and write ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_file_open(&lfs, &file[0], "hello/kitty42", LFS_O_RDWR) => 0; + lfs_file_open(&lfs, &file[0], "hello/kitty042", LFS_O_RDWR) => 0; lfs_soff_t pos; size = strlen("kittycatcat"); @@ -253,7 +253,7 @@ TEST echo "--- Large file seek and write ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_file_open(&lfs, &file[0], "hello/kitty42", LFS_O_RDWR) => 0; + lfs_file_open(&lfs, &file[0], "hello/kitty042", LFS_O_RDWR) => 0; lfs_soff_t pos; size = strlen("kittycatcat"); @@ -296,7 +296,7 @@ TEST echo "--- Boundary seek and write ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_file_open(&lfs, &file[0], "hello/kitty42", LFS_O_RDWR) => 0; + lfs_file_open(&lfs, &file[0], "hello/kitty042", LFS_O_RDWR) => 0; size = strlen("hedgehoghog"); const lfs_soff_t offsets[] = {512, 1020, 513, 1021, 511, 1019}; @@ -324,7 +324,7 @@ TEST echo "--- Out-of-bounds seek ---" tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; - lfs_file_open(&lfs, &file[0], "hello/kitty42", LFS_O_RDWR) => 0; + lfs_file_open(&lfs, &file[0], "hello/kitty042", LFS_O_RDWR) => 0; size = strlen("kittycatcat"); lfs_file_size(&lfs, &file[0]) => $LARGESIZE*size;