From fdf516b177a40119216be231d813edde59c583f1 Mon Sep 17 00:00:00 2001 From: Allison Karlitskaya Date: Tue, 15 Oct 2024 10:55:25 +0200 Subject: [PATCH] FileSystem: optimize lookup for last DirEnt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do directory lookups in context of iterating over tarballs, which are usually sorted. We store our DirEnts sorted too. It's therefore very likely that if we're inserting an item, it'll be at the end of the array, and if we're looking for a directory to perform operations inside of, it's probably going to be the last DirEnt. That means that instead of starting our binary search on the middle element, we want to start with the last one. This is something like a ~10% improvement on the time it takes to compose the merged filesystem tree — in-memory — and brings the 'create a dumpfile for a ~250k files multi-layer container image' usecase from consistently slightly more than 0.5s to consistently slightly less than 0.5s. --- src/image.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/image.rs b/src/image.rs index 02ebb0c..766b0cb 100644 --- a/src/image.rs +++ b/src/image.rs @@ -1,4 +1,8 @@ use std::{ + cmp::{ + Ord, + Ordering, + }, ffi::{ OsStr, OsString, @@ -60,10 +64,20 @@ pub struct DirEnt { impl Directory { pub fn find_entry(&self, name: &OsStr) -> Result { - // performance TODO: on the first pass through we'll almost always want the last entry - // (since the layer is sorted and we're always inserting into the directory that we just - // created) maybe add a special case for that? - self.entries.binary_search_by_key(&name, |e| &e.name) + // OCI layer tarballs are typically sorted, with the entries for a particular directory + // written out immediately after that directory was created. That means that it's very + // likely that the thing we're looking for is either the last entry or the insertion point + // immediately following it. Fast-path those cases by essentially unrolling the first + // iteration of the binary search. + if let Some(last_entry) = self.entries.last() { + match name.cmp(&last_entry.name) { + Ordering::Equal => return Ok(self.entries.len() - 1), // the last item, indeed + Ordering::Greater => return Err(self.entries.len()), // need to append + Ordering::Less => self.entries.binary_search_by_key(&name, |e| &e.name) + } + } else { + Err(0) + } } pub fn recurse<'a>(&'a mut self, name: &OsStr) -> Result<&'a mut Directory> {