From 5161ab0f2d08d705cb6b42a0542720dfa789f3b2 Mon Sep 17 00:00:00 2001 From: Mark Juggurnauth-Thomas Date: Thu, 19 Oct 2023 09:22:09 +0100 Subject: [PATCH 1/2] Add a test demonstrating underread of underlying buffers When reading from a decompressor that was constructed using `Decoder::with_buffer`, the decoder may not consume all of the input by the time it returns all of the decompressed output. This means when you call `.finish()` on the decoder to get the underlying stream back, it is not pointing after the end of the compressed data. This commit adds a test that demonstrates the issue. --- assets/zeros64.zst | Bin 0 -> 21 bytes tests/issue_251.rs | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 assets/zeros64.zst create mode 100644 tests/issue_251.rs diff --git a/assets/zeros64.zst b/assets/zeros64.zst new file mode 100644 index 0000000000000000000000000000000000000000..37cb6753c20948e0013f7f3223570491f0fcaf45 GIT binary patch literal 21 ccmdPcs{dET!IgnQfPsOLVKRf5q}C31067c Date: Thu, 19 Oct 2023 09:22:13 +0100 Subject: [PATCH 2/2] Ensure reader buffer is flushed when extracting reader After the decoder stream has yielded all of the uncompressed data, it is possible for the input stream to still not be fully consumed. This means if we extract the inner stream at this point, it will not be pointing to the end of the compressed data. From the [zstd documentation](https://facebook.github.io/zstd/zstd_manual.html#Chapter9) for `ZSTD_decompressStream`: > But if `output.pos == output.size`, there might be some data left within internal buffers. > In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. This is only necessary if the caller wants the stream back, so at that point we can force an additional call to `ZSTD_decompressStream` by reading to a zero-length buffer. --- src/stream/read/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/stream/read/mod.rs b/src/stream/read/mod.rs index a3a947b0..8a82cc86 100644 --- a/src/stream/read/mod.rs +++ b/src/stream/read/mod.rs @@ -93,7 +93,9 @@ impl<'a, R: BufRead> Decoder<'a, R> { /// /// Calling `finish()` is not *required* after reading a stream - /// just use it if you need to get the `Read` back. - pub fn finish(self) -> R { + pub fn finish(mut self) -> R { + // Ensure the input buffers have been flushed by reading to a zero-length buffer. + let _ = self.reader.read(&mut [0; 0]); self.reader.into_inner() }