-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
gz by definition is not able to preserve the sparse nature of files. using some code from the crc project and gluing it together with our decompression code, we can re-create the sparseness of a file. one downside is the operation is a little bit slower, but i think the gains from the sparse file are well worth it in IO alone. there are a number of todo's in this PR that would be ripe for quick hitting fixes. [NO NEW TESTS NEEDED] Signed-off-by: Brent Baude <[email protected]>
- Loading branch information
Brent Baude
committed
Feb 5, 2024
1 parent
33bda05
commit 5059b84
Showing
5 changed files
with
237 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package compression | ||
|
||
import ( | ||
"bytes" | ||
"io" | ||
"os" | ||
) | ||
|
||
// TODO vendor this in ... pkg/os directory is small and code should be negligible | ||
/* | ||
NOTE: copy.go and copy.test were lifted from github.com/crc-org/crc because | ||
i was having trouble getting go to vendor it properly. all credit to them | ||
*/ | ||
|
||
func copyFile(src, dst string, sparse bool) error { | ||
in, err := os.Open(src) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
defer in.Close() | ||
|
||
out, err := os.Create(dst) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
defer out.Close() | ||
|
||
if sparse { | ||
if _, err = CopySparse(out, in); err != nil { | ||
return err | ||
} | ||
} else { | ||
if _, err = io.Copy(out, in); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
fi, err := os.Stat(src) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if err = os.Chmod(dst, fi.Mode()); err != nil { | ||
return err | ||
} | ||
|
||
return out.Close() | ||
} | ||
|
||
func CopyFile(src, dst string) error { | ||
return copyFile(src, dst, false) | ||
} | ||
|
||
func CopyFileSparse(src, dst string) error { | ||
return copyFile(src, dst, true) | ||
} | ||
|
||
func CopySparse(dst io.WriteSeeker, src io.Reader) (int64, error) { | ||
copyBuf := make([]byte, copyChunkSize) | ||
sparseWriter := newSparseWriter(dst) | ||
|
||
bytesWritten, err := io.CopyBuffer(sparseWriter, src, copyBuf) | ||
if err != nil { | ||
return bytesWritten, err | ||
} | ||
err = sparseWriter.Close() | ||
return bytesWritten, err | ||
} | ||
|
||
type sparseWriter struct { | ||
writer io.WriteSeeker | ||
lastChunkSparse bool | ||
} | ||
|
||
func newSparseWriter(writer io.WriteSeeker) *sparseWriter { | ||
return &sparseWriter{writer: writer} | ||
} | ||
|
||
const copyChunkSize = 4096 | ||
|
||
var emptyChunk = make([]byte, copyChunkSize) | ||
|
||
func isEmptyChunk(p []byte) bool { | ||
// HasPrefix instead of bytes.Equal in order to handle the last chunk | ||
// of the file, which may be shorter than len(emptyChunk), and would | ||
// fail bytes.Equal() | ||
return bytes.HasPrefix(emptyChunk, p) | ||
} | ||
|
||
func (w *sparseWriter) Write(p []byte) (n int, err error) { | ||
if isEmptyChunk(p) { | ||
offset, err := w.writer.Seek(int64(len(p)), io.SeekCurrent) | ||
if err != nil { | ||
w.lastChunkSparse = false | ||
return 0, err | ||
} | ||
_ = offset | ||
w.lastChunkSparse = true | ||
return len(p), nil | ||
} | ||
w.lastChunkSparse = false | ||
return w.writer.Write(p) | ||
} | ||
|
||
func (w *sparseWriter) Close() error { | ||
if w.lastChunkSparse { | ||
if _, err := w.writer.Seek(-1, io.SeekCurrent); err != nil { | ||
return err | ||
} | ||
if _, err := w.writer.Write([]byte{0}); err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package compression | ||
|
||
import ( | ||
"os" | ||
"path/filepath" | ||
"testing" | ||
) | ||
|
||
func TestCopyFile(t *testing.T) { | ||
testStr := "test-machine" | ||
|
||
srcFile, err := os.CreateTemp("", "machine-test-") | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
srcFi, err := srcFile.Stat() | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
_, _ = srcFile.Write([]byte(testStr)) //nolint:mirror | ||
srcFile.Close() | ||
|
||
srcFilePath := filepath.Join(os.TempDir(), srcFi.Name()) | ||
|
||
destFile, err := os.CreateTemp("", "machine-copy-test-") | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
destFi, err := destFile.Stat() | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
destFile.Close() | ||
|
||
destFilePath := filepath.Join(os.TempDir(), destFi.Name()) | ||
|
||
if err := CopyFile(srcFilePath, destFilePath); err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
data, err := os.ReadFile(destFilePath) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
if string(data) != testStr { | ||
t.Fatalf("expected data \"%s\"; received \"%s\"", testStr, string(data)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters