Skip to content

Commit

Permalink
AppleHV - make gz ops sparse
Browse files Browse the repository at this point in the history
gz by definition is not able to preserve the sparse nature of files.  using some code from the crc project and gluing it together with our decompression code, we can re-create the sparseness of a file.  one downside is the operation is a little bit slower, but i think the gains from the sparse file are well worth it in IO alone.

there are a number of todo's in this PR that would be ripe for quick hitting fixes.

[NO NEW TESTS NEEDED]

Signed-off-by: Brent Baude <[email protected]>
  • Loading branch information
Brent Baude committed Feb 5, 2024
1 parent 33bda05 commit 5059b84
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 11 deletions.
11 changes: 1 addition & 10 deletions pkg/machine/applehv/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package applehv
import (
"fmt"
"os"
"os/exec"
"syscall"

"github.com/containers/podman/v4/pkg/machine"
Expand Down Expand Up @@ -101,15 +100,7 @@ func checkProcessRunning(processName string, pid int) error {
// is assumed GiB
func resizeDisk(mc *vmconfigs.MachineConfig, newSize strongunits.GiB) error {
logrus.Debugf("resizing %s to %d bytes", mc.ImagePath.GetPath(), newSize.ToBytes())
// seems like os.truncate() is not very performant with really large files
// so exec'ing out to the command truncate
size := fmt.Sprintf("%dG", newSize)
c := exec.Command("truncate", "-s", size, mc.ImagePath.GetPath())
if logrus.IsLevelEnabled(logrus.DebugLevel) {
c.Stderr = os.Stderr
c.Stdout = os.Stdout
}
return c.Run()
return os.Truncate(mc.ImagePath.GetPath(), int64(newSize.ToBytes()))
}

func generateSystemDFilesForVirtiofsMounts(mounts []machine.VirtIoFs) []ignition.Unit {
Expand Down
1 change: 0 additions & 1 deletion pkg/machine/applehv/stubber.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ func (a AppleHVStubber) VMType() define.VMType {
return define.AppleHvVirt
}


func waitForGvProxy(gvproxySocket *define.VMFile) error {
backoffWait := gvProxyWaitBackoff
logrus.Debug("checking that gvproxy is running")
Expand Down
117 changes: 117 additions & 0 deletions pkg/machine/compression/copy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package compression

import (
"bytes"
"io"
"os"
)

// TODO vendor this in ... pkg/os directory is small and code should be negligible
/*
NOTE: copy.go and copy.test were lifted from github.com/crc-org/crc because
i was having trouble getting go to vendor it properly. all credit to them
*/

func copyFile(src, dst string, sparse bool) error {
in, err := os.Open(src)
if err != nil {
return err
}

defer in.Close()

out, err := os.Create(dst)
if err != nil {
return err
}

defer out.Close()

if sparse {
if _, err = CopySparse(out, in); err != nil {
return err
}
} else {
if _, err = io.Copy(out, in); err != nil {
return err
}
}

fi, err := os.Stat(src)
if err != nil {
return err
}

if err = os.Chmod(dst, fi.Mode()); err != nil {
return err
}

return out.Close()
}

func CopyFile(src, dst string) error {
return copyFile(src, dst, false)
}

func CopyFileSparse(src, dst string) error {
return copyFile(src, dst, true)
}

func CopySparse(dst io.WriteSeeker, src io.Reader) (int64, error) {
copyBuf := make([]byte, copyChunkSize)
sparseWriter := newSparseWriter(dst)

bytesWritten, err := io.CopyBuffer(sparseWriter, src, copyBuf)
if err != nil {
return bytesWritten, err
}
err = sparseWriter.Close()
return bytesWritten, err
}

type sparseWriter struct {
writer io.WriteSeeker
lastChunkSparse bool
}

func newSparseWriter(writer io.WriteSeeker) *sparseWriter {
return &sparseWriter{writer: writer}
}

const copyChunkSize = 4096

var emptyChunk = make([]byte, copyChunkSize)

func isEmptyChunk(p []byte) bool {
// HasPrefix instead of bytes.Equal in order to handle the last chunk
// of the file, which may be shorter than len(emptyChunk), and would
// fail bytes.Equal()
return bytes.HasPrefix(emptyChunk, p)
}

func (w *sparseWriter) Write(p []byte) (n int, err error) {
if isEmptyChunk(p) {
offset, err := w.writer.Seek(int64(len(p)), io.SeekCurrent)
if err != nil {
w.lastChunkSparse = false
return 0, err
}
_ = offset
w.lastChunkSparse = true
return len(p), nil
}
w.lastChunkSparse = false
return w.writer.Write(p)
}

func (w *sparseWriter) Close() error {
if w.lastChunkSparse {
if _, err := w.writer.Seek(-1, io.SeekCurrent); err != nil {
return err
}
if _, err := w.writer.Write([]byte{0}); err != nil {
return err
}
}
return nil
}
52 changes: 52 additions & 0 deletions pkg/machine/compression/copy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package compression

import (
"os"
"path/filepath"
"testing"
)

func TestCopyFile(t *testing.T) {
testStr := "test-machine"

srcFile, err := os.CreateTemp("", "machine-test-")
if err != nil {
t.Fatal(err)
}
srcFi, err := srcFile.Stat()
if err != nil {
t.Fatal(err)
}

_, _ = srcFile.Write([]byte(testStr)) //nolint:mirror
srcFile.Close()

srcFilePath := filepath.Join(os.TempDir(), srcFi.Name())

destFile, err := os.CreateTemp("", "machine-copy-test-")
if err != nil {
t.Fatal(err)
}

destFi, err := destFile.Stat()
if err != nil {
t.Fatal(err)
}

destFile.Close()

destFilePath := filepath.Join(os.TempDir(), destFi.Name())

if err := CopyFile(srcFilePath, destFilePath); err != nil {
t.Fatal(err)
}

data, err := os.ReadFile(destFilePath)
if err != nil {
t.Fatal(err)
}

if string(data) != testStr {
t.Fatalf("expected data \"%s\"; received \"%s\"", testStr, string(data))
}
}
67 changes: 67 additions & 0 deletions pkg/machine/compression/decompress.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package compression
import (
"archive/zip"
"bufio"
"compress/gzip"
"errors"
"io"
"os"
Expand All @@ -19,12 +20,20 @@ import (
"github.com/ulikunitz/xz"
)

// Decompress is a generic wrapper for various decompression algos
// TODO this needs some love. in the various decompression functions that are
// called, the same uncompressed path is being opened multiple times.
func Decompress(localPath *define.VMFile, uncompressedPath string) error {
var isZip bool
uncompressedFileWriter, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_RDWR, 0600)
if err != nil {
return err
}
defer func() {
if err := uncompressedFileWriter.Close(); err != nil {
logrus.Errorf("unable to to close decompressed file %s: %q", uncompressedPath, err)
}
}()
sourceFile, err := localPath.Read()
if err != nil {
return err
Expand All @@ -44,6 +53,11 @@ func Decompress(localPath *define.VMFile, uncompressedPath string) error {
if isZip && runtime.GOOS == "windows" {
return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter)
}

// Unfortunately GZ is not sparse capable. Lets handle it differently
if compressionType == archive.Gzip && runtime.GOOS == "darwin" {
return decompressGzWithSparse(prefix, localPath, uncompressedPath)
}
return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter)
}

Expand Down Expand Up @@ -182,3 +196,56 @@ func decompressZip(prefix string, src string, output io.WriteCloser) error {
p.Wait()
return err
}

func decompressGzWithSparse(prefix string, compressedPath *define.VMFile, uncompressedPath string) error {
stat, err := os.Stat(compressedPath.GetPath())
if err != nil {
return err
}

dstFile, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode())
if err != nil {
return err
}
defer func() {
if err := dstFile.Close(); err != nil {
logrus.Errorf("unable to close uncompressed file %s: %q", uncompressedPath, err)
}
}()

f, err := os.Open(compressedPath.GetPath())
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err)
}
}()

gzReader, err := gzip.NewReader(f)
if err != nil {
return err
}
defer func() {
if err := gzReader.Close(); err != nil {
logrus.Errorf("unable to close gzreader: %q", err)
}
}()

// TODO remove the following line when progress bars work
_ = prefix
// p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done")
// proxyReader := bar.ProxyReader(f)
// defer func() {
// if err := proxyReader.Close(); err != nil {
// logrus.Error(err)
// }
// }()

logrus.Debugf("decompressing %s", compressedPath.GetPath())
_, err = CopySparse(dstFile, gzReader)
logrus.Debug("decompression complete")
// p.Wait()
return err
}

0 comments on commit 5059b84

Please sign in to comment.