Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Discard support #333

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bindings/genode/bindings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,11 @@ struct Solo5::Platform
return pkt.succeeded() ? SOLO5_R_OK : SOLO5_R_EUNSPEC;
}

solo5_result_t block_discard(solo5_off_t, offset, size_t size)
{
return SOLO5_R_EOPNOTSUPP;
}

solo5_result_t block_read(solo5_off_t offset, uint8_t *buf, size_t size)
{
if ((offset|size) % blk_size)
Expand Down
1 change: 1 addition & 0 deletions bindings/genode/stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ solo5_result_t solo5_net_read(uint8_t *buf, size_t size, size_t *read_size) { re

void solo5_block_info(struct solo5_block_info *info) { }
solo5_result_t solo5_block_write(solo5_off_t offset, const uint8_t *buf, size_t size) { return SOLO5_R_EUNSPEC; }
solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size) { return SOLO5_R_EUNSPEC; }
solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size) { return SOLO5_R_EUNSPEC; }

uintptr_t __stack_chk_guard;
Expand Down
22 changes: 22 additions & 0 deletions bindings/hvt/block.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,28 @@ solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size)
return (rd.ret == 0 && rd.len == size) ? SOLO5_R_OK : SOLO5_R_EUNSPEC;
}

solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size)
{
if (!block_info.capacity)
init_block_info();

if ((offset % block_info.block_size != 0) ||
(size % block_info.block_size != 0) ||
(offset >= block_info.capacity) ||
(offset + size < offset) || /* checks for overflow */
(offset + size > block_info.capacity))
return SOLO5_R_EINVAL;

volatile struct hvt_blkdiscard di;
di.sector = offset / block_info.block_size;
di.len_sectors = size / block_info.block_size;
di.ret = 0;

hvt_do_hypercall(HVT_HYPERCALL_BLKDISCARD, &di);

return (di.ret == 0) ? SOLO5_R_OK : (di.ret == -2) ? SOLO5_R_EOPNOTSUPP : SOLO5_R_EUNSPEC;
}

void solo5_block_info(struct solo5_block_info *info)
{
if (!block_info.capacity)
Expand Down
6 changes: 6 additions & 0 deletions bindings/spt/bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ long sys_write(long fd, const void *buf, long size);
long sys_pread64(long fd, void *buf, long size, long pos);
long sys_pwrite64(long fd, const void *buf, long size, long pos);

#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */

long sys_fallocate(long fd, long mode, long size, long pos);

void sys_exit_group(long status) __attribute__((noreturn));

struct sys_timespec {
Expand All @@ -59,6 +64,7 @@ struct sys_pollfd {
#define SYS_POLLIN 1
#define SYS_EINTR -4
#define SYS_EAGAIN -11
#define SYS_EOPNOTSUPP -95

long sys_ppoll(void *fds, long nfds, void *ts);

Expand Down
21 changes: 21 additions & 0 deletions bindings/spt/block.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size)
return (nbytes == (int)size) ? SOLO5_R_OK : SOLO5_R_EUNSPEC;
}

solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size) {
if(size % block_size != 0)
return SOLO5_R_EINVAL;
if(offset % block_size != 0)
return SOLO5_R_EINVAL;
if(offset + size < offset)
return SOLO5_R_EINVAL;
if(offset >= block_capacity)
return SOLO5_R_EINVAL;
if(offset + size > block_capacity)
return SOLO5_R_EINVAL;

if(size == 0)
return SOLO5_R_OK;

int rv = sys_fallocate(block_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size);
if(rv == SYS_EOPNOTSUPP)
return SOLO5_R_EOPNOTSUPP;
return rv == 0 ? SOLO5_R_OK : SOLO5_R_EUNSPEC;
}

solo5_result_t solo5_block_write(solo5_off_t offset, const uint8_t *buf,
size_t size)
{
Expand Down
19 changes: 19 additions & 0 deletions bindings/spt/sys_linux_aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* also consider explicitly inlining these functions.
*/

#define SYS_fallocate 47
#define SYS_read 63
#define SYS_write 64
#define SYS_pread64 67
Expand Down Expand Up @@ -108,6 +109,24 @@ long sys_pwrite64(long fd, const void *buf, long size, long pos)
return x0;
}

long sys_fallocate(long fd, long mode, long size, long pos)
{
register long x8 __asm__("x8") = SYS_fallocate;
register long x0 __asm__("x0") = fd;
register long x1 __asm__("x1") = mode;
register long x2 __asm__("x2") = size;
register long x3 __asm__("x3") = pos;

__asm__ __volatile__ (
"svc 0"
: "=r" (x0)
: "r" (x8), "r" (x0), "r" (x1), "r" (x2), "r" (x3)
: "memory", "cc"
);

return x0;
}

void sys_exit_group(long status)
{
register long x8 __asm__("x8") = SYS_exit_group;
Expand Down
16 changes: 16 additions & 0 deletions bindings/spt/sys_linux_x86_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#define SYS_clock_gettime 228
#define SYS_exit_group 231
#define SYS_ppoll 271
#define SYS_fallocate 285

long sys_read(long fd, void *buf, long size)
{
Expand Down Expand Up @@ -96,6 +97,21 @@ long sys_pwrite64(long fd, const void *buf, long size, long pos)
return ret;
}

long sys_fallocate(long fd, long mode, long size, long pos)
{
long ret;
register long r10 asm("r10") = pos;

__asm__ __volatile__ (
"syscall"
: "=a" (ret)
: "a" (SYS_fallocate), "D" (fd), "S" (mode), "d" (size), "r" (r10)
: "rcx", "r11", "memory"
);

return ret;
}

void sys_exit_group(long status)
{
__asm__ __volatile__ (
Expand Down
33 changes: 30 additions & 3 deletions bindings/virtio/virtio_blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,19 @@
#define VIRTIO_BLK_T_FLUSH 4
#define VIRTIO_BLK_T_FLUSH_OUT 5
#define VIRTIO_BLK_T_GET_ID 8
#define VIRTIO_BLK_T_DISCARD 11
#define VIRTIO_BLK_T_WRITE_ZEROES 13
#define VIRTIO_BLK_T_BARRIER 0x80000000

#define VIRTIO_BLK_S_OK 0
#define VIRTIO_BLK_S_IOERR 1
#define VIRTIO_BLK_S_UNSUPP 2

#define VIRTIO_BLK_F_DISCARD 13
#define VIRTIO_BLK_F_WRITE_ZEROES 14

static uint64_t virtio_blk_sectors;
static uint32_t guest_features;

#define VIRTIO_BLK_SECTOR_SIZE 512

Expand Down Expand Up @@ -147,18 +153,19 @@ static int virtio_blk_op_sync(uint32_t type,
void virtio_config_block(struct pci_config_info *pci)
{
uint8_t ready_for_init = VIRTIO_PCI_STATUS_ACK | VIRTIO_PCI_STATUS_DRIVER;
uint32_t host_features, guest_features;
uint32_t host_features;
size_t pgs;

outb(pci->base + VIRTIO_PCI_STATUS, ready_for_init);

host_features = inl(pci->base + VIRTIO_PCI_HOST_FEATURES);

/* don't negotiate anything for now */
guest_features = 0;
/* negociate enough for discard */
guest_features = host_features & (VIRTIO_BLK_F_DISCARD | VIRTIO_BLK_F_WRITE_ZEROES);
outl(pci->base + VIRTIO_PCI_GUEST_FEATURES, guest_features);

virtio_blk_sectors = inq(pci->base + VIRTIO_PCI_CONFIG_OFF);

log(INFO, "Solo5: PCI:%02x:%02x: configured, capacity=%llu sectors, "
"features=0x%x\n",
pci->bus, pci->dev, (unsigned long long)virtio_blk_sectors,
Expand Down Expand Up @@ -209,6 +216,26 @@ solo5_result_t solo5_block_write(solo5_off_t offset, const uint8_t *buf,
return (rv == 0) ? SOLO5_R_OK : SOLO5_R_EUNSPEC;
}

solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size)
{
assert(blk_configured);

uint64_t sector = offset / VIRTIO_BLK_SECTOR_SIZE;
uint64_t size_sectors = size / VIRTIO_BLK_SECTOR_SIZE;
if ((offset % VIRTIO_BLK_SECTOR_SIZE != 0) ||
(size % VIRTIO_BLK_SECTOR_SIZE != 0) ||
(sector >= virtio_blk_sectors) ||
(sector + size_sectors < sector) ||
(sector + size_sectors > virtio_blk_sectors))
return SOLO5_R_EINVAL;

if (! (guest_features & VIRTIO_BLK_F_WRITE_ZEROES))
{
return SOLO5_R_EOPNOTSUPP;
}
return SOLO5_R_EOPNOTSUPP;
}

solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size)
{
assert(blk_configured);
Expand Down
12 changes: 12 additions & 0 deletions include/solo5/hvt_abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ enum hvt_hypercall {
HVT_HYPERCALL_NETWRITE,
HVT_HYPERCALL_NETREAD,
HVT_HYPERCALL_HALT,
HVT_HYPERCALL_BLKDISCARD,
HVT_HYPERCALL_MAX
};

Expand Down Expand Up @@ -284,4 +285,15 @@ struct hvt_halt {
int exit_status;
};

struct hvt_blkdiscard {
/* IN */
size_t sector;

/* IN */
size_t len_sectors;

/* OUT */
int ret;
};

#endif /* HVT_GUEST_H */
16 changes: 15 additions & 1 deletion include/solo5/solo5.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,12 @@ typedef enum {
/*
* The operation failed due to an unspecified error.
*/
SOLO5_R_EUNSPEC
SOLO5_R_EUNSPEC,
/*
* The operation failed because the backing device does
* not support it. Retrying will not succeed.
*/
SOLO5_R_EOPNOTSUPP
} solo5_result_t;

/*
Expand Down Expand Up @@ -250,6 +255,15 @@ struct solo5_block_info {
*/
void solo5_block_info(struct solo5_block_info *info);

/*
* Discards data of (size) bytes on the block device,
* starting at byte (offset). This is not atomic.
* offset and size must be aligned to the block_size.
* If the current backend or backing device doesn't support it,
* SOLO5_R_EOPNOTSUPP is returned and the data is unchanged.
*/
solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size);

/*
* Writes data of (size) bytes from the buffer (*buf) to the block device,
* starting at byte (offset). Data is either written in it's entirety or not at
Expand Down
40 changes: 40 additions & 0 deletions tenders/hvt/hvt_module_blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define _GNU_SOURCE
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
Expand Down Expand Up @@ -72,6 +73,43 @@ static void hypercall_blkwrite(struct hvt *hvt, hvt_gpa_t gpa)
wr->ret = 0;
}

static void hypercall_blkdiscard(struct hvt *hvt, hvt_gpa_t gpa)
{
struct hvt_blkdiscard *di =
HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_blkdiscard));
off_t pos, len, end;

if (di->sector >= blkinfo.num_sectors) {
di->ret = -1;
return;
}
pos = (off_t)blkinfo.sector_size * (off_t)di->sector;
len = (off_t)blkinfo.sector_size * (off_t)di->len_sectors;
if (add_overflow(pos, len, end)
|| (end > blkinfo.num_sectors * blkinfo.sector_size))
{
di->ret = -1;
return;
}

#if defined(__linux__)
ssize_t ret;
ret = fallocate(
diskfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos, len);
if (ret != 0)
{
if (ret == -1 && errno == EOPNOTSUPP)
di->ret = -2;
else
di->ret = -1;
} else {
di->ret = 0;
}
#else
di->ret = -2;
#endif
}

static void hypercall_blkread(struct hvt *hvt, hvt_gpa_t gpa)
{
struct hvt_blkread *rd =
Expand Down Expand Up @@ -126,6 +164,8 @@ static int setup(struct hvt *hvt)
hypercall_blkwrite) == 0);
assert(hvt_core_register_hypercall(HVT_HYPERCALL_BLKREAD,
hypercall_blkread) == 0);
assert(hvt_core_register_hypercall(HVT_HYPERCALL_BLKDISCARD,
hypercall_blkdiscard) == 0);

return 0;
}
Expand Down
9 changes: 9 additions & 0 deletions tenders/spt/spt_module_block.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,15 @@ static int setup(struct spt *spt)
if (rc != 0)
errx(1, "seccomp_rule_add(pwrite64, fd=%d) failed: %s", diskfd,
strerror(-rc));
rc = seccomp_rule_add(spt->sc_ctx, SCMP_ACT_ALLOW, SCMP_SYS(fallocate), 3,
SCMP_A0(SCMP_CMP_EQ, diskfd),
SCMP_A1(SCMP_CMP_EQ, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE),
SCMP_A2(SCMP_CMP_LE,
(spt->bi->blocki.capacity - spt->bi->blocki.block_size)),
SCMP_A3(SCMP_CMP_GE, spt->bi->blocki.block_size));
if (rc != 0)
errx(1, "seccomp_rule_add(fallocate, fd=%d) failed: %s", diskfd,
strerror(-rc));

return 0;
}
Expand Down
Loading