diff --git a/bindings/genode/bindings.cc b/bindings/genode/bindings.cc index b4675b5d..6fabcb0a 100644 --- a/bindings/genode/bindings.cc +++ b/bindings/genode/bindings.cc @@ -402,6 +402,11 @@ struct Solo5::Platform return pkt.succeeded() ? SOLO5_R_OK : SOLO5_R_EUNSPEC; } + solo5_result_t block_discard(solo5_off_t, offset, size_t size) + { + return SOLO5_R_EOPNOTSUPP; + } + solo5_result_t block_read(solo5_off_t offset, uint8_t *buf, size_t size) { if ((offset|size) % blk_size) diff --git a/bindings/genode/stubs.c b/bindings/genode/stubs.c index 5f34cbf8..a3ae55f1 100644 --- a/bindings/genode/stubs.c +++ b/bindings/genode/stubs.c @@ -14,6 +14,7 @@ solo5_result_t solo5_net_read(uint8_t *buf, size_t size, size_t *read_size) { re void solo5_block_info(struct solo5_block_info *info) { } solo5_result_t solo5_block_write(solo5_off_t offset, const uint8_t *buf, size_t size) { return SOLO5_R_EUNSPEC; } +solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size) { return SOLO5_R_EUNSPEC; } solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size) { return SOLO5_R_EUNSPEC; } uintptr_t __stack_chk_guard; diff --git a/bindings/hvt/block.c b/bindings/hvt/block.c index 511721c4..51bcb223 100644 --- a/bindings/hvt/block.c +++ b/bindings/hvt/block.c @@ -90,6 +90,28 @@ solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size) return (rd.ret == 0 && rd.len == size) ? SOLO5_R_OK : SOLO5_R_EUNSPEC; } +solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size) +{ + if (!block_info.capacity) + init_block_info(); + + if ((offset % block_info.block_size != 0) || + (size % block_info.block_size != 0) || + (offset >= block_info.capacity) || + (offset + size < offset) || /* checks for overflow */ + (offset + size > block_info.capacity)) + return SOLO5_R_EINVAL; + + volatile struct hvt_blkdiscard di; + di.sector = offset / block_info.block_size; + di.len_sectors = size / block_info.block_size; + di.ret = 0; + + hvt_do_hypercall(HVT_HYPERCALL_BLKDISCARD, &di); + + return (di.ret == 0) ? SOLO5_R_OK : (di.ret == -2) ? SOLO5_R_EOPNOTSUPP : SOLO5_R_EUNSPEC; +} + void solo5_block_info(struct solo5_block_info *info) { if (!block_info.capacity) diff --git a/bindings/spt/bindings.h b/bindings/spt/bindings.h index 7121cb55..750f75db 100644 --- a/bindings/spt/bindings.h +++ b/bindings/spt/bindings.h @@ -38,6 +38,11 @@ long sys_write(long fd, const void *buf, long size); long sys_pread64(long fd, void *buf, long size, long pos); long sys_pwrite64(long fd, const void *buf, long size, long pos); +#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ +#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ + +long sys_fallocate(long fd, long mode, long size, long pos); + void sys_exit_group(long status) __attribute__((noreturn)); struct sys_timespec { @@ -59,6 +64,7 @@ struct sys_pollfd { #define SYS_POLLIN 1 #define SYS_EINTR -4 #define SYS_EAGAIN -11 +#define SYS_EOPNOTSUPP -95 long sys_ppoll(void *fds, long nfds, void *ts); diff --git a/bindings/spt/block.c b/bindings/spt/block.c index 525b8031..184e4542 100644 --- a/bindings/spt/block.c +++ b/bindings/spt/block.c @@ -63,6 +63,27 @@ solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size) return (nbytes == (int)size) ? SOLO5_R_OK : SOLO5_R_EUNSPEC; } +solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size) { + if(size % block_size != 0) + return SOLO5_R_EINVAL; + if(offset % block_size != 0) + return SOLO5_R_EINVAL; + if(offset + size < offset) + return SOLO5_R_EINVAL; + if(offset >= block_capacity) + return SOLO5_R_EINVAL; + if(offset + size > block_capacity) + return SOLO5_R_EINVAL; + + if(size == 0) + return SOLO5_R_OK; + + int rv = sys_fallocate(block_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size); + if(rv == SYS_EOPNOTSUPP) + return SOLO5_R_EOPNOTSUPP; + return rv == 0 ? SOLO5_R_OK : SOLO5_R_EUNSPEC; +} + solo5_result_t solo5_block_write(solo5_off_t offset, const uint8_t *buf, size_t size) { diff --git a/bindings/spt/sys_linux_aarch64.c b/bindings/spt/sys_linux_aarch64.c index d678764b..bec911e4 100644 --- a/bindings/spt/sys_linux_aarch64.c +++ b/bindings/spt/sys_linux_aarch64.c @@ -30,6 +30,7 @@ * also consider explicitly inlining these functions. */ +#define SYS_fallocate 47 #define SYS_read 63 #define SYS_write 64 #define SYS_pread64 67 @@ -108,6 +109,24 @@ long sys_pwrite64(long fd, const void *buf, long size, long pos) return x0; } +long sys_fallocate(long fd, long mode, long size, long pos) +{ + register long x8 __asm__("x8") = SYS_fallocate; + register long x0 __asm__("x0") = fd; + register long x1 __asm__("x1") = mode; + register long x2 __asm__("x2") = size; + register long x3 __asm__("x3") = pos; + + __asm__ __volatile__ ( + "svc 0" + : "=r" (x0) + : "r" (x8), "r" (x0), "r" (x1), "r" (x2), "r" (x3) + : "memory", "cc" + ); + + return x0; +} + void sys_exit_group(long status) { register long x8 __asm__("x8") = SYS_exit_group; diff --git a/bindings/spt/sys_linux_x86_64.c b/bindings/spt/sys_linux_x86_64.c index 64f08e8a..457f8a34 100644 --- a/bindings/spt/sys_linux_x86_64.c +++ b/bindings/spt/sys_linux_x86_64.c @@ -37,6 +37,7 @@ #define SYS_clock_gettime 228 #define SYS_exit_group 231 #define SYS_ppoll 271 +#define SYS_fallocate 285 long sys_read(long fd, void *buf, long size) { @@ -96,6 +97,21 @@ long sys_pwrite64(long fd, const void *buf, long size, long pos) return ret; } +long sys_fallocate(long fd, long mode, long size, long pos) +{ + long ret; + register long r10 asm("r10") = pos; + + __asm__ __volatile__ ( + "syscall" + : "=a" (ret) + : "a" (SYS_fallocate), "D" (fd), "S" (mode), "d" (size), "r" (r10) + : "rcx", "r11", "memory" + ); + + return ret; +} + void sys_exit_group(long status) { __asm__ __volatile__ ( diff --git a/bindings/virtio/virtio_blk.c b/bindings/virtio/virtio_blk.c index aa9f44c2..2a9be575 100644 --- a/bindings/virtio/virtio_blk.c +++ b/bindings/virtio/virtio_blk.c @@ -30,13 +30,19 @@ #define VIRTIO_BLK_T_FLUSH 4 #define VIRTIO_BLK_T_FLUSH_OUT 5 #define VIRTIO_BLK_T_GET_ID 8 +#define VIRTIO_BLK_T_DISCARD 11 +#define VIRTIO_BLK_T_WRITE_ZEROES 13 #define VIRTIO_BLK_T_BARRIER 0x80000000 #define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_IOERR 1 #define VIRTIO_BLK_S_UNSUPP 2 +#define VIRTIO_BLK_F_DISCARD 13 +#define VIRTIO_BLK_F_WRITE_ZEROES 14 + static uint64_t virtio_blk_sectors; +static uint32_t guest_features; #define VIRTIO_BLK_SECTOR_SIZE 512 @@ -147,18 +153,19 @@ static int virtio_blk_op_sync(uint32_t type, void virtio_config_block(struct pci_config_info *pci) { uint8_t ready_for_init = VIRTIO_PCI_STATUS_ACK | VIRTIO_PCI_STATUS_DRIVER; - uint32_t host_features, guest_features; + uint32_t host_features; size_t pgs; outb(pci->base + VIRTIO_PCI_STATUS, ready_for_init); host_features = inl(pci->base + VIRTIO_PCI_HOST_FEATURES); - /* don't negotiate anything for now */ - guest_features = 0; + /* negociate enough for discard */ + guest_features = host_features & (VIRTIO_BLK_F_DISCARD | VIRTIO_BLK_F_WRITE_ZEROES); outl(pci->base + VIRTIO_PCI_GUEST_FEATURES, guest_features); virtio_blk_sectors = inq(pci->base + VIRTIO_PCI_CONFIG_OFF); + log(INFO, "Solo5: PCI:%02x:%02x: configured, capacity=%llu sectors, " "features=0x%x\n", pci->bus, pci->dev, (unsigned long long)virtio_blk_sectors, @@ -209,6 +216,26 @@ solo5_result_t solo5_block_write(solo5_off_t offset, const uint8_t *buf, return (rv == 0) ? SOLO5_R_OK : SOLO5_R_EUNSPEC; } +solo5_result_t solo5_block_discard(solo5_off_t offset, size_t size) +{ + assert(blk_configured); + + uint64_t sector = offset / VIRTIO_BLK_SECTOR_SIZE; + uint64_t size_sectors = size / VIRTIO_BLK_SECTOR_SIZE; + if ((offset % VIRTIO_BLK_SECTOR_SIZE != 0) || + (size % VIRTIO_BLK_SECTOR_SIZE != 0) || + (sector >= virtio_blk_sectors) || + (sector + size_sectors < sector) || + (sector + size_sectors > virtio_blk_sectors)) + return SOLO5_R_EINVAL; + + if (! (guest_features & VIRTIO_BLK_F_WRITE_ZEROES)) + { + return SOLO5_R_EOPNOTSUPP; + } + return SOLO5_R_EOPNOTSUPP; +} + solo5_result_t solo5_block_read(solo5_off_t offset, uint8_t *buf, size_t size) { assert(blk_configured); diff --git a/include/solo5/hvt_abi.h b/include/solo5/hvt_abi.h index 5f459960..7bb933f3 100644 --- a/include/solo5/hvt_abi.h +++ b/include/solo5/hvt_abi.h @@ -170,6 +170,7 @@ enum hvt_hypercall { HVT_HYPERCALL_NETWRITE, HVT_HYPERCALL_NETREAD, HVT_HYPERCALL_HALT, + HVT_HYPERCALL_BLKDISCARD, HVT_HYPERCALL_MAX }; @@ -284,4 +285,15 @@ struct hvt_halt { int exit_status; }; +struct hvt_blkdiscard { + /* IN */ + size_t sector; + + /* IN */ + size_t len_sectors; + + /* OUT */ + int ret; +}; + #endif /* HVT_GUEST_H */ diff --git a/tenders/hvt/hvt_module_blk.c b/tenders/hvt/hvt_module_blk.c index 4cd568c5..354e1bba 100644 --- a/tenders/hvt/hvt_module_blk.c +++ b/tenders/hvt/hvt_module_blk.c @@ -25,6 +25,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include @@ -72,6 +73,39 @@ static void hypercall_blkwrite(struct hvt *hvt, hvt_gpa_t gpa) wr->ret = 0; } +static void hypercall_blkdiscard(struct hvt *hvt, hvt_gpa_t gpa) +{ + struct hvt_blkdiscard *di = + HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_blkdiscard)); + ssize_t ret; + off_t pos, len, end; + + if (di->sector >= blkinfo.num_sectors) { + di->ret = -1; + return; + } + pos = (off_t)blkinfo.sector_size * (off_t)di->sector; + len = (off_t)blkinfo.sector_size * (off_t)di->len_sectors; + if (add_overflow(pos, len, end) + || (end > blkinfo.num_sectors * blkinfo.sector_size)) + { + di->ret = -1; + return; + } + + ret = fallocate( + diskfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, pos, len); + if (ret != 0) + { + if (ret == -1 && errno == EOPNOTSUPP) + di->ret = -2; + else + di->ret = -1; + } else { + di->ret = 0; + } +} + static void hypercall_blkread(struct hvt *hvt, hvt_gpa_t gpa) { struct hvt_blkread *rd = @@ -126,6 +160,8 @@ static int setup(struct hvt *hvt) hypercall_blkwrite) == 0); assert(hvt_core_register_hypercall(HVT_HYPERCALL_BLKREAD, hypercall_blkread) == 0); + assert(hvt_core_register_hypercall(HVT_HYPERCALL_BLKDISCARD, + hypercall_blkdiscard) == 0); return 0; } diff --git a/tenders/spt/spt_module_block.c b/tenders/spt/spt_module_block.c index 4018aa93..63399254 100644 --- a/tenders/spt/spt_module_block.c +++ b/tenders/spt/spt_module_block.c @@ -96,6 +96,15 @@ static int setup(struct spt *spt) if (rc != 0) errx(1, "seccomp_rule_add(pwrite64, fd=%d) failed: %s", diskfd, strerror(-rc)); + rc = seccomp_rule_add(spt->sc_ctx, SCMP_ACT_ALLOW, SCMP_SYS(fallocate), 3, + SCMP_A0(SCMP_CMP_EQ, diskfd), + SCMP_A1(SCMP_CMP_EQ, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE), + SCMP_A2(SCMP_CMP_LE, + (spt->bi->blocki.capacity - spt->bi->blocki.block_size)), + SCMP_A3(SCMP_CMP_GE, spt->bi->blocki.block_size)); + if (rc != 0) + errx(1, "seccomp_rule_add(fallocate, fd=%d) failed: %s", diskfd, + strerror(-rc)); return 0; } diff --git a/tests/test_blk/test_blk.c b/tests/test_blk/test_blk.c index 8902e574..e7accf6e 100644 --- a/tests/test_blk/test_blk.c +++ b/tests/test_blk/test_blk.c @@ -51,6 +51,32 @@ bool check_one_block(solo5_off_t offset, size_t block_size) return true; } +bool check_one_block_discard(solo5_off_t offset, size_t block_size) +{ + size_t i; + uint8_t rbuf[block_size]; + + for (i = 0; i < block_size; i++) { + rbuf[i] = '0' + i % 10; + } + + solo5_result_t rv = solo5_block_discard(offset, block_size); + if (rv == SOLO5_R_EOPNOTSUPP) + return true; + if (rv != SOLO5_R_OK) + return false; + if (solo5_block_read(offset, rbuf, block_size) != SOLO5_R_OK) + return false; + + for (i = 0; i < block_size; i++) { + if (rbuf[i] != 0) + /* Check failed */ + return false; + } + + return true; +} + int solo5_app_main(const struct solo5_start_info *si __attribute__((unused))) { puts("\n**** Solo5 standalone test_blk ****\n\n"); @@ -66,10 +92,15 @@ int solo5_app_main(const struct solo5_start_info *si __attribute__((unused))) if (!check_one_block(offset, bi.block_size)) /* Check failed */ return 1; + if (!check_one_block_discard(offset, bi.block_size)) + /* Check failed */ + return 12; } uint8_t buf[bi.block_size * 2]; + solo5_result_t rv; + /* * Check edge case: read/write of last sector on the device. */ @@ -78,6 +109,9 @@ int solo5_app_main(const struct solo5_start_info *si __attribute__((unused))) return 2; if (solo5_block_read(last_block, buf, bi.block_size) != SOLO5_R_OK) return 3; + rv = solo5_block_discard(last_block, bi.block_size); + if ((rv != SOLO5_R_OK) && (rv != SOLO5_R_EOPNOTSUPP)) + return 13; /* * Check edge cases: should not be able to read or write beyond end @@ -90,6 +124,8 @@ int solo5_app_main(const struct solo5_start_info *si __attribute__((unused))) return 4; if (solo5_block_read(bi.capacity, buf, bi.block_size) == SOLO5_R_OK) return 5; + if (solo5_block_discard(bi.capacity, bi.block_size) != SOLO5_R_EINVAL) + return 14; /* * Check invalid arguments: Should not be able to read or write less than @@ -102,10 +138,14 @@ int solo5_app_main(const struct solo5_start_info *si __attribute__((unused))) return 6; if (solo5_block_read(0, buf, bi.block_size - 1) == SOLO5_R_OK) return 7; + if (solo5_block_discard(0, bi.block_size - 1) != SOLO5_R_EINVAL) + return 15; if (solo5_block_write(0, buf, bi.block_size + 1) == SOLO5_R_OK) return 8; if (solo5_block_read(0, buf, bi.block_size + 1) == SOLO5_R_OK) return 9; + if (solo5_block_discard(0, bi.block_size + 1) != SOLO5_R_EINVAL) + return 16; /* * Check invalid arguments: Should not be able to read or write at offsets @@ -118,6 +158,8 @@ int solo5_app_main(const struct solo5_start_info *si __attribute__((unused))) return 10; if (solo5_block_read(bi.block_size - 1, buf, bi.block_size) == SOLO5_R_OK) return 11; + if (solo5_block_discard(bi.block_size - 1, bi.block_size) != SOLO5_R_EINVAL) + return 17; puts("SUCCESS\n");