Skip to content
This repository has been archived by the owner on Nov 24, 2021. It is now read-only.

Uhyve file read/write should not assume target buffer is contiguous in physical memory #86

Open
olivierpierre opened this issue Apr 5, 2018 · 4 comments

Comments

@olivierpierre
Copy link
Contributor

Hello,

With Uhyve, when performing file read/write operations in the host, the target buffer is fully read or written from the guest physical memory. Such a design assumes that the buffer is contiguous in physical memory, which is not always the case. When using a buffer allocated with malloc, example of bad things that can happen are:

  • Reading from a file in a buffer that is not or partially mapped to physical memory (because of on-demand mapping)
  • Reading from a file in a buffer which virtual pages are not mapped to contiguous physical pages
  • Writing in a file from a buffer which virtual pages are not mapped to contiguous physical pages

You can confirm these issues with these two sample programs and the corresponding Makefile:

/* read_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE		4096
#define FILE_TO_READ	"fread.bin"
#define FILE_SIZE		PAGE_SIZE * 4

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
	int fd, verify_fd, i;
	char *buf;

	printf("Read test starting\n");

	buf = malloc(FILE_SIZE);
	if(!buf) {
		fprintf(stderr, "error malloc!\n");
		return -1;
	}

	verify_fd = open(FILE_TO_READ, O_RDONLY, 0x0);
	fd = open(FILE_TO_READ, O_RDONLY, 0x0);
	if(fd == -1 || verify_fd == -1) {
		printf("error open\n");
		return -1;
	}

	if(read(verify_fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "read error\n");
		return -1;
	}

	if(read(fd, buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "read error\n");
		return -1;
	}
	printf("Read in malloc'd buffer done\n");

	for(i=0; i<FILE_SIZE; i++)
		if(buf[i] != verify_buf[i]) {
			fprintf(stderr, "verification failed!\n");
			fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
			fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
			return -1;
		}

	printf("Read verification successful!\n");

	free(buf);
	close(fd);
	return 0;
}
/* write_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE		4096
#define FILE_SIZE		PAGE_SIZE * 4
#define FILE_TO_WRITE	"fwrite.bin"

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
	char *buf;
	int fd, i;

	printf("write test starting\n");

	buf = malloc(FILE_SIZE);
	if(!buf) {
		fprintf(stderr, "error malloc\n");
		return -1;
	}

	fd = open(FILE_TO_WRITE, O_RDWR | O_CREAT | O_TRUNC, 0666);
	if(fd == -1) {
		fprintf(stderr, "error opening %s\n", FILE_TO_WRITE);
		return -1;
	}

	for(i=0; i<FILE_SIZE; i++)
		buf[i] = (i/4096) + '0';

	if(write(fd, buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "error writing in %s", FILE_TO_WRITE);
		return -1;
	}
	printf("Write from malloc'd buffer done\n");

	if(lseek(fd, 0x0, SEEK_SET) != 0) {
		fprintf(stderr, "error lseek\n");
		return -1;
	}

	if(read(fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "error read\n");
		return -1;
	}

	for(i=0; i<FILE_SIZE; i++)
		if(verify_buf[i] != buf[i]) {
			fprintf(stderr, "verification failed!\n");
			fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
			fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
			return -1;
		}

	printf("Write verification successful!\n");

	free(buf);
	close(fd);
	return 0;
}
# Makefile

HERMIT_LOCAL_INSTALL=/home/pierre/Desktop/HermitCore/prefix/
HERMIT_TOOLCHAIN_INSTALL=/opt/hermit/
READ_TEST=read_test
WRITE_TEST=write_test
READ_SAMPLE_FILE=fread.bin
WRITE_SAMPLE_FILE=fwrite.bin
READ_TEST_SRC=$(READ_TEST).c
WRITE_TEST_SRC=$(WRITE_TEST).c
CFLAGS=-Wall -Werror
LDFLAGS=-L$(HERMIT_LOCAL_INSTALL)/x86_64-hermit/lib
CC=$(HERMIT_TOOLCHAIN_INSTALL)/bin/x86_64-hermit-gcc
PROXY=$(HERMIT_LOCAL_INSTALL)/bin/proxy
VERBOSE?=0

all: $(READ_TEST) $(WRITE_TEST)

$(READ_TEST): $(READ_TEST_SRC) $(READ_SAMPLE_FILE)
	$(CC) $(CFLAGS) $(READ_TEST_SRC) -o $(READ_TEST) $(LDFLAGS)

$(WRITE_TEST): $(WRITE_TEST_SRC)
	$(CC) $(CFLAGS) $(WRITE_TEST_SRC) -o $(WRITE_TEST) $(LDFLAGS)

$(READ_SAMPLE_FILE):
	dd if=/dev/urandom of=$(READ_SAMPLE_FILE) bs=4K count=4

test: $(READ_TEST) $(WRITE_TEST)
	HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(READ_TEST)
	HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(WRITE_TEST)

clean:
	rm -rf $(READ_TEST) $(WRITE_TEST) *.o $(READ_SAMPLE_FILE) \
		$(WRITE_SAMPLE_FILE)

Here is my proposed solution: a patch that forces the file read and write operations to be performed on a page-by-page basis, i.e. Uhyve file read or write operation is called for each page composing the buffer. I did not extensively tested it, nor did I evaluated the performance impact (I guess it slows things down a bit).

diff --git a/kernel/syscall.c b/kernel/syscall.c
index d10691d..f7b7b00 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -159,6 +159,47 @@ typedef struct {
 	ssize_t ret;
 } __attribute__((packed)) uhyve_read_t;
 
+/* Pages belonging to the heap are mapped on demand, and are not always
+ * contiguous in physical memory. Thus, we need to force allocation and call
+ * the hypervisor file read function page by page.
+ */
+ssize_t uhyve_noncontiguous_read(int fd, char *buf, size_t len) {
+	ssize_t bytes_read = 0;
+	size_t cur_len = 0;
+	uhyve_read_t arg = {fd, 0x0, 0x0, -1};
+
+	while(len) {
+
+		if(!((size_t)buf % PAGE_SIZE)) {
+			cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+		} else {
+			cur_len = PAGE_CEIL((size_t)buf) - (size_t)buf;
+			cur_len = (((size_t)buf + len) > PAGE_CEIL((size_t)buf))
+				? (PAGE_CEIL((size_t)buf) - (size_t)buf) : len;
+		}
+
+		/* Force mapping */
+		memset(buf, 0x0, 1);
+
+		arg.buf = (char *)virt_to_phys((size_t)buf);
+		arg.len = cur_len;
+		arg.ret = -1;
+		uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&arg));
+
+		if(arg.ret < 0)
+			return arg.ret;
+
+		bytes_read += arg.ret;
+		if(arg.ret != cur_len)
+			break;
+
+		buf += cur_len;
+		len -= cur_len;
+	}
+
+	return bytes_read;
+}
+
 ssize_t sys_read(int fd, char* buf, size_t len)
 {
 	sys_read_t sysargs = {__NR_read, fd, len};
@@ -174,11 +215,16 @@ ssize_t sys_read(int fd, char* buf, size_t len)
 	}
 
 	if (is_uhyve()) {
+
+		return uhyve_noncontiguous_read(fd, buf, len);
+
+#if 0
 		uhyve_read_t uhyve_args = {fd, (char*) virt_to_phys((size_t) buf), len, -1};
 
 		uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&uhyve_args));
 
 		return uhyve_args.ret;
+#endif
 	}
 
 	spinlock_irqsave_lock(&lwip_lock);
@@ -222,6 +268,39 @@ typedef struct {
 	size_t len;
 } __attribute__((packed)) uhyve_write_t;
 
+ssize_t uhyve_noncontiguous_write(int fd, const char *buf, size_t len) {
+	char *cur_buf = (char *)buf;
+	ssize_t bytes_written = 0;
+	size_t cur_len = 0;
+	uhyve_write_t arg = {fd, 0x0, 0x0};
+
+	while(len) {
+
+		if(!((size_t)cur_buf % PAGE_SIZE)) {
+			cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+		} else {
+			cur_len = (((size_t)cur_buf + len) > PAGE_CEIL((size_t)cur_buf))
+				? (PAGE_CEIL((size_t)cur_buf) - (size_t)cur_buf) : len;
+		}
+
+		arg.buf = (char *)virt_to_phys((size_t)cur_buf);
+		arg.len = cur_len;
+		uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&arg));
+
+		if(arg.len == (size_t)(-1))
+			return -1;
+
+		bytes_written += arg.len;
+		if(arg.len != cur_len)
+			break;
+
+		cur_buf += cur_len;
+		len -= cur_len;
+	}
+
+	return bytes_written;
+}
+
 ssize_t sys_write(int fd, const char* buf, size_t len)
 {
 	if (BUILTIN_EXPECT(!buf, 0))
@@ -240,11 +319,16 @@ ssize_t sys_write(int fd, const char* buf, size_t len)
 	}
 
 	if (is_uhyve()) {
+
+		return uhyve_noncontiguous_write(fd, buf, len);
+
+#if 0
 		uhyve_write_t uhyve_args = {fd, (const char*) virt_to_phys((size_t) buf), len};
 
 		uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&uhyve_args));
 
 		return uhyve_args.len;
+#endif
 	}
 
 	spinlock_irqsave_lock(&lwip_lock);
@stlankes
Copy link
Contributor

stlankes commented Apr 5, 2018

Hello Pierre,

you are right. I will create patch to solve this issue.

Best

Stefan

@ColinFinck
Copy link
Member

@olivierpierre: I got to know about your problem, however not about your proposed solution.
Anyway, I've committed a fix for HermitCore-rs (the Rust implementation of HermitCore), which lets uhyve perform the virtual-to-physical address translation for the buffers of read/write operations.
This way, the physical memory addresses of the buffer pages can be arbitrary.

The fix commit is hermit-os/hermit-caves@6b9e4be and seems to have also been merged to the HermitCore C version in the meantime.

@stlankes
Copy link
Contributor

@olivierpierre : Since yesterday I added a patch to the C version (see pull request 97). Could you test it on your system? By the way, I add also gdb support for aarch64...

@olivierpierre
Copy link
Contributor Author

@ColinFinck: I believe my initial post includes a description of the problem as well as two sample programs that trigger the issue. Please let me know if you need something specific.

@stlankes: thank! I will try the patch on our version and let you know

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants