Skip to content

Commit

Permalink
Copy just used pages from huge ANONYMOUS mappings.
Browse files Browse the repository at this point in the history
ASan creates a "shadow" of the used memory.
This is done in a mapping of around 20 TB address space,
but most of it is not yet used.

This patch helps an ASan-enabled rr build in following tests:
  nested_detach
  nested_detach_kill
  nested_detach_kill_stuck
  nested_detach_wait
  nested_release

Avoids error message:
  ERROR: AddressSanitizer: requested allocation size 0x20000000000 (0x20000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0)

Changes in V5:
- Add more suggested improvements of readability.
- Avoid possible underflow issue.
  • Loading branch information
bernhardu committed Apr 29, 2023
1 parent a28cc29 commit 5c1c898
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,7 @@ set(BASIC_TESTS
daemon
desched_blocking_poll
desched_sigkill
detach_huge_mmap
detach_state
detach_threads
detach_sigkill
Expand Down
70 changes: 70 additions & 0 deletions src/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3696,6 +3696,66 @@ static void copy_mem_mapping(Task* from, Task* to, const KernelMapping& km) {
}
}

// https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/fs/proc/task_mmu.c?h=v6.3#n1352
#define PM_PRESENT (1ULL << 63)
#define PM_SWAP (1ULL << 62)

static bool copy_mem_mapping_just_used(Task* from, Task* to, const KernelMapping& km)
{
ScopedFd& fd = from->pagemap_fd();
if (!fd.is_open()) {
LOG(debug) << "Failed to open " << from->proc_pagemap_path();
return false;
}

size_t pagesize = page_size();
uint64_t pages_present = 0; // Just for logging

const int max_buf_size = 65536;
vector<uint64_t> buf;

for (uintptr_t page_offset = 0; page_offset < km.size() / pagesize; page_offset += max_buf_size) {
auto page_read_offset = (km.start().as_int() / pagesize + page_offset);
size_t page_read_count = min<size_t>(max_buf_size, km.size() / pagesize - page_offset);
buf.resize(page_read_count);
size_t bytes_read = pread(fd, buf.data(), page_read_count * sizeof(uint64_t), page_read_offset * sizeof(uint64_t));
ASSERT(from, bytes_read == page_read_count * sizeof(uint64_t));

// A chunk was read from pagemap above, now iterate through it to detect
// if memory is physically present (bit 63, PM_PRESENT) or in swap (bit 62, PM_SWAP) in Task "from".
// If yes, just transfer those pages to the new Task "to".
// Also try to find consecutive pages to copy them in one operation.
// The file /proc/PID/pagemap consists of 64-bit values, each describing
// the state of one page. See https://www.kernel.org/doc/Documentation/vm/pagemap.txt

for (size_t page = 0; page < page_read_count; ++page) {
if (buf[page] & (PM_PRESENT | PM_SWAP)) {
auto start = km.start() + (page_offset + page) * pagesize;
if (start >= km.end()) {
break;
}
++pages_present;

// Check for consecutive used pages
while (page + 1 < page_read_count &&
buf[page + 1] & (PM_PRESENT | PM_SWAP))
{
++page;
++pages_present;
}

auto end = km.start() + (page_offset + page + 1) * pagesize;
LOG(debug) << km << " copying start: 0x" << hex << start << " end: 0x" << end
<< dec << " pages: " << (end - start) / pagesize;
auto pages = km.subrange(start, end);
copy_mem_mapping(from, to, pages);
}
}
}
LOG(debug) << km << " pages_present: " << pages_present << " pages_total: " << km.size() / pagesize;
return true;
}

static void move_vdso_mapping(AutoRemoteSyscalls &remote, const KernelMapping &km) {
for (const auto& m : remote.task()->vm()->maps()) {
if (m.map.is_vdso() && m.map.start() != km.start()) {
Expand Down Expand Up @@ -3783,6 +3843,16 @@ void Task::dup_from(Task *other) {
create_mapping(this, remote_this, km);
LOG(debug) << "Copying mapping into " << tid;
if (!(km.flags() & MAP_SHARED)) {
// Make the effort just for bigger mappings, copy smaller as a whole.
if ((km.flags() & MAP_ANONYMOUS) &&
km.size() >= 0x400000/*4MB*/)
{
LOG(debug) << "Using copy_mem_mapping_just_used";
if (copy_mem_mapping_just_used(other, this, km)) {
continue;
}
LOG(debug) << "Fallback to copy_mem_mapping";
}
copy_mem_mapping(other, this, km);
}
}
Expand Down
74 changes: 74 additions & 0 deletions src/test/detach_huge_mmap.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */

#include "util_internal.h"

static const int magic = 0xab;
static uint64_t size = 0x400000; /* 4 MB, at least the value in Task::dup_from */
static size_t page_size;
static void* pages[10];
static unsigned int idx; /*next index of pages*/

void test_alloc(char* mem, unsigned int count, off_t offset) {

test_assert(0 == munmap(mem + size, page_size));

/* one page near the start */
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
pages[idx] = mem + page_size;
memset(pages[idx], magic, page_size);
idx++;

/* one or more pages near or at the end */
for (unsigned int i = 0; i < count; i++) {
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
pages[idx] = mem + offset + i * page_size;
memset(pages[idx], magic, page_size);
idx++;
}
}

int main(void) {
page_size = sysconf(_SC_PAGESIZE);

/* Create one big mapping, then break it up by munmap
* into smaller ones, to better test the handling in
* the end of mappings. */

void* mem1 = mmap(NULL, 4 * (size + page_size), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
test_assert(mem1 != MAP_FAILED);

void* mem2 = mem1 + size + page_size;
void* mem3 = mem2 + size + page_size;
void* mem4 = mem3 + size + page_size;

test_alloc(mem1, 1, size - page_size); /* one page used at last page */
test_alloc(mem2, 1, size - page_size * 2); /* one page used before last page */
test_alloc(mem3, 2, size - page_size * 2); /* two consecutive pages at last two pages */
test_alloc(mem4, 2, size - page_size * 3); /* two consecutive pages before last page */

pid_t pid = fork();
if (pid == 0) {
if (running_under_rr()) {
rr_detach_teleport();
}

/* create one page for easier comparison */
char* cmp = malloc(page_size * 3);
test_assert(cmp != NULL);
memset(cmp, magic, page_size * 3);

/* check if the saved pages have the expected value */
for (unsigned int i = 0; i < idx; i++) {
test_assert(memcmp(pages[i], cmp, page_size) == 0);
}

return 0;
}

int status;
wait(&status);
test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 0);
atomic_puts("EXIT-SUCCESS");
return 0;
}

0 comments on commit 5c1c898

Please sign in to comment.