Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ ifeq (,$(filter $(CONFIG_LABEL_MEMORY),true false))
$(error CONFIG_LABEL_MEMORY must be true or false)
endif

ifeq (,$(filter $(CONFIG_GUARD_PAGES_USE_MADVISE),true false))
$(error CONFIG_GUARD_PAGES_USE_MADVISE must be true or false)
endif

CPPFLAGS += \
-DCONFIG_SEAL_METADATA=$(CONFIG_SEAL_METADATA) \
-DZERO_ON_FREE=$(CONFIG_ZERO_ON_FREE) \
Expand All @@ -113,7 +117,8 @@ CPPFLAGS += \
-DN_ARENA=$(CONFIG_N_ARENA) \
-DCONFIG_STATS=$(CONFIG_STATS) \
-DCONFIG_SELF_INIT=$(CONFIG_SELF_INIT) \
-DCONFIG_LABEL_MEMORY=$(CONFIG_LABEL_MEMORY)
-DCONFIG_LABEL_MEMORY=$(CONFIG_LABEL_MEMORY) \
-DGUARD_PAGES_USE_MADVISE=$(CONFIG_GUARD_PAGES_USE_MADVISE)

$(OUT)/libhardened_malloc$(SUFFIX).so: $(OBJECTS) | $(OUT)
$(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@
Expand All @@ -125,7 +130,7 @@ $(OUT)/chacha.o: chacha.c chacha.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/h_malloc.o: h_malloc.c include/h_malloc.h mutex.h memory.h pages.h random.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/memory.o: memory.c memory.h util.h $(CONFIG_FILE) | $(OUT)
$(OUT)/memory.o: memory.c memory.h pages.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/new.o: new.cc include/h_malloc.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.cc) $(OUTPUT_OPTION) $<
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,19 @@ The following integer configuration options are available:
allocations use the slab allocation size class scheme instead of page size
granularity. See the [section on size classes](#size-classes) below for
details.
* `CONFIG_GUARD_PAGES_USE_MADVISE`: `false` (default) to control whether the
guard regions for large allocations are created with `MADV_GUARD_INSTALL`
(Linux 6.13+) inside a single read-write mapping instead of separate
`PROT_NONE` mappings. This keeps each large allocation to a single VMA instead
of three, which substantially reduces VMA pressure on systems making many
large allocations. The kernel feature is probed at runtime with a fallback to
the `PROT_NONE` scheme when it is unavailable; the probe trusts the `madvise`
return value, so the feature must be validated on a real kernel (qemu-user, for
example, silently ignores the advice). It is off by default because
the guarded bytes are then accounted as committed memory (resident memory and
the total address space are unchanged, since the guard pages are never backed
and the same amount of address space is reserved either way), which can be a
problem under strict overcommit (`vm.overcommit_memory=2`).

There will be more control over enabled features in the future along with
control over fairly arbitrarily chosen values like the size of empty slab
Expand Down
1 change: 1 addition & 0 deletions config/default.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ CONFIG_N_ARENA := 4
CONFIG_STATS := false
CONFIG_SELF_INIT := true
CONFIG_LABEL_MEMORY := false
CONFIG_GUARD_PAGES_USE_MADVISE := false
1 change: 1 addition & 0 deletions config/light.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ CONFIG_N_ARENA := 4
CONFIG_STATS := false
CONFIG_SELF_INIT := true
CONFIG_LABEL_MEMORY := false
CONFIG_GUARD_PAGES_USE_MADVISE := false
8 changes: 6 additions & 2 deletions h_malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1019,10 +1019,13 @@ static void regions_quarantine_deallocate_pages(void *p, size_t size, size_t gua
return;
}

if (unlikely(memory_map_fixed(p, size))) {
if (unlikely(memory_guard_or_protnone(p, size))) {
if (unlikely(memory_purge(p, size))) {
memset(p, 0, size);
}
} else if (GUARD_PAGES_USE_MADVISE) {
// name the whole region so labeling (PR_SET_VMA_ANON_NAME) keeps it a single VMA
memory_set_name((char *)p - guard_size, size + guard_size * 2, "malloc large quarantine");
} else {
memory_set_name(p, size, "malloc large quarantine");
}
Expand Down Expand Up @@ -1596,7 +1599,7 @@ EXPORT void *h_realloc(void *old, size_t size) {
// in-place shrink
if (size < old_size) {
void *new_end = (char *)old + size;
if (memory_map_fixed(new_end, old_guard_size)) {
if (memory_guard_or_protnone(new_end, old_guard_size)) {
thread_seal_metadata();
return NULL;
}
Expand All @@ -1619,6 +1622,7 @@ EXPORT void *h_realloc(void *old, size_t size) {

#ifdef HAVE_COMPATIBLE_MREMAP
static const bool vma_merging_reliable = false;
// not updated for the madvise guard scheme; revisit guard handling before enabling
if (vma_merging_reliable) {
// in-place growth
void *guard_end = (char *)old + old_size + old_guard_size;
Expand Down
60 changes: 60 additions & 0 deletions memory.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
#include <errno.h>
#include <stdatomic.h>

#include <sys/mman.h>

#include <sys/prctl.h>

#ifndef MADV_GUARD_INSTALL
#define MADV_GUARD_INSTALL 102
#endif

#ifndef PR_SET_VMA
#define PR_SET_VMA 0x53564d41
#endif
Expand All @@ -13,6 +18,7 @@
#endif

#include "memory.h"
#include "pages.h"
#include "util.h"

static void *memory_map_prot(size_t size, int prot) {
Expand All @@ -30,6 +36,10 @@ void *memory_map(size_t size) {
return memory_map_prot(size, PROT_NONE);
}

void *memory_map_rw(size_t size) {
return memory_map_prot(size, PROT_READ|PROT_WRITE);
}

#ifdef HAS_ARM_MTE
// Note that PROT_MTE can't be cleared via mprotect
void *memory_map_mte(size_t size) {
Expand Down Expand Up @@ -117,6 +127,56 @@ bool memory_purge(void *ptr, size_t size) {
return ret;
}

// 0 = unknown, 1 = supported, -1 = unsupported/disabled
static atomic_int guard_install_state;

// EINVAL means the mapping can't be guarded (VM_LOCKED via mlockall(MCL_FUTURE), which locks
// all future mappings), so latch the feature off; ENOMEM/EINTR are transient
bool memory_guard_install(void *ptr, size_t size) {
int saved_errno = errno;
if (likely(madvise(ptr, size, MADV_GUARD_INSTALL) == 0)) {
return false;
}
if (errno == EINVAL) {
atomic_store_explicit(&guard_install_state, -1, memory_order_relaxed);
}
errno = saved_errno;
return true;
}

bool memory_guard_install_supported(void) {
int s = atomic_load_explicit(&guard_install_state, memory_order_relaxed);
if (likely(s)) {
return s > 0;
}
int saved_errno = errno;
void *p = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
// a transient mmap failure must not be cached as "unsupported"; leave the state unknown
if (p == MAP_FAILED) {
errno = saved_errno;
return false;
}
s = madvise(p, PAGE_SIZE, MADV_GUARD_INSTALL) == 0 ? 1 : -1;
munmap(p, PAGE_SIZE);
errno = saved_errno;
// don't clobber a concurrent EINVAL latch from memory_guard_install
int expected = 0;
if (!atomic_compare_exchange_strong_explicit(&guard_install_state, &expected, s,
memory_order_relaxed, memory_order_relaxed)) {
s = expected;
}
return s > 0;
}

// guard the range in place to keep it within a single VMA when the madvise scheme is active,
// falling back to a PROT_NONE remap on failure or when the scheme is off
bool memory_guard_or_protnone(void *ptr, size_t size) {
if (GUARD_PAGES_USE_MADVISE && memory_guard_install_supported()) {
return memory_guard_install(ptr, size) && memory_map_fixed(ptr, size);
}
return memory_map_fixed(ptr, size);
}

bool memory_set_name(UNUSED void *ptr, UNUSED size_t size, UNUSED const char *name) {
if (CONFIG_LABEL_MEMORY) {
return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, name);
Expand Down
8 changes: 8 additions & 0 deletions memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@
#define CONFIG_LABEL_MEMORY false
#endif

#ifndef GUARD_PAGES_USE_MADVISE
#define GUARD_PAGES_USE_MADVISE false
#endif

int get_metadata_key(void);

void *memory_map(size_t size);
void *memory_map_rw(size_t size);
bool memory_guard_install(void *ptr, size_t size);
bool memory_guard_install_supported(void);
bool memory_guard_or_protnone(void *ptr, size_t size);
#ifdef HAS_ARM_MTE
void *memory_map_mte(size_t size);
#endif
Expand Down
72 changes: 50 additions & 22 deletions pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@ void *allocate_pages(size_t usable_size, size_t guard_size, bool unprotect, cons
errno = ENOMEM;
return NULL;
}
// MADV_GUARD_INSTALL needs page-aligned ranges
if (GUARD_PAGES_USE_MADVISE && unprotect && (usable_size & (PAGE_SIZE - 1)) == 0 &&
memory_guard_install_supported()) {
void *guarded = memory_map_rw(real_size);
if (likely(guarded != NULL)) {
memory_set_name(guarded, real_size, name);
void *usable = (char *)guarded + guard_size;
// on guard-install failure, fall back to the PROT_NONE scheme below
if (!guard_size || (!memory_guard_install(guarded, guard_size) &&
!memory_guard_install((char *)usable + usable_size, guard_size))) {
return usable;
}
memory_unmap(guarded, real_size);
}
}

void *real = memory_map(real_size);
if (unlikely(real == NULL)) {
return NULL;
Expand Down Expand Up @@ -48,38 +64,50 @@ void *allocate_pages_aligned(size_t usable_size, size_t alignment, size_t guard_
return NULL;
}

void *real = memory_map(real_alloc_size);
if (unlikely(real == NULL)) {
return NULL;
}
memory_set_name(real, real_alloc_size, name);
bool use_madvise = GUARD_PAGES_USE_MADVISE && memory_guard_install_supported();

void *usable = (char *)real + guard_size;
for (;;) {
void *real = use_madvise ? memory_map_rw(real_alloc_size) : memory_map(real_alloc_size);
if (unlikely(real == NULL)) {
return NULL;
}
memory_set_name(real, real_alloc_size, name);

size_t lead_size = align((uintptr_t)usable, alignment) - (uintptr_t)usable;
size_t trail_size = alloc_size - lead_size - usable_size;
void *base = (char *)usable + lead_size;
void *usable = (char *)real + guard_size;

if (unlikely(memory_protect_rw(base, usable_size))) {
memory_unmap(real, real_alloc_size);
return NULL;
}
size_t lead_size = align((uintptr_t)usable, alignment) - (uintptr_t)usable;
size_t trail_size = alloc_size - lead_size - usable_size;
void *base = (char *)usable + lead_size;

if (lead_size) {
if (unlikely(memory_unmap(real, lead_size))) {
if (!use_madvise && unlikely(memory_protect_rw(base, usable_size))) {
memory_unmap(real, real_alloc_size);
return NULL;
}
}

if (trail_size) {
if (unlikely(memory_unmap((char *)base + usable_size + guard_size, trail_size))) {
memory_unmap(real, real_alloc_size);
return NULL;
if (lead_size) {
if (unlikely(memory_unmap(real, lead_size))) {
memory_unmap(real, real_alloc_size);
return NULL;
}
}
}

return base;
if (trail_size) {
if (unlikely(memory_unmap((char *)base + usable_size + guard_size, trail_size))) {
memory_unmap(real, real_alloc_size);
return NULL;
}
}

// on guard-install failure, retry with the PROT_NONE scheme
if (use_madvise && guard_size && (unlikely(memory_guard_install((char *)base - guard_size, guard_size)) ||
unlikely(memory_guard_install((char *)base + usable_size, guard_size)))) {
memory_unmap((char *)base - guard_size, usable_size + guard_size * 2);
use_madvise = false;
continue;
}

return base;
}
}

void deallocate_pages(void *usable, size_t usable_size, size_t guard_size) {
Expand Down