add initial implementation of arenas

pull/87/head
Daniel Micay 2019-03-25 14:49:50 -04:00
parent 55769496dc
commit c5e911419d
4 changed files with 99 additions and 56 deletions

View File

@ -24,6 +24,7 @@ common_cflags = [
"-DREGION_QUARANTINE_SKIP_THRESHOLD=33554432", // 32MiB "-DREGION_QUARANTINE_SKIP_THRESHOLD=33554432", // 32MiB
"-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=32", "-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=32",
"-DCONFIG_CLASS_REGION_SIZE=1073741824", // 1GiB "-DCONFIG_CLASS_REGION_SIZE=1073741824", // 1GiB
"-DN_ARENA=1",
] ]
cc_defaults { cc_defaults {

View File

@ -15,6 +15,7 @@ CONFIG_REGION_QUARANTINE_QUEUE_LENGTH := 1024
CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD := 33554432 # 32MiB CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD := 33554432 # 32MiB
CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH := 32 CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH := 32
CONFIG_CLASS_REGION_SIZE := 137438953472 # 128GiB CONFIG_CLASS_REGION_SIZE := 137438953472 # 128GiB
CONFIG_N_ARENA = 1
define safe_flag define safe_flag
$(shell $(CC) -E $1 - </dev/null >/dev/null 2>&1 && echo $1 || echo $2) $(shell $(CC) -E $1 - </dev/null >/dev/null 2>&1 && echo $1 || echo $2)
@ -79,7 +80,8 @@ CPPFLAGS += \
-DREGION_QUARANTINE_QUEUE_LENGTH=$(CONFIG_REGION_QUARANTINE_QUEUE_LENGTH) \ -DREGION_QUARANTINE_QUEUE_LENGTH=$(CONFIG_REGION_QUARANTINE_QUEUE_LENGTH) \
-DREGION_QUARANTINE_SKIP_THRESHOLD=$(CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD) \ -DREGION_QUARANTINE_SKIP_THRESHOLD=$(CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD) \
-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=$(CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH) \ -DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=$(CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH) \
-DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE) -DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE) \
-DN_ARENA=$(CONFIG_N_ARENA)
libhardened_malloc.so: $(OBJECTS) libhardened_malloc.so: $(OBJECTS)
$(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@ $(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@

View File

@ -171,6 +171,7 @@ for the chosen values are not written yet, so use them at your own peril:
number of slots in the random array used to randomize free slab reuse number of slots in the random array used to randomize free slab reuse
* `CONFIG_CLASS_REGION_SIZE`: `34359738368` (default) to control the size of * `CONFIG_CLASS_REGION_SIZE`: `34359738368` (default) to control the size of
the size class regions the size class regions
* `CONFIG_N_ARENA`: `1` (default) to control the number of arenas
There will be more control over enabled features in the future along with There will be more control over enabled features in the future along with
control over fairly arbitrarily chosen values like the size of empty slab control over fairly arbitrarily chosen values like the size of empty slab

View File

@ -4,6 +4,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <threads.h>
#include <malloc.h> #include <malloc.h>
#include <pthread.h> #include <pthread.h>
@ -56,13 +57,22 @@ static_assert(MREMAP_MOVE_THRESHOLD >= REGION_QUARANTINE_SKIP_THRESHOLD,
// either sizeof(u64) or 0 // either sizeof(u64) or 0
static const size_t canary_size = SLAB_CANARY ? sizeof(u64) : 0; static const size_t canary_size = SLAB_CANARY ? sizeof(u64) : 0;
static_assert(N_ARENA >= 1, "must have at least 1 arena");
static_assert(N_ARENA <= 4, "currently only support up to 4 arenas (as an initial arbitrary limitation)");
#define CACHELINE_SIZE 64 #define CACHELINE_SIZE 64
#if N_ARENA > 1
__attribute__((tls_model("initial-exec")))
static thread_local unsigned thread_arena = N_ARENA;
#else
static const unsigned thread_arena = 0;
#endif
static union { static union {
struct { struct {
void *_Atomic slab_region_start; void *_Atomic slab_region_start;
void *slab_region_end; void *slab_region_end;
struct size_class *size_class_metadata; struct size_class *size_class_metadata[N_ARENA];
struct region_allocator *region_allocator; struct region_allocator *region_allocator;
struct region_metadata *regions[2]; struct region_metadata *regions[2];
#ifdef USE_PKEY #ifdef USE_PKEY
@ -246,7 +256,8 @@ struct __attribute__((aligned(CACHELINE_SIZE))) size_class {
#define CLASS_REGION_SIZE (size_t)CONFIG_CLASS_REGION_SIZE #define CLASS_REGION_SIZE (size_t)CONFIG_CLASS_REGION_SIZE
#define REAL_CLASS_REGION_SIZE (CLASS_REGION_SIZE * 2) #define REAL_CLASS_REGION_SIZE (CLASS_REGION_SIZE * 2)
static const size_t slab_region_size = REAL_CLASS_REGION_SIZE * N_SIZE_CLASSES; #define ARENA_SIZE (REAL_CLASS_REGION_SIZE * N_SIZE_CLASSES)
static const size_t slab_region_size = ARENA_SIZE * N_ARENA;
static_assert(PAGE_SIZE == 4096, "bitmap handling will need adjustment for other page sizes"); static_assert(PAGE_SIZE == 4096, "bitmap handling will need adjustment for other page sizes");
static void *get_slab(struct size_class *c, size_t slab_size, struct slab_metadata *metadata) { static void *get_slab(struct size_class *c, size_t slab_size, struct slab_metadata *metadata) {
@ -442,7 +453,14 @@ static u64 get_random_canary(struct random_state *rng) {
static inline void *allocate_small(size_t requested_size) { static inline void *allocate_small(size_t requested_size) {
struct size_info info = get_size_info(requested_size); struct size_info info = get_size_info(requested_size);
size_t size = info.size ? info.size : 16; size_t size = info.size ? info.size : 16;
struct size_class *c = &ro.size_class_metadata[info.class];
#if N_ARENA > 1
if (unlikely(thread_arena == N_ARENA)) {
thread_arena = hash_page(&thread_arena) % N_ARENA;
}
#endif
struct size_class *c = &ro.size_class_metadata[thread_arena][info.class];
size_t slots = size_class_slots[info.class]; size_t slots = size_class_slots[info.class];
size_t slab_size = get_slab_size(slots, size); size_t slab_size = get_slab_size(slots, size);
@ -545,13 +563,23 @@ static inline void *allocate_small(size_t requested_size) {
return p; return p;
} }
static size_t slab_size_class(const void *p) { struct slab_size_class_info {
unsigned arena;
size_t class;
};
static struct slab_size_class_info slab_size_class(const void *p) {
size_t offset = (const char *)p - (const char *)ro.slab_region_start; size_t offset = (const char *)p - (const char *)ro.slab_region_start;
return offset / REAL_CLASS_REGION_SIZE; unsigned arena = 0;
if (N_ARENA > 1) {
arena = offset / ARENA_SIZE;
offset -= arena * ARENA_SIZE;
}
return (struct slab_size_class_info){arena, offset / REAL_CLASS_REGION_SIZE};
} }
static size_t slab_usable_size(const void *p) { static size_t slab_usable_size(const void *p) {
return size_classes[slab_size_class(p)]; return size_classes[slab_size_class(p).class];
} }
static void enqueue_free_slab(struct size_class *c, struct slab_metadata *metadata) { static void enqueue_free_slab(struct size_class *c, struct slab_metadata *metadata) {
@ -575,9 +603,10 @@ static void enqueue_free_slab(struct size_class *c, struct slab_metadata *metada
} }
static inline void deallocate_small(void *p, const size_t *expected_size) { static inline void deallocate_small(void *p, const size_t *expected_size) {
size_t class = slab_size_class(p); struct slab_size_class_info size_class_info = slab_size_class(p);
size_t class = size_class_info.class;
struct size_class *c = &ro.size_class_metadata[class]; struct size_class *c = &ro.size_class_metadata[size_class_info.arena][class];
size_t size = size_classes[class]; size_t size = size_classes[class];
if (expected_size && size != *expected_size) { if (expected_size && size != *expected_size) {
fatal_error("sized deallocation mismatch (small)"); fatal_error("sized deallocation mismatch (small)");
@ -735,14 +764,14 @@ struct __attribute__((aligned(PAGE_SIZE))) slab_info_mapping {
}; };
struct __attribute__((aligned(PAGE_SIZE))) allocator_state { struct __attribute__((aligned(PAGE_SIZE))) allocator_state {
struct size_class size_class_metadata[N_SIZE_CLASSES]; struct size_class size_class_metadata[N_ARENA][N_SIZE_CLASSES];
struct region_allocator region_allocator; struct region_allocator region_allocator;
// padding until next page boundary for mprotect // padding until next page boundary for mprotect
struct region_metadata regions_a[MAX_REGION_TABLE_SIZE] __attribute__((aligned(PAGE_SIZE))); struct region_metadata regions_a[MAX_REGION_TABLE_SIZE] __attribute__((aligned(PAGE_SIZE)));
// padding until next page boundary for mprotect // padding until next page boundary for mprotect
struct region_metadata regions_b[MAX_REGION_TABLE_SIZE] __attribute__((aligned(PAGE_SIZE))); struct region_metadata regions_b[MAX_REGION_TABLE_SIZE] __attribute__((aligned(PAGE_SIZE)));
// padding until next page boundary for mprotect // padding until next page boundary for mprotect
struct slab_info_mapping slab_info_mapping[N_SIZE_CLASSES]; struct slab_info_mapping slab_info_mapping[N_ARENA][N_SIZE_CLASSES];
// padding until next page boundary for mprotect // padding until next page boundary for mprotect
}; };
@ -891,8 +920,10 @@ static void regions_delete(struct region_metadata *region) {
static void full_lock(void) { static void full_lock(void) {
thread_unseal_metadata(); thread_unseal_metadata();
mutex_lock(&ro.region_allocator->lock); mutex_lock(&ro.region_allocator->lock);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) { for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
mutex_lock(&ro.size_class_metadata[class].lock); mutex_lock(&ro.size_class_metadata[arena][class].lock);
}
} }
thread_seal_metadata(); thread_seal_metadata();
} }
@ -900,8 +931,10 @@ static void full_lock(void) {
static void full_unlock(void) { static void full_unlock(void) {
thread_unseal_metadata(); thread_unseal_metadata();
mutex_unlock(&ro.region_allocator->lock); mutex_unlock(&ro.region_allocator->lock);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) { for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
mutex_unlock(&ro.size_class_metadata[class].lock); mutex_unlock(&ro.size_class_metadata[arena][class].lock);
}
} }
thread_seal_metadata(); thread_seal_metadata();
} }
@ -920,11 +953,13 @@ static void post_fork_child(void) {
mutex_init(&ro.region_allocator->lock); mutex_init(&ro.region_allocator->lock);
random_state_init(&ro.region_allocator->rng); random_state_init(&ro.region_allocator->rng);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) { for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[class]; struct size_class *c = &ro.size_class_metadata[arena][class];
mutex_init(&c->lock); mutex_init(&c->lock);
random_state_init(&c->rng); random_state_init(&c->rng);
} }
}
thread_seal_metadata(); thread_seal_metadata();
} }
@ -1004,16 +1039,17 @@ COLD static void init_slow_path(void) {
ro.slab_region_end = (char *)slab_region_start + slab_region_size; ro.slab_region_end = (char *)slab_region_start + slab_region_size;
memory_set_name(slab_region_start, slab_region_size, "malloc slab region gap"); memory_set_name(slab_region_start, slab_region_size, "malloc slab region gap");
ro.size_class_metadata = allocator_state->size_class_metadata; for (unsigned arena = 0; arena < N_ARENA; arena++) {
ro.size_class_metadata[arena] = allocator_state->size_class_metadata[arena];
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) { for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[class]; struct size_class *c = &ro.size_class_metadata[arena][class];
mutex_init(&c->lock); mutex_init(&c->lock);
random_state_init(&c->rng); random_state_init(&c->rng);
size_t bound = (REAL_CLASS_REGION_SIZE - CLASS_REGION_SIZE) / PAGE_SIZE - 1; size_t bound = (REAL_CLASS_REGION_SIZE - CLASS_REGION_SIZE) / PAGE_SIZE - 1;
size_t gap = (get_random_u64_uniform(rng, bound) + 1) * PAGE_SIZE; size_t gap = (get_random_u64_uniform(rng, bound) + 1) * PAGE_SIZE;
c->class_region_start = (char *)slab_region_start + REAL_CLASS_REGION_SIZE * class + gap; c->class_region_start = (char *)slab_region_start + ARENA_SIZE * arena + REAL_CLASS_REGION_SIZE * class + gap;
memory_set_name(c->class_region_start, CLASS_REGION_SIZE, size_class_labels[class]); memory_set_name(c->class_region_start, CLASS_REGION_SIZE, size_class_labels[class]);
size_t size = size_classes[class]; size_t size = size_classes[class];
@ -1023,7 +1059,8 @@ COLD static void init_slow_path(void) {
c->size_divisor = libdivide_u32_gen(size); c->size_divisor = libdivide_u32_gen(size);
size_t slab_size = get_slab_size(size_class_slots[class], size); size_t slab_size = get_slab_size(size_class_slots[class], size);
c->slab_size_divisor = libdivide_u64_gen(slab_size); c->slab_size_divisor = libdivide_u64_gen(slab_size);
c->slab_info = allocator_state->slab_info_mapping[class].slab_info; c->slab_info = allocator_state->slab_info_mapping[arena][class].slab_info;
}
} }
deallocate_pages(rng, sizeof(struct random_state), PAGE_SIZE); deallocate_pages(rng, sizeof(struct random_state), PAGE_SIZE);
@ -1490,9 +1527,10 @@ EXPORT int h_malloc_trim(UNUSED size_t pad) {
bool is_trimmed = false; bool is_trimmed = false;
for (unsigned arena = 0; arena < N_ARENA; arena++) {
// skip zero byte size class since there's nothing to change // skip zero byte size class since there's nothing to change
for (unsigned class = 1; class < N_SIZE_CLASSES; class++) { for (unsigned class = 1; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[class]; struct size_class *c = &ro.size_class_metadata[arena][class];
size_t slab_size = get_slab_size(size_class_slots[class], size_classes[class]); size_t slab_size = get_slab_size(size_class_slots[class], size_classes[class]);
mutex_lock(&c->lock); mutex_lock(&c->lock);
@ -1515,6 +1553,7 @@ EXPORT int h_malloc_trim(UNUSED size_t pad) {
c->empty_slabs = iterator; c->empty_slabs = iterator;
mutex_unlock(&c->lock); mutex_unlock(&c->lock);
} }
}
thread_seal_metadata(); thread_seal_metadata();