add initial implementation of arenas

pull/87/head
Daniel Micay 2019-03-25 14:49:50 -04:00
parent 55769496dc
commit c5e911419d
4 changed files with 99 additions and 56 deletions

View File

@ -24,6 +24,7 @@ common_cflags = [
"-DREGION_QUARANTINE_SKIP_THRESHOLD=33554432", // 32MiB
"-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=32",
"-DCONFIG_CLASS_REGION_SIZE=1073741824", // 1GiB
"-DN_ARENA=1",
]
cc_defaults {

View File

@ -15,6 +15,7 @@ CONFIG_REGION_QUARANTINE_QUEUE_LENGTH := 1024
CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD := 33554432 # 32MiB
CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH := 32
CONFIG_CLASS_REGION_SIZE := 137438953472 # 128GiB
CONFIG_N_ARENA = 1
define safe_flag
$(shell $(CC) -E $1 - </dev/null >/dev/null 2>&1 && echo $1 || echo $2)
@ -79,7 +80,8 @@ CPPFLAGS += \
-DREGION_QUARANTINE_QUEUE_LENGTH=$(CONFIG_REGION_QUARANTINE_QUEUE_LENGTH) \
-DREGION_QUARANTINE_SKIP_THRESHOLD=$(CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD) \
-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=$(CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH) \
-DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE)
-DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE) \
-DN_ARENA=$(CONFIG_N_ARENA)
libhardened_malloc.so: $(OBJECTS)
$(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@

View File

@ -171,6 +171,7 @@ for the chosen values are not written yet, so use them at your own peril:
number of slots in the random array used to randomize free slab reuse
* `CONFIG_CLASS_REGION_SIZE`: `34359738368` (default) to control the size of
the size class regions
* `CONFIG_N_ARENA`: `1` (default) to control the number of arenas
There will be more control over enabled features in the future along with
control over fairly arbitrarily chosen values like the size of empty slab

View File

@ -4,6 +4,7 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <threads.h>
#include <malloc.h>
#include <pthread.h>
@ -56,13 +57,22 @@ static_assert(MREMAP_MOVE_THRESHOLD >= REGION_QUARANTINE_SKIP_THRESHOLD,
// either sizeof(u64) or 0
static const size_t canary_size = SLAB_CANARY ? sizeof(u64) : 0;
static_assert(N_ARENA >= 1, "must have at least 1 arena");
static_assert(N_ARENA <= 4, "currently only support up to 4 arenas (as an initial arbitrary limitation)");
#define CACHELINE_SIZE 64
#if N_ARENA > 1
__attribute__((tls_model("initial-exec")))
static thread_local unsigned thread_arena = N_ARENA;
#else
static const unsigned thread_arena = 0;
#endif
static union {
struct {
void *_Atomic slab_region_start;
void *slab_region_end;
struct size_class *size_class_metadata;
struct size_class *size_class_metadata[N_ARENA];
struct region_allocator *region_allocator;
struct region_metadata *regions[2];
#ifdef USE_PKEY
@ -246,7 +256,8 @@ struct __attribute__((aligned(CACHELINE_SIZE))) size_class {
#define CLASS_REGION_SIZE (size_t)CONFIG_CLASS_REGION_SIZE
#define REAL_CLASS_REGION_SIZE (CLASS_REGION_SIZE * 2)
static const size_t slab_region_size = REAL_CLASS_REGION_SIZE * N_SIZE_CLASSES;
#define ARENA_SIZE (REAL_CLASS_REGION_SIZE * N_SIZE_CLASSES)
static const size_t slab_region_size = ARENA_SIZE * N_ARENA;
static_assert(PAGE_SIZE == 4096, "bitmap handling will need adjustment for other page sizes");
static void *get_slab(struct size_class *c, size_t slab_size, struct slab_metadata *metadata) {
@ -442,7 +453,14 @@ static u64 get_random_canary(struct random_state *rng) {
static inline void *allocate_small(size_t requested_size) {
struct size_info info = get_size_info(requested_size);
size_t size = info.size ? info.size : 16;
struct size_class *c = &ro.size_class_metadata[info.class];
#if N_ARENA > 1
if (unlikely(thread_arena == N_ARENA)) {
thread_arena = hash_page(&thread_arena) % N_ARENA;
}
#endif
struct size_class *c = &ro.size_class_metadata[thread_arena][info.class];
size_t slots = size_class_slots[info.class];
size_t slab_size = get_slab_size(slots, size);
@ -545,13 +563,23 @@ static inline void *allocate_small(size_t requested_size) {
return p;
}
static size_t slab_size_class(const void *p) {
struct slab_size_class_info {
unsigned arena;
size_t class;
};
static struct slab_size_class_info slab_size_class(const void *p) {
size_t offset = (const char *)p - (const char *)ro.slab_region_start;
return offset / REAL_CLASS_REGION_SIZE;
unsigned arena = 0;
if (N_ARENA > 1) {
arena = offset / ARENA_SIZE;
offset -= arena * ARENA_SIZE;
}
return (struct slab_size_class_info){arena, offset / REAL_CLASS_REGION_SIZE};
}
static size_t slab_usable_size(const void *p) {
return size_classes[slab_size_class(p)];
return size_classes[slab_size_class(p).class];
}
static void enqueue_free_slab(struct size_class *c, struct slab_metadata *metadata) {
@ -575,9 +603,10 @@ static void enqueue_free_slab(struct size_class *c, struct slab_metadata *metada
}
static inline void deallocate_small(void *p, const size_t *expected_size) {
size_t class = slab_size_class(p);
struct slab_size_class_info size_class_info = slab_size_class(p);
size_t class = size_class_info.class;
struct size_class *c = &ro.size_class_metadata[class];
struct size_class *c = &ro.size_class_metadata[size_class_info.arena][class];
size_t size = size_classes[class];
if (expected_size && size != *expected_size) {
fatal_error("sized deallocation mismatch (small)");
@ -735,14 +764,14 @@ struct __attribute__((aligned(PAGE_SIZE))) slab_info_mapping {
};
struct __attribute__((aligned(PAGE_SIZE))) allocator_state {
struct size_class size_class_metadata[N_SIZE_CLASSES];
struct size_class size_class_metadata[N_ARENA][N_SIZE_CLASSES];
struct region_allocator region_allocator;
// padding until next page boundary for mprotect
struct region_metadata regions_a[MAX_REGION_TABLE_SIZE] __attribute__((aligned(PAGE_SIZE)));
// padding until next page boundary for mprotect
struct region_metadata regions_b[MAX_REGION_TABLE_SIZE] __attribute__((aligned(PAGE_SIZE)));
// padding until next page boundary for mprotect
struct slab_info_mapping slab_info_mapping[N_SIZE_CLASSES];
struct slab_info_mapping slab_info_mapping[N_ARENA][N_SIZE_CLASSES];
// padding until next page boundary for mprotect
};
@ -891,8 +920,10 @@ static void regions_delete(struct region_metadata *region) {
static void full_lock(void) {
thread_unseal_metadata();
mutex_lock(&ro.region_allocator->lock);
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
mutex_lock(&ro.size_class_metadata[class].lock);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
mutex_lock(&ro.size_class_metadata[arena][class].lock);
}
}
thread_seal_metadata();
}
@ -900,8 +931,10 @@ static void full_lock(void) {
static void full_unlock(void) {
thread_unseal_metadata();
mutex_unlock(&ro.region_allocator->lock);
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
mutex_unlock(&ro.size_class_metadata[class].lock);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
mutex_unlock(&ro.size_class_metadata[arena][class].lock);
}
}
thread_seal_metadata();
}
@ -920,10 +953,12 @@ static void post_fork_child(void) {
mutex_init(&ro.region_allocator->lock);
random_state_init(&ro.region_allocator->rng);
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[class];
mutex_init(&c->lock);
random_state_init(&c->rng);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[arena][class];
mutex_init(&c->lock);
random_state_init(&c->rng);
}
}
thread_seal_metadata();
}
@ -1004,26 +1039,28 @@ COLD static void init_slow_path(void) {
ro.slab_region_end = (char *)slab_region_start + slab_region_size;
memory_set_name(slab_region_start, slab_region_size, "malloc slab region gap");
ro.size_class_metadata = allocator_state->size_class_metadata;
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[class];
for (unsigned arena = 0; arena < N_ARENA; arena++) {
ro.size_class_metadata[arena] = allocator_state->size_class_metadata[arena];
for (unsigned class = 0; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[arena][class];
mutex_init(&c->lock);
random_state_init(&c->rng);
mutex_init(&c->lock);
random_state_init(&c->rng);
size_t bound = (REAL_CLASS_REGION_SIZE - CLASS_REGION_SIZE) / PAGE_SIZE - 1;
size_t gap = (get_random_u64_uniform(rng, bound) + 1) * PAGE_SIZE;
c->class_region_start = (char *)slab_region_start + REAL_CLASS_REGION_SIZE * class + gap;
memory_set_name(c->class_region_start, CLASS_REGION_SIZE, size_class_labels[class]);
size_t bound = (REAL_CLASS_REGION_SIZE - CLASS_REGION_SIZE) / PAGE_SIZE - 1;
size_t gap = (get_random_u64_uniform(rng, bound) + 1) * PAGE_SIZE;
c->class_region_start = (char *)slab_region_start + ARENA_SIZE * arena + REAL_CLASS_REGION_SIZE * class + gap;
memory_set_name(c->class_region_start, CLASS_REGION_SIZE, size_class_labels[class]);
size_t size = size_classes[class];
if (size == 0) {
size = 16;
size_t size = size_classes[class];
if (size == 0) {
size = 16;
}
c->size_divisor = libdivide_u32_gen(size);
size_t slab_size = get_slab_size(size_class_slots[class], size);
c->slab_size_divisor = libdivide_u64_gen(slab_size);
c->slab_info = allocator_state->slab_info_mapping[arena][class].slab_info;
}
c->size_divisor = libdivide_u32_gen(size);
size_t slab_size = get_slab_size(size_class_slots[class], size);
c->slab_size_divisor = libdivide_u64_gen(slab_size);
c->slab_info = allocator_state->slab_info_mapping[class].slab_info;
}
deallocate_pages(rng, sizeof(struct random_state), PAGE_SIZE);
@ -1490,30 +1527,32 @@ EXPORT int h_malloc_trim(UNUSED size_t pad) {
bool is_trimmed = false;
// skip zero byte size class since there's nothing to change
for (unsigned class = 1; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[class];
size_t slab_size = get_slab_size(size_class_slots[class], size_classes[class]);
for (unsigned arena = 0; arena < N_ARENA; arena++) {
// skip zero byte size class since there's nothing to change
for (unsigned class = 1; class < N_SIZE_CLASSES; class++) {
struct size_class *c = &ro.size_class_metadata[arena][class];
size_t slab_size = get_slab_size(size_class_slots[class], size_classes[class]);
mutex_lock(&c->lock);
struct slab_metadata *iterator = c->empty_slabs;
while (iterator) {
void *slab = get_slab(c, slab_size, iterator);
if (memory_map_fixed(slab, slab_size)) {
break;
mutex_lock(&c->lock);
struct slab_metadata *iterator = c->empty_slabs;
while (iterator) {
void *slab = get_slab(c, slab_size, iterator);
if (memory_map_fixed(slab, slab_size)) {
break;
}
memory_set_name(slab, slab_size, size_class_labels[class]);
struct slab_metadata *trimmed = iterator;
iterator = iterator->next;
c->empty_slabs_total -= slab_size;
enqueue_free_slab(c, trimmed);
is_trimmed = true;
}
memory_set_name(slab, slab_size, size_class_labels[class]);
struct slab_metadata *trimmed = iterator;
iterator = iterator->next;
c->empty_slabs_total -= slab_size;
enqueue_free_slab(c, trimmed);
is_trimmed = true;
c->empty_slabs = iterator;
mutex_unlock(&c->lock);
}
c->empty_slabs = iterator;
mutex_unlock(&c->lock);
}
thread_seal_metadata();