implement the option of large size classes

This extends the size class scheme used for slab allocations to large
allocations. This drastically improves performance for many real world
programs using incremental realloc growth instead of using proper growth
factors. There are 4 size classes for every doubling in size, resulting
in a worst case of ~20% extra virtual memory being reserved and a huge
increase in performance for pathological cases. For example, growing
from 4MiB to 8MiB by calling realloc in increments of 32 bytes will only
need to do work beyond looking up the size 4 times instead of 1024 times
with 4096 byte granularity.
pull/87/head
Daniel Micay 2019-04-07 08:04:06 -04:00
parent 7a7126e780
commit e0891c8cfc
4 changed files with 64 additions and 11 deletions

View File

@ -17,6 +17,7 @@ common_cflags = [
"-DSLAB_CANARY=true",
"-DSLAB_QUARANTINE_RANDOM_LENGTH=1",
"-DSLAB_QUARANTINE_QUEUE_LENGTH=1",
"-DCONFIG_LARGE_SIZE_CLASSES=true",
"-DGUARD_SLABS_INTERVAL=1",
"-DGUARD_SIZE_DIVISOR=2",
"-DREGION_QUARANTINE_RANDOM_LENGTH=128",

View File

@ -8,6 +8,7 @@ CONFIG_SLOT_RANDOMIZE := true
CONFIG_SLAB_CANARY := true
CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH := 1
CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH := 1
CONFIG_LARGE_SIZE_CLASSES := true
CONFIG_GUARD_SLABS_INTERVAL := 1
CONFIG_GUARD_SIZE_DIVISOR := 2
CONFIG_REGION_QUARANTINE_RANDOM_LENGTH := 128
@ -68,6 +69,10 @@ ifeq (,$(filter $(CONFIG_SLAB_CANARY),true false))
$(error CONFIG_SLAB_CANARY must be true or false)
endif
ifeq (,$(filter $(CONFIG_LARGE_SIZE_CLASSES),true false))
$(error CONFIG_LARGE_SIZE_CLASSES must be true or false)
endif
CPPFLAGS += \
-DZERO_ON_FREE=$(CONFIG_ZERO_ON_FREE) \
-DWRITE_AFTER_FREE_CHECK=$(CONFIG_WRITE_AFTER_FREE_CHECK) \
@ -75,6 +80,7 @@ CPPFLAGS += \
-DSLAB_CANARY=$(CONFIG_SLAB_CANARY) \
-DSLAB_QUARANTINE_RANDOM_LENGTH=$(CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH) \
-DSLAB_QUARANTINE_QUEUE_LENGTH=$(CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH) \
-DCONFIG_LARGE_SIZE_CLASSES=$(CONFIG_LARGE_SIZE_CLASSES) \
-DGUARD_SLABS_INTERVAL=$(CONFIG_GUARD_SLABS_INTERVAL) \
-DGUARD_SIZE_DIVISOR=$(CONFIG_GUARD_SIZE_DIVISOR) \
-DREGION_QUARANTINE_RANDOM_LENGTH=$(CONFIG_REGION_QUARANTINE_RANDOM_LENGTH) \

View File

@ -167,6 +167,9 @@ for the chosen values are not written yet, so use them at your own peril:
* `CONFIG_STATS`: `false` (default) to control whether stats on allocation /
deallocation count and active allocations are tracked. This is currently only
exposed via the mallinfo APIs on Android.
* `CONFIG_LARGE_SIZE_CLASSES`: `true` (default) to control whether large
allocations use the slab allocation size class scheme instead of page size
granularity (see the section on size classes below)
There will be more control over enabled features in the future along with
control over fairly arbitrarily chosen values like the size of empty slab
@ -400,6 +403,14 @@ size for 2048 byte spacing and the next spacing class matches the page size of
classes required to avoid substantial waste from rounding. Further slab
allocation size classes may be offered as an option in the future.
The `CONFIG_LARGE_SIZE_CLASSES` option controls whether large allocations use
the same size class scheme providing 4 size classes for every doubling of size.
It increases virtual memory consumption but drastically improves performance
where realloc is used without proper growth factors, which is fairly common and
destroys performance in some commonly used programs. If large size classes are
disabled, the granularity is instead the page size, which is currently always
4096 bytes on supported platforms.
## Scalability
### Small (slab) allocations

View File

@ -1123,11 +1123,37 @@ COLD __attribute__((constructor(101))) static void trigger_early_init(void) {
h_free(h_malloc(16));
}
// Returns 0 on overflow.
static size_t get_large_size_class(size_t size) {
if (CONFIG_LARGE_SIZE_CLASSES) {
// Continue small size class growth pattern of power of 2 spacing classes:
//
// 4 KiB [20 KiB, 24 KiB, 28 KiB, 32 KiB]
// 8 KiB [40 KiB, 48 KiB, 54 KiB, 64 KiB]
// 16 KiB [80 KiB, 96 KiB, 112 KiB, 128 KiB]
// 32 KiB [160 KiB, 192 KiB, 224 KiB, 256 KiB]
// 512 KiB [2560 KiB, 3 MiB, 3584 KiB, 4 MiB]
// 1 MiB [5 MiB, 6 MiB, 7 MiB, 8 MiB]
// etc.
size_t spacing_shift = 64 - __builtin_clzl(size - 1) - 3;
size_t spacing_class = 1ULL << spacing_shift;
return (size + (spacing_class - 1)) & ~(spacing_class - 1);
} else {
return PAGE_CEILING(size);
}
}
static size_t get_guard_size(struct random_state *state, size_t size) {
return (get_random_u64_uniform(state, size / PAGE_SIZE / GUARD_SIZE_DIVISOR) + 1) * PAGE_SIZE;
}
static void *allocate_large(size_t size) {
size = get_large_size_class(size);
if (unlikely(!size)) {
errno = ENOMEM;
return NULL;
}
struct region_allocator *ra = ro.region_allocator;
mutex_lock(&ra->lock);
@ -1200,6 +1226,11 @@ static int alloc_aligned(void **memptr, size_t alignment, size_t size, size_t mi
return 0;
}
size = get_large_size_class(size);
if (unlikely(!size)) {
return ENOMEM;
}
struct region_allocator *ra = ro.region_allocator;
mutex_lock(&ra->lock);
@ -1277,6 +1308,14 @@ EXPORT void *h_realloc(void *old, size_t size) {
size = adjust_size_for_canaries(size);
if (size > max_slab_size_class) {
size = get_large_size_class(size);
if (unlikely(!size)) {
errno = ENOMEM;
return NULL;
}
}
size_t old_size;
if (old >= get_slab_region_start() && old < ro.slab_region_end) {
old_size = slab_usable_size(old);
@ -1297,28 +1336,24 @@ EXPORT void *h_realloc(void *old, size_t size) {
}
old_size = region->size;
size_t old_guard_size = region->guard_size;
if (PAGE_CEILING(old_size) == PAGE_CEILING(size)) {
region->size = size;
if (old_size == size) {
mutex_unlock(&ra->lock);
thread_seal_metadata();
return old;
}
mutex_unlock(&ra->lock);
size_t old_rounded_size = PAGE_CEILING(old_size);
size_t rounded_size = PAGE_CEILING(size);
if (size > max_slab_size_class) {
// in-place shrink
if (size < old_size) {
void *new_end = (char *)old + rounded_size;
void *new_end = (char *)old + size;
if (memory_map_fixed(new_end, old_guard_size)) {
thread_seal_metadata();
return NULL;
}
memory_set_name(new_end, old_guard_size, "malloc large");
void *new_guard_end = (char *)new_end + old_guard_size;
regions_quarantine_deallocate_pages(new_guard_end, old_rounded_size - rounded_size, 0);
regions_quarantine_deallocate_pages(new_guard_end, old_size - size, 0);
mutex_lock(&ra->lock);
struct region_metadata *region = regions_find(old);
@ -1333,10 +1368,10 @@ EXPORT void *h_realloc(void *old, size_t size) {
}
// in-place growth
void *guard_end = (char *)old + old_rounded_size + old_guard_size;
size_t extra = rounded_size - old_rounded_size;
if (!memory_remap((char *)old + old_rounded_size, old_guard_size, old_guard_size + extra)) {
if (memory_protect_rw((char *)old + old_rounded_size, extra)) {
void *guard_end = (char *)old + old_size + old_guard_size;
size_t extra = size - old_size;
if (!memory_remap((char *)old + old_size, old_guard_size, old_guard_size + extra)) {
if (memory_protect_rw((char *)old + old_size, extra)) {
memory_unmap(guard_end, extra);
} else {
mutex_lock(&ra->lock);