diff --git a/Android.bp b/Android.bp index 9abe604..0b2768c 100644 --- a/Android.bp +++ b/Android.bp @@ -17,6 +17,7 @@ common_cflags = [ "-DSLAB_CANARY=true", "-DSLAB_QUARANTINE_RANDOM_LENGTH=1", "-DSLAB_QUARANTINE_QUEUE_LENGTH=1", + "-DCONFIG_LARGE_SIZE_CLASSES=true", "-DGUARD_SLABS_INTERVAL=1", "-DGUARD_SIZE_DIVISOR=2", "-DREGION_QUARANTINE_RANDOM_LENGTH=128", diff --git a/Makefile b/Makefile index 26e1a87..9ef4c5b 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ CONFIG_SLOT_RANDOMIZE := true CONFIG_SLAB_CANARY := true CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH := 1 CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH := 1 +CONFIG_LARGE_SIZE_CLASSES := true CONFIG_GUARD_SLABS_INTERVAL := 1 CONFIG_GUARD_SIZE_DIVISOR := 2 CONFIG_REGION_QUARANTINE_RANDOM_LENGTH := 128 @@ -68,6 +69,10 @@ ifeq (,$(filter $(CONFIG_SLAB_CANARY),true false)) $(error CONFIG_SLAB_CANARY must be true or false) endif +ifeq (,$(filter $(CONFIG_LARGE_SIZE_CLASSES),true false)) + $(error CONFIG_LARGE_SIZE_CLASSES must be true or false) +endif + CPPFLAGS += \ -DZERO_ON_FREE=$(CONFIG_ZERO_ON_FREE) \ -DWRITE_AFTER_FREE_CHECK=$(CONFIG_WRITE_AFTER_FREE_CHECK) \ @@ -75,6 +80,7 @@ CPPFLAGS += \ -DSLAB_CANARY=$(CONFIG_SLAB_CANARY) \ -DSLAB_QUARANTINE_RANDOM_LENGTH=$(CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH) \ -DSLAB_QUARANTINE_QUEUE_LENGTH=$(CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH) \ + -DCONFIG_LARGE_SIZE_CLASSES=$(CONFIG_LARGE_SIZE_CLASSES) \ -DGUARD_SLABS_INTERVAL=$(CONFIG_GUARD_SLABS_INTERVAL) \ -DGUARD_SIZE_DIVISOR=$(CONFIG_GUARD_SIZE_DIVISOR) \ -DREGION_QUARANTINE_RANDOM_LENGTH=$(CONFIG_REGION_QUARANTINE_RANDOM_LENGTH) \ diff --git a/README.md b/README.md index 10d5c1a..6ade93b 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,9 @@ for the chosen values are not written yet, so use them at your own peril: * `CONFIG_STATS`: `false` (default) to control whether stats on allocation / deallocation count and active allocations are tracked. This is currently only exposed via the mallinfo APIs on Android. +* `CONFIG_LARGE_SIZE_CLASSES`: `true` (default) to control whether large + allocations use the slab allocation size class scheme instead of page size + granularity (see the section on size classes below) There will be more control over enabled features in the future along with control over fairly arbitrarily chosen values like the size of empty slab @@ -400,6 +403,14 @@ size for 2048 byte spacing and the next spacing class matches the page size of classes required to avoid substantial waste from rounding. Further slab allocation size classes may be offered as an option in the future. +The `CONFIG_LARGE_SIZE_CLASSES` option controls whether large allocations use +the same size class scheme providing 4 size classes for every doubling of size. +It increases virtual memory consumption but drastically improves performance +where realloc is used without proper growth factors, which is fairly common and +destroys performance in some commonly used programs. If large size classes are +disabled, the granularity is instead the page size, which is currently always +4096 bytes on supported platforms. + ## Scalability ### Small (slab) allocations diff --git a/h_malloc.c b/h_malloc.c index 8cf70cb..0336b27 100644 --- a/h_malloc.c +++ b/h_malloc.c @@ -1123,11 +1123,37 @@ COLD __attribute__((constructor(101))) static void trigger_early_init(void) { h_free(h_malloc(16)); } +// Returns 0 on overflow. +static size_t get_large_size_class(size_t size) { + if (CONFIG_LARGE_SIZE_CLASSES) { + // Continue small size class growth pattern of power of 2 spacing classes: + // + // 4 KiB [20 KiB, 24 KiB, 28 KiB, 32 KiB] + // 8 KiB [40 KiB, 48 KiB, 54 KiB, 64 KiB] + // 16 KiB [80 KiB, 96 KiB, 112 KiB, 128 KiB] + // 32 KiB [160 KiB, 192 KiB, 224 KiB, 256 KiB] + // 512 KiB [2560 KiB, 3 MiB, 3584 KiB, 4 MiB] + // 1 MiB [5 MiB, 6 MiB, 7 MiB, 8 MiB] + // etc. + size_t spacing_shift = 64 - __builtin_clzl(size - 1) - 3; + size_t spacing_class = 1ULL << spacing_shift; + return (size + (spacing_class - 1)) & ~(spacing_class - 1); + } else { + return PAGE_CEILING(size); + } +} + static size_t get_guard_size(struct random_state *state, size_t size) { return (get_random_u64_uniform(state, size / PAGE_SIZE / GUARD_SIZE_DIVISOR) + 1) * PAGE_SIZE; } static void *allocate_large(size_t size) { + size = get_large_size_class(size); + if (unlikely(!size)) { + errno = ENOMEM; + return NULL; + } + struct region_allocator *ra = ro.region_allocator; mutex_lock(&ra->lock); @@ -1200,6 +1226,11 @@ static int alloc_aligned(void **memptr, size_t alignment, size_t size, size_t mi return 0; } + size = get_large_size_class(size); + if (unlikely(!size)) { + return ENOMEM; + } + struct region_allocator *ra = ro.region_allocator; mutex_lock(&ra->lock); @@ -1277,6 +1308,14 @@ EXPORT void *h_realloc(void *old, size_t size) { size = adjust_size_for_canaries(size); + if (size > max_slab_size_class) { + size = get_large_size_class(size); + if (unlikely(!size)) { + errno = ENOMEM; + return NULL; + } + } + size_t old_size; if (old >= get_slab_region_start() && old < ro.slab_region_end) { old_size = slab_usable_size(old); @@ -1297,28 +1336,24 @@ EXPORT void *h_realloc(void *old, size_t size) { } old_size = region->size; size_t old_guard_size = region->guard_size; - if (PAGE_CEILING(old_size) == PAGE_CEILING(size)) { - region->size = size; + if (old_size == size) { mutex_unlock(&ra->lock); thread_seal_metadata(); return old; } mutex_unlock(&ra->lock); - size_t old_rounded_size = PAGE_CEILING(old_size); - size_t rounded_size = PAGE_CEILING(size); - if (size > max_slab_size_class) { // in-place shrink if (size < old_size) { - void *new_end = (char *)old + rounded_size; + void *new_end = (char *)old + size; if (memory_map_fixed(new_end, old_guard_size)) { thread_seal_metadata(); return NULL; } memory_set_name(new_end, old_guard_size, "malloc large"); void *new_guard_end = (char *)new_end + old_guard_size; - regions_quarantine_deallocate_pages(new_guard_end, old_rounded_size - rounded_size, 0); + regions_quarantine_deallocate_pages(new_guard_end, old_size - size, 0); mutex_lock(&ra->lock); struct region_metadata *region = regions_find(old); @@ -1333,10 +1368,10 @@ EXPORT void *h_realloc(void *old, size_t size) { } // in-place growth - void *guard_end = (char *)old + old_rounded_size + old_guard_size; - size_t extra = rounded_size - old_rounded_size; - if (!memory_remap((char *)old + old_rounded_size, old_guard_size, old_guard_size + extra)) { - if (memory_protect_rw((char *)old + old_rounded_size, extra)) { + void *guard_end = (char *)old + old_size + old_guard_size; + size_t extra = size - old_size; + if (!memory_remap((char *)old + old_size, old_guard_size, old_guard_size + extra)) { + if (memory_protect_rw((char *)old + old_size, extra)) { memory_unmap(guard_end, extra); } else { mutex_lock(&ra->lock);