From 96668b20e37bc4eb13eba8733dce27d39964a5b9 Mon Sep 17 00:00:00 2001 From: Thor Preimesberger Date: Mon, 22 Sep 2025 17:01:59 -0400 Subject: [PATCH] Shorten x86_64 random slab path with pext asm Only applies on platforms with BMI2, i.e. haswell+ --- h_malloc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/h_malloc.c b/h_malloc.c index 6221d0b..c4855cd 100644 --- a/h_malloc.c +++ b/h_malloc.c @@ -405,8 +405,22 @@ static size_t get_free_slot(struct random_state *rng, size_t slots, const struct // randomize start location for linear search (uniform random choice is too slow) size_t random_index = get_random_u16_uniform(rng, slots); size_t first_bitmap = random_index / U64_WIDTH; - u64 random_split = ~(~0UL << (random_index - first_bitmap * U64_WIDTH)); +#if __x86_64__ && __BMI2__ + u64 tmp; + __asm__ ( + // set up mask + "mov $0xfffffffffffffff8, %%rdx\n\t" + // tmp is now same as shift amount mod 256 in portable case + "pext %[tmp], %[random_index], %%rdx\n\t" + + : [tmp] "=r" (tmp) + : [random_index] "r" (random_index)); + // gcc/clang is smart enough to generate code with no spills here + u64 random_split = ~(~0UL << (tmp)); +#else + u64 random_split = ~(~0UL << (random_index - first_bitmap * U64_WIDTH)); +#endif size_t i = first_bitmap; u64 masked = metadata->bitmap[i]; masked |= random_split;