Compare commits

..

168 commits

Author SHA1 Message Date
Віктор Дуйко
7481c8857f docs: updated the license date 2025-04-05 13:13:18 -04:00
Christian Göttsche
1d7fc7ffe0 support GCC15
GCC 15 starts warning about non NUL-terminated string literals:

    chacha.c:44:31: error: initializer-string for array of ‘char’ truncates NUL terminator but destination lacks ‘nonstring’ attribute (17 chars into 16 available) [-Werror=unterminated-string-initialization]
       44 | static const char sigma[16] = "expand 32-byte k";
          |                               ^~~~~~~~~~~~~~~~~~
2025-04-03 18:31:55 -04:00
Daniel Micay
4fe9018b6f rename calculate_waste.py to calculate-waste 2025-02-17 12:47:30 -05:00
Daniel Micay
3ab23f7ebf update libdivide to 5.2.0 2025-01-25 16:13:22 -05:00
Daniel Micay
c894f3ec1d add newer compiler versions for GitHub workflow 2024-12-15 22:20:01 -05:00
Daniel Micay
c97263ef0c handle GitHub runner image updates
clang-14 and clang-15 are no longer installed by default.
2024-12-15 22:18:40 -05:00
Daniel Micay
a7302add63 update outdated branch in README 2024-10-23 06:36:02 -04:00
Daniel Micay
b1d9571fec remove trailing whitespace 2024-10-12 03:23:52 -04:00
Daniel Micay
e03579253a preserve PROT_MTE when releasing memory 2024-10-12 03:19:16 -04:00
Daniel Micay
9739cb4690 use wrapper for calling memory_map_mte 2024-10-12 03:19:03 -04:00
Daniel Micay
aa950244f8 reuse code for memory_map_mte
This drops the separate error message since that doesn't seem useful.
2024-10-12 03:18:36 -04:00
Daniel Micay
6402e2b0d4 reduce probability hint for is_memtag_enabled 2024-10-12 03:17:44 -04:00
Daniel Micay
e86192e7fe remove redundant warning switches for Android
Android already enables -Wall and -Wextra in the global soong build
settings.
2024-10-09 19:57:15 -04:00
Julien Voisin
6ce663a8bd Fix -Wimplicit-function-declaration error with gcc 14.
```
malloc_info.c: In function 'leak_memory':
malloc_info.c:12:12: error: implicit declaration of function 'malloc' [-Wimplicit-function-declaration]
   12 |     (void)!malloc(1024 * 1024 * 1024);
      |            ^~~~~~
malloc_info.c:10:1: note: include '<stdlib.h>' or provide a declaration of 'malloc'
    9 | #include "../util.h"
  +++ |+#include <stdlib.h>
   10 |
malloc_info.c:12:12: warning: incompatible implicit declaration of built-in function 'malloc' [-Wbuiltin-declaration-mismatch]
   12 |     (void)!malloc(1024 * 1024 * 1024);
      |            ^~~~~~
```

Taken from https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/72971/

Co-authored-by: @mio
2024-10-03 23:44:15 -04:00
maade93791
9ca9d2d925 android: use more basic CPU target for memtag
This is required for hardened_malloc to work in microdroid on MTE-enabled devices (currently, 8th
and 9th generation Pixels) since PVMFW only supports ARMv8 cores.

https://android.googlesource.com/platform/packages/modules/Virtualization/+/refs/tags/android-15.0.0_r1/pvmfw/platform.dts#100
2024-09-09 19:22:23 -04:00
Daniel Micay
3f07acfab1 update libdivide to 5.1 2024-08-05 02:25:55 -04:00
Daniel Micay
749640c274 update copyright notice 2024-02-15 02:57:33 -05:00
Dmitry Muhomor
7268189933 mte: use tag 0 for freed slots, stop reserving tag 15 2024-01-23 12:56:54 -05:00
Dmitry Muhomor
3c1f40aff0 amend memory tagging README section
Memory tagging is enabled by default in bionic, but can be disabled at any point.
Memory tagging can't be re-enabled after it's disabled.
2024-01-23 12:56:54 -05:00
Dmitry Muhomor
5fbbdc2ef8 memtag_test: add test for MADV_DONTNEED behavior 2024-01-23 12:56:54 -05:00
Dmitry Muhomor
7d2151e40c mte: remove util.h dependency from arm_mte.h
It's needed for including arm_mte.h into memtag_test.cc
2024-01-23 12:56:54 -05:00
Dmitry Muhomor
4756716904 memtag_test: move SEGV code checks to device-side binary 2024-01-23 12:56:54 -05:00
Daniel Micay
a3bf742c3e remove trailing whitespace 2024-01-03 14:44:08 -05:00
Julien Voisin
53a45b4661 Improve a bit the formulation of the MTE documentation 2024-01-03 13:40:42 -05:00
Daniel Micay
abe54dba27 update memory tagging documentation 2024-01-03 12:22:56 -05:00
Dmitry Muhomor
365ee6900d android: restore the default SIGABRT handler in fatal_error()
async_safe_fatal() calls abort() at the end, which can be intercepted by a custom SIGABRT handler.

In particular, crashlytics installs such a handler and tries to fork() after catching SIGABRT.

hardened_malloc uses pthread_atfork() to register fork handlers. These handlers try to lock internal
hardened_malloc mutexes. If at least one of those mutexes is already locked, which is usually the
case, thread that called fatai_error() gets deadlocked, while the other threads (if there are any)
continue to run.
2023-12-31 11:21:28 -05:00
Christian Göttsche
7093fdc482 README: add note about AppArmor constraint on Debian 2023-12-14 09:06:32 -05:00
jvoisin
61821b02c8 Clarify a bit why a particular magic number was chosen 2023-11-16 14:25:54 -05:00
Daniel Micay
3c274731ba Revert "use safe_flag for -fstack-clash-protection"
This reverts commit 4171bd164e.
2023-11-14 16:19:33 -05:00
Daniel Micay
4171bd164e use safe_flag for -fstack-clash-protection 2023-11-08 14:21:04 -05:00
jvoisin
352c083f65 Run the testsuite on multiple compiler versions 2023-11-05 17:58:32 -05:00
Dmitry Muhomor
88b3c1acf9 memtag_test: fix sporadic failures of overflow/underflow tests 2023-11-01 17:33:20 -04:00
Daniel Micay
f793a3edf6 update README now that MTE is implemented 2023-10-30 14:23:48 -04:00
Dmitry Muhomor
fd75fc1ba8 mte: add scudo to CREDITS file 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
72dc236d5f mte: add untag_pointer() variant for const pointers 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
be08eeee2d mte: update comment about skipped tag array update in deallocate_small() 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
25f0fe9c69 remove an always-true sizeof(u8) assert 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
c75cb4c3f3 mte: refactor tag_and_clear_slab_slot()
Explicitly call is_memtag_enabled() before calling tag_and_clear_slab_slot() to make it clearer that
memory is not zeroed when MTE is disabled.
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
b560431c01 mte: note why 0 tag is excluded 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
009f2dad76 mte: note alignment requirements of arm_mte_tag_and_clear_mem() 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
03883eb2ce mte: rename arm_mte_store_tags_and_clear() to arm_mte_tag_and_clear_mem() 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
7a6dbd8152 mte: add comment about the reserved slab canary value 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
f16ef601d4 memtag_test: improve capturing of test results
Using debuggerd + logcat parsing is unreliable and slow, print SEGV signal code to stderr instead.
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
155800526a memtag_test: improve tag_distinctness test
- check that tag distinctess checks are actually reached (it was previously verified manually by
looking at the now-removed printf output)
- check that only non-reserved tags are used
- check that all of non-reserved tags are used
- print tag usage statistics at the end of run
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
28d5d394cf memtag_test: remove usages of rand()
It didn't work correctly due to not being seeded and its usage wasn't necessary.
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
577d9583eb mte: add licensing info for code that was copied from scudo 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
93aa9eefe4 mte: make h_malloc_disable_memory_tagging() thread-safe 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
01a199e19e mte: move is_memtag_enabled to read-only allocator data 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
576328b1b4 android: add MTE tests
To run them, connect an MTE-enabled device via adb and execute `atest HMallocTest:MemtagTest`.

Since these tests are not deterministic (and neither is hardened_malloc itself), it's better to run
them multiple times, e.g. `atest --iterations 30 HMallocTest:MemtagTest`.

There are also CTS tests that are useful for checking correctness of the Android integration:
`atest CtsTaggingHostTestCases`
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
5137d2da4d android: enable MTE on devices that declare having it 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
f042a6b9b0 android: add function for disabling MTE at runtime
On Android, MTE is always enabled in Zygote, and is disabled after fork for apps that didn't opt-in
to MTE.

Depends on the slab canary adjustments in the previous commit.
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
001fc86585 mte: disable slab canaries when MTE is on
Canary with the "0" value is now reserved to support re-enabling slab canaries if MTE is turned off
at runtime.
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
70c91f4c3e mte: disable write-after-free check for slab allocations when MTE is on
Freed slab memory is tagged with a reserved tag value that is never used for live allocations.
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
e3686ae457 add support for Arm MTE memory tagging
- tag slab allocations with [1..14] tags
- tag freed slab allocations with the "15" tag value to detect accesses to freed slab memory
- when generating tag value for a slab slot, always exclude most recent tag value for that slot
(to make use-after-free detection more reliable) and most recent tag values of its immediate
neighbors (to detect linear overflows and underflows)
2023-10-30 14:20:53 -04:00
Dmitry Muhomor
19a46e0f96 add helper functions for using u8 array as u4 array 2023-10-30 14:20:53 -04:00
Dmitry Muhomor
8d5c631224 android: implement fatal_error() via async_safe_fatal()
async_safe_fatal() performs the following steps:
- logs the error message to stderr and logcat
- passes error message to debuggerd via android_set_abort_message(). debuggerd then saves the error
message in the crash report file ("tombstone")
- calls abort()
2023-09-28 13:47:11 -04:00
Christian Göttsche
903cba5a84 test: add regression test for missing init() in realloc() 2023-09-27 19:19:19 -04:00
Christian Göttsche
9cb4e6daf6 do not skip init() in realloc()
If N_ARENA is greater than 1 `thread_arena` is initially to N_ARENA,
which is an invalid index into `ro.size_class_metadata[]`.

The actual used arena is computed in init().

Ensure init() is called if a new thread is only using realloc() to avoid
UB, e.g. pthread_mutex_lock() might crash due the memory not holding an
initialized mutex.

Affects mesa 23.2.0~rc4.

Example back trace using glmark2 (note `arena=4` with the default
N_ARENA being 4):

    Program terminated with signal SIGSEGV, Segmentation fault.
    #0  ___pthread_mutex_lock (mutex=0x7edff8d3f200) at ./nptl/pthread_mutex_lock.c:80
            type = <optimized out>
            __PRETTY_FUNCTION__ = "___pthread_mutex_lock"
            id = <optimized out>
    #1  0x00007f0ab62091a6 in mutex_lock (m=0x7edff8d3f200) at ./mutex.h:21
    No locals.
    #2  0x00007f0ab620c9b5 in allocate_small (arena=4, requested_size=24) at h_malloc.c:517
            info = {size = 32, class = 2}
            size = 32
            c = 0x7edff8d3f200
            slots = 128
            slab_size = 4096
            metadata = 0x0
            slot = 0
            slab = 0x0
            p = 0x0
    #3  0x00007f0ab6209809 in allocate (arena=4, size=24) at h_malloc.c:1252
    No locals.
    #4  0x00007f0ab6208e26 in realloc (old=0x72b138199120, size=24) at h_malloc.c:1499
            vma_merging_reliable = false
            old_size = 16
            new = 0x0
            copy_size = 139683981990973
    #5  0x00007299f919e556 in attach_shader (ctx=0x7299e9ef9000, shProg=0x7370c9277d30, sh=0x7370c9278230) at ../src/mesa/main/shaderapi.c:336
            n = 1
    #6  0x00007299f904223e in _mesa_unmarshal_AttachShader (ctx=<optimized out>, cmd=<optimized out>) at src/mapi/glapi/gen/marshal_generated2.c:1539
            program = <optimized out>
            shader = <optimized out>
            cmd_size = 2
    #7  0x00007299f8f2e3b2 in glthread_unmarshal_batch (job=job@entry=0x7299e9ef9168, gdata=gdata@entry=0x0, thread_index=thread_index@entry=0) at ../src/mesa/main/glthread.c:139
            cmd = 0x7299e9ef9180
            batch = 0x7299e9ef9168
            ctx = 0x7299e9ef9000
            pos = 0
            used = 3
            buffer = 0x7299e9ef9180
            shared = <optimized out>
            lock_mutexes = <optimized out>
            batch_index = <optimized out>
    #8  0x00007299f8ecc2d9 in util_queue_thread_func (input=input@entry=0x72c1160e5580) at ../src/util/u_queue.c:309
            job = {job = 0x7299e9ef9168, global_data = 0x0, job_size = 0, fence = 0x7299e9ef9168, execute = <optimized out>, cleanup = <optimized out>}
            queue = 0x7299e9ef9058
            thread_index = 0
    #9  0x00007299f8f1bcbb in impl_thrd_routine (p=<optimized out>) at ../src/c11/impl/threads_posix.c:67
            pack = {func = 0x7299f8ecc190 <util_queue_thread_func>, arg = 0x72c1160e5580}
    #10 0x00007f0ab5aa63ec in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:444
            ret = <optimized out>
            pd = <optimized out>
            out = <optimized out>
            unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139683974242608, 2767510063778797177, -168, 11, 140727286820160, 126005371879424, -4369625917767903623, -2847048016936659335}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0,
              0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
            not_first_call = <optimized out>
    #11 0x00007f0ab5b26a2c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
2023-09-26 20:03:02 -04:00
dependabot[bot]
8696431b88 Bump actions/checkout from 3 to 4
Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-09-04 15:37:49 -04:00
Daniel Micay
2d302f7d85 enable -Wundef 2023-06-10 14:58:33 -04:00
Daniel Micay
d5f9909eca add missing include 2023-06-10 14:58:20 -04:00
Daniel Micay
5e1901e85d silence unwanted tidy warning 2023-06-10 14:52:08 -04:00
Daniel Micay
462c2c5293 conditionally include bits/functexcept.h 2023-06-10 14:20:20 -04:00
Daniel Micay
8f3281ed6a enable strict prototypes warning 2023-06-10 14:18:27 -04:00
Christian Göttsche
7d75acc62a use strict prototype
h_malloc.c:83:21: error: function declaration isn’t a prototype [-Werror=strict-prototypes]
       83 | static inline void *get_slab_region_end() {
          |                     ^~~~~~~~~~~~~~~~~~~
2023-06-10 14:18:27 -04:00
Christian Göttsche
af866a7faa support versioned Clang
make CC=clang-14
    clang-14  -std=c17 -O3 -flto -fPIC -fvisibility=hidden -fno-plt -fstack-clash-protection -fstack-protector-strong -pipe -Wall -Wcast-qual -Wextra -Wfloat-equal -Wformat=2 -Winit-self -Wmissing-format-attribute -Wmissing-noreturn -Wmissing-prototypes -Wnull-dereference -Wpointer-arith -Wshadow -Wstrict-prototypes -Wundef -Wunused -Wwrite-strings -Wcast-align=strict -Wcast-qual -Wwrite-strings -Werror -march=native -Wmissing-prototypes  -D_GNU_SOURCE -I include -DCONFIG_SEAL_METADATA=false -DZERO_ON_FREE=true -DWRITE_AFTER_FREE_CHECK=true -DSLOT_RANDOMIZE=true -DSLAB_CANARY=true -DSLAB_QUARANTINE_RANDOM_LENGTH=1 -DSLAB_QUARANTINE_QUEUE_LENGTH=1 -DCONFIG_EXTENDED_SIZE_CLASSES=true -DCONFIG_LARGE_SIZE_CLASSES=true -DGUARD_SLABS_INTERVAL=1 -DGUARD_SIZE_DIVISOR=2 -DREGION_QUARANTINE_RANDOM_LENGTH=256 -DREGION_QUARANTINE_QUEUE_LENGTH=1024 -DREGION_QUARANTINE_SKIP_THRESHOLD=33554432  -DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=32 -DCONFIG_CLASS_REGION_SIZE=34359738368  -DN_ARENA=4 -DCONFIG_STATS=false  -c -o out/chacha.o chacha.c
    error: unknown warning option '-Wcast-align=strict'; did you mean '-Wcast-align'? [-Werror,-Wunknown-warning-option]
    make: *** [Makefile:114: out/chacha.o] Error 1
2023-06-10 14:18:27 -04:00
Daniel Micay
64dad0a69f drop legacy glibc support 2023-06-10 14:04:46 -04:00
Daniel Micay
95c4b40caf update minimum dependency version list 2023-06-10 14:02:55 -04:00
Daniel Micay
cc70583beb drop info on MPK with unsupported kernels 2023-06-10 13:59:56 -04:00
Daniel Micay
62a98efb13 update supported Android branch 2023-06-10 13:59:36 -04:00
Daniel Micay
d3152b8e8f preserve errno for free calls
This is a future POSIX requirement recently implemented by musl and
glibc.
2023-02-17 13:07:26 -05:00
Daniel Micay
2e9daf3122 merge fprintf/fputs calls in malloc_info 2023-02-17 13:07:26 -05:00
Daniel Micay
6038030d0b no need to check for -fstack-clash-protection
This is supported by the compiler versions listed as minimum
requirements in the README.
2023-02-17 13:07:26 -05:00
Daniel Micay
4d23fa37ad enable Intel CET support 2023-02-17 13:07:26 -05:00
Daniel Micay
6d36e758f5 update copyright notice 2023-02-17 13:07:26 -05:00
Daniel Micay
cd9b875297 reorganize compiler switches 2023-02-17 13:07:24 -05:00
Daniel Micay
2250130c53 remove unnecessary UNUSED marker 2022-09-16 01:03:47 -04:00
Daniel Micay
72dba6765f disable tidy identifier length lint 2022-09-16 00:57:08 -04:00
Daniel Micay
8f38bbdee6 add configuration for self-init
This needs to be disabled for compatibility with the exploit protection
compatibility mode on GrapheneOS. hardened_malloc shouldn't be trying to
initialize itself when exploit protection compatibility mode is enabled.
This has to be handled in our Bionic integration instead.
2022-09-14 03:41:31 -04:00
Daniel Micay
dd427cb3b8 arm64 page table / page size docs 2022-09-08 23:17:25 -04:00
Daniel Micay
b5dd9d11d9 raise class region size to 32GB for arm64 Android 2022-09-08 23:13:15 -04:00
Daniel Micay
72fb3576f5 Android 13 is now all we'll be supporting 2022-08-16 07:48:47 -04:00
Dmitry Muhomor
f8fec401c7 update Android.bp for Android 13 2022-08-16 07:46:44 -04:00
Daniel Micay
0d6d63cbe7 improve package installation for CI 2022-03-11 22:09:13 -05:00
dependabot[bot]
8fd31e4bc1 Bump actions/checkout from 2 to 3
Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v2...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-03-02 13:44:16 -05:00
jvoisin
b511696c55 clean up includes and remove non-portable includes
This marginally increases the portability of hardened_malloc,
eg. on OSX.
2022-02-07 07:14:51 -05:00
jvoisin
943704de7c Remove __pycache__ as well in make clean 2022-02-06 18:56:56 -05:00
jvoisin
04a86566c3 Don't depend on gcc_s 2022-01-28 14:59:58 -05:00
Daniel Micay
448170a412 fix case for non-macro constant 2022-01-21 23:59:37 -05:00
Daniel Micay
995ce07d45 add is_init likely/unlikely markers 2022-01-21 19:46:49 -05:00
Daniel Micay
c9d1abcd7e explicitly mark fatal error conditions unlikely 2022-01-21 19:45:05 -05:00
Daniel Micay
8f0b252c33 mark more out-of-memory conditions as unlikely 2022-01-21 19:03:02 -05:00
Daniel Micay
3cffc1e1af treat zero size malloc as unlikely
Calls to malloc with a zero size are extremely rare relative to normal
usage of the API. It's generally only done by inefficient C code with
open coded dynamic array implementations where they aren't handling zero
size as a special case for their usage of malloc/realloc. Efficient code
wouldn't be making these allocations. It doesn't make sense to optimize
for the performance of rare edge cases caused by inefficient code.
2022-01-21 18:27:04 -05:00
Daniel Micay
ae2524bf88 ignore environment for test Makefile variables 2022-01-21 16:24:49 -05:00
Daniel Micay
e28addda19 add back gitignore entries 2022-01-21 15:07:13 -05:00
Daniel Micay
9d89712386 remove extra newline 2022-01-21 15:06:29 -05:00
jvoisin
84eadd8568 Move memory corruption tests up a directory 2022-01-21 15:00:18 -05:00
Daniel Micay
0bbcc5d610 malloc.c was renamed to h_malloc.c 2022-01-19 16:42:12 -05:00
jvoisin
3fa30842ed Use $(MAKE) instead of make in Makefiles
This will pass the correct flags to the make
invocations.
2022-01-17 16:21:00 -05:00
Daniel Micay
b3d78bd5f6 use static const for local constants 2022-01-16 21:02:17 -05:00
Daniel Micay
8d61e63274 add comment about special small size classes 2022-01-16 20:50:49 -05:00
Daniel Micay
422ee78b3e reorganize pages.h header 2022-01-16 16:57:22 -05:00
Daniel Micay
3e312695e1 document clz64/log2u64 not being able to handle 0 2022-01-16 16:28:49 -05:00
Daniel Micay
81cf2f27a0 calculate slab size class instead of array loop 2022-01-16 16:18:14 -05:00
Daniel Micay
d8cb2d9f7a use consistent wrappers around clz/ffs 2022-01-16 15:39:59 -05:00
Daniel Micay
86f9c739ee define constant for u64 bit width 2022-01-16 15:06:36 -05:00
Daniel Micay
536f852538 reuse a single size alignment implementation 2022-01-16 14:44:28 -05:00
Daniel Micay
e814cf4f5c enable linking optimization for GNU linker 2022-01-16 12:18:00 -05:00
Daniel Micay
705211ef49 define UBSan flags for SHARED_FLAGS to reuse it 2022-01-16 11:50:55 -05:00
Daniel Micay
189d3362d5 enable sized deallocation ABI for consistency 2022-01-16 11:49:51 -05:00
Daniel Micay
e2bcf4a356 stop silencing constant logical operand warning
This was resolved by 3696f071a4.
2022-01-13 14:51:22 -05:00
Daniel Micay
d470ae56a5 switch Android build to C17 from C11 2022-01-13 14:48:56 -05:00
Daniel Micay
42b097f3b0 CONFIG_SEAL_METADATA is regularly tested now 2022-01-13 14:25:41 -05:00
Daniel Micay
17891d743e switch from c11 to c17 standard 2022-01-12 10:20:47 -05:00
Daniel Micay
efd71e70c7 update documentation based on light configuration 2022-01-12 08:58:00 -05:00
Daniel Micay
a6d27848af wrap overly long line 2022-01-12 08:44:39 -05:00
Daniel Micay
110126d7f0 README: fix path to configuration templates 2022-01-12 08:43:36 -05:00
Daniel Micay
a2bdb4da27 update gitignore for renamed / added tests 2022-01-12 08:41:21 -05:00
Daniel Micay
0c0561e563 update gitignore for config template system 2022-01-12 08:41:12 -05:00
Daniel Micay
5a577e9ee0 document configuration template system 2022-01-12 08:38:33 -05:00
Daniel Micay
b3372e1576 add configuration template system 2022-01-10 04:47:01 -05:00
jvoisin
052b756840 Fix two warnings 2022-01-09 08:50:46 -05:00
jvoisin
001eb0687b Fix an unused parameter warning 2022-01-04 12:16:53 -05:00
Daniel Micay
2a5662948e rename bitmap manipulation functions 2022-01-04 12:14:55 -05:00
Daniel Micay
d1c39edc9b use const for malloc_object_size API 2022-01-04 10:14:41 -05:00
Daniel Micay
aa1746a90d alloc_size attribute for legacy valloc function 2022-01-04 10:04:26 -05:00
Daniel Micay
f3efc26638 add malloc attribute where appropriate 2022-01-04 09:56:29 -05:00
jvoisin
78cbb964d4 Add alloc_size and alloc_align attributes
This should help a bit the compiler to emit better diagnostics and to improve
the correctness of `__builtin_object_size`.

See https://clang.llvm.org/docs/AttributeReference.html#alloc-size
and https://clang.llvm.org/docs/AttributeReference.html#alloc-align
2022-01-04 09:45:20 -05:00
jvoisin
36dfed3354 Add aarch64 to the CI 2022-01-04 09:45:00 -05:00
Daniel Micay
8a500088c6 add missing include for overflow tests 2022-01-03 21:24:31 -05:00
Daniel Micay
c50d06bc6a comment explaining XOR for 8 byte overflow test 2022-01-03 21:23:14 -05:00
Daniel Micay
645414cc9f add 1 byte overflow tests 2022-01-03 21:20:15 -05:00
Daniel Micay
13a1f578cb use calculated size for overflow tests
This greatly reduces how much these tests depend on hard-wired knowledge
about the size classes.
2022-01-03 21:11:31 -05:00
Daniel Micay
acda766e2c fix small allocation canary overwrite test
Overwriting one byte of a canary with 0 has a 1/256 chance of not
triggering the expected failure.
2022-01-03 21:08:14 -05:00
Daniel Micay
5f32942263 get rid of canary_value when canaries are disabled 2022-01-03 20:39:30 -05:00
Daniel Micay
346529574d check whole allocation for uninit read large test 2022-01-03 17:55:05 -05:00
Daniel Micay
16c991b8f7 use 256k for large allocation tests 2022-01-03 16:11:16 -05:00
jvoisin
5f59ee3935 Add two tests to check that uninitialized read are zeroed 2022-01-03 16:10:01 -05:00
Daniel Micay
3696f071a4 use SLAB_CANARY for conditional checks 2022-01-03 02:17:04 -05:00
Daniel Micay
7d6663ed80 update copyright notice 2022-01-03 01:41:27 -05:00
Daniel Micay
c6af50d088 use unsigned for ffzl definition
This makes more sense and avoids clang tidy conversion warnings.
2022-01-03 01:29:12 -05:00
Daniel Micay
8ae78237ae avoid unnecessarily mixing 32-bit and 64-bit ints
It's ever so slightly faster to stick to stick to 64-bit arithmetic and
it avoids clang tidy being unhappy about the implicit widening.
2022-01-03 00:54:43 -05:00
Daniel Micay
3f8e9d3184 make MREMAP_MOVE_THRESHOLD into size_t constant
This avoids a clang-tidy warning and is a bit cleaner.
2022-01-03 00:32:06 -05:00
Daniel Micay
1e526fc36b disable incredibly impractical clang-tidy check
bugprone-easily-swappable-parameters is completely impractical for real
world usage. It's a reasonable thing to consider as part of API design
but it mostly applies to having APIs taking a lot of parameters. It's
unreasonable to disallow APIs simply taking 2 integer parameters even as
a loose guideline.
2022-01-03 00:27:49 -05:00
jvoisin
c5be4b1888 Fix two mundane clang warnings in the testsuite 2022-01-02 08:27:46 -05:00
jvoisin
ffdf7b1ee1 Make the testsuite work for read-after-free
This commit makes the testsuite fail if
the read-after-free tests are failing, instead
of simply printing some info.
2022-01-02 08:25:08 -05:00
jvoisin
2d56c1de01 Fix a couple of mundane typo in the readme 2022-01-02 08:20:13 -05:00
jvoisin
3878f4a5f4 Don't ignore the return value of the testsuite 2022-01-02 00:55:21 -05:00
Daniel Micay
de7a3b6e5a enable sized deallocation for sized deletion test
Clang doesn't currently enable sized deallocation by default like GCC.
2022-01-01 23:18:52 -05:00
jvoisin
9142a9376b Add a bunch of const qualifiers 2021-12-30 21:25:16 -05:00
Daniel Micay
75e26afdb6 remove legacy safe_flag check for -fno-plt
This is supported by the minimum versions of the dependencies.
2021-12-30 19:17:33 -05:00
jvoisin
cff1d6d4b5 Add a test to prove that hardened_malloc handles too-large-to-be-true allocations
This pattern, used by https://github.com/kaist-hacking/HardsHeap,
uncovered bugs in other memory allocators.
2021-12-28 19:47:05 -05:00
jvoisin
75952581ee Silence a GCC warning
As suggested in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425#c34
2021-12-27 06:22:32 -05:00
jvoisin
a84d3f5310 Run the testsuite on musl as well in the CI 2021-12-27 06:22:32 -05:00
jvoisin
0655c1d024 Add a missing const 2021-12-26 18:19:59 -05:00
jvoisin
2b25c791ee Run the CI every day at 2am UTC
This should help to catch issues in newer versions
of distributions/packages.
2021-12-26 17:02:51 -05:00
jvoisin
e816c545ea Run the CI in clang 2021-12-26 16:29:18 -05:00
jvoisin
06192ae499 make clean is now thorough 2021-12-26 16:28:03 -05:00
Daniel Micay
4ccd6f16df always enable C++17
The safe_flag check doesn't work properly for C++ flags with Clang so
this wasn't getting enabled despite the conditional compilation being
removed from the code, leading to breaking Clang builds.
2021-12-26 16:26:38 -05:00
jvoisin
9966adbdad Add another ifdef for GNU extension 2021-12-23 14:45:43 -05:00
jvoisin
769e01fc4b Don't use mallinfo on non-android and non-glibc 2021-12-23 14:38:29 -05:00
Daniel Micay
460fef456d only Android 12 is supported 2021-12-13 19:42:40 -05:00
Daniel Micay
1a650b0317 update copyright notice 2021-12-13 19:42:33 -05:00
Lelmister101
fa46a7a85d small typo fix
“expanded cover” changed to “expanded to cover”
2021-12-05 09:52:50 -05:00
Daniel Micay
d8817417cc use compiler extension instead of C11 noreturn
C11 noreturn isn't available in C++.
2021-11-23 16:00:06 -05:00
Daniel Micay
7106bff27f update required dependencies 2021-11-23 15:53:03 -05:00
Lelmister101
1bdbb2d3f7 minor typo fix
“entirely independently arenas” changed to “entirely independent arenas”
2021-11-23 15:39:53 -05:00
Thibaut Sautereau
a33d2ca97d Fix CPPFLAGS in test Makefile
In particular, the _GNU_SOURCE feature test macro needs to be set in
order to correctly define mmap(2) flags such as MAP_ANONYMOUS.
Otherwise, compilation of some test files fails when CPPFLAGS is not
defined in the initial user environment, as Make then does not export it
from the root Makefile to the sub-make.
2021-11-02 16:13:09 -04:00
77 changed files with 2256 additions and 724 deletions

View file

@ -1,2 +1,2 @@
Checks: 'bugprone-*,-bugprone-macro-parentheses,-bugprone-too-small-loop-variable,cert-*,clang-analyzer-*,-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,-clang-diagnostic-constant-logical-operand,readability-*,-readability-function-cognitive-complexity,-readability-inconsistent-declaration-parameter-name,-readability-magic-numbers,-readability-named-parameter,llvm-include-order,misc-*' Checks: 'bugprone-*,-bugprone-easily-swappable-parameters,-bugprone-macro-parentheses,-bugprone-too-small-loop-variable,cert-*,-cert-err33-c,clang-analyzer-*,-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,-clang-diagnostic-constant-logical-operand,readability-*,-readability-function-cognitive-complexity,-readability-identifier-length,-readability-inconsistent-declaration-parameter-name,-readability-magic-numbers,-readability-named-parameter,llvm-include-order,misc-*'
WarningsAsErrors: '*' WarningsAsErrors: '*'

View file

@ -1,12 +1,55 @@
name: Build and run tests name: Build and run tests
on: [pull_request, push] on:
push:
pull_request:
schedule:
- cron: '0 2 * * *'
jobs: jobs:
build: build-ubuntu-gcc:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy:
matrix:
version: [12, 13, 14]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v4
- name: Setting up gcc version
run: |
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${{ matrix.version }} 100
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${{ matrix.version }} 100
- name: Build - name: Build
run: make test run: make test
build-ubuntu-clang:
runs-on: ubuntu-latest
strategy:
matrix:
version: [14, 15, 16, 17, 18]
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: sudo apt-get update && sudo apt-get install -y --no-install-recommends clang-14 clang-15
- name: Setting up clang version
run: |
sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-${{ matrix.version }} 100
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-${{ matrix.version }} 100
- name: Build
run: CC=clang CXX=clang++ make test
build-musl:
runs-on: ubuntu-latest
container:
image: alpine:latest
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: apk update && apk add build-base python3
- name: Build
run: make test
build-ubuntu-gcc-aarch64:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: sudo apt-get update && sudo apt-get install -y --no-install-recommends gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgcc-s1-arm64-cross cpp-aarch64-linux-gnu
- name: Build
run: CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-gcc++ make CONFIG_NATIVE=false

4
.gitignore vendored
View file

@ -1,2 +1,2 @@
*.o out/
*.so out-light/

View file

@ -1,16 +1,13 @@
common_cflags = [ common_cflags = [
"-pipe",
"-O3", "-O3",
//"-flto", //"-flto",
"-fPIC", "-fPIC",
"-fvisibility=hidden", "-fvisibility=hidden",
//"-fno-plt", //"-fno-plt",
"-pipe",
"-Wall",
"-Wextra",
"-Wcast-align", "-Wcast-align",
"-Wcast-qual", "-Wcast-qual",
"-Wwrite-strings", "-Wwrite-strings",
"-Wno-constant-logical-operand",
"-Werror", "-Werror",
"-DH_MALLOC_PREFIX", "-DH_MALLOC_PREFIX",
"-DZERO_ON_FREE=true", "-DZERO_ON_FREE=true",
@ -27,23 +24,17 @@ common_cflags = [
"-DREGION_QUARANTINE_QUEUE_LENGTH=1024", "-DREGION_QUARANTINE_QUEUE_LENGTH=1024",
"-DREGION_QUARANTINE_SKIP_THRESHOLD=33554432", // 32MiB "-DREGION_QUARANTINE_SKIP_THRESHOLD=33554432", // 32MiB
"-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=32", "-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=32",
"-DCONFIG_CLASS_REGION_SIZE=34359738368", // 32GiB
"-DN_ARENA=1", "-DN_ARENA=1",
"-DCONFIG_STATS=true", "-DCONFIG_STATS=true",
"-DCONFIG_SELF_INIT=false",
] ]
cc_defaults { cc_defaults {
name: "hardened_malloc_defaults", name: "hardened_malloc_defaults",
defaults: ["linux_bionic_supported"], defaults: ["linux_bionic_supported"],
cflags: common_cflags, cflags: common_cflags,
arch: { conlyflags: ["-std=c17", "-Wmissing-prototypes"],
arm64: {
cflags: ["-DCONFIG_CLASS_REGION_SIZE=2147483648"] // 2GiB
},
x86_64: {
cflags: ["-DCONFIG_CLASS_REGION_SIZE=34359738368"] // 32GiB
},
},
conlyflags: ["-std=c11", "-Wmissing-prototypes"],
stl: "none", stl: "none",
} }
@ -80,5 +71,11 @@ cc_library {
debuggable: { debuggable: {
cflags: ["-DLABEL_MEMORY"], cflags: ["-DLABEL_MEMORY"],
}, },
device_has_arm_mte: {
cflags: ["-DHAS_ARM_MTE", "-march=armv8-a+dotprod+memtag"]
},
}, },
apex_available: [
"com.android.runtime",
],
} }

229
CREDITS
View file

@ -4,7 +4,7 @@ chacha.c is a simple conversion of chacha-merged.c to a keystream-only implement
D. J. Bernstein D. J. Bernstein
Public domain. Public domain.
malloc.c open-addressed hash table (regions_grow, regions_insert, regions_find, regions_delete): h_malloc.c open-addressed hash table (regions_grow, regions_insert, regions_find, regions_delete):
Copyright (c) 2008, 2010, 2011, 2016 Otto Moerbeek <otto@drijf.net> Copyright (c) 2008, 2010, 2011, 2016 Otto Moerbeek <otto@drijf.net>
Copyright (c) 2012 Matthew Dempsky <matthew@openbsd.org> Copyright (c) 2012 Matthew Dempsky <matthew@openbsd.org>
@ -54,3 +54,230 @@ libdivide:
random.c get_random_{type}_uniform functions are based on Fast Random Integer random.c get_random_{type}_uniform functions are based on Fast Random Integer
Generation in an Interval by Daniel Lemire Generation in an Interval by Daniel Lemire
arm_mte.h arm_mte_tag_and_clear_mem function contents were copied from storeTags function in scudo:
==============================================================================
The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
==============================================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---- LLVM Exceptions to the Apache 2.0 License ----
As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into an Object form of such source code, you
may redistribute such embedded portions in such Object form without complying
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
In addition, if you combine or link compiled forms of this Software with
software that is licensed under the GPLv2 ("Combined Software") and if a
court of competent jurisdiction determines that the patent provision (Section
3), the indemnity provision (Section 9) or other Section of the License
conflicts with the conditions of the GPLv2, you may retroactively and
prospectively choose to deem waived or otherwise exclude such Section(s) of
the License, but only in their entirety and only with respect to the Combined
Software.
==============================================================================

View file

@ -1,4 +1,4 @@
Copyright © 2018-2021 Daniel Micay Copyright © 2018-2025 GrapheneOS
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

110
Makefile
View file

@ -1,38 +1,27 @@
CONFIG_WERROR := true VARIANT := default
CONFIG_NATIVE := true
CONFIG_CXX_ALLOCATOR := true ifneq ($(VARIANT),)
CONFIG_UBSAN := false CONFIG_FILE := config/$(VARIANT).mk
CONFIG_SEAL_METADATA := false include config/$(VARIANT).mk
CONFIG_ZERO_ON_FREE := true endif
CONFIG_WRITE_AFTER_FREE_CHECK := true
CONFIG_SLOT_RANDOMIZE := true ifeq ($(VARIANT),default)
CONFIG_SLAB_CANARY := true SUFFIX :=
CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH := 1 else
CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH := 1 SUFFIX := -$(VARIANT)
CONFIG_EXTENDED_SIZE_CLASSES := true endif
CONFIG_LARGE_SIZE_CLASSES := true
CONFIG_GUARD_SLABS_INTERVAL := 1 OUT := out$(SUFFIX)
CONFIG_GUARD_SIZE_DIVISOR := 2
CONFIG_REGION_QUARANTINE_RANDOM_LENGTH := 256
CONFIG_REGION_QUARANTINE_QUEUE_LENGTH := 1024
CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD := 33554432 # 32MiB
CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH := 32
CONFIG_CLASS_REGION_SIZE := 34359738368 # 32GiB
CONFIG_N_ARENA := 4
CONFIG_STATS := false
define safe_flag define safe_flag
$(shell $(CC) $(if $(filter clang,$(CC)),-Werror=unknown-warning-option) -E $1 - </dev/null >/dev/null 2>&1 && echo $1 || echo $2) $(shell $(CC) $(if $(filter clang%,$(CC)),-Werror=unknown-warning-option) -E $1 - </dev/null >/dev/null 2>&1 && echo $1 || echo $2)
endef endef
CPPFLAGS := $(CPPFLAGS) -D_GNU_SOURCE -I include CPPFLAGS := $(CPPFLAGS) -D_GNU_SOURCE -I include
SHARED_FLAGS := -O3 -flto -fPIC -fvisibility=hidden $(call safe_flag,-fno-plt) \ SHARED_FLAGS := -pipe -O3 -flto -fPIC -fvisibility=hidden -fno-plt \
$(call safe_flag,-fstack-clash-protection) -fstack-protector-strong -pipe -Wall -Wextra \ -fstack-clash-protection $(call safe_flag,-fcf-protection) -fstack-protector-strong \
$(call safe_flag,-Wcast-align=strict,-Wcast-align) -Wcast-qual -Wwrite-strings -Wall -Wextra $(call safe_flag,-Wcast-align=strict,-Wcast-align) -Wcast-qual -Wwrite-strings \
-Wundef
ifeq ($(CC),clang)
SHARED_FLAGS += -Wno-constant-logical-operand
endif
ifeq ($(CONFIG_WERROR),true) ifeq ($(CONFIG_WERROR),true)
SHARED_FLAGS += -Werror SHARED_FLAGS += -Werror
@ -42,9 +31,13 @@ ifeq ($(CONFIG_NATIVE),true)
SHARED_FLAGS += -march=native SHARED_FLAGS += -march=native
endif endif
CFLAGS := $(CFLAGS) -std=c11 $(SHARED_FLAGS) -Wmissing-prototypes ifeq ($(CONFIG_UBSAN),true)
CXXFLAGS := $(CXXFLAGS) $(call safe_flag,-std=c++17,-std=c++14) $(SHARED_FLAGS) SHARED_FLAGS += -fsanitize=undefined -fno-sanitize-recover=undefined
LDFLAGS := $(LDFLAGS) -Wl,--as-needed,-z,defs,-z,relro,-z,now,-z,nodlopen,-z,text endif
CFLAGS := $(CFLAGS) -std=c17 $(SHARED_FLAGS) -Wmissing-prototypes -Wstrict-prototypes
CXXFLAGS := $(CXXFLAGS) -std=c++17 -fsized-deallocation $(SHARED_FLAGS)
LDFLAGS := $(LDFLAGS) -Wl,-O1,--as-needed,-z,defs,-z,relro,-z,now,-z,nodlopen,-z,text
SOURCES := chacha.c h_malloc.c memory.c pages.c random.c util.c SOURCES := chacha.c h_malloc.c memory.c pages.c random.c util.c
OBJECTS := $(SOURCES:.c=.o) OBJECTS := $(SOURCES:.c=.o)
@ -52,16 +45,13 @@ OBJECTS := $(SOURCES:.c=.o)
ifeq ($(CONFIG_CXX_ALLOCATOR),true) ifeq ($(CONFIG_CXX_ALLOCATOR),true)
# make sure LTO is compatible in case CC and CXX don't match (such as clang and g++) # make sure LTO is compatible in case CC and CXX don't match (such as clang and g++)
CXX := $(CC) CXX := $(CC)
LDLIBS += -lstdc++ -lgcc_s LDLIBS += -lstdc++
SOURCES += new.cc SOURCES += new.cc
OBJECTS += new.o OBJECTS += new.o
endif endif
ifeq ($(CONFIG_UBSAN),true) OBJECTS := $(addprefix $(OUT)/,$(OBJECTS))
CFLAGS += -fsanitize=undefined -fno-sanitize-recover=undefined
CXXFLAGS += -fsanitize=undefined -fno-sanitize-recover=undefined
endif
ifeq (,$(filter $(CONFIG_SEAL_METADATA),true false)) ifeq (,$(filter $(CONFIG_SEAL_METADATA),true false))
$(error CONFIG_SEAL_METADATA must be true or false) $(error CONFIG_SEAL_METADATA must be true or false)
@ -95,6 +85,10 @@ ifeq (,$(filter $(CONFIG_STATS),true false))
$(error CONFIG_STATS must be true or false) $(error CONFIG_STATS must be true or false)
endif endif
ifeq (,$(filter $(CONFIG_SELF_INIT),true false))
$(error CONFIG_SELF_INIT must be true or false)
endif
CPPFLAGS += \ CPPFLAGS += \
-DCONFIG_SEAL_METADATA=$(CONFIG_SEAL_METADATA) \ -DCONFIG_SEAL_METADATA=$(CONFIG_SEAL_METADATA) \
-DZERO_ON_FREE=$(CONFIG_ZERO_ON_FREE) \ -DZERO_ON_FREE=$(CONFIG_ZERO_ON_FREE) \
@ -113,30 +107,42 @@ CPPFLAGS += \
-DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=$(CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH) \ -DFREE_SLABS_QUARANTINE_RANDOM_LENGTH=$(CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH) \
-DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE) \ -DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE) \
-DN_ARENA=$(CONFIG_N_ARENA) \ -DN_ARENA=$(CONFIG_N_ARENA) \
-DCONFIG_STATS=$(CONFIG_STATS) -DCONFIG_STATS=$(CONFIG_STATS) \
-DCONFIG_SELF_INIT=$(CONFIG_SELF_INIT)
libhardened_malloc.so: $(OBJECTS) $(OUT)/libhardened_malloc$(SUFFIX).so: $(OBJECTS) | $(OUT)
$(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@ $(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@
chacha.o: chacha.c chacha.h util.h $(OUT):
h_malloc.o: h_malloc.c include/h_malloc.h mutex.h memory.h pages.h random.h util.h mkdir -p $(OUT)
memory.o: memory.c memory.h util.h
new.o: new.cc include/h_malloc.h util.h $(OUT)/chacha.o: chacha.c chacha.h util.h $(CONFIG_FILE) | $(OUT)
pages.o: pages.c pages.h memory.h util.h $(COMPILE.c) $(OUTPUT_OPTION) $<
random.o: random.c random.h chacha.h util.h $(OUT)/h_malloc.o: h_malloc.c include/h_malloc.h mutex.h memory.h pages.h random.h util.h $(CONFIG_FILE) | $(OUT)
util.o: util.c util.h $(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/memory.o: memory.c memory.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/new.o: new.cc include/h_malloc.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.cc) $(OUTPUT_OPTION) $<
$(OUT)/pages.o: pages.c pages.h memory.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/random.o: random.c random.h chacha.h util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
$(OUT)/util.o: util.c util.h $(CONFIG_FILE) | $(OUT)
$(COMPILE.c) $(OUTPUT_OPTION) $<
check: tidy check: tidy
tidy: tidy:
clang-tidy --extra-arg=-std=c11 $(filter %.c,$(SOURCES)) -- $(CPPFLAGS) clang-tidy --extra-arg=-std=c17 $(filter %.c,$(SOURCES)) -- $(CPPFLAGS)
clang-tidy --extra-arg=-std=c++17 $(filter %.cc,$(SOURCES)) -- $(CPPFLAGS) clang-tidy --extra-arg=-std=c++17 $(filter %.cc,$(SOURCES)) -- $(CPPFLAGS)
clean: clean:
rm -f libhardened_malloc.so $(OBJECTS) rm -f $(OUT)/libhardened_malloc.so $(OBJECTS)
$(MAKE) -C test/ clean
test: libhardened_malloc.so test: $(OUT)/libhardened_malloc$(SUFFIX).so
make -C test/ $(MAKE) -C test/
-python3 -m unittest discover --start-directory test/ python3 -m unittest discover --start-directory test/
.PHONY: check clean tidy test .PHONY: check clean tidy test

197
README.md
View file

@ -32,7 +32,7 @@ to much less metadata overhead and memory waste from fragmentation than a more
traditional allocator design. It aims to provide decent overall performance traditional allocator design. It aims to provide decent overall performance
with a focus on long-term performance and memory usage rather than allocator with a focus on long-term performance and memory usage rather than allocator
micro-benchmarks. It offers scalability via a configurable number of entirely micro-benchmarks. It offers scalability via a configurable number of entirely
independently arenas, with the internal locking within arenas further divided independent arenas, with the internal locking within arenas further divided
up per size class. up per size class.
This project currently supports Bionic (Android), musl and glibc. It may This project currently supports Bionic (Android), musl and glibc. It may
@ -65,12 +65,14 @@ used instead as this allocator fundamentally doesn't support that environment.
## Dependencies ## Dependencies
Debian stable (currently Debian 10) determines the most ancient set of Debian stable (currently Debian 12) determines the most ancient set of
supported dependencies: supported dependencies:
* glibc 2.28 * glibc 2.36
* Linux 4.19 * Linux 6.1
* Clang 7.0 or GCC 8.3.0 * Clang 14.0.6 or GCC 12.2.0
For Android, the Linux GKI 5.10, 5.15 and 6.1 branches are supported.
However, using more recent releases is highly recommended. Older versions of However, using more recent releases is highly recommended. Older versions of
the dependencies may be compatible at the moment but are not tested and will the dependencies may be compatible at the moment but are not tested and will
@ -81,15 +83,7 @@ there will be custom integration offering better performance in the future
along with other hardening for the C standard library implementation. along with other hardening for the C standard library implementation.
For Android, only the current generation, actively developed maintenance branch of the Android For Android, only the current generation, actively developed maintenance branch of the Android
Open Source Project will be supported, which currently means `android11-qpr3-release` and Open Source Project will be supported, which currently means `android15-release`.
`android12-release`.
The Linux kernel's implementation of Memory Protection Keys was severely broken
before Linux 5.0. The `CONFIG_SEAL_METADATA` feature should only be enabled for
use on kernels newer than 5.0 or longterm branches with a backport of the [fix
for the
issue](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a31e184e4f69965c99c04cc5eb8a4920e0c63737).
This issue was discovered and reported by the hardened\_malloc project.
## Testing ## Testing
@ -101,7 +95,7 @@ executables using glibc or musl:
./preload.sh krita --new-image RGBA,U8,500,500 ./preload.sh krita --new-image RGBA,U8,500,500
It can be necessary to substantially increase the `vm.max_map_count` sysctl to It can be necessary to substantially increase the `vm.max_map_count` sysctl to
accomodate the large number of mappings caused by guard slabs and large accommodate the large number of mappings caused by guard slabs and large
allocation guard regions. The number of mappings can also be drastically allocation guard regions. The number of mappings can also be drastically
reduced via a significant increase to `CONFIG_GUARD_SLABS_INTERVAL` but the reduced via a significant increase to `CONFIG_GUARD_SLABS_INTERVAL` but the
feature has a low performance and memory usage cost so that isn't recommended. feature has a low performance and memory usage cost so that isn't recommended.
@ -123,7 +117,8 @@ make command as follows:
## Compatibility ## Compatibility
OpenSSH 8.1 or higher is required to allow the mprotect PROT_READ|PROT_WRITE system calls in the seccomp-bpf filter rather than killing the process. OpenSSH 8.1 or higher is required to allow the mprotect `PROT_READ|PROT_WRITE`
system calls in the seccomp-bpf filter rather than killing the process.
## OS integration ## OS integration
@ -139,7 +134,7 @@ between performance and security. However, this reduces security for threat
models where persistent state is untrusted, i.e. verified boot and attestation models where persistent state is untrusted, i.e. verified boot and attestation
(see the [attestation sister project](https://attestation.app/about)). (see the [attestation sister project](https://attestation.app/about)).
Make sure to raise `vm.max_map_count` substantially too to accomodate the very Make sure to raise `vm.max_map_count` substantially too to accommodate the very
large number of guard pages created by hardened\_malloc. This can be done in large number of guard pages created by hardened\_malloc. This can be done in
`init.rc` (`system/core/rootdir/init.rc`) near the other virtual memory `init.rc` (`system/core/rootdir/init.rc`) near the other virtual memory
configuration: configuration:
@ -164,13 +159,16 @@ line to the `/etc/ld.so.preload` configuration file:
The format of this configuration file is a whitespace-separated list, so it's The format of this configuration file is a whitespace-separated list, so it's
good practice to put each library on a separate line. good practice to put each library on a separate line.
On Debian systems `libhardened_malloc.so` should be installed into `/usr/lib/`
to avoid preload failures caused by AppArmor profile restrictions.
Using the `LD_PRELOAD` environment variable to load it on a case-by-case basis Using the `LD_PRELOAD` environment variable to load it on a case-by-case basis
will not work when `AT_SECURE` is set such as with setuid binaries. It's also will not work when `AT_SECURE` is set such as with setuid binaries. It's also
generally not a recommended approach for production usage. The recommendation generally not a recommended approach for production usage. The recommendation
is to enable it globally and make exceptions for performance critical cases by is to enable it globally and make exceptions for performance critical cases by
running the application in a container / namespace without it enabled. running the application in a container / namespace without it enabled.
Make sure to raise `vm.max_map_count` substantially too to accomodate the very Make sure to raise `vm.max_map_count` substantially too to accommodate the very
large number of guard pages created by hardened\_malloc. As an example, in large number of guard pages created by hardened\_malloc. As an example, in
`/etc/sysctl.d/hardened_malloc.conf`: `/etc/sysctl.d/hardened_malloc.conf`:
@ -179,6 +177,13 @@ large number of guard pages created by hardened\_malloc. As an example, in
This is unnecessary if you set `CONFIG_GUARD_SLABS_INTERVAL` to a very large This is unnecessary if you set `CONFIG_GUARD_SLABS_INTERVAL` to a very large
value in the build configuration. value in the build configuration.
On arm64, make sure your kernel is configured to use 4k pages since we haven't
yet added support for 16k and 64k pages. The kernel also has to be configured
to use 4 level page tables for the full 48 bit address space instead of only
having a 39 bit address space for the default hardened\_malloc configuration.
It's possible to reduce the class region size substantially to make a 39 bit
address space workable but the defaults won't work.
## Configuration ## Configuration
You can set some configuration options at compile-time via arguments to the You can set some configuration options at compile-time via arguments to the
@ -191,6 +196,30 @@ between portability, performance, memory usage or security. The core design
choices are not configurable and the allocator remains very security-focused choices are not configurable and the allocator remains very security-focused
even with all the optional features disabled. even with all the optional features disabled.
The configuration system supports a configuration template system with two
standard presets: the default configuration (`config/default.mk`) and a light
configuration (`config/light.mk`). Packagers are strongly encouraged to ship
both the standard `default` and `light` configuration. You can choose the
configuration to build using `make VARIANT=light` where `make VARIANT=default`
is the same as `make`. Non-default configuration templates will build a library
with the suffix `-variant` such as `libhardened_malloc-light.so` and will use
an `out-variant` directory instead of `out` for the build.
The `default` configuration template has all normal optional security features
enabled (just not the niche `CONFIG_SEAL_METADATA`) and is quite aggressive in
terms of sacrificing performance and memory usage for security. The `light`
configuration template disables the slab quarantines, write after free check,
slot randomization and raises the guard slab interval from 1 to 8 but leaves
zero-on-free and slab canaries enabled. The `light` configuration has solid
performance and memory usage while still being far more secure than mainstream
allocators with much better security properties. Disabling zero-on-free would
gain more performance but doesn't make much difference for small allocations
without also disabling slab canaries. Slab canaries slightly raise memory use
and slightly slow down performance but are quite important to mitigate small
overflows and C string overflows. Disabling slab canaries is not recommended
in most cases since it would no longer be a strict upgrade over traditional
allocators with headers on allocations and basic consistency checks for them.
For reduced memory usage at the expense of performance (this will also reduce For reduced memory usage at the expense of performance (this will also reduce
the size of the empty slab caches and quarantines, saving a lot of memory, the size of the empty slab caches and quarantines, saving a lot of memory,
since those are currently based on the size of the largest size class): since those are currently based on the size of the largest size class):
@ -199,24 +228,6 @@ since those are currently based on the size of the largest size class):
N_ARENA=1 \ N_ARENA=1 \
CONFIG_EXTENDED_SIZE_CLASSES=false CONFIG_EXTENDED_SIZE_CLASSES=false
The default configuration has all normal security features enabled (just not
the niche `CONFIG_SEAL_METADATA`) and is quite aggressive in terms of
sacrificing performance and memory usage for security. An example of a leaner
configuration disabling expensive security features other than zero-on-free /
slab canaries along with using far fewer guard slabs:
make \
CONFIG_WRITE_AFTER_FREE_CHECK=false \
CONFIG_SLOT_RANDOMIZE=false \
CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH=0 \
CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH=0 \
CONFIG_GUARD_SLABS_INTERVAL=8
This is a more appropriate configuration for a more mainstream OS choosing to
use hardened\_malloc while making a smaller memory and performance sacrifice.
The slot randomization isn't particularly expensive but it's low value and is
one of the first things to disable when aiming for higher performance.
The following boolean configuration options are available: The following boolean configuration options are available:
* `CONFIG_WERROR`: `true` (default) or `false` to control whether compiler * `CONFIG_WERROR`: `true` (default) or `false` to control whether compiler
@ -261,11 +272,10 @@ The following boolean configuration options are available:
* `CONFIG_SEAL_METADATA`: `true` or `false` (default) to control whether Memory * `CONFIG_SEAL_METADATA`: `true` or `false` (default) to control whether Memory
Protection Keys are used to disable access to all writable allocator state Protection Keys are used to disable access to all writable allocator state
outside of the memory allocator code. It's currently disabled by default due outside of the memory allocator code. It's currently disabled by default due
to lack of regular testing and a significant performance cost for this use to a significant performance cost for this use case on current generation
case on current generation hardware, which may become drastically lower in hardware, which may become drastically lower in the future. Whether or not
the future. Whether or not this feature is enabled, the metadata is all this feature is enabled, the metadata is all contained within an isolated
contained within an isolated memory region with high entropy random guard memory region with high entropy random guard regions around it.
regions around it.
The following integer configuration options are available: The following integer configuration options are available:
@ -463,16 +473,16 @@ was a bit less important and if a core goal was finding latent bugs.
* Errors other than ENOMEM from mmap, munmap, mprotect and mremap treated * Errors other than ENOMEM from mmap, munmap, mprotect and mremap treated
as fatal, which can help to detect memory management gone wrong elsewhere as fatal, which can help to detect memory management gone wrong elsewhere
in the process. in the process.
* [future] Memory tagging for slab allocations via MTE on ARMv8.5+ * Memory tagging for slab allocations via MTE on ARMv8.5+
* random memory tags as the baseline, providing probabilistic protection * random memory tags as the baseline, providing probabilistic protection
against various forms of memory corruption against various forms of memory corruption
* dedicated tag for free slots, set on free, for deterministic protection * dedicated tag for free slots, set on free, for deterministic protection
against accessing freed memory against accessing freed memory
* store previous random tag within freed slab allocations, and increment it
to get the next tag for that slot to provide deterministic use-after-free
detection through multiple cycles of memory reuse
* guarantee distinct tags for adjacent memory allocations by incrementing * guarantee distinct tags for adjacent memory allocations by incrementing
past matching values for deterministic detection of linear overflows past matching values for deterministic detection of linear overflows
* [future] store previous random tag and increment it to get the next tag
for that slot to provide deterministic use-after-free detection through
multiple cycles of memory reuse
## Randomness ## Randomness
@ -495,7 +505,7 @@ ChaCha8 is a great fit because it's extremely fast across platforms without
relying on hardware support or complex platform-specific code. The security relying on hardware support or complex platform-specific code. The security
margins of ChaCha20 would be completely overkill for the use case. Using margins of ChaCha20 would be completely overkill for the use case. Using
ChaCha8 avoids needing to resort to a non-cryptographically secure PRNG or ChaCha8 avoids needing to resort to a non-cryptographically secure PRNG or
something without a lot of scrunity. The current implementation is simply the something without a lot of scrutiny. The current implementation is simply the
reference implementation of ChaCha8 converted into a pure keystream by ripping reference implementation of ChaCha8 converted into a pure keystream by ripping
out the XOR of the message into the keystream. out the XOR of the message into the keystream.
@ -714,77 +724,46 @@ freeing as there would be if the kernel supported these features directly.
## Memory tagging ## Memory tagging
Integrating extensive support for ARMv8.5 memory tagging is planned and this Random tags are set for all slab allocations when allocated, with 4 excluded values:
section will be expanded cover the details on the chosen design. The approach
for slab allocations is currently covered, but it can also be used for the
allocator metadata region and large allocations.
Memory allocations are already always multiples of naturally aligned 16 byte 1. the reserved `0` tag
units, so memory tags are a natural fit into a malloc implementation due to the 2. the previous tag used for the slot
16 byte alignment requirement. The only extra memory consumption will come from 3. the current (or previous) tag used for the slot to the left
the hardware supported storage for the tag values (4 bits per 16 bytes). 4. the current (or previous) tag used for the slot to the right
The baseline policy will be to generate random tags for each slab allocation When a slab allocation is freed, the reserved `0` tag is set for the slot.
slot on first use. The highest value will be reserved for marking freed memory Slab allocation slots are cleared before reuse when memory tagging is enabled.
allocations to detect any accesses to freed memory so it won't be part of the
generated range. Adjacent slots will be guaranteed to have distinct memory tags
in order to guarantee that linear overflows are detected. There are a few ways
of implementing this and it will end up depending on the performance costs of
different approaches. If there's an efficient way to fetch the adjacent tag
values without wasting extra memory, it will be possible to check for them and
skip them either by generating a new random value in a loop or incrementing
past them since the tiny bit of bias wouldn't matter. Another approach would be
alternating odd and even tag values but that would substantially reduce the
overall randomness of the tags and there's very little entropy from the start.
Once a slab allocation has been freed, the tag will be set to the reserved This ensures the following properties:
value for free memory and the previous tag value will be stored inside the
allocation itself. The next time the slot is allocated, the chosen tag value
will be the previous value incremented by one to provide use-after-free
detection between generations of allocations. The stored tag will be wiped
before retagging the memory, to avoid leaking it and as part of preserving the
security property of newly allocated memory being zeroed due to zero-on-free.
It will eventually wrap all the way around, but this ends up providing a strong
guarantee for many allocation cycles due to the combination of 4 bit tags with
the FIFO quarantine feature providing delayed free. It also benefits from
random slot allocation and the randomized portion of delayed free, which result
in a further delay along with preventing a deterministic bypass by forcing a
reuse after a certain number of allocation cycles. Similarly to the initial tag
generation, tag values for adjacent allocations will be skipped by incrementing
past them.
For example, consider this slab of allocations that are not yet used with 15 - Linear overflows are deterministically detected.
representing the tag for free memory. For the sake of simplicity, there will be - Use-after-free are deterministically detected until the freed slot goes through
no quarantine or other slabs for this example: both the random and FIFO quarantines, gets allocated again, goes through both
quarantines again and then finally gets allocated again for a 2nd time.
- Since the default `0` tag is reserved, untagged pointers can't access slab
allocations and vice versa.
| 15 | 15 | 15 | 15 | 15 | 15 | Slab allocations are done in a statically reserved region for each size class
and all metadata is in a statically reserved region, so interactions between
different uses of the same address space is not applicable.
Three slots are randomly chosen for allocations, with random tags assigned (2, Large allocations beyond the largest slab allocation size class (128k by
7, 14) since these slots haven't ever been used and don't have saved values: default) are guaranteed to have randomly sized guard regions to the left and
right. Random and FIFO address space quarantines provide use-after-free
detection. We need to test whether the cost of random tags is acceptable to enabled them by default,
since they would be useful for:
| 15 | 2 | 15 | 7 | 14 | 15 | - probabilistic detection of overflows
- probabilistic detection of use-after-free once the address space is
out of the quarantine and reused for another allocation
- deterministic detection of use-after-free for reuse by another allocator.
The 2nd allocation slot is freed, and is set back to the tag for free memory When memory tagging is enabled, checking for write-after-free at allocation
(15), but with the previous tag value stored in the freed space: time and checking canaries are both disabled. Canaries will be more thoroughly
disabled when using memory tagging in the future, but Android currently has
| 15 | 15 | 15 | 7 | 14 | 15 | [very dynamic memory tagging support](https://source.android.com/docs/security/test/memory-safety/arm-mte)
where it can be disabled at any time which creates a barrier to optimizing
The first slot is allocated for the first time, receiving the random value 3: by disabling redundant features.
| 3 | 15 | 15 | 7 | 14 | 15 |
The 2nd slot is randomly chosen again, so the previous tag (2) is retrieved and
incremented to 3 as part of the use-after-free mitigation. An adjacent
allocation already uses the tag 3, so the tag is further incremented to 4 (it
would be incremented to 5 if one of the adjacent tags was 4):
| 3 | 4 | 15 | 7 | 14 | 15 |
The last slot is randomly chosen for the next alocation, and is assigned the
random value 14. However, it's placed next to an allocation with the tag 14 so
the tag is incremented and wraps around to 0:
| 3 | 4 | 15 | 7 | 14 | 0 |
## API extensions ## API extensions

25
androidtest/Android.bp Normal file
View file

@ -0,0 +1,25 @@
java_test_host {
name: "HMallocTest",
srcs: [
"src/**/*.java",
],
libs: [
"tradefed",
"compatibility-tradefed",
"compatibility-host-util",
],
static_libs: [
"cts-host-utils",
"frameworks-base-hostutils",
],
test_suites: [
"general-tests",
],
data_device_bins_64: [
"memtag_test",
],
}

View file

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration description="hardened_malloc test">
<target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
<option name="cleanup" value="true" />
<option name="push" value="memtag_test->/data/local/tmp/memtag_test" />
</target_preparer>
<test class="com.android.compatibility.common.tradefed.testtype.JarHostTest" >
<option name="jar" value="HMallocTest.jar" />
</test>
</configuration>

View file

@ -0,0 +1,17 @@
cc_test {
name: "memtag_test",
srcs: ["memtag_test.cc"],
cflags: [
"-Wall",
"-Werror",
"-Wextra",
"-O0",
"-march=armv9-a+memtag",
],
compile_multilib: "64",
sanitize: {
memtag_heap: true,
},
}

View file

@ -0,0 +1,351 @@
// needed to uncondionally enable assertions
#undef NDEBUG
#include <assert.h>
#include <malloc.h>
#include <signal.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <unistd.h>
#include <map>
#include <set>
#include <string>
#include <unordered_map>
#include "../../arm_mte.h"
using namespace std;
using u8 = uint8_t;
using uptr = uintptr_t;
using u64 = uint64_t;
const size_t DEFAULT_ALLOC_SIZE = 8;
const size_t CANARY_SIZE = 8;
void do_context_switch() {
utsname s;
uname(&s);
}
u8 get_pointer_tag(void *ptr) {
return (((uptr) ptr) >> 56) & 0xf;
}
void *untag_pointer(void *ptr) {
const uintptr_t mask = UINTPTR_MAX >> 8;
return (void *) ((uintptr_t) ptr & mask);
}
void *set_pointer_tag(void *ptr, u8 tag) {
return (void *) (((uintptr_t) tag << 56) | (uintptr_t) untag_pointer(ptr));
}
// This test checks that slab slot allocation uses tag that is distint from tags of its neighbors
// and from the tag of the previous allocation that used the same slot
void tag_distinctness() {
// tag 0 is reserved
const int min_tag = 1;
const int max_tag = 0xf;
struct SizeClass {
int size;
int slot_cnt;
};
// values from size_classes[] and size_class_slots[] in h_malloc.c
SizeClass size_classes[] = {
{ .size = 16, .slot_cnt = 256, },
{ .size = 32, .slot_cnt = 128, },
// this size class is used by allocations that are made by the addr_tag_map, which breaks
// tag distinctess checks
// { .size = 48, .slot_cnt = 85, },
{ .size = 64, .slot_cnt = 64, },
{ .size = 80, .slot_cnt = 51, },
{ .size = 96, .slot_cnt = 42, },
{ .size = 112, .slot_cnt = 36, },
{ .size = 128, .slot_cnt = 64, },
{ .size = 160, .slot_cnt = 51, },
{ .size = 192, .slot_cnt = 64, },
{ .size = 224, .slot_cnt = 54, },
{ .size = 10240, .slot_cnt = 6, },
{ .size = 20480, .slot_cnt = 1, },
};
int tag_usage[max_tag + 1];
for (size_t sc_idx = 0; sc_idx < sizeof(size_classes) / sizeof(SizeClass); ++sc_idx) {
SizeClass &sc = size_classes[sc_idx];
const size_t full_alloc_size = sc.size;
const size_t alloc_size = full_alloc_size - CANARY_SIZE;
// "tdc" is short for "tag distinctness check"
int left_neighbor_tdc_cnt = 0;
int right_neighbor_tdc_cnt = 0;
int prev_alloc_tdc_cnt = 0;
int iter_cnt = 600;
unordered_map<uptr, u8> addr_tag_map;
addr_tag_map.reserve(iter_cnt * sc.slot_cnt);
u64 seen_tags = 0;
for (int iter = 0; iter < iter_cnt; ++iter) {
uptr allocations[256]; // 256 is max slot count
for (int i = 0; i < sc.slot_cnt; ++i) {
u8 *p = (u8 *) malloc(alloc_size);
assert(p);
uptr addr = (uptr) untag_pointer(p);
u8 tag = get_pointer_tag(p);
assert(tag >= min_tag && tag <= max_tag);
seen_tags |= 1 << tag;
++tag_usage[tag];
// check most recent tags of left and right neighbors
auto left = addr_tag_map.find(addr - full_alloc_size);
if (left != addr_tag_map.end()) {
assert(left->second != tag);
++left_neighbor_tdc_cnt;
}
auto right = addr_tag_map.find(addr + full_alloc_size);
if (right != addr_tag_map.end()) {
assert(right->second != tag);
++right_neighbor_tdc_cnt;
}
// check previous tag of this slot
auto prev = addr_tag_map.find(addr);
if (prev != addr_tag_map.end()) {
assert(prev->second != tag);
++prev_alloc_tdc_cnt;
addr_tag_map.erase(addr);
}
addr_tag_map.emplace(addr, tag);
for (size_t j = 0; j < alloc_size; ++j) {
// check that slot is zeroed
assert(p[j] == 0);
// check that slot is readable and writable
p[j]++;
}
allocations[i] = addr;
}
// free some of allocations to allow their slots to be reused
for (int i = sc.slot_cnt - 1; i >= 0; i -= 2) {
free((void *) allocations[i]);
}
}
// check that all of the tags were used, except for the reserved tag 0
assert(seen_tags == (0xffff & ~(1 << 0)));
printf("size_class\t%i\t" "tdc_left %i\t" "tdc_right %i\t" "tdc_prev_alloc %i\n",
sc.size, left_neighbor_tdc_cnt, right_neighbor_tdc_cnt, prev_alloc_tdc_cnt);
// make sure tag distinctess checks were actually performed
int min_tdc_cnt = sc.slot_cnt * iter_cnt / 5;
assert(prev_alloc_tdc_cnt > min_tdc_cnt);
if (sc.slot_cnt > 1) {
assert(left_neighbor_tdc_cnt > min_tdc_cnt);
assert(right_neighbor_tdc_cnt > min_tdc_cnt);
}
// async tag check failures are reported on context switch
do_context_switch();
}
printf("\nTag use counters:\n");
int min = INT_MAX;
int max = 0;
double geomean = 0.0;
for (int i = min_tag; i <= max_tag; ++i) {
int v = tag_usage[i];
geomean += log(v);
min = std::min(min, v);
max = std::max(max, v);
printf("%i\t%i\n", i, tag_usage[i]);
}
int tag_cnt = 1 + max_tag - min_tag;
geomean = exp(geomean / tag_cnt);
double max_deviation = std::max((double) max - geomean, geomean - min);
printf("geomean: %.2f, max deviation from geomean: %.2f%%\n", geomean, (100.0 * max_deviation) / geomean);
}
u8* alloc_default() {
const size_t full_alloc_size = DEFAULT_ALLOC_SIZE + CANARY_SIZE;
set<uptr> addrs;
// make sure allocation has both left and right neighbors, otherwise overflow/underflow tests
// will fail when allocation is at the end/beginning of slab
for (;;) {
u8 *p = (u8 *) malloc(DEFAULT_ALLOC_SIZE);
assert(p);
uptr addr = (uptr) untag_pointer(p);
uptr left = addr - full_alloc_size;
if (addrs.find(left) != addrs.end()) {
uptr right = addr + full_alloc_size;
if (addrs.find(right) != addrs.end()) {
return p;
}
}
addrs.emplace(addr);
}
}
int expected_segv_code;
#define expect_segv(exp, segv_code) ({\
expected_segv_code = segv_code; \
volatile auto val = exp; \
(void) val; \
do_context_switch(); \
fprintf(stderr, "didn't receive SEGV code %i", segv_code); \
exit(1); })
// it's expected that the device is configured to use asymm MTE tag checking mode (sync read checks,
// async write checks)
#define expect_read_segv(exp) expect_segv(exp, SEGV_MTESERR)
#define expect_write_segv(exp) expect_segv(exp, SEGV_MTEAERR)
void read_after_free() {
u8 *p = alloc_default();
free(p);
expect_read_segv(p[0]);
}
void write_after_free() {
u8 *p = alloc_default();
free(p);
expect_write_segv(p[0] = 1);
}
void underflow_read() {
u8 *p = alloc_default();
expect_read_segv(p[-1]);
}
void underflow_write() {
u8 *p = alloc_default();
expect_write_segv(p[-1] = 1);
}
void overflow_read() {
u8 *p = alloc_default();
expect_read_segv(p[DEFAULT_ALLOC_SIZE + CANARY_SIZE]);
}
void overflow_write() {
u8 *p = alloc_default();
expect_write_segv(p[DEFAULT_ALLOC_SIZE + CANARY_SIZE] = 1);
}
void untagged_read() {
u8 *p = alloc_default();
p = (u8 *) untag_pointer(p);
expect_read_segv(p[0]);
}
void untagged_write() {
u8 *p = alloc_default();
p = (u8 *) untag_pointer(p);
expect_write_segv(p[0] = 1);
}
// checks that each of memory locations inside the buffer is tagged with expected_tag
void check_tag(void *buf, size_t len, u8 expected_tag) {
for (size_t i = 0; i < len; ++i) {
assert(get_pointer_tag(__arm_mte_get_tag((void *) ((uintptr_t) buf + i))) == expected_tag);
}
}
void madvise_dontneed() {
const size_t len = 100'000;
void *ptr = mmap(NULL, len, PROT_READ | PROT_WRITE | PROT_MTE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
assert(ptr != MAP_FAILED);
// check that 0 is the initial tag
check_tag(ptr, len, 0);
arm_mte_tag_and_clear_mem(set_pointer_tag(ptr, 1), len);
check_tag(ptr, len, 1);
memset(set_pointer_tag(ptr, 1), 1, len);
assert(madvise(ptr, len, MADV_DONTNEED) == 0);
// check that MADV_DONTNEED resets the tag
check_tag(ptr, len, 0);
// check that MADV_DONTNEED clears the memory
for (size_t i = 0; i < len; ++i) {
assert(((u8 *) ptr)[i] == 0);
}
// check that mistagged read after MADV_DONTNEED fails
expect_read_segv(*((u8 *) set_pointer_tag(ptr, 1)));
}
map<string, function<void()>> tests = {
#define TEST(s) { #s, s }
TEST(tag_distinctness),
TEST(read_after_free),
TEST(write_after_free),
TEST(overflow_read),
TEST(overflow_write),
TEST(underflow_read),
TEST(underflow_write),
TEST(untagged_read),
TEST(untagged_write),
TEST(madvise_dontneed),
#undef TEST
};
void segv_handler(int, siginfo_t *si, void *) {
if (expected_segv_code == 0 || expected_segv_code != si->si_code) {
fprintf(stderr, "received unexpected SEGV_CODE %i", si->si_code);
exit(139); // standard exit code for SIGSEGV
}
exit(0);
}
int main(int argc, char **argv) {
setbuf(stdout, NULL);
assert(argc == 2);
auto test_name = string(argv[1]);
auto test_fn = tests[test_name];
assert(test_fn != nullptr);
assert(mallopt(M_BIONIC_SET_HEAP_TAGGING_LEVEL, M_HEAP_TAGGING_LEVEL_ASYNC) == 1);
struct sigaction sa = {
.sa_sigaction = segv_handler,
.sa_flags = SA_SIGINFO,
};
assert(sigaction(SIGSEGV, &sa, nullptr) == 0);
test_fn();
do_context_switch();
return 0;
}

View file

@ -0,0 +1,79 @@
package grapheneos.hmalloc;
import com.android.tradefed.device.DeviceNotAvailableException;
import com.android.tradefed.testtype.DeviceJUnit4ClassRunner;
import com.android.tradefed.testtype.junit4.BaseHostJUnit4Test;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.ArrayList;
import static org.junit.Assert.assertEquals;
@RunWith(DeviceJUnit4ClassRunner.class)
public class MemtagTest extends BaseHostJUnit4Test {
private static final String TEST_BINARY = "/data/local/tmp/memtag_test";
private void runTest(String name) throws DeviceNotAvailableException {
var args = new ArrayList<String>();
args.add(TEST_BINARY);
args.add(name);
String cmdLine = String.join(" ", args);
var result = getDevice().executeShellV2Command(cmdLine);
assertEquals("stderr", "", result.getStderr());
assertEquals("process exit code", 0, result.getExitCode().intValue());
}
@Test
public void tag_distinctness() throws DeviceNotAvailableException {
runTest("tag_distinctness");
}
@Test
public void read_after_free() throws DeviceNotAvailableException {
runTest("read_after_free");
}
@Test
public void write_after_free() throws DeviceNotAvailableException {
runTest("write_after_free");
}
@Test
public void underflow_read() throws DeviceNotAvailableException {
runTest("underflow_read");
}
@Test
public void underflow_write() throws DeviceNotAvailableException {
runTest("underflow_write");
}
@Test
public void overflow_read() throws DeviceNotAvailableException {
runTest("overflow_read");
}
@Test
public void overflow_write() throws DeviceNotAvailableException {
runTest("overflow_write");
}
@Test
public void untagged_read() throws DeviceNotAvailableException {
runTest("untagged_read");
}
@Test
public void untagged_write() throws DeviceNotAvailableException {
runTest("untagged_write");
}
@Test
public void madvise_dontneed() throws DeviceNotAvailableException {
runTest("madvise_dontneed");
}
}

91
arm_mte.h Normal file
View file

@ -0,0 +1,91 @@
#ifndef ARM_MTE_H
#define ARM_MTE_H
#include <arm_acle.h>
#include <stdint.h>
// Returns a tagged pointer.
// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions/IRG--Insert-Random-Tag-
static inline void *arm_mte_create_random_tag(void *p, uint64_t exclusion_mask) {
return __arm_mte_create_random_tag(p, exclusion_mask);
}
// Tag the memory region with the tag specified in tag bits of tagged_ptr. Memory region itself is
// zeroed.
// tagged_ptr has to be aligned by 16, and len has to be a multiple of 16 (tag granule size).
//
// Arm's software optimization guide says:
// "it is recommended to use STZGM (or DCZGVA) to set tag if data is not a concern." (STZGM and
// DCGZVA are zeroing variants of tagging instructions).
//
// Contents of this function were copied from scudo:
// https://android.googlesource.com/platform/external/scudo/+/refs/tags/android-14.0.0_r1/standalone/memtag.h#167
//
// scudo is licensed under the Apache License v2.0 with LLVM Exceptions, which is compatible with
// the hardened_malloc's MIT license
static inline void arm_mte_tag_and_clear_mem(void *tagged_ptr, size_t len) {
uintptr_t Begin = (uintptr_t) tagged_ptr;
uintptr_t End = Begin + len;
uintptr_t LineSize, Next, Tmp;
__asm__ __volatile__(
".arch_extension memtag \n\t"
// Compute the cache line size in bytes (DCZID_EL0 stores it as the log2
// of the number of 4-byte words) and bail out to the slow path if DCZID_EL0
// indicates that the DC instructions are unavailable.
"DCZID .req %[Tmp] \n\t"
"mrs DCZID, dczid_el0 \n\t"
"tbnz DCZID, #4, 3f \n\t"
"and DCZID, DCZID, #15 \n\t"
"mov %[LineSize], #4 \n\t"
"lsl %[LineSize], %[LineSize], DCZID \n\t"
".unreq DCZID \n\t"
// Our main loop doesn't handle the case where we don't need to perform any
// DC GZVA operations. If the size of our tagged region is less than
// twice the cache line size, bail out to the slow path since it's not
// guaranteed that we'll be able to do a DC GZVA.
"Size .req %[Tmp] \n\t"
"sub Size, %[End], %[Cur] \n\t"
"cmp Size, %[LineSize], lsl #1 \n\t"
"b.lt 3f \n\t"
".unreq Size \n\t"
"LineMask .req %[Tmp] \n\t"
"sub LineMask, %[LineSize], #1 \n\t"
// STZG until the start of the next cache line.
"orr %[Next], %[Cur], LineMask \n\t"
"1:\n\t"
"stzg %[Cur], [%[Cur]], #16 \n\t"
"cmp %[Cur], %[Next] \n\t"
"b.lt 1b \n\t"
// DC GZVA cache lines until we have no more full cache lines.
"bic %[Next], %[End], LineMask \n\t"
".unreq LineMask \n\t"
"2: \n\t"
"dc gzva, %[Cur] \n\t"
"add %[Cur], %[Cur], %[LineSize] \n\t"
"cmp %[Cur], %[Next] \n\t"
"b.lt 2b \n\t"
// STZG until the end of the tagged region. This loop is also used to handle
// slow path cases.
"3: \n\t"
"cmp %[Cur], %[End] \n\t"
"b.ge 4f \n\t"
"stzg %[Cur], [%[Cur]], #16 \n\t"
"b 3b \n\t"
"4: \n\t"
: [Cur] "+&r"(Begin), [LineSize] "=&r"(LineSize), [Next] "=&r"(Next), [Tmp] "=&r"(Tmp)
: [End] "r"(End)
: "memory"
);
}
#endif

View file

@ -41,7 +41,7 @@ static const unsigned rounds = 8;
a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \
c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); c = PLUS(c, d); b = ROTATE(XOR(b, c), 7);
static const char sigma[16] = "expand 32-byte k"; static const char sigma[16] NONSTRING = "expand 32-byte k";
void chacha_keysetup(chacha_ctx *x, const u8 *k) { void chacha_keysetup(chacha_ctx *x, const u8 *k) {
x->input[0] = U8TO32_LITTLE(sigma + 0); x->input[0] = U8TO32_LITTLE(sigma + 0);

23
config/default.mk Normal file
View file

@ -0,0 +1,23 @@
CONFIG_WERROR := true
CONFIG_NATIVE := true
CONFIG_CXX_ALLOCATOR := true
CONFIG_UBSAN := false
CONFIG_SEAL_METADATA := false
CONFIG_ZERO_ON_FREE := true
CONFIG_WRITE_AFTER_FREE_CHECK := true
CONFIG_SLOT_RANDOMIZE := true
CONFIG_SLAB_CANARY := true
CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH := 1
CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH := 1
CONFIG_EXTENDED_SIZE_CLASSES := true
CONFIG_LARGE_SIZE_CLASSES := true
CONFIG_GUARD_SLABS_INTERVAL := 1
CONFIG_GUARD_SIZE_DIVISOR := 2
CONFIG_REGION_QUARANTINE_RANDOM_LENGTH := 256
CONFIG_REGION_QUARANTINE_QUEUE_LENGTH := 1024
CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD := 33554432 # 32MiB
CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH := 32
CONFIG_CLASS_REGION_SIZE := 34359738368 # 32GiB
CONFIG_N_ARENA := 4
CONFIG_STATS := false
CONFIG_SELF_INIT := true

23
config/light.mk Normal file
View file

@ -0,0 +1,23 @@
CONFIG_WERROR := true
CONFIG_NATIVE := true
CONFIG_CXX_ALLOCATOR := true
CONFIG_UBSAN := false
CONFIG_SEAL_METADATA := false
CONFIG_ZERO_ON_FREE := true
CONFIG_WRITE_AFTER_FREE_CHECK := false
CONFIG_SLOT_RANDOMIZE := false
CONFIG_SLAB_CANARY := true
CONFIG_SLAB_QUARANTINE_RANDOM_LENGTH := 0
CONFIG_SLAB_QUARANTINE_QUEUE_LENGTH := 0
CONFIG_EXTENDED_SIZE_CLASSES := true
CONFIG_LARGE_SIZE_CLASSES := true
CONFIG_GUARD_SLABS_INTERVAL := 8
CONFIG_GUARD_SIZE_DIVISOR := 2
CONFIG_REGION_QUARANTINE_RANDOM_LENGTH := 256
CONFIG_REGION_QUARANTINE_QUEUE_LENGTH := 1024
CONFIG_REGION_QUARANTINE_SKIP_THRESHOLD := 33554432 # 32MiB
CONFIG_FREE_SLABS_QUARANTINE_RANDOM_LENGTH := 32
CONFIG_CLASS_REGION_SIZE := 34359738368 # 32GiB
CONFIG_N_ARENA := 4
CONFIG_STATS := false
CONFIG_SELF_INIT := true

File diff suppressed because it is too large Load diff

View file

@ -48,9 +48,10 @@ extern "C" {
#endif #endif
// C standard // C standard
void *h_malloc(size_t size); __attribute__((malloc)) __attribute__((alloc_size(1))) void *h_malloc(size_t size);
void *h_calloc(size_t nmemb, size_t size); __attribute__((malloc)) __attribute__((alloc_size(1, 2))) void *h_calloc(size_t nmemb, size_t size);
void *h_realloc(void *ptr, size_t size); __attribute__((alloc_size(2))) void *h_realloc(void *ptr, size_t size);
__attribute__((malloc)) __attribute__((alloc_size(2))) __attribute__((alloc_align(1)))
void *h_aligned_alloc(size_t alignment, size_t size); void *h_aligned_alloc(size_t alignment, size_t size);
void h_free(void *ptr); void h_free(void *ptr);
@ -76,10 +77,11 @@ int h_malloc_info(int options, FILE *fp);
#endif #endif
// obsolete glibc extensions // obsolete glibc extensions
__attribute__((malloc)) __attribute__((alloc_size(2))) __attribute__((alloc_align(1)))
void *h_memalign(size_t alignment, size_t size); void *h_memalign(size_t alignment, size_t size);
#ifndef __ANDROID__ #ifndef __ANDROID__
void *h_valloc(size_t size); __attribute__((malloc)) __attribute__((alloc_size(1))) void *h_valloc(size_t size);
void *h_pvalloc(size_t size); __attribute__((malloc)) void *h_pvalloc(size_t size);
#endif #endif
#ifdef __GLIBC__ #ifdef __GLIBC__
void h_cfree(void *ptr) __THROW; void h_cfree(void *ptr) __THROW;
@ -97,15 +99,16 @@ int h_malloc_iterate(uintptr_t base, size_t size, void (*callback)(uintptr_t ptr
void *arg); void *arg);
void h_malloc_disable(void); void h_malloc_disable(void);
void h_malloc_enable(void); void h_malloc_enable(void);
void h_malloc_disable_memory_tagging(void);
#endif #endif
// hardened_malloc extensions // hardened_malloc extensions
// return an upper bound on object size for any pointer based on malloc metadata // return an upper bound on object size for any pointer based on malloc metadata
size_t h_malloc_object_size(void *ptr); size_t h_malloc_object_size(const void *ptr);
// similar to malloc_object_size, but avoiding locking so the results are much more limited // similar to malloc_object_size, but avoiding locking so the results are much more limited
size_t h_malloc_object_size_fast(void *ptr); size_t h_malloc_object_size_fast(const void *ptr);
// The free function with an extra parameter for passing the size requested at // The free function with an extra parameter for passing the size requested at
// allocation time. // allocation time.

View file

@ -1,7 +1,10 @@
#include <errno.h> #include <errno.h>
#include <sys/mman.h> #include <sys/mman.h>
#ifdef LABEL_MEMORY
#include <sys/prctl.h> #include <sys/prctl.h>
#endif
#ifndef PR_SET_VMA #ifndef PR_SET_VMA
#define PR_SET_VMA 0x53564d41 #define PR_SET_VMA 0x53564d41
@ -14,8 +17,8 @@
#include "memory.h" #include "memory.h"
#include "util.h" #include "util.h"
void *memory_map(size_t size) { static void *memory_map_prot(size_t size, int prot) {
void *p = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); void *p = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (unlikely(p == MAP_FAILED)) { if (unlikely(p == MAP_FAILED)) {
if (errno != ENOMEM) { if (errno != ENOMEM) {
fatal_error("non-ENOMEM mmap failure"); fatal_error("non-ENOMEM mmap failure");
@ -25,8 +28,19 @@ void *memory_map(size_t size) {
return p; return p;
} }
bool memory_map_fixed(void *ptr, size_t size) { void *memory_map(size_t size) {
void *p = mmap(ptr, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0); return memory_map_prot(size, PROT_NONE);
}
#ifdef HAS_ARM_MTE
// Note that PROT_MTE can't be cleared via mprotect
void *memory_map_mte(size_t size) {
return memory_map_prot(size, PROT_MTE);
}
#endif
static bool memory_map_fixed_prot(void *ptr, size_t size, int prot) {
void *p = mmap(ptr, size, prot, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0);
bool ret = p == MAP_FAILED; bool ret = p == MAP_FAILED;
if (unlikely(ret) && errno != ENOMEM) { if (unlikely(ret) && errno != ENOMEM) {
fatal_error("non-ENOMEM MAP_FIXED mmap failure"); fatal_error("non-ENOMEM MAP_FIXED mmap failure");
@ -34,6 +48,17 @@ bool memory_map_fixed(void *ptr, size_t size) {
return ret; return ret;
} }
bool memory_map_fixed(void *ptr, size_t size) {
return memory_map_fixed_prot(ptr, size, PROT_NONE);
}
#ifdef HAS_ARM_MTE
// Note that PROT_MTE can't be cleared via mprotect
bool memory_map_fixed_mte(void *ptr, size_t size) {
return memory_map_fixed_prot(ptr, size, PROT_MTE);
}
#endif
bool memory_unmap(void *ptr, size_t size) { bool memory_unmap(void *ptr, size_t size) {
bool ret = munmap(ptr, size); bool ret = munmap(ptr, size);
if (unlikely(ret) && errno != ENOMEM) { if (unlikely(ret) && errno != ENOMEM) {

View file

@ -11,7 +11,13 @@
int get_metadata_key(void); int get_metadata_key(void);
void *memory_map(size_t size); void *memory_map(size_t size);
#ifdef HAS_ARM_MTE
void *memory_map_mte(size_t size);
#endif
bool memory_map_fixed(void *ptr, size_t size); bool memory_map_fixed(void *ptr, size_t size);
#ifdef HAS_ARM_MTE
bool memory_map_fixed_mte(void *ptr, size_t size);
#endif
bool memory_unmap(void *ptr, size_t size); bool memory_unmap(void *ptr, size_t size);
bool memory_protect_ro(void *ptr, size_t size); bool memory_protect_ro(void *ptr, size_t size);
bool memory_protect_rw(void *ptr, size_t size); bool memory_protect_rw(void *ptr, size_t size);

50
memtag.h Normal file
View file

@ -0,0 +1,50 @@
#ifndef MEMTAG_H
#define MEMTAG_H
#include "util.h"
#ifdef HAS_ARM_MTE
#include "arm_mte.h"
#define MEMTAG 1
// Note that bionic libc always reserves tag 0 via PR_MTE_TAG_MASK prctl
#define RESERVED_TAG 0
#define TAG_WIDTH 4
#endif
static inline void *untag_pointer(void *ptr) {
#ifdef HAS_ARM_MTE
const uintptr_t mask = UINTPTR_MAX >> 8;
return (void *) ((uintptr_t) ptr & mask);
#else
return ptr;
#endif
}
static inline const void *untag_const_pointer(const void *ptr) {
#ifdef HAS_ARM_MTE
const uintptr_t mask = UINTPTR_MAX >> 8;
return (const void *) ((uintptr_t) ptr & mask);
#else
return ptr;
#endif
}
static inline void *set_pointer_tag(void *ptr, u8 tag) {
#ifdef HAS_ARM_MTE
return (void *) (((uintptr_t) tag << 56) | (uintptr_t) untag_pointer(ptr));
#else
(void) tag;
return ptr;
#endif
}
static inline u8 get_pointer_tag(void *ptr) {
#ifdef HAS_ARM_MTE
return (((uintptr_t) ptr) >> 56) & 0xf;
#else
(void) ptr;
return 0;
#endif
}
#endif

6
new.cc
View file

@ -1,7 +1,9 @@
// needed with libstdc++ but not libc++
#if __has_include(<bits/functexcept.h>)
#include <bits/functexcept.h> #include <bits/functexcept.h>
#include <new> #endif
#define noreturn #include <new>
#include "h_malloc.h" #include "h_malloc.h"
#include "util.h" #include "util.h"

View file

@ -9,10 +9,6 @@ static bool add_guards(size_t size, size_t guard_size, size_t *total_size) {
__builtin_add_overflow(*total_size, guard_size, total_size); __builtin_add_overflow(*total_size, guard_size, total_size);
} }
static uintptr_t alignment_ceiling(uintptr_t s, uintptr_t alignment) {
return ((s) + (alignment - 1)) & ((~alignment) + 1);
}
void *allocate_pages(size_t usable_size, size_t guard_size, bool unprotect, const char *name) { void *allocate_pages(size_t usable_size, size_t guard_size, bool unprotect, const char *name) {
size_t real_size; size_t real_size;
if (unlikely(add_guards(usable_size, guard_size, &real_size))) { if (unlikely(add_guards(usable_size, guard_size, &real_size))) {
@ -33,7 +29,7 @@ void *allocate_pages(size_t usable_size, size_t guard_size, bool unprotect, cons
} }
void *allocate_pages_aligned(size_t usable_size, size_t alignment, size_t guard_size, const char *name) { void *allocate_pages_aligned(size_t usable_size, size_t alignment, size_t guard_size, const char *name) {
usable_size = PAGE_CEILING(usable_size); usable_size = page_align(usable_size);
if (unlikely(!usable_size)) { if (unlikely(!usable_size)) {
errno = ENOMEM; errno = ENOMEM;
return NULL; return NULL;
@ -59,7 +55,7 @@ void *allocate_pages_aligned(size_t usable_size, size_t alignment, size_t guard_
void *usable = (char *)real + guard_size; void *usable = (char *)real + guard_size;
size_t lead_size = alignment_ceiling((uintptr_t)usable, alignment) - (uintptr_t)usable; size_t lead_size = align((uintptr_t)usable, alignment) - (uintptr_t)usable;
size_t trail_size = alloc_size - lead_size - usable_size; size_t trail_size = alloc_size - lead_size - usable_size;
void *base = (char *)usable + lead_size; void *base = (char *)usable + lead_size;

View file

@ -5,16 +5,21 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "util.h"
#define PAGE_SHIFT 12 #define PAGE_SHIFT 12
#ifndef PAGE_SIZE #ifndef PAGE_SIZE
#define PAGE_SIZE ((size_t)1 << PAGE_SHIFT) #define PAGE_SIZE ((size_t)1 << PAGE_SHIFT)
#endif #endif
#define PAGE_CEILING(s) (((s) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
void *allocate_pages(size_t usable_size, size_t guard_size, bool unprotect, const char *name); void *allocate_pages(size_t usable_size, size_t guard_size, bool unprotect, const char *name);
void *allocate_pages_aligned(size_t usable_size, size_t alignment, size_t guard_size, const char *name); void *allocate_pages_aligned(size_t usable_size, size_t alignment, size_t guard_size, const char *name);
void deallocate_pages(void *usable, size_t usable_size, size_t guard_size); void deallocate_pages(void *usable, size_t usable_size, size_t guard_size);
static inline size_t page_align(size_t size) {
return align(size, PAGE_SIZE);
}
static inline size_t hash_page(const void *p) { static inline size_t hash_page(const void *p) {
uintptr_t u = (uintptr_t)p >> PAGE_SHIFT; uintptr_t u = (uintptr_t)p >> PAGE_SHIFT;
size_t sum = u; size_t sum = u;

39
test/.gitignore vendored
View file

@ -3,3 +3,42 @@ mallinfo
mallinfo2 mallinfo2
malloc_info malloc_info
offset offset
delete_type_size_mismatch
double_free_large
double_free_large_delayed
double_free_small
double_free_small_delayed
invalid_free_protected
invalid_free_small_region
invalid_free_small_region_far
invalid_free_unprotected
read_after_free_large
read_after_free_small
read_zero_size
string_overflow
unaligned_free_large
unaligned_free_small
uninitialized_free
uninitialized_malloc_usable_size
uninitialized_realloc
write_after_free_large
write_after_free_large_reuse
write_after_free_small
write_after_free_small_reuse
write_zero_size
unaligned_malloc_usable_size_small
invalid_malloc_usable_size_small
invalid_malloc_usable_size_small_quarantine
malloc_object_size
malloc_object_size_offset
invalid_malloc_object_size_small
invalid_malloc_object_size_small_quarantine
impossibly_large_malloc
overflow_large_1_byte
overflow_large_8_byte
overflow_small_1_byte
overflow_small_8_byte
uninitialized_read_large
uninitialized_read_small
realloc_init
__pycache__/

View file

@ -1,25 +1,76 @@
CONFIG_SLAB_CANARY := true CONFIG_SLAB_CANARY := true
CONFIG_EXTENDED_SIZE_CLASSES := true CONFIG_EXTENDED_SIZE_CLASSES := true
ifneq ($(VARIANT),)
$(error testing non-default variants not yet supported)
endif
ifeq (,$(filter $(CONFIG_SLAB_CANARY),true false)) ifeq (,$(filter $(CONFIG_SLAB_CANARY),true false))
$(error CONFIG_SLAB_CANARY must be true or false) $(error CONFIG_SLAB_CANARY must be true or false)
endif endif
LDLIBS := -lpthread dir=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
CPPFLAGS += \ CPPFLAGS := \
-D_GNU_SOURCE \
-DSLAB_CANARY=$(CONFIG_SLAB_CANARY) \ -DSLAB_CANARY=$(CONFIG_SLAB_CANARY) \
-DCONFIG_EXTENDED_SIZE_CLASSES=$(CONFIG_EXTENDED_SIZE_CLASSES) -DCONFIG_EXTENDED_SIZE_CLASSES=$(CONFIG_EXTENDED_SIZE_CLASSES)
SHARED_FLAGS := -O3
CFLAGS := -std=c17 $(SHARED_FLAGS) -Wmissing-prototypes
CXXFLAGS := -std=c++17 -fsized-deallocation $(SHARED_FLAGS)
LDFLAGS := -Wl,-L$(dir)../out,-R,$(dir)../out
LDLIBS := -lpthread -lhardened_malloc
EXECUTABLES := \ EXECUTABLES := \
offset \ offset \
mallinfo \ mallinfo \
mallinfo2 \ mallinfo2 \
malloc_info \ malloc_info \
large_array_growth large_array_growth \
double_free_large \
double_free_large_delayed \
double_free_small \
double_free_small_delayed \
unaligned_free_large \
unaligned_free_small \
read_after_free_large \
read_after_free_small \
write_after_free_large \
write_after_free_large_reuse \
write_after_free_small \
write_after_free_small_reuse \
read_zero_size \
write_zero_size \
invalid_free_protected \
invalid_free_unprotected \
invalid_free_small_region \
invalid_free_small_region_far \
uninitialized_read_small \
uninitialized_read_large \
uninitialized_free \
uninitialized_realloc \
uninitialized_malloc_usable_size \
overflow_large_1_byte \
overflow_large_8_byte \
overflow_small_1_byte \
overflow_small_8_byte \
string_overflow \
delete_type_size_mismatch \
unaligned_malloc_usable_size_small \
invalid_malloc_usable_size_small \
invalid_malloc_usable_size_small_quarantine \
malloc_object_size \
malloc_object_size_offset \
invalid_malloc_object_size_small \
invalid_malloc_object_size_small_quarantine \
impossibly_large_malloc \
realloc_init
all: $(EXECUTABLES) all: $(EXECUTABLES)
make -C simple-memory-corruption
clean: clean:
rm -f $(EXECUTABLES) rm -f $(EXECUTABLES)
rm -fr ./__pycache__

View file

@ -1,6 +1,6 @@
#include <stdint.h> #include <stdint.h>
#include "../test_util.h" #include "test_util.h"
struct foo { struct foo {
uint64_t a, b, c, d; uint64_t a, b, c, d;

View file

@ -1,9 +1,9 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
void *p = malloc(128 * 1024); void *p = malloc(256 * 1024);
if (!p) { if (!p) {
return 1; return 1;
} }

View file

@ -1,13 +1,13 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
void *p = malloc(128 * 1024); void *p = malloc(256 * 1024);
if (!p) { if (!p) {
return 1; return 1;
} }
void *q = malloc(128 * 1024); void *q = malloc(256 * 1024);
if (!q) { if (!q) {
return 1; return 1;
} }

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
void *p = malloc(16); void *p = malloc(16);

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
void *p = malloc(16); void *p = malloc(16);

View file

@ -0,0 +1,8 @@
#include <stdlib.h>
#include "test_util.h"
OPTNONE int main(void) {
char *p = malloc(-8);
return !(p == NULL);
}

View file

@ -2,7 +2,7 @@
#include <sys/mman.h> #include <sys/mman.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
free(malloc(16)); free(malloc(16));

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);

View file

@ -2,7 +2,7 @@
#include <sys/mman.h> #include <sys/mman.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
free(malloc(16)); free(malloc(16));

View file

@ -1,9 +1,10 @@
#include <malloc.h> #include <stdlib.h>
#include "test_util.h"
size_t malloc_object_size(void *ptr); size_t malloc_object_size(void *ptr);
__attribute__((optimize(0))) OPTNONE int main(void) {
int main() {
char *p = malloc(16); char *p = malloc(16);
if (!p) { if (!p) {
return 1; return 1;

View file

@ -1,9 +1,10 @@
#include <malloc.h> #include <stdlib.h>
#include "test_util.h"
size_t malloc_object_size(void *ptr); size_t malloc_object_size(void *ptr);
__attribute__((optimize(0))) OPTNONE int main(void) {
int main() {
void *p = malloc(16); void *p = malloc(16);
if (!p) { if (!p) {
return 1; return 1;

View file

@ -1,6 +1,6 @@
#include <malloc.h> #include <malloc.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);

View file

@ -1,6 +1,6 @@
#include <malloc.h> #include <malloc.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
void *p = malloc(16); void *p = malloc(16);

View file

@ -1,10 +1,14 @@
#include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#if defined(__GLIBC__) || defined(__ANDROID__)
#include <malloc.h> #include <malloc.h>
#endif
#include "test_util.h" #include "test_util.h"
static void print_mallinfo(void) { static void print_mallinfo(void) {
#if defined(__GLIBC__) || defined(__ANDROID__)
struct mallinfo info = mallinfo(); struct mallinfo info = mallinfo();
printf("mallinfo:\n"); printf("mallinfo:\n");
printf("arena: %zu\n", (size_t)info.arena); printf("arena: %zu\n", (size_t)info.arena);
@ -17,6 +21,7 @@ static void print_mallinfo(void) {
printf("uordblks: %zu\n", (size_t)info.uordblks); printf("uordblks: %zu\n", (size_t)info.uordblks);
printf("fordblks: %zu\n", (size_t)info.fordblks); printf("fordblks: %zu\n", (size_t)info.fordblks);
printf("keepcost: %zu\n", (size_t)info.keepcost); printf("keepcost: %zu\n", (size_t)info.keepcost);
#endif
} }
OPTNONE int main(void) { OPTNONE int main(void) {

View file

@ -1,12 +1,14 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#if defined(__GLIBC__)
#include <malloc.h> #include <malloc.h>
#endif
#include "test_util.h" #include "test_util.h"
static void print_mallinfo2(void) { static void print_mallinfo2(void) {
#if defined(__GLIBC__) #if defined(__GLIBC__)
#if __GLIBC_PREREQ(2, 33)
struct mallinfo2 info = mallinfo2(); struct mallinfo2 info = mallinfo2();
printf("mallinfo2:\n"); printf("mallinfo2:\n");
printf("arena: %zu\n", (size_t)info.arena); printf("arena: %zu\n", (size_t)info.arena);
@ -20,7 +22,6 @@ static void print_mallinfo2(void) {
printf("fordblks: %zu\n", (size_t)info.fordblks); printf("fordblks: %zu\n", (size_t)info.fordblks);
printf("keepcost: %zu\n", (size_t)info.keepcost); printf("keepcost: %zu\n", (size_t)info.keepcost);
#endif #endif
#endif
} }
OPTNONE int main(void) { OPTNONE int main(void) {

View file

@ -1,18 +1,22 @@
#include <pthread.h> #include <pthread.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#if defined(__GLIBC__) || defined(__ANDROID__)
#include <malloc.h> #include <malloc.h>
#endif
#include "test_util.h" #include "test_util.h"
#include "../util.h"
OPTNONE static void leak_memory(void) { OPTNONE static void leak_memory(void) {
(void)malloc(1024 * 1024 * 1024); (void)!malloc(1024 * 1024 * 1024);
(void)malloc(16); (void)!malloc(16);
(void)malloc(32); (void)!malloc(32);
(void)malloc(4096); (void)!malloc(4096);
} }
static void *do_work(void *p) { static void *do_work(UNUSED void *p) {
leak_memory(); leak_memory();
return NULL; return NULL;
} }
@ -26,5 +30,7 @@ int main(void) {
pthread_join(thread[i], NULL); pthread_join(thread[i], NULL);
} }
#if defined(__GLIBC__) || defined(__ANDROID__)
malloc_info(0, stdout); malloc_info(0, stdout);
#endif
} }

View file

@ -1,7 +1,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <malloc.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
size_t malloc_object_size(void *ptr); size_t malloc_object_size(void *ptr);

View file

@ -1,7 +1,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <malloc.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
size_t malloc_object_size(void *ptr); size_t malloc_object_size(void *ptr);

View file

@ -0,0 +1,15 @@
#include <malloc.h>
#include <stdlib.h>
#include "test_util.h"
OPTNONE int main(void) {
char *p = malloc(256 * 1024);
if (!p) {
return 1;
}
size_t size = malloc_usable_size(p);
*(p + size) = 0;
free(p);
return 0;
}

View file

@ -0,0 +1,15 @@
#include <malloc.h>
#include <stdlib.h>
#include "test_util.h"
OPTNONE int main(void) {
char *p = malloc(256 * 1024);
if (!p) {
return 1;
}
size_t size = malloc_usable_size(p);
*(p + size + 7) = 0;
free(p);
return 0;
}

View file

@ -1,13 +1,15 @@
#include <malloc.h>
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(8); char *p = malloc(8);
if (!p) { if (!p) {
return 1; return 1;
} }
*(p + 8 + 7) = 0; size_t size = malloc_usable_size(p);
*(p + size) = 1;
free(p); free(p);
return 0; return 0;
} }

View file

@ -0,0 +1,16 @@
#include <malloc.h>
#include <stdlib.h>
#include "test_util.h"
OPTNONE int main(void) {
char *p = malloc(8);
if (!p) {
return 1;
}
size_t size = malloc_usable_size(p);
// XOR is used to avoid the test having a 1/256 chance to fail
*(p + size + 7) ^= 1;
free(p);
return 0;
}

View file

@ -2,17 +2,20 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(128 * 1024); char *p = malloc(256 * 1024);
if (!p) { if (!p) {
return 1; return 1;
} }
memset(p, 'a', 16); memset(p, 'a', 16);
free(p); free(p);
for (size_t i = 0; i < 128 * 1024; i++) { for (size_t i = 0; i < 256 * 1024; i++) {
printf("%x\n", p[i]); printf("%x\n", p[i]);
if (p[i] != '\0') {
return 1;
}
} }
return 0; return 0;
} }

View file

@ -2,7 +2,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);
@ -13,6 +13,9 @@ OPTNONE int main(void) {
free(p); free(p);
for (size_t i = 0; i < 16; i++) { for (size_t i = 0; i < 16; i++) {
printf("%x\n", p[i]); printf("%x\n", p[i]);
if (p[i] != '\0') {
return 1;
}
} }
return 0; return 0;
} }

View file

@ -1,7 +1,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(0); char *p = malloc(0);

33
test/realloc_init.c Normal file
View file

@ -0,0 +1,33 @@
#include <pthread.h>
#include <stdlib.h>
static void *thread_func(void *arg) {
arg = realloc(arg, 1024);
if (!arg) {
exit(EXIT_FAILURE);
}
free(arg);
return NULL;
}
int main(void) {
void *mem = realloc(NULL, 12);
if (!mem) {
return EXIT_FAILURE;
}
pthread_t thread;
int r = pthread_create(&thread, NULL, thread_func, mem);
if (r != 0) {
return EXIT_FAILURE;
}
r = pthread_join(thread, NULL);
if (r != 0) {
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View file

@ -1,33 +0,0 @@
delete_type_size_mismatch
double_free_large
double_free_large_delayed
double_free_small
double_free_small_delayed
eight_byte_overflow_large
eight_byte_overflow_small
invalid_free_protected
invalid_free_small_region
invalid_free_small_region_far
invalid_free_unprotected
read_after_free_large
read_after_free_small
read_zero_size
string_overflow
unaligned_free_large
unaligned_free_small
uninitialized_free
uninitialized_malloc_usable_size
uninitialized_realloc
write_after_free_large
write_after_free_large_reuse
write_after_free_small
write_after_free_small_reuse
write_zero_size
unaligned_malloc_usable_size_small
invalid_malloc_usable_size_small
invalid_malloc_usable_size_small_quarantine
malloc_object_size
malloc_object_size_offset
invalid_malloc_object_size_small
invalid_malloc_object_size_small_quarantine
__pycache__/

View file

@ -1,52 +0,0 @@
dir=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
CONFIG_SLAB_CANARY := true
ifeq (,$(filter $(CONFIG_SLAB_CANARY),true false))
$(error CONFIG_SLAB_CANARY must be true or false)
endif
CFLAGS += -DSLAB_CANARY=$(CONFIG_SLAB_CANARY)
LDLIBS := -lhardened_malloc
LDFLAGS := -Wl,-L$(dir)../../,-R,$(dir)../../
EXECUTABLES := \
double_free_large \
double_free_large_delayed \
double_free_small \
double_free_small_delayed \
unaligned_free_large \
unaligned_free_small \
read_after_free_large \
read_after_free_small \
write_after_free_large \
write_after_free_large_reuse \
write_after_free_small \
write_after_free_small_reuse \
read_zero_size \
write_zero_size \
invalid_free_protected \
invalid_free_unprotected \
invalid_free_small_region \
invalid_free_small_region_far \
uninitialized_free \
uninitialized_realloc \
uninitialized_malloc_usable_size \
eight_byte_overflow_small \
eight_byte_overflow_large \
string_overflow \
delete_type_size_mismatch \
unaligned_malloc_usable_size_small \
invalid_malloc_usable_size_small \
invalid_malloc_usable_size_small_quarantine \
malloc_object_size \
malloc_object_size_offset \
invalid_malloc_object_size_small \
invalid_malloc_object_size_small_quarantine
all: $(EXECUTABLES)
clean:
rm -f $(EXECUTABLES)

View file

@ -1,14 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include "../test_util.h"
OPTNONE int main(void) {
char *p = malloc(128 * 1024);
if (!p) {
return 1;
}
free(p);
p[64 * 1024 + 1] = 'a';
return 0;
}

View file

@ -4,7 +4,7 @@
#include <malloc.h> #include <malloc.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);

View file

@ -48,14 +48,26 @@ class TestSimpleMemoryCorruption(unittest.TestCase):
self.assertEqual(stderr.decode("utf-8"), self.assertEqual(stderr.decode("utf-8"),
"fatal allocator error: double free (quarantine)\n") "fatal allocator error: double free (quarantine)\n")
def test_eight_byte_overflow_large(self): def test_overflow_large_1_byte(self):
_stdout, _stderr, returncode = self.run_test( _stdout, _stderr, returncode = self.run_test(
"eight_byte_overflow_large") "overflow_large_1_byte")
self.assertEqual(returncode, -11) self.assertEqual(returncode, -11)
def test_eight_byte_overflow_small(self): def test_overflow_large_8_byte(self):
_stdout, _stderr, returncode = self.run_test(
"overflow_large_8_byte")
self.assertEqual(returncode, -11)
def test_overflow_small_1_byte(self):
_stdout, stderr, returncode = self.run_test( _stdout, stderr, returncode = self.run_test(
"eight_byte_overflow_small") "overflow_small_1_byte")
self.assertEqual(returncode, -6)
self.assertEqual(stderr.decode("utf-8"),
"fatal allocator error: canary corrupted\n")
def test_overflow_small_8_byte(self):
_stdout, stderr, returncode = self.run_test(
"overflow_small_8_byte")
self.assertEqual(returncode, -6) self.assertEqual(returncode, -6)
self.assertEqual(stderr.decode("utf-8"), self.assertEqual(stderr.decode("utf-8"),
"fatal allocator error: canary corrupted\n") "fatal allocator error: canary corrupted\n")
@ -206,6 +218,25 @@ class TestSimpleMemoryCorruption(unittest.TestCase):
self.assertEqual(stderr.decode( self.assertEqual(stderr.decode(
"utf-8"), "fatal allocator error: invalid malloc_object_size (quarantine)\n") "utf-8"), "fatal allocator error: invalid malloc_object_size (quarantine)\n")
def test_impossibly_large_malloc(self):
_stdout, stderr, returncode = self.run_test(
"impossibly_large_malloc")
self.assertEqual(returncode, 0)
def test_uninitialized_read_small(self):
_stdout, stderr, returncode = self.run_test(
"uninitialized_read_small")
self.assertEqual(returncode, 0)
def test_uninitialized_read_large(self):
_stdout, stderr, returncode = self.run_test(
"uninitialized_read_large")
self.assertEqual(returncode, 0)
def test_realloc_init(self):
_stdout, _stderr, returncode = self.run_test(
"realloc_init")
self.assertEqual(returncode, 0)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -1,9 +1,9 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(128 * 1024); char *p = malloc(256 * 1024);
if (!p) { if (!p) {
return 1; return 1;
} }

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);

View file

@ -1,6 +1,6 @@
#include <malloc.h> #include <malloc.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(16); char *p = malloc(16);

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
free((void *)1); free((void *)1);

View file

@ -1,6 +1,6 @@
#include <malloc.h> #include <malloc.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
malloc_usable_size((void *)1); malloc_usable_size((void *)1);

View file

@ -0,0 +1,14 @@
#include <stdlib.h>
#include "test_util.h"
OPTNONE int main(void) {
char *p = malloc(256 * 1024);
for (unsigned i = 0; i < 256 * 1024; i++) {
if (p[i] != 0) {
return 1;
}
}
free(p);
return 0;
}

View file

@ -0,0 +1,14 @@
#include <stdlib.h>
#include "test_util.h"
OPTNONE int main(void) {
char *p = malloc(8);
for (unsigned i = 0; i < 8; i++) {
if (p[i] != 0) {
return 1;
}
}
free(p);
return 0;
}

View file

@ -1,6 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
void *p = realloc((void *)1, 16); void *p = realloc((void *)1, 16);

View file

@ -1,13 +1,13 @@
#include <stdlib.h> #include <stdlib.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(256 * 1024); char *p = malloc(256 * 1024);
if (!p) { if (!p) {
return 1; return 1;
} }
*(p + 256 * 1024 + 7) = 0;
free(p); free(p);
p[64 * 1024 + 1] = 'a';
return 0; return 0;
} }

View file

@ -1,15 +1,16 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "../test_util.h" #include "test_util.h"
#include "../util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(128 * 1024); char *p = malloc(256 * 1024);
if (!p) { if (!p) {
return 1; return 1;
} }
free(p); free(p);
char *q = malloc(128 * 1024); UNUSED char *q = malloc(256 * 1024);
p[64 * 1024 + 1] = 'a'; p[64 * 1024 + 1] = 'a';
return 0; return 0;
} }

View file

@ -1,7 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(128); char *p = malloc(128);

View file

@ -1,7 +1,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "../test_util.h" #include "test_util.h"
#include "../util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(128); char *p = malloc(128);
@ -9,7 +9,7 @@ OPTNONE int main(void) {
return 1; return 1;
} }
free(p); free(p);
char *q = malloc(128); UNUSED char *q = malloc(128);
p[65] = 'a'; p[65] = 'a';

View file

@ -1,7 +1,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h>
#include "../test_util.h" #include "test_util.h"
OPTNONE int main(void) { OPTNONE int main(void) {
char *p = malloc(0); char *p = malloc(0);

View file

@ -1,8 +1,8 @@
// libdivide.h - Optimized integer division // libdivide.h - Optimized integer division
// https://libdivide.com // https://libdivide.com
// //
// Copyright (C) 2010 - 2021 ridiculous_fish, <libdivide@ridiculousfish.com> // Copyright (C) 2010 - 2022 ridiculous_fish, <libdivide@ridiculousfish.com>
// Copyright (C) 2016 - 2021 Kim Walisch, <kim.walisch@gmail.com> // Copyright (C) 2016 - 2022 Kim Walisch, <kim.walisch@gmail.com>
// //
// libdivide is dual-licensed under the Boost or zlib licenses. // libdivide is dual-licensed under the Boost or zlib licenses.
// You may use libdivide under the terms of either of these. // You may use libdivide under the terms of either of these.
@ -11,11 +11,14 @@
#ifndef LIBDIVIDE_H #ifndef LIBDIVIDE_H
#define LIBDIVIDE_H #define LIBDIVIDE_H
#define LIBDIVIDE_VERSION "5.0" // *** Version numbers are auto generated - do not edit ***
#define LIBDIVIDE_VERSION "5.2.0"
#define LIBDIVIDE_VERSION_MAJOR 5 #define LIBDIVIDE_VERSION_MAJOR 5
#define LIBDIVIDE_VERSION_MINOR 0 #define LIBDIVIDE_VERSION_MINOR 2
#define LIBDIVIDE_VERSION_PATCH 0
#include <stdint.h> #include <stdint.h>
#if !defined(__AVR__) #if !defined(__AVR__)
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -24,20 +27,29 @@
#if defined(LIBDIVIDE_SSE2) #if defined(LIBDIVIDE_SSE2)
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
#if defined(LIBDIVIDE_AVX2) || defined(LIBDIVIDE_AVX512) #if defined(LIBDIVIDE_AVX2) || defined(LIBDIVIDE_AVX512)
#include <immintrin.h> #include <immintrin.h>
#endif #endif
#if defined(LIBDIVIDE_NEON) #if defined(LIBDIVIDE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
// Clang-cl prior to Visual Studio 2022 doesn't include __umulh/__mulh intrinsics
#if defined(_MSC_VER) && defined(LIBDIVIDE_X86_64) && (!defined(__clang__) || _MSC_VER>1930)
#define LIBDIVIDE_X64_INTRINSICS
#endif
#if defined(_MSC_VER) #if defined(_MSC_VER)
#if defined(LIBDIVIDE_X64_INTRINSICS)
#include <intrin.h> #include <intrin.h>
#endif
#pragma warning(push) #pragma warning(push)
// disable warning C4146: unary minus operator applied // disable warning C4146: unary minus operator applied
// to unsigned type, result still unsigned // to unsigned type, result still unsigned
#pragma warning(disable : 4146) #pragma warning(disable : 4146)
// disable warning C4204: nonstandard extension used : non-constant aggregate // disable warning C4204: nonstandard extension used : non-constant aggregate
// initializer // initializer
// //
// It's valid C99 // It's valid C99
@ -238,24 +250,32 @@ static LIBDIVIDE_INLINE struct libdivide_u64_branchfree_t libdivide_u64_branchfr
static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw( static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw(
int16_t numer, int16_t magic, uint8_t more); int16_t numer, int16_t magic, uint8_t more);
static LIBDIVIDE_INLINE int16_t libdivide_s16_do( static LIBDIVIDE_INLINE int16_t libdivide_s16_do(
int16_t numer, const struct libdivide_s16_t* denom); int16_t numer, const struct libdivide_s16_t *denom);
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw( static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw(
uint16_t numer, uint16_t magic, uint8_t more); uint16_t numer, uint16_t magic, uint8_t more);
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do( static LIBDIVIDE_INLINE uint16_t libdivide_u16_do(
uint16_t numer, const struct libdivide_u16_t* denom); uint16_t numer, const struct libdivide_u16_t *denom);
static LIBDIVIDE_INLINE int32_t libdivide_s32_do_raw(
int32_t numer, int32_t magic, uint8_t more);
static LIBDIVIDE_INLINE int32_t libdivide_s32_do( static LIBDIVIDE_INLINE int32_t libdivide_s32_do(
int32_t numer, const struct libdivide_s32_t *denom); int32_t numer, const struct libdivide_s32_t *denom);
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do_raw(
uint32_t numer, uint32_t magic, uint8_t more);
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do( static LIBDIVIDE_INLINE uint32_t libdivide_u32_do(
uint32_t numer, const struct libdivide_u32_t *denom); uint32_t numer, const struct libdivide_u32_t *denom);
static LIBDIVIDE_INLINE int64_t libdivide_s64_do_raw(
int64_t numer, int64_t magic, uint8_t more);
static LIBDIVIDE_INLINE int64_t libdivide_s64_do( static LIBDIVIDE_INLINE int64_t libdivide_s64_do(
int64_t numer, const struct libdivide_s64_t *denom); int64_t numer, const struct libdivide_s64_t *denom);
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do_raw(
uint64_t numer, uint64_t magic, uint8_t more);
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do( static LIBDIVIDE_INLINE uint64_t libdivide_u64_do(
uint64_t numer, const struct libdivide_u64_t *denom); uint64_t numer, const struct libdivide_u64_t *denom);
static LIBDIVIDE_INLINE int16_t libdivide_s16_branchfree_do( static LIBDIVIDE_INLINE int16_t libdivide_s16_branchfree_do(
int16_t numer, const struct libdivide_s16_branchfree_t* denom); int16_t numer, const struct libdivide_s16_branchfree_t *denom);
static LIBDIVIDE_INLINE uint16_t libdivide_u16_branchfree_do( static LIBDIVIDE_INLINE uint16_t libdivide_u16_branchfree_do(
uint16_t numer, const struct libdivide_u16_branchfree_t* denom); uint16_t numer, const struct libdivide_u16_branchfree_t *denom);
static LIBDIVIDE_INLINE int32_t libdivide_s32_branchfree_do( static LIBDIVIDE_INLINE int32_t libdivide_s32_branchfree_do(
int32_t numer, const struct libdivide_s32_branchfree_t *denom); int32_t numer, const struct libdivide_s32_branchfree_t *denom);
static LIBDIVIDE_INLINE uint32_t libdivide_u32_branchfree_do( static LIBDIVIDE_INLINE uint32_t libdivide_u32_branchfree_do(
@ -265,17 +285,17 @@ static LIBDIVIDE_INLINE int64_t libdivide_s64_branchfree_do(
static LIBDIVIDE_INLINE uint64_t libdivide_u64_branchfree_do( static LIBDIVIDE_INLINE uint64_t libdivide_u64_branchfree_do(
uint64_t numer, const struct libdivide_u64_branchfree_t *denom); uint64_t numer, const struct libdivide_u64_branchfree_t *denom);
static LIBDIVIDE_INLINE int16_t libdivide_s16_recover(const struct libdivide_s16_t* denom); static LIBDIVIDE_INLINE int16_t libdivide_s16_recover(const struct libdivide_s16_t *denom);
static LIBDIVIDE_INLINE uint16_t libdivide_u16_recover(const struct libdivide_u16_t* denom); static LIBDIVIDE_INLINE uint16_t libdivide_u16_recover(const struct libdivide_u16_t *denom);
static LIBDIVIDE_INLINE int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom); static LIBDIVIDE_INLINE int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom);
static LIBDIVIDE_INLINE uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom); static LIBDIVIDE_INLINE uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom);
static LIBDIVIDE_INLINE int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom); static LIBDIVIDE_INLINE int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom);
static LIBDIVIDE_INLINE uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom); static LIBDIVIDE_INLINE uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom);
static LIBDIVIDE_INLINE int16_t libdivide_s16_branchfree_recover( static LIBDIVIDE_INLINE int16_t libdivide_s16_branchfree_recover(
const struct libdivide_s16_branchfree_t* denom); const struct libdivide_s16_branchfree_t *denom);
static LIBDIVIDE_INLINE uint16_t libdivide_u16_branchfree_recover( static LIBDIVIDE_INLINE uint16_t libdivide_u16_branchfree_recover(
const struct libdivide_u16_branchfree_t* denom); const struct libdivide_u16_branchfree_t *denom);
static LIBDIVIDE_INLINE int32_t libdivide_s32_branchfree_recover( static LIBDIVIDE_INLINE int32_t libdivide_s32_branchfree_recover(
const struct libdivide_s32_branchfree_t *denom); const struct libdivide_s32_branchfree_t *denom);
static LIBDIVIDE_INLINE uint32_t libdivide_u32_branchfree_recover( static LIBDIVIDE_INLINE uint32_t libdivide_u32_branchfree_recover(
@ -314,7 +334,7 @@ static LIBDIVIDE_INLINE int32_t libdivide_mullhi_s32(int32_t x, int32_t y) {
} }
static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) { static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) #if defined(LIBDIVIDE_X64_INTRINSICS)
return __umulh(x, y); return __umulh(x, y);
#elif defined(HAS_INT128_T) #elif defined(HAS_INT128_T)
__uint128_t xl = x, yl = y; __uint128_t xl = x, yl = y;
@ -340,7 +360,7 @@ static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
} }
static LIBDIVIDE_INLINE int64_t libdivide_mullhi_s64(int64_t x, int64_t y) { static LIBDIVIDE_INLINE int64_t libdivide_mullhi_s64(int64_t x, int64_t y) {
#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) #if defined(LIBDIVIDE_X64_INTRINSICS)
return __mulh(x, y); return __mulh(x, y);
#elif defined(HAS_INT128_T) #elif defined(HAS_INT128_T)
__int128_t xl = x, yl = y; __int128_t xl = x, yl = y;
@ -393,7 +413,7 @@ static LIBDIVIDE_INLINE int16_t libdivide_count_leading_zeros16(uint16_t val) {
static LIBDIVIDE_INLINE int32_t libdivide_count_leading_zeros32(uint32_t val) { static LIBDIVIDE_INLINE int32_t libdivide_count_leading_zeros32(uint32_t val) {
#if defined(__AVR__) #if defined(__AVR__)
// Fast way to count leading zeros // Fast way to count leading zeros
return __builtin_clzl(val); return __builtin_clzl(val);
#elif defined(__GNUC__) || __has_builtin(__builtin_clz) #elif defined(__GNUC__) || __has_builtin(__builtin_clz)
// Fast way to count leading zeros // Fast way to count leading zeros
@ -442,7 +462,7 @@ static LIBDIVIDE_INLINE int32_t libdivide_count_leading_zeros64(uint64_t val) {
// uint {v}. The result must fit in 16 bits. // uint {v}. The result must fit in 16 bits.
// Returns the quotient directly and the remainder in *r // Returns the quotient directly and the remainder in *r
static LIBDIVIDE_INLINE uint16_t libdivide_32_div_16_to_16( static LIBDIVIDE_INLINE uint16_t libdivide_32_div_16_to_16(
uint16_t u1, uint16_t u0, uint16_t v, uint16_t* r) { uint16_t u1, uint16_t u0, uint16_t v, uint16_t *r) {
uint32_t n = ((uint32_t)u1 << 16) | u0; uint32_t n = ((uint32_t)u1 << 16) | u0;
uint16_t result = (uint16_t)(n / v); uint16_t result = (uint16_t)(n / v);
*r = (uint16_t)(n - result * (uint32_t)v); *r = (uint16_t)(n - result * (uint32_t)v);
@ -512,7 +532,7 @@ static LIBDIVIDE_INLINE uint64_t libdivide_128_div_64_to_64(
// Check for overflow and divide by 0. // Check for overflow and divide by 0.
if (numhi >= den) { if (numhi >= den) {
if (r != NULL) *r = ~0ull; if (r) *r = ~0ull;
return ~0ull; return ~0ull;
} }
@ -558,11 +578,14 @@ static LIBDIVIDE_INLINE uint64_t libdivide_128_div_64_to_64(
q0 = (uint32_t)qhat; q0 = (uint32_t)qhat;
// Return remainder if requested. // Return remainder if requested.
if (r != NULL) *r = (rem * b + num0 - q0 * den) >> shift; if (r) *r = (rem * b + num0 - q0 * den) >> shift;
return ((uint64_t)q1 << 32) | q0; return ((uint64_t)q1 << 32) | q0;
#endif #endif
} }
#if !(defined(HAS_INT128_T) && \
defined(HAS_INT128_DIV))
// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0) // Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0)
static LIBDIVIDE_INLINE void libdivide_u128_shift( static LIBDIVIDE_INLINE void libdivide_u128_shift(
uint64_t *u1, uint64_t *u0, int32_t signed_shift) { uint64_t *u1, uint64_t *u0, int32_t signed_shift) {
@ -579,6 +602,8 @@ static LIBDIVIDE_INLINE void libdivide_u128_shift(
} }
} }
#endif
// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder. // Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder.
static LIBDIVIDE_INLINE uint64_t libdivide_128_div_128_to_64( static LIBDIVIDE_INLINE uint64_t libdivide_128_div_128_to_64(
uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) { uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) {
@ -696,8 +721,7 @@ static LIBDIVIDE_INLINE struct libdivide_u16_t libdivide_internal_u16_gen(
// 1 in its recovery algorithm. // 1 in its recovery algorithm.
result.magic = 0; result.magic = 0;
result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); result.more = (uint8_t)(floor_log_2_d - (branchfree != 0));
} } else {
else {
uint8_t more; uint8_t more;
uint16_t rem, proposed_m; uint16_t rem, proposed_m;
proposed_m = libdivide_32_div_16_to_16((uint16_t)1 << floor_log_2_d, 0, d, &rem); proposed_m = libdivide_32_div_16_to_16((uint16_t)1 << floor_log_2_d, 0, d, &rem);
@ -709,8 +733,7 @@ static LIBDIVIDE_INLINE struct libdivide_u16_t libdivide_internal_u16_gen(
if (!branchfree && (e < ((uint16_t)1 << floor_log_2_d))) { if (!branchfree && (e < ((uint16_t)1 << floor_log_2_d))) {
// This power works // This power works
more = floor_log_2_d; more = floor_log_2_d;
} } else {
else {
// We have to use the general 17-bit algorithm. We need to compute // We have to use the general 17-bit algorithm. We need to compute
// (2**power) / d. However, we already have (2**(power-1))/d and // (2**power) / d. However, we already have (2**(power-1))/d and
// its remainder. By doubling both, and then correcting the // its remainder. By doubling both, and then correcting the
@ -742,7 +765,7 @@ struct libdivide_u16_branchfree_t libdivide_u16_branchfree_gen(uint16_t d) {
} }
struct libdivide_u16_t tmp = libdivide_internal_u16_gen(d, 1); struct libdivide_u16_t tmp = libdivide_internal_u16_gen(d, 1);
struct libdivide_u16_branchfree_t ret = { struct libdivide_u16_branchfree_t ret = {
tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_16_SHIFT_MASK) }; tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_16_SHIFT_MASK)};
return ret; return ret;
} }
@ -752,27 +775,25 @@ struct libdivide_u16_branchfree_t libdivide_u16_branchfree_gen(uint16_t d) {
uint16_t libdivide_u16_do_raw(uint16_t numer, uint16_t magic, uint8_t more) { uint16_t libdivide_u16_do_raw(uint16_t numer, uint16_t magic, uint8_t more) {
if (!magic) { if (!magic) {
return numer >> more; return numer >> more;
} } else {
else {
uint16_t q = libdivide_mullhi_u16(magic, numer); uint16_t q = libdivide_mullhi_u16(magic, numer);
if (more & LIBDIVIDE_ADD_MARKER) { if (more & LIBDIVIDE_ADD_MARKER) {
uint16_t t = ((numer - q) >> 1) + q; uint16_t t = ((numer - q) >> 1) + q;
return t >> (more & LIBDIVIDE_16_SHIFT_MASK); return t >> (more & LIBDIVIDE_16_SHIFT_MASK);
} } else {
else {
// All upper bits are 0, // All upper bits are 0,
// don't need to mask them off. // don't need to mask them off.
return q >> more; return q >> more;
} }
} }
} }
uint16_t libdivide_u16_do(uint16_t numer, const struct libdivide_u16_t* denom) { uint16_t libdivide_u16_do(uint16_t numer, const struct libdivide_u16_t *denom) {
return libdivide_u16_do_raw(numer, denom->magic, denom->more); return libdivide_u16_do_raw(numer, denom->magic, denom->more);
} }
uint16_t libdivide_u16_branchfree_do( uint16_t libdivide_u16_branchfree_do(
uint16_t numer, const struct libdivide_u16_branchfree_t* denom) { uint16_t numer, const struct libdivide_u16_branchfree_t *denom) {
uint16_t q = libdivide_mullhi_u16(denom->magic, numer); uint16_t q = libdivide_mullhi_u16(denom->magic, numer);
uint16_t t = ((numer - q) >> 1) + q; uint16_t t = ((numer - q) >> 1) + q;
return t >> denom->more; return t >> denom->more;
@ -800,7 +821,7 @@ uint16_t libdivide_u16_recover(const struct libdivide_u16_t *denom) {
// overflow. So we have to compute it as 2^(16+shift)/(m+2^16), and // overflow. So we have to compute it as 2^(16+shift)/(m+2^16), and
// then double the quotient and remainder. // then double the quotient and remainder.
uint32_t half_n = (uint32_t)1 << (16 + shift); uint32_t half_n = (uint32_t)1 << (16 + shift);
uint32_t d = ( (uint32_t)1 << 16) | denom->magic; uint32_t d = ((uint32_t)1 << 16) | denom->magic;
// Note that the quotient is guaranteed <= 16 bits, but the remainder // Note that the quotient is guaranteed <= 16 bits, but the remainder
// may need 17! // may need 17!
uint16_t half_q = (uint16_t)(half_n / d); uint16_t half_q = (uint16_t)(half_n / d);
@ -912,12 +933,11 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
return ret; return ret;
} }
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { uint32_t libdivide_u32_do_raw(uint32_t numer, uint32_t magic, uint8_t more) {
uint8_t more = denom->more; if (!magic) {
if (!denom->magic) {
return numer >> more; return numer >> more;
} else { } else {
uint32_t q = libdivide_mullhi_u32(denom->magic, numer); uint32_t q = libdivide_mullhi_u32(magic, numer);
if (more & LIBDIVIDE_ADD_MARKER) { if (more & LIBDIVIDE_ADD_MARKER) {
uint32_t t = ((numer - q) >> 1) + q; uint32_t t = ((numer - q) >> 1) + q;
return t >> (more & LIBDIVIDE_32_SHIFT_MASK); return t >> (more & LIBDIVIDE_32_SHIFT_MASK);
@ -929,6 +949,10 @@ uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
} }
} }
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
return libdivide_u32_do_raw(numer, denom->magic, denom->more);
}
uint32_t libdivide_u32_branchfree_do( uint32_t libdivide_u32_branchfree_do(
uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { uint32_t numer, const struct libdivide_u32_branchfree_t *denom) {
uint32_t q = libdivide_mullhi_u32(denom->magic, numer); uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
@ -1072,12 +1096,11 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) {
return ret; return ret;
} }
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { uint64_t libdivide_u64_do_raw(uint64_t numer, uint64_t magic, uint8_t more) {
uint8_t more = denom->more; if (!magic) {
if (!denom->magic) {
return numer >> more; return numer >> more;
} else { } else {
uint64_t q = libdivide_mullhi_u64(denom->magic, numer); uint64_t q = libdivide_mullhi_u64(magic, numer);
if (more & LIBDIVIDE_ADD_MARKER) { if (more & LIBDIVIDE_ADD_MARKER) {
uint64_t t = ((numer - q) >> 1) + q; uint64_t t = ((numer - q) >> 1) + q;
return t >> (more & LIBDIVIDE_64_SHIFT_MASK); return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
@ -1089,6 +1112,10 @@ uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
} }
} }
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
return libdivide_u64_do_raw(numer, denom->magic, denom->more);
}
uint64_t libdivide_u64_branchfree_do( uint64_t libdivide_u64_branchfree_do(
uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { uint64_t numer, const struct libdivide_u64_branchfree_t *denom) {
uint64_t q = libdivide_mullhi_u64(denom->magic, numer); uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
@ -1428,11 +1455,10 @@ struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) {
return result; return result;
} }
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { int32_t libdivide_s32_do_raw(int32_t numer, int32_t magic, uint8_t more) {
uint8_t more = denom->more;
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
if (!denom->magic) { if (!magic) {
uint32_t sign = (int8_t)more >> 7; uint32_t sign = (int8_t)more >> 7;
uint32_t mask = ((uint32_t)1 << shift) - 1; uint32_t mask = ((uint32_t)1 << shift) - 1;
uint32_t uq = numer + ((numer >> 31) & mask); uint32_t uq = numer + ((numer >> 31) & mask);
@ -1441,7 +1467,7 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
q = (q ^ sign) - sign; q = (q ^ sign) - sign;
return q; return q;
} else { } else {
uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); uint32_t uq = (uint32_t)libdivide_mullhi_s32(magic, numer);
if (more & LIBDIVIDE_ADD_MARKER) { if (more & LIBDIVIDE_ADD_MARKER) {
// must be arithmetic shift and then sign extend // must be arithmetic shift and then sign extend
int32_t sign = (int8_t)more >> 7; int32_t sign = (int8_t)more >> 7;
@ -1456,6 +1482,10 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
} }
} }
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
return libdivide_s32_do_raw(numer, denom->magic, denom->more);
}
int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) {
uint8_t more = denom->more; uint8_t more = denom->more;
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
@ -1597,11 +1627,10 @@ struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) {
return ret; return ret;
} }
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { int64_t libdivide_s64_do_raw(int64_t numer, int64_t magic, uint8_t more) {
uint8_t more = denom->more;
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
if (!denom->magic) { // shift path if (!magic) { // shift path
uint64_t mask = ((uint64_t)1 << shift) - 1; uint64_t mask = ((uint64_t)1 << shift) - 1;
uint64_t uq = numer + ((numer >> 63) & mask); uint64_t uq = numer + ((numer >> 63) & mask);
int64_t q = (int64_t)uq; int64_t q = (int64_t)uq;
@ -1611,7 +1640,7 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
q = (q ^ sign) - sign; q = (q ^ sign) - sign;
return q; return q;
} else { } else {
uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); uint64_t uq = (uint64_t)libdivide_mullhi_s64(magic, numer);
if (more & LIBDIVIDE_ADD_MARKER) { if (more & LIBDIVIDE_ADD_MARKER) {
// must be arithmetic shift and then sign extend // must be arithmetic shift and then sign extend
int64_t sign = (int8_t)more >> 7; int64_t sign = (int8_t)more >> 7;
@ -1626,6 +1655,10 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
} }
} }
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
return libdivide_s64_do_raw(numer, denom->magic, denom->more);
}
int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) {
uint8_t more = denom->more; uint8_t more = denom->more;
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
@ -1682,15 +1715,22 @@ int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t
// Simplest possible vector type division: treat the vector type as an array // Simplest possible vector type division: treat the vector type as an array
// of underlying native type. // of underlying native type.
#define SIMPLE_VECTOR_DIVISION(IntT, VecT, Algo) \ //
const size_t count = sizeof(VecT) / sizeof(IntT); \ // Use a union to read a vector via pointer-to-integer, without violating strict
VecT result; \ // aliasing.
IntT *pSource = (IntT *)&numers; \ #define SIMPLE_VECTOR_DIVISION(IntT, VecT, Algo) \
IntT *pTarget = (IntT *)&result; \ const size_t count = sizeof(VecT) / sizeof(IntT); \
for (size_t loop=0; loop<count; ++loop) { \ union type_pun_vec { \
pTarget[loop] = libdivide_##Algo##_do(pSource[loop], denom); \ VecT vec; \
} \ IntT arr[sizeof(VecT) / sizeof(IntT)]; \
return result; \ }; \
union type_pun_vec result; \
union type_pun_vec input; \
input.vec = numers; \
for (size_t loop = 0; loop < count; ++loop) { \
result.arr[loop] = libdivide_##Algo##_do(input.arr[loop], denom); \
} \
return result.vec;
#if defined(LIBDIVIDE_NEON) #if defined(LIBDIVIDE_NEON)
@ -1804,13 +1844,12 @@ static LIBDIVIDE_INLINE int64x2_t libdivide_mullhi_s64_vec128(int64x2_t x, int64
////////// UINT16 ////////// UINT16
uint16x8_t libdivide_u16_do_vec128(uint16x8_t numers, const struct libdivide_u16_t *denom) { uint16x8_t libdivide_u16_do_vec128(uint16x8_t numers, const struct libdivide_u16_t *denom){
SIMPLE_VECTOR_DIVISION(uint16_t, uint16x8_t, u16) SIMPLE_VECTOR_DIVISION(uint16_t, uint16x8_t, u16)}
}
uint16x8_t libdivide_u16_branchfree_do_vec128(uint16x8_t numers, const struct libdivide_u16_branchfree_t *denom) { uint16x8_t libdivide_u16_branchfree_do_vec128(
SIMPLE_VECTOR_DIVISION(uint16_t, uint16x8_t, u16_branchfree) uint16x8_t numers, const struct libdivide_u16_branchfree_t *denom){
} SIMPLE_VECTOR_DIVISION(uint16_t, uint16x8_t, u16_branchfree)}
////////// UINT32 ////////// UINT32
@ -1870,13 +1909,12 @@ uint64x2_t libdivide_u64_branchfree_do_vec128(
////////// SINT16 ////////// SINT16
int16x8_t libdivide_s16_do_vec128(int16x8_t numers, const struct libdivide_s16_t *denom) { int16x8_t libdivide_s16_do_vec128(int16x8_t numers, const struct libdivide_s16_t *denom){
SIMPLE_VECTOR_DIVISION(int16_t, int16x8_t, s16) SIMPLE_VECTOR_DIVISION(int16_t, int16x8_t, s16)}
}
int16x8_t libdivide_s16_branchfree_do_vec128(int16x8_t numers, const struct libdivide_s16_branchfree_t *denom) { int16x8_t libdivide_s16_branchfree_do_vec128(
SIMPLE_VECTOR_DIVISION(int16_t, int16x8_t, s16_branchfree) int16x8_t numers, const struct libdivide_s16_branchfree_t *denom){
} SIMPLE_VECTOR_DIVISION(int16_t, int16x8_t, s16_branchfree)}
////////// SINT32 ////////// SINT32
@ -2082,13 +2120,12 @@ static LIBDIVIDE_INLINE __m512i libdivide_mullhi_s64_vec512(__m512i x, __m512i y
////////// UINT16 ////////// UINT16
__m512i libdivide_u16_do_vec512(__m512i numers, const struct libdivide_u16_t *denom) { __m512i libdivide_u16_do_vec512(__m512i numers, const struct libdivide_u16_t *denom){
SIMPLE_VECTOR_DIVISION(uint16_t, __m512i, u16) SIMPLE_VECTOR_DIVISION(uint16_t, __m512i, u16)}
}
__m512i libdivide_u16_branchfree_do_vec512(__m512i numers, const struct libdivide_u16_branchfree_t *denom) { __m512i libdivide_u16_branchfree_do_vec512(
SIMPLE_VECTOR_DIVISION(uint16_t, __m512i, u16_branchfree) __m512i numers, const struct libdivide_u16_branchfree_t *denom){
} SIMPLE_VECTOR_DIVISION(uint16_t, __m512i, u16_branchfree)}
////////// UINT32 ////////// UINT32
@ -2146,13 +2183,12 @@ __m512i libdivide_u64_branchfree_do_vec512(
////////// SINT16 ////////// SINT16
__m512i libdivide_s16_do_vec512(__m512i numers, const struct libdivide_s16_t *denom) { __m512i libdivide_s16_do_vec512(__m512i numers, const struct libdivide_s16_t *denom){
SIMPLE_VECTOR_DIVISION(int16_t, __m512i, s16) SIMPLE_VECTOR_DIVISION(int16_t, __m512i, s16)}
}
__m512i libdivide_s16_branchfree_do_vec512(__m512i numers, const struct libdivide_s16_branchfree_t *denom) { __m512i libdivide_s16_branchfree_do_vec512(
SIMPLE_VECTOR_DIVISION(int16_t, __m512i, s16_branchfree) __m512i numers, const struct libdivide_s16_branchfree_t *denom){
} SIMPLE_VECTOR_DIVISION(int16_t, __m512i, s16_branchfree)}
////////// SINT32 ////////// SINT32
@ -2365,11 +2401,25 @@ static LIBDIVIDE_INLINE __m256i libdivide_mullhi_s64_vec256(__m256i x, __m256i y
////////// UINT16 ////////// UINT16
__m256i libdivide_u16_do_vec256(__m256i numers, const struct libdivide_u16_t *denom) { __m256i libdivide_u16_do_vec256(__m256i numers, const struct libdivide_u16_t *denom) {
SIMPLE_VECTOR_DIVISION(uint16_t, __m256i, u16) uint8_t more = denom->more;
if (!denom->magic) {
return _mm256_srli_epi16(numers, more);
} else {
__m256i q = _mm256_mulhi_epu16(numers, _mm256_set1_epi16(denom->magic));
if (more & LIBDIVIDE_ADD_MARKER) {
__m256i t = _mm256_adds_epu16(_mm256_srli_epi16(_mm256_subs_epu16(numers, q), 1), q);
return _mm256_srli_epi16(t, (more & LIBDIVIDE_16_SHIFT_MASK));
} else {
return _mm256_srli_epi16(q, more);
}
}
} }
__m256i libdivide_u16_branchfree_do_vec256(__m256i numers, const struct libdivide_u16_branchfree_t *denom) { __m256i libdivide_u16_branchfree_do_vec256(
SIMPLE_VECTOR_DIVISION(uint16_t, __m256i, u16_branchfree) __m256i numers, const struct libdivide_u16_branchfree_t *denom) {
__m256i q = _mm256_mulhi_epu16(numers, _mm256_set1_epi16(denom->magic));
__m256i t = _mm256_adds_epu16(_mm256_srli_epi16(_mm256_subs_epu16(numers, q), 1), q);
return _mm256_srli_epi16(t, denom->more);
} }
////////// UINT32 ////////// UINT32
@ -2429,11 +2479,54 @@ __m256i libdivide_u64_branchfree_do_vec256(
////////// SINT16 ////////// SINT16
__m256i libdivide_s16_do_vec256(__m256i numers, const struct libdivide_s16_t *denom) { __m256i libdivide_s16_do_vec256(__m256i numers, const struct libdivide_s16_t *denom) {
SIMPLE_VECTOR_DIVISION(int16_t, __m256i, s16) uint8_t more = denom->more;
if (!denom->magic) {
uint16_t shift = more & LIBDIVIDE_16_SHIFT_MASK;
uint16_t mask = ((uint16_t)1 << shift) - 1;
__m256i roundToZeroTweak = _mm256_set1_epi16(mask);
// q = numer + ((numer >> 15) & roundToZeroTweak);
__m256i q = _mm256_add_epi16(
numers, _mm256_and_si256(_mm256_srai_epi16(numers, 15), roundToZeroTweak));
q = _mm256_srai_epi16(q, shift);
__m256i sign = _mm256_set1_epi16((int8_t)more >> 7);
// q = (q ^ sign) - sign;
q = _mm256_sub_epi16(_mm256_xor_si256(q, sign), sign);
return q;
} else {
__m256i q = _mm256_mulhi_epi16(numers, _mm256_set1_epi16(denom->magic));
if (more & LIBDIVIDE_ADD_MARKER) {
// must be arithmetic shift
__m256i sign = _mm256_set1_epi16((int8_t)more >> 7);
// q += ((numer ^ sign) - sign);
q = _mm256_add_epi16(q, _mm256_sub_epi16(_mm256_xor_si256(numers, sign), sign));
}
// q >>= shift
q = _mm256_srai_epi16(q, more & LIBDIVIDE_16_SHIFT_MASK);
q = _mm256_add_epi16(q, _mm256_srli_epi16(q, 15)); // q += (q < 0)
return q;
}
} }
__m256i libdivide_s16_branchfree_do_vec256(__m256i numers, const struct libdivide_s16_branchfree_t *denom) { __m256i libdivide_s16_branchfree_do_vec256(
SIMPLE_VECTOR_DIVISION(int16_t, __m256i, s16_branchfree) __m256i numers, const struct libdivide_s16_branchfree_t *denom) {
int16_t magic = denom->magic;
uint8_t more = denom->more;
uint8_t shift = more & LIBDIVIDE_16_SHIFT_MASK;
// must be arithmetic shift
__m256i sign = _mm256_set1_epi16((int8_t)more >> 7);
__m256i q = _mm256_mulhi_epi16(numers, _mm256_set1_epi16(magic));
q = _mm256_add_epi16(q, numers); // q += numers
// If q is non-negative, we have nothing to do
// If q is negative, we want to add either (2**shift)-1 if d is
// a power of 2, or (2**shift) if it is not a power of 2
uint16_t is_power_of_2 = (magic == 0);
__m256i q_sign = _mm256_srai_epi16(q, 15); // q_sign = q >> 15
__m256i mask = _mm256_set1_epi16(((uint16_t)1 << shift) - is_power_of_2);
q = _mm256_add_epi16(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask)
q = _mm256_srai_epi16(q, shift); // q >>= shift
q = _mm256_sub_epi16(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign
return q;
} }
////////// SINT32 ////////// SINT32
@ -2661,11 +2754,25 @@ static LIBDIVIDE_INLINE __m128i libdivide_mullhi_s64_vec128(__m128i x, __m128i y
////////// UINT26 ////////// UINT26
__m128i libdivide_u16_do_vec128(__m128i numers, const struct libdivide_u16_t *denom) { __m128i libdivide_u16_do_vec128(__m128i numers, const struct libdivide_u16_t *denom) {
SIMPLE_VECTOR_DIVISION(uint16_t, __m128i, u16) uint8_t more = denom->more;
if (!denom->magic) {
return _mm_srli_epi16(numers, more);
} else {
__m128i q = _mm_mulhi_epu16(numers, _mm_set1_epi16(denom->magic));
if (more & LIBDIVIDE_ADD_MARKER) {
__m128i t = _mm_adds_epu16(_mm_srli_epi16(_mm_subs_epu16(numers, q), 1), q);
return _mm_srli_epi16(t, (more & LIBDIVIDE_16_SHIFT_MASK));
} else {
return _mm_srli_epi16(q, more);
}
}
} }
__m128i libdivide_u16_branchfree_do_vec128(__m128i numers, const struct libdivide_u16_branchfree_t *denom) { __m128i libdivide_u16_branchfree_do_vec128(
SIMPLE_VECTOR_DIVISION(uint16_t, __m128i, u16_branchfree) __m128i numers, const struct libdivide_u16_branchfree_t *denom) {
__m128i q = _mm_mulhi_epu16(numers, _mm_set1_epi16(denom->magic));
__m128i t = _mm_adds_epu16(_mm_srli_epi16(_mm_subs_epu16(numers, q), 1), q);
return _mm_srli_epi16(t, denom->more);
} }
////////// UINT32 ////////// UINT32
@ -2725,11 +2832,54 @@ __m128i libdivide_u64_branchfree_do_vec128(
////////// SINT16 ////////// SINT16
__m128i libdivide_s16_do_vec128(__m128i numers, const struct libdivide_s16_t *denom) { __m128i libdivide_s16_do_vec128(__m128i numers, const struct libdivide_s16_t *denom) {
SIMPLE_VECTOR_DIVISION(int16_t, __m128i, s16) uint8_t more = denom->more;
if (!denom->magic) {
uint16_t shift = more & LIBDIVIDE_16_SHIFT_MASK;
uint16_t mask = ((uint16_t)1 << shift) - 1;
__m128i roundToZeroTweak = _mm_set1_epi16(mask);
// q = numer + ((numer >> 15) & roundToZeroTweak);
__m128i q =
_mm_add_epi16(numers, _mm_and_si128(_mm_srai_epi16(numers, 15), roundToZeroTweak));
q = _mm_srai_epi16(q, shift);
__m128i sign = _mm_set1_epi16((int8_t)more >> 7);
// q = (q ^ sign) - sign;
q = _mm_sub_epi16(_mm_xor_si128(q, sign), sign);
return q;
} else {
__m128i q = _mm_mulhi_epi16(numers, _mm_set1_epi16(denom->magic));
if (more & LIBDIVIDE_ADD_MARKER) {
// must be arithmetic shift
__m128i sign = _mm_set1_epi16((int8_t)more >> 7);
// q += ((numer ^ sign) - sign);
q = _mm_add_epi16(q, _mm_sub_epi16(_mm_xor_si128(numers, sign), sign));
}
// q >>= shift
q = _mm_srai_epi16(q, more & LIBDIVIDE_16_SHIFT_MASK);
q = _mm_add_epi16(q, _mm_srli_epi16(q, 15)); // q += (q < 0)
return q;
}
} }
__m128i libdivide_s16_branchfree_do_vec128(__m128i numers, const struct libdivide_s16_branchfree_t *denom) { __m128i libdivide_s16_branchfree_do_vec128(
SIMPLE_VECTOR_DIVISION(int16_t, __m128i, s16_branchfree) __m128i numers, const struct libdivide_s16_branchfree_t *denom) {
int16_t magic = denom->magic;
uint8_t more = denom->more;
uint8_t shift = more & LIBDIVIDE_16_SHIFT_MASK;
// must be arithmetic shift
__m128i sign = _mm_set1_epi16((int8_t)more >> 7);
__m128i q = _mm_mulhi_epi16(numers, _mm_set1_epi16(magic));
q = _mm_add_epi16(q, numers); // q += numers
// If q is non-negative, we have nothing to do
// If q is negative, we want to add either (2**shift)-1 if d is
// a power of 2, or (2**shift) if it is not a power of 2
uint16_t is_power_of_2 = (magic == 0);
__m128i q_sign = _mm_srai_epi16(q, 15); // q_sign = q >> 15
__m128i mask = _mm_set1_epi16(((uint16_t)1 << shift) - is_power_of_2);
q = _mm_add_epi16(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask)
q = _mm_srai_epi16(q, shift); // q >>= shift
q = _mm_sub_epi16(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign
return q;
} }
////////// SINT32 ////////// SINT32
@ -2795,8 +2945,8 @@ __m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *de
uint64_t mask = ((uint64_t)1 << shift) - 1; uint64_t mask = ((uint64_t)1 << shift) - 1;
__m128i roundToZeroTweak = _mm_set1_epi64x(mask); __m128i roundToZeroTweak = _mm_set1_epi64x(mask);
// q = numer + ((numer >> 63) & roundToZeroTweak); // q = numer + ((numer >> 63) & roundToZeroTweak);
__m128i q = __m128i q = _mm_add_epi64(
_mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits_vec128(numers), roundToZeroTweak)); numers, _mm_and_si128(libdivide_s64_signbits_vec128(numers), roundToZeroTweak));
q = libdivide_s64_shift_right_vec128(q, shift); q = libdivide_s64_shift_right_vec128(q, shift);
__m128i sign = _mm_set1_epi32((int8_t)more >> 7); __m128i sign = _mm_set1_epi32((int8_t)more >> 7);
// q = (q ^ sign) - sign; // q = (q ^ sign) - sign;
@ -2847,49 +2997,80 @@ __m128i libdivide_s64_branchfree_do_vec128(
#ifdef __cplusplus #ifdef __cplusplus
//for constexpr zero initialization,
//c++11 might handle things ok,
//but just limit to at least c++14 to ensure
//we don't break anyone's code:
// for gcc and clang, use https://en.cppreference.com/w/cpp/feature_test#cpp_constexpr
#if (defined(__GNUC__) || defined(__clang__)) && (__cpp_constexpr >= 201304L)
#define LIBDIVIDE_CONSTEXPR constexpr
// supposedly, MSVC might not implement feature test macros right (https://stackoverflow.com/questions/49316752/feature-test-macros-not-working-properly-in-visual-c)
// so check that _MSVC_LANG corresponds to at least c++14, and _MSC_VER corresponds to at least VS 2017 15.0 (for extended constexpr support https://learn.microsoft.com/en-us/cpp/overview/visual-cpp-language-conformance?view=msvc-170)
#elif defined(_MSC_VER) && _MSC_VER >= 1910 && defined(_MSVC_LANG) && _MSVC_LANG >=201402L
#define LIBDIVIDE_CONSTEXPR constexpr
// in case some other obscure compiler has the right __cpp_constexpr :
#elif defined(__cpp_constexpr) && __cpp_constexpr >= 201304L
#define LIBDIVIDE_CONSTEXPR constexpr
#else
#define LIBDIVIDE_CONSTEXPR LIBDIVIDE_INLINE
#endif
enum Branching { enum Branching {
BRANCHFULL, // use branching algorithms BRANCHFULL, // use branching algorithms
BRANCHFREE // use branchfree algorithms BRANCHFREE // use branchfree algorithms
}; };
namespace detail {
enum Signedness {
SIGNED,
UNSIGNED,
};
#if defined(LIBDIVIDE_NEON) #if defined(LIBDIVIDE_NEON)
// Helper to deduce NEON vector type for integral type. // Helper to deduce NEON vector type for integral type.
template <typename T> template <int _WIDTH, Signedness _SIGN>
struct NeonVecFor {}; struct NeonVec {};
template <> template <>
struct NeonVecFor<uint16_t> { struct NeonVec<16, UNSIGNED> {
typedef uint16x8_t type; typedef uint16x8_t type;
}; };
template <> template <>
struct NeonVecFor<int16_t> { struct NeonVec<16, SIGNED> {
typedef int16x8_t type; typedef int16x8_t type;
}; };
template <> template <>
struct NeonVecFor<uint32_t> { struct NeonVec<32, UNSIGNED> {
typedef uint32x4_t type; typedef uint32x4_t type;
}; };
template <> template <>
struct NeonVecFor<int32_t> { struct NeonVec<32, SIGNED> {
typedef int32x4_t type; typedef int32x4_t type;
}; };
template <> template <>
struct NeonVecFor<uint64_t> { struct NeonVec<64, UNSIGNED> {
typedef uint64x2_t type; typedef uint64x2_t type;
}; };
template <> template <>
struct NeonVecFor<int64_t> { struct NeonVec<64, SIGNED> {
typedef int64x2_t type; typedef int64x2_t type;
}; };
#endif
// Versions of our algorithms for SIMD. template <typename T>
#if defined(LIBDIVIDE_NEON) struct NeonVecFor {
// See 'class divider' for an explanation of these template parameters.
typedef typename NeonVec<sizeof(T) * 8, (((T)0 >> 0) > (T)(-1) ? SIGNED : UNSIGNED)>::type type;
};
#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) \ #define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) \
LIBDIVIDE_INLINE typename NeonVecFor<INT_TYPE>::type divide( \ LIBDIVIDE_INLINE typename NeonVecFor<INT_TYPE>::type divide( \
typename NeonVecFor<INT_TYPE>::type n) const { \ typename NeonVecFor<INT_TYPE>::type n) const { \
@ -2898,6 +3079,7 @@ struct NeonVecFor<int64_t> {
#else #else
#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) #define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE)
#endif #endif
#if defined(LIBDIVIDE_SSE2) #if defined(LIBDIVIDE_SSE2)
#define LIBDIVIDE_DIVIDE_SSE2(ALGO) \ #define LIBDIVIDE_DIVIDE_SSE2(ALGO) \
LIBDIVIDE_INLINE __m128i divide(__m128i n) const { \ LIBDIVIDE_INLINE __m128i divide(__m128i n) const { \
@ -2930,6 +3112,7 @@ struct NeonVecFor<int64_t> {
#define DISPATCHER_GEN(T, ALGO) \ #define DISPATCHER_GEN(T, ALGO) \
libdivide_##ALGO##_t denom; \ libdivide_##ALGO##_t denom; \
LIBDIVIDE_INLINE dispatcher() {} \ LIBDIVIDE_INLINE dispatcher() {} \
explicit LIBDIVIDE_CONSTEXPR dispatcher(decltype(nullptr)) : denom{} {} \
LIBDIVIDE_INLINE dispatcher(T d) : denom(libdivide_##ALGO##_gen(d)) {} \ LIBDIVIDE_INLINE dispatcher(T d) : denom(libdivide_##ALGO##_gen(d)) {} \
LIBDIVIDE_INLINE T divide(T n) const { return libdivide_##ALGO##_do(n, &denom); } \ LIBDIVIDE_INLINE T divide(T n) const { return libdivide_##ALGO##_do(n, &denom); } \
LIBDIVIDE_INLINE T recover() const { return libdivide_##ALGO##_recover(&denom); } \ LIBDIVIDE_INLINE T recover() const { return libdivide_##ALGO##_recover(&denom); } \
@ -2939,66 +3122,81 @@ struct NeonVecFor<int64_t> {
LIBDIVIDE_DIVIDE_AVX512(ALGO) LIBDIVIDE_DIVIDE_AVX512(ALGO)
// The dispatcher selects a specific division algorithm for a given // The dispatcher selects a specific division algorithm for a given
// type and ALGO using partial template specialization. // width, signedness, and ALGO using partial template specialization.
template <typename _IntT, Branching ALGO> template <int _WIDTH, Signedness _SIGN, Branching _ALGO>
struct dispatcher {}; struct dispatcher {};
template <> template <>
struct dispatcher<int16_t, BRANCHFULL> { struct dispatcher<16, SIGNED, BRANCHFULL> {
DISPATCHER_GEN(int16_t, s16) DISPATCHER_GEN(int16_t, s16)
}; };
template <> template <>
struct dispatcher<int16_t, BRANCHFREE> { struct dispatcher<16, SIGNED, BRANCHFREE> {
DISPATCHER_GEN(int16_t, s16_branchfree) DISPATCHER_GEN(int16_t, s16_branchfree)
}; };
template <> template <>
struct dispatcher<uint16_t, BRANCHFULL> { struct dispatcher<16, UNSIGNED, BRANCHFULL> {
DISPATCHER_GEN(uint16_t, u16) DISPATCHER_GEN(uint16_t, u16)
}; };
template <> template <>
struct dispatcher<uint16_t, BRANCHFREE> { struct dispatcher<16, UNSIGNED, BRANCHFREE> {
DISPATCHER_GEN(uint16_t, u16_branchfree) DISPATCHER_GEN(uint16_t, u16_branchfree)
}; };
template <> template <>
struct dispatcher<int32_t, BRANCHFULL> { struct dispatcher<32, SIGNED, BRANCHFULL> {
DISPATCHER_GEN(int32_t, s32) DISPATCHER_GEN(int32_t, s32)
}; };
template <> template <>
struct dispatcher<int32_t, BRANCHFREE> { struct dispatcher<32, SIGNED, BRANCHFREE> {
DISPATCHER_GEN(int32_t, s32_branchfree) DISPATCHER_GEN(int32_t, s32_branchfree)
}; };
template <> template <>
struct dispatcher<uint32_t, BRANCHFULL> { struct dispatcher<32, UNSIGNED, BRANCHFULL> {
DISPATCHER_GEN(uint32_t, u32) DISPATCHER_GEN(uint32_t, u32)
}; };
template <> template <>
struct dispatcher<uint32_t, BRANCHFREE> { struct dispatcher<32, UNSIGNED, BRANCHFREE> {
DISPATCHER_GEN(uint32_t, u32_branchfree) DISPATCHER_GEN(uint32_t, u32_branchfree)
}; };
template <> template <>
struct dispatcher<int64_t, BRANCHFULL> { struct dispatcher<64, SIGNED, BRANCHFULL> {
DISPATCHER_GEN(int64_t, s64) DISPATCHER_GEN(int64_t, s64)
}; };
template <> template <>
struct dispatcher<int64_t, BRANCHFREE> { struct dispatcher<64, SIGNED, BRANCHFREE> {
DISPATCHER_GEN(int64_t, s64_branchfree) DISPATCHER_GEN(int64_t, s64_branchfree)
}; };
template <> template <>
struct dispatcher<uint64_t, BRANCHFULL> { struct dispatcher<64, UNSIGNED, BRANCHFULL> {
DISPATCHER_GEN(uint64_t, u64) DISPATCHER_GEN(uint64_t, u64)
}; };
template <> template <>
struct dispatcher<uint64_t, BRANCHFREE> { struct dispatcher<64, UNSIGNED, BRANCHFREE> {
DISPATCHER_GEN(uint64_t, u64_branchfree) DISPATCHER_GEN(uint64_t, u64_branchfree)
}; };
} // namespace detail
#if defined(LIBDIVIDE_NEON)
// Allow NeonVecFor outside of detail namespace.
template <typename T>
struct NeonVecFor {
typedef typename detail::NeonVecFor<T>::type type;
};
#endif
// This is the main divider class for use by the user (C++ API). // This is the main divider class for use by the user (C++ API).
// The actual division algorithm is selected using the dispatcher struct // The actual division algorithm is selected using the dispatcher struct
// based on the integer and algorithm template parameters. // based on the integer width and algorithm template parameters.
template <typename T, Branching ALGO = BRANCHFULL> template <typename T, Branching ALGO = BRANCHFULL>
class divider { class divider {
private: private:
typedef dispatcher<T, ALGO> dispatcher_t; // Dispatch based on the size and signedness.
// We avoid using type_traits as it's not available in AVR.
// Detect signedness by checking if T(-1) is less than T(0).
// Also throw in a shift by 0, which prevents floating point types from being passed.
typedef detail::dispatcher<sizeof(T) * 8,
(((T)0 >> 0) > (T)(-1) ? detail::SIGNED : detail::UNSIGNED), ALGO>
dispatcher_t;
public: public:
// We leave the default constructor empty so that creating // We leave the default constructor empty so that creating
@ -3006,6 +3204,9 @@ class divider {
// later doesn't slow us down. // later doesn't slow us down.
divider() {} divider() {}
// constexpr zero-initialization to allow for use w/ static constinit
explicit LIBDIVIDE_CONSTEXPR divider(decltype(nullptr)) : div(nullptr) {}
// Constructor that takes the divisor as a parameter // Constructor that takes the divisor as a parameter
LIBDIVIDE_INLINE divider(T d) : div(d) {} LIBDIVIDE_INLINE divider(T d) : div(d) {}
@ -3017,7 +3218,7 @@ class divider {
T recover() const { return div.recover(); } T recover() const { return div.recover(); }
bool operator==(const divider<T, ALGO> &other) const { bool operator==(const divider<T, ALGO> &other) const {
return div.denom.magic == other.denom.magic && div.denom.more == other.denom.more; return div.denom.magic == other.div.denom.magic && div.denom.more == other.div.denom.more;
} }
bool operator!=(const divider<T, ALGO> &other) const { return !(*this == other); } bool operator!=(const divider<T, ALGO> &other) const { return !(*this == other); }
@ -3098,12 +3299,14 @@ LIBDIVIDE_INLINE __m512i operator/=(__m512i &n, const divider<T, ALGO> &div) {
#if defined(LIBDIVIDE_NEON) #if defined(LIBDIVIDE_NEON)
template <typename T, Branching ALGO> template <typename T, Branching ALGO>
LIBDIVIDE_INLINE typename NeonVecFor<T>::type operator/(typename NeonVecFor<T>::type n, const divider<T, ALGO> &div) { LIBDIVIDE_INLINE typename NeonVecFor<T>::type operator/(
typename NeonVecFor<T>::type n, const divider<T, ALGO> &div) {
return div.divide(n); return div.divide(n);
} }
template <typename T, Branching ALGO> template <typename T, Branching ALGO>
LIBDIVIDE_INLINE typename NeonVecFor<T>::type operator/=(typename NeonVecFor<T>::type &n, const divider<T, ALGO> &div) { LIBDIVIDE_INLINE typename NeonVecFor<T>::type operator/=(
typename NeonVecFor<T>::type &n, const divider<T, ALGO> &div) {
n = div.divide(n); n = div.divide(n);
return n; return n;
} }

12
util.c
View file

@ -6,10 +6,13 @@
#ifdef __ANDROID__ #ifdef __ANDROID__
#include <async_safe/log.h> #include <async_safe/log.h>
int mallopt(int param, int value);
#define M_BIONIC_RESTORE_DEFAULT_SIGABRT_HANDLER (-1003)
#endif #endif
#include "util.h" #include "util.h"
#ifndef __ANDROID__
static int write_full(int fd, const char *buf, size_t length) { static int write_full(int fd, const char *buf, size_t length) {
do { do {
ssize_t bytes_written = write(fd, buf, length); ssize_t bytes_written = write(fd, buf, length);
@ -25,14 +28,17 @@ static int write_full(int fd, const char *buf, size_t length) {
return 0; return 0;
} }
#endif
COLD noreturn void fatal_error(const char *s) { COLD noreturn void fatal_error(const char *s) {
#ifdef __ANDROID__
mallopt(M_BIONIC_RESTORE_DEFAULT_SIGABRT_HANDLER, 0);
async_safe_fatal("hardened_malloc: fatal allocator error: %s", s);
#else
const char *prefix = "fatal allocator error: "; const char *prefix = "fatal allocator error: ";
(void)(write_full(STDERR_FILENO, prefix, strlen(prefix)) != -1 && (void)(write_full(STDERR_FILENO, prefix, strlen(prefix)) != -1 &&
write_full(STDERR_FILENO, s, strlen(s)) != -1 && write_full(STDERR_FILENO, s, strlen(s)) != -1 &&
write_full(STDERR_FILENO, "\n", 1)); write_full(STDERR_FILENO, "\n", 1));
#ifdef __ANDROID__
async_safe_format_log(ANDROID_LOG_FATAL, "hardened_malloc", "fatal allocator error: %s", s);
#endif
abort(); abort();
#endif
} }

58
util.h
View file

@ -1,11 +1,17 @@
#ifndef UTIL_H #ifndef UTIL_H
#define UTIL_H #define UTIL_H
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdnoreturn.h>
// C11 noreturn doesn't work in C++
#define noreturn __attribute__((noreturn))
#define likely(x) __builtin_expect(!!(x), 1) #define likely(x) __builtin_expect(!!(x), 1)
#define likely51(x) __builtin_expect_with_probability(!!(x), 1, 0.51)
#define unlikely(x) __builtin_expect(!!(x), 0) #define unlikely(x) __builtin_expect(!!(x), 0)
#define unlikely51(x) __builtin_expect_with_probability(!!(x), 0, 0.51)
#define min(x, y) ({ \ #define min(x, y) ({ \
__typeof__(x) _x = (x); \ __typeof__(x) _x = (x); \
@ -26,11 +32,12 @@
#define STRINGIFY(s) #s #define STRINGIFY(s) #s
#define ALIAS(f) __attribute__((alias(STRINGIFY(f)))) #define ALIAS(f) __attribute__((alias(STRINGIFY(f))))
static inline int ffzl(long x) { // supported since GCC 15
return __builtin_ffsl(~x); #if __has_attribute (nonstring)
} # define NONSTRING __attribute__ ((nonstring))
#else
COLD noreturn void fatal_error(const char *s); # define NONSTRING
#endif
typedef uint8_t u8; typedef uint8_t u8;
typedef uint16_t u16; typedef uint16_t u16;
@ -38,6 +45,45 @@ typedef uint32_t u32;
typedef uint64_t u64; typedef uint64_t u64;
typedef unsigned __int128 u128; typedef unsigned __int128 u128;
#define U64_WIDTH 64
static inline int ffz64(u64 x) {
return __builtin_ffsll(~x);
}
// parameter must not be 0
static inline int clz64(u64 x) {
return __builtin_clzll(x);
}
// parameter must not be 0
static inline u64 log2u64(u64 x) {
return U64_WIDTH - clz64(x) - 1;
}
static inline size_t align(size_t size, size_t align) {
size_t mask = align - 1;
return (size + mask) & ~mask;
}
// u4_arr_{set,get} are helper functions for using u8 array as an array of unsigned 4-bit values.
// val is treated as a 4-bit value
static inline void u4_arr_set(u8 *arr, size_t idx, u8 val) {
size_t off = idx >> 1;
size_t shift = (idx & 1) << 2;
u8 mask = (u8) (0xf0 >> shift);
arr[off] = (arr[off] & mask) | (val << shift);
}
static inline u8 u4_arr_get(const u8 *arr, size_t idx) {
size_t off = idx >> 1;
size_t shift = (idx & 1) << 2;
return (u8) ((arr[off] >> shift) & 0xf);
}
COLD noreturn void fatal_error(const char *s);
#if CONFIG_SEAL_METADATA #if CONFIG_SEAL_METADATA
#ifdef __GLIBC__ #ifdef __GLIBC__