Blame jemalloc/include/jemalloc/internal/tsd.h

Packit 345191
#ifndef JEMALLOC_INTERNAL_TSD_H
Packit 345191
#define JEMALLOC_INTERNAL_TSD_H
Packit 345191
Packit 345191
#include "jemalloc/internal/arena_types.h"
Packit 345191
#include "jemalloc/internal/assert.h"
Packit 345191
#include "jemalloc/internal/bin_types.h"
Packit 345191
#include "jemalloc/internal/jemalloc_internal_externs.h"
Packit 345191
#include "jemalloc/internal/prof_types.h"
Packit 345191
#include "jemalloc/internal/ql.h"
Packit 345191
#include "jemalloc/internal/rtree_tsd.h"
Packit 345191
#include "jemalloc/internal/tcache_types.h"
Packit 345191
#include "jemalloc/internal/tcache_structs.h"
Packit 345191
#include "jemalloc/internal/util.h"
Packit 345191
#include "jemalloc/internal/witness.h"
Packit 345191
Packit 345191
/*
Packit 345191
 * Thread-Specific-Data layout
Packit 345191
 * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
Packit 345191
 * s: state
Packit 345191
 * e: tcache_enabled
Packit 345191
 * m: thread_allocated (config_stats)
Packit 345191
 * f: thread_deallocated (config_stats)
Packit 345191
 * p: prof_tdata (config_prof)
Packit 345191
 * c: rtree_ctx (rtree cache accessed on deallocation)
Packit 345191
 * t: tcache
Packit 345191
 * --- data not accessed on tcache fast path: arena-related fields ---
Packit 345191
 * d: arenas_tdata_bypass
Packit 345191
 * r: reentrancy_level
Packit 345191
 * x: narenas_tdata
Packit 345191
 * i: iarena
Packit 345191
 * a: arena
Packit 345191
 * o: arenas_tdata
Packit 345191
 * Loading TSD data is on the critical path of basically all malloc operations.
Packit 345191
 * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
Packit 345191
 * Use a compact layout to reduce cache footprint.
Packit 345191
 * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
Packit 345191
 * |----------------------------  1st cacheline  ----------------------------|
Packit 345191
 * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
Packit 345191
 * |----------------------------  2nd cacheline  ----------------------------|
Packit 345191
 * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
Packit 345191
 * |----------------------------  3nd cacheline  ----------------------------|
Packit 345191
 * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
Packit 345191
 * +-------------------------------------------------------------------------+
Packit 345191
 * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
Packit 345191
 *
Packit 345191
 * The last 3 members (i, a and o) before tcache isn't really needed on tcache
Packit 345191
 * fast path.  However we have a number of unused tcache bins and witnesses
Packit 345191
 * (never touched unless config_debug) at the end of tcache, so we place them
Packit 345191
 * there to avoid breaking the cachelines and possibly paging in an extra page.
Packit 345191
 */
Packit 345191
#ifdef JEMALLOC_JET
Packit 345191
typedef void (*test_callback_t)(int *);
Packit 345191
#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
Packit 345191
#  define MALLOC_TEST_TSD \
Packit 345191
    O(test_data,		int,			int)		\
Packit 345191
    O(test_callback,		test_callback_t,	int)
Packit 345191
#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
Packit 345191
#else
Packit 345191
#  define MALLOC_TEST_TSD
Packit 345191
#  define MALLOC_TEST_TSD_INITIALIZER
Packit 345191
#endif
Packit 345191
Packit 345191
/*  O(name,			type,			nullable type */
Packit 345191
#define MALLOC_TSD							\
Packit 345191
    O(tcache_enabled,		bool,			bool)		\
Packit 345191
    O(arenas_tdata_bypass,	bool,			bool)		\
Packit 345191
    O(reentrancy_level,		int8_t,			int8_t)		\
Packit 345191
    O(narenas_tdata,		uint32_t,		uint32_t)	\
Packit 345191
    O(offset_state,		uint64_t,		uint64_t)	\
Packit 345191
    O(thread_allocated,		uint64_t,		uint64_t)	\
Packit 345191
    O(thread_deallocated,	uint64_t,		uint64_t)	\
Packit 345191
    O(bytes_until_sample,	int64_t,		int64_t)	\
Packit 345191
    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
Packit 345191
    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
Packit 345191
    O(iarena,			arena_t *,		arena_t *)	\
Packit 345191
    O(arena,			arena_t *,		arena_t *)	\
Packit 345191
    O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
Packit 345191
    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
Packit 345191
    O(tcache,			tcache_t,		tcache_t)	\
Packit 345191
    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
Packit 345191
    MALLOC_TEST_TSD
Packit 345191
Packit 345191
#define TSD_INITIALIZER {						\
Packit 345191
    ATOMIC_INIT(tsd_state_uninitialized),				\
Packit 345191
    TCACHE_ENABLED_ZERO_INITIALIZER,					\
Packit 345191
    false,								\
Packit 345191
    0,									\
Packit 345191
    0,									\
Packit 345191
    0,									\
Packit 345191
    0,									\
Packit 345191
    0,									\
Packit 345191
    0,									\
Packit 345191
    NULL,								\
Packit 345191
    RTREE_CTX_ZERO_INITIALIZER,						\
Packit 345191
    NULL,								\
Packit 345191
    NULL,								\
Packit 345191
    NULL,								\
Packit 345191
    TSD_BINSHARDS_ZERO_INITIALIZER,					\
Packit 345191
    TCACHE_ZERO_INITIALIZER,						\
Packit 345191
    WITNESS_TSD_INITIALIZER						\
Packit 345191
    MALLOC_TEST_TSD_INITIALIZER						\
Packit 345191
}
Packit 345191
Packit 345191
void *malloc_tsd_malloc(size_t size);
Packit 345191
void malloc_tsd_dalloc(void *wrapper);
Packit 345191
void malloc_tsd_cleanup_register(bool (*f)(void));
Packit 345191
tsd_t *malloc_tsd_boot0(void);
Packit 345191
void malloc_tsd_boot1(void);
Packit 345191
void tsd_cleanup(void *arg);
Packit 345191
tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
Packit 345191
void tsd_state_set(tsd_t *tsd, uint8_t new_state);
Packit 345191
void tsd_slow_update(tsd_t *tsd);
Packit 345191
void tsd_prefork(tsd_t *tsd);
Packit 345191
void tsd_postfork_parent(tsd_t *tsd);
Packit 345191
void tsd_postfork_child(tsd_t *tsd);
Packit 345191
Packit 345191
/*
Packit 345191
 * Call ..._inc when your module wants to take all threads down the slow paths,
Packit 345191
 * and ..._dec when it no longer needs to.
Packit 345191
 */
Packit 345191
void tsd_global_slow_inc(tsdn_t *tsdn);
Packit 345191
void tsd_global_slow_dec(tsdn_t *tsdn);
Packit 345191
bool tsd_global_slow();
Packit 345191
Packit 345191
enum {
Packit 345191
	/* Common case --> jnz. */
Packit 345191
	tsd_state_nominal = 0,
Packit 345191
	/* Initialized but on slow path. */
Packit 345191
	tsd_state_nominal_slow = 1,
Packit 345191
	/*
Packit 345191
	 * Some thread has changed global state in such a way that all nominal
Packit 345191
	 * threads need to recompute their fast / slow status the next time they
Packit 345191
	 * get a chance.
Packit 345191
	 *
Packit 345191
	 * Any thread can change another thread's status *to* recompute, but
Packit 345191
	 * threads are the only ones who can change their status *from*
Packit 345191
	 * recompute.
Packit 345191
	 */
Packit 345191
	tsd_state_nominal_recompute = 2,
Packit 345191
	/*
Packit 345191
	 * The above nominal states should be lower values.  We use
Packit 345191
	 * tsd_nominal_max to separate nominal states from threads in the
Packit 345191
	 * process of being born / dying.
Packit 345191
	 */
Packit 345191
	tsd_state_nominal_max = 2,
Packit 345191
Packit 345191
	/*
Packit 345191
	 * A thread might free() during its death as its only allocator action;
Packit 345191
	 * in such scenarios, we need tsd, but set up in such a way that no
Packit 345191
	 * cleanup is necessary.
Packit 345191
	 */
Packit 345191
	tsd_state_minimal_initialized = 3,
Packit 345191
	/* States during which we know we're in thread death. */
Packit 345191
	tsd_state_purgatory = 4,
Packit 345191
	tsd_state_reincarnated = 5,
Packit 345191
	/*
Packit 345191
	 * What it says on the tin; tsd that hasn't been initialized.  Note
Packit 345191
	 * that even when the tsd struct lives in TLS, when need to keep track
Packit 345191
	 * of stuff like whether or not our pthread destructors have been
Packit 345191
	 * scheduled, so this really truly is different than the nominal state.
Packit 345191
	 */
Packit 345191
	tsd_state_uninitialized = 6
Packit 345191
};
Packit 345191
Packit 345191
/*
Packit 345191
 * Some TSD accesses can only be done in a nominal state.  To enforce this, we
Packit 345191
 * wrap TSD member access in a function that asserts on TSD state, and mangle
Packit 345191
 * field names to prevent touching them accidentally.
Packit 345191
 */
Packit 345191
#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
Packit 345191
Packit 345191
#ifdef JEMALLOC_U8_ATOMICS
Packit 345191
#  define tsd_state_t atomic_u8_t
Packit 345191
#  define tsd_atomic_load atomic_load_u8
Packit 345191
#  define tsd_atomic_store atomic_store_u8
Packit 345191
#  define tsd_atomic_exchange atomic_exchange_u8
Packit 345191
#else
Packit 345191
#  define tsd_state_t atomic_u32_t
Packit 345191
#  define tsd_atomic_load atomic_load_u32
Packit 345191
#  define tsd_atomic_store atomic_store_u32
Packit 345191
#  define tsd_atomic_exchange atomic_exchange_u32
Packit 345191
#endif
Packit 345191
Packit 345191
/* The actual tsd. */
Packit 345191
struct tsd_s {
Packit 345191
	/*
Packit 345191
	 * The contents should be treated as totally opaque outside the tsd
Packit 345191
	 * module.  Access any thread-local state through the getters and
Packit 345191
	 * setters below.
Packit 345191
	 */
Packit 345191
Packit 345191
	/*
Packit 345191
	 * We manually limit the state to just a single byte.  Unless the 8-bit
Packit 345191
	 * atomics are unavailable (which is rare).
Packit 345191
	 */
Packit 345191
	tsd_state_t state;
Packit 345191
#define O(n, t, nt)							\
Packit 345191
	t TSD_MANGLE(n);
Packit 345191
MALLOC_TSD
Packit 345191
#undef O
Packit 345191
};
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE uint8_t
Packit 345191
tsd_state_get(tsd_t *tsd) {
Packit 345191
	/*
Packit 345191
	 * This should be atomic.  Unfortunately, compilers right now can't tell
Packit 345191
	 * that this can be done as a memory comparison, and forces a load into
Packit 345191
	 * a register that hurts fast-path performance.
Packit 345191
	 */
Packit 345191
	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
Packit 345191
	return *(uint8_t *)&tsd->state;
Packit 345191
}
Packit 345191
Packit 345191
/*
Packit 345191
 * Wrapper around tsd_t that makes it possible to avoid implicit conversion
Packit 345191
 * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
Packit 345191
 * explicitly converted to tsd_t, which is non-nullable.
Packit 345191
 */
Packit 345191
struct tsdn_s {
Packit 345191
	tsd_t tsd;
Packit 345191
};
Packit 345191
#define TSDN_NULL ((tsdn_t *)0)
Packit 345191
JEMALLOC_ALWAYS_INLINE tsdn_t *
Packit 345191
tsd_tsdn(tsd_t *tsd) {
Packit 345191
	return (tsdn_t *)tsd;
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE bool
Packit 345191
tsdn_null(const tsdn_t *tsdn) {
Packit 345191
	return tsdn == NULL;
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE tsd_t *
Packit 345191
tsdn_tsd(tsdn_t *tsdn) {
Packit 345191
	assert(!tsdn_null(tsdn));
Packit 345191
Packit 345191
	return &tsdn->tsd;
Packit 345191
}
Packit 345191
Packit 345191
/*
Packit 345191
 * We put the platform-specific data declarations and inlines into their own
Packit 345191
 * header files to avoid cluttering this file.  They define tsd_boot0,
Packit 345191
 * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
Packit 345191
 */
Packit 345191
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
Packit 345191
#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
Packit 345191
#elif (defined(JEMALLOC_TLS))
Packit 345191
#include "jemalloc/internal/tsd_tls.h"
Packit 345191
#elif (defined(_WIN32))
Packit 345191
#include "jemalloc/internal/tsd_win.h"
Packit 345191
#else
Packit 345191
#include "jemalloc/internal/tsd_generic.h"
Packit 345191
#endif
Packit 345191
Packit 345191
/*
Packit 345191
 * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of
Packit 345191
 * foo.  This omits some safety checks, and so can be used during tsd
Packit 345191
 * initialization and cleanup.
Packit 345191
 */
Packit 345191
#define O(n, t, nt)							\
Packit 345191
JEMALLOC_ALWAYS_INLINE t *						\
Packit 345191
tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
Packit 345191
	return &tsd->TSD_MANGLE(n);					\
Packit 345191
}
Packit 345191
MALLOC_TSD
Packit 345191
#undef O
Packit 345191
Packit 345191
/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
Packit 345191
#define O(n, t, nt)							\
Packit 345191
JEMALLOC_ALWAYS_INLINE t *						\
Packit 345191
tsd_##n##p_get(tsd_t *tsd) {						\
Packit 345191
	/*								\
Packit 345191
	 * Because the state might change asynchronously if it's	\
Packit 345191
	 * nominal, we need to make sure that we only read it once.	\
Packit 345191
	 */								\
Packit 345191
	uint8_t state = tsd_state_get(tsd);				\
Packit 345191
	assert(state == tsd_state_nominal ||				\
Packit 345191
	    state == tsd_state_nominal_slow ||				\
Packit 345191
	    state == tsd_state_nominal_recompute ||			\
Packit 345191
	    state == tsd_state_reincarnated ||				\
Packit 345191
	    state == tsd_state_minimal_initialized);			\
Packit 345191
	return tsd_##n##p_get_unsafe(tsd);				\
Packit 345191
}
Packit 345191
MALLOC_TSD
Packit 345191
#undef O
Packit 345191
Packit 345191
/*
Packit 345191
 * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
Packit 345191
 * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
Packit 345191
 */
Packit 345191
#define O(n, t, nt)							\
Packit 345191
JEMALLOC_ALWAYS_INLINE nt *						\
Packit 345191
tsdn_##n##p_get(tsdn_t *tsdn) {						\
Packit 345191
	if (tsdn_null(tsdn)) {						\
Packit 345191
		return NULL;						\
Packit 345191
	}								\
Packit 345191
	tsd_t *tsd = tsdn_tsd(tsdn);					\
Packit 345191
	return (nt *)tsd_##n##p_get(tsd);				\
Packit 345191
}
Packit 345191
MALLOC_TSD
Packit 345191
#undef O
Packit 345191
Packit 345191
/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
Packit 345191
#define O(n, t, nt)							\
Packit 345191
JEMALLOC_ALWAYS_INLINE t						\
Packit 345191
tsd_##n##_get(tsd_t *tsd) {						\
Packit 345191
	return *tsd_##n##p_get(tsd);					\
Packit 345191
}
Packit 345191
MALLOC_TSD
Packit 345191
#undef O
Packit 345191
Packit 345191
/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
Packit 345191
#define O(n, t, nt)							\
Packit 345191
JEMALLOC_ALWAYS_INLINE void						\
Packit 345191
tsd_##n##_set(tsd_t *tsd, t val) {					\
Packit 345191
	assert(tsd_state_get(tsd) != tsd_state_reincarnated &&		\
Packit 345191
	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
Packit 345191
	*tsd_##n##p_get(tsd) = val;					\
Packit 345191
}
Packit 345191
MALLOC_TSD
Packit 345191
#undef O
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE void
Packit 345191
tsd_assert_fast(tsd_t *tsd) {
Packit 345191
	/*
Packit 345191
	 * Note that our fastness assertion does *not* include global slowness
Packit 345191
	 * counters; it's not in general possible to ensure that they won't
Packit 345191
	 * change asynchronously from underneath us.
Packit 345191
	 */
Packit 345191
	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
Packit 345191
	    tsd_reentrancy_level_get(tsd) == 0);
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE bool
Packit 345191
tsd_fast(tsd_t *tsd) {
Packit 345191
	bool fast = (tsd_state_get(tsd) == tsd_state_nominal);
Packit 345191
	if (fast) {
Packit 345191
		tsd_assert_fast(tsd);
Packit 345191
	}
Packit 345191
Packit 345191
	return fast;
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE tsd_t *
Packit 345191
tsd_fetch_impl(bool init, bool minimal) {
Packit 345191
	tsd_t *tsd = tsd_get(init);
Packit 345191
Packit 345191
	if (!init && tsd_get_allocates() && tsd == NULL) {
Packit 345191
		return NULL;
Packit 345191
	}
Packit 345191
	assert(tsd != NULL);
Packit 345191
Packit 345191
	if (unlikely(tsd_state_get(tsd) != tsd_state_nominal)) {
Packit 345191
		return tsd_fetch_slow(tsd, minimal);
Packit 345191
	}
Packit 345191
	assert(tsd_fast(tsd));
Packit 345191
	tsd_assert_fast(tsd);
Packit 345191
Packit 345191
	return tsd;
Packit 345191
}
Packit 345191
Packit 345191
/* Get a minimal TSD that requires no cleanup.  See comments in free(). */
Packit 345191
JEMALLOC_ALWAYS_INLINE tsd_t *
Packit 345191
tsd_fetch_min(void) {
Packit 345191
	return tsd_fetch_impl(true, true);
Packit 345191
}
Packit 345191
Packit 345191
/* For internal background threads use only. */
Packit 345191
JEMALLOC_ALWAYS_INLINE tsd_t *
Packit 345191
tsd_internal_fetch(void) {
Packit 345191
	tsd_t *tsd = tsd_fetch_min();
Packit 345191
	/* Use reincarnated state to prevent full initialization. */
Packit 345191
	tsd_state_set(tsd, tsd_state_reincarnated);
Packit 345191
Packit 345191
	return tsd;
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE tsd_t *
Packit 345191
tsd_fetch(void) {
Packit 345191
	return tsd_fetch_impl(true, false);
Packit 345191
}
Packit 345191
Packit 345191
static inline bool
Packit 345191
tsd_nominal(tsd_t *tsd) {
Packit 345191
	return (tsd_state_get(tsd) <= tsd_state_nominal_max);
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE tsdn_t *
Packit 345191
tsdn_fetch(void) {
Packit 345191
	if (!tsd_booted_get()) {
Packit 345191
		return NULL;
Packit 345191
	}
Packit 345191
Packit 345191
	return tsd_tsdn(tsd_fetch_impl(false, false));
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
Packit 345191
tsd_rtree_ctx(tsd_t *tsd) {
Packit 345191
	return tsd_rtree_ctxp_get(tsd);
Packit 345191
}
Packit 345191
Packit 345191
JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
Packit 345191
tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
Packit 345191
	/*
Packit 345191
	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
Packit 345191
	 * return a pointer to it.
Packit 345191
	 */
Packit 345191
	if (unlikely(tsdn_null(tsdn))) {
Packit 345191
		rtree_ctx_data_init(fallback);
Packit 345191
		return fallback;
Packit 345191
	}
Packit 345191
	return tsd_rtree_ctx(tsdn_tsd(tsdn));
Packit 345191
}
Packit 345191
Packit 345191
#endif /* JEMALLOC_INTERNAL_TSD_H */