Blob Blame History Raw
/*
 * Copyright 2014-2018, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *
 *     * Neither the name of the copyright holder nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * libvmmalloc.c -- entry points for libvmmalloc
 *
 * NOTES:
 * 1) Since some standard library functions (fopen, sprintf) use malloc
 *    internally, then at initialization phase, malloc(3) calls are redirected
 *    to the standard jemalloc interfaces that operate on a system heap.
 *    There is no need to track these allocations.  For small allocations,
 *    jemalloc is able to detect the corresponding pool the memory was
 *    allocated from, and Vmp argument is actually ignored.  So, it is safe
 *    to reclaim this memory using je_vmem_pool_free().
 *    The problem may occur for huge allocations only (>2MB), but it seems
 *    like such allocations do not happen at initialization phase.
 *
 * 2) Debug traces in malloc(3) functions are not available until library
 *    initialization (vmem pool creation) is completed.  This is to avoid
 *    recursive calls to malloc, leading to stack overflow.
 *
 * 3) Malloc hooks in glibc are overridden to prevent any references to glibc's
 *    malloc(3) functions in case the application uses dlopen with
 *    RTLD_DEEPBIND flag. (Not relevant for FreeBSD since FreeBSD supports
 *    neither malloc hooks nor RTLD_DEEPBIND.)
 *
 * 4) If the process forks, there is no separate log file open for a new
 *    process, even if the configured log file name is terminated with "-".
 *
 * 5) Fork options 2 and 3 are currently not supported on FreeBSD because
 *    locks are dynamically allocated on FreeBSD and hence they would be cloned
 *    as part of the pool. This may be solvable.
 */

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <errno.h>
#include <stdint.h>
#include <signal.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#ifndef __FreeBSD__
#include <malloc.h>
#endif

#include "libvmem.h"
#include "libvmmalloc.h"

#include "jemalloc.h"
#include "pmemcommon.h"
#include "file.h"
#include "os.h"
#include "os_thread.h"
#include "vmem.h"
#include "vmmalloc.h"
#include "valgrind_internal.h"

#define HUGE (2 * 1024 * 1024)

/*
 * private to this file...
 */
static size_t Header_size;
static VMEM *Vmp;
static char *Dir;
static int Fd;
static int Fd_clone;
static int Private;
static int Forkopt = 1; /* default behavior - remap as private */
static bool Destructed; /* when set - ignore all calls (do not call jemalloc) */

/*
 * malloc -- allocate a block of size bytes
 */
__ATTR_MALLOC__
__ATTR_ALLOC_SIZE__(1)
void *
malloc(size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_malloc(size);
	}
	LOG(4, "size %zu", size);
	return je_vmem_pool_malloc(
			(pool_t *)((uintptr_t)Vmp + Header_size), size);
}

/*
 * calloc -- allocate a block of nmemb * size bytes and set its contents to zero
 */
__ATTR_MALLOC__
__ATTR_ALLOC_SIZE__(1, 2)
void *
calloc(size_t nmemb, size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	if (Vmp == NULL) {
		ASSERT((nmemb * size) <= HUGE);
		return je_vmem_calloc(nmemb, size);
	}
	LOG(4, "nmemb %zu, size %zu", nmemb, size);
	return je_vmem_pool_calloc((pool_t *)((uintptr_t)Vmp + Header_size),
			nmemb, size);
}

/*
 * realloc -- resize a block previously allocated by malloc
 */
__ATTR_ALLOC_SIZE__(2)
void *
realloc(void *ptr, size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_realloc(ptr, size);
	}
	LOG(4, "ptr %p, size %zu", ptr, size);
	return je_vmem_pool_ralloc((pool_t *)((uintptr_t)Vmp + Header_size),
			ptr, size);
}

/*
 * free -- free a block previously allocated by malloc
 */
void
free(void *ptr)
{
	if (unlikely(Destructed))
		return;

	if (Vmp == NULL) {
		je_vmem_free(ptr);
		return;
	}
	LOG(4, "ptr %p", ptr);
	je_vmem_pool_free((pool_t *)((uintptr_t)Vmp + Header_size), ptr);
}

/*
 * cfree -- free a block previously allocated by calloc
 *
 * the implementation is identical to free()
 *
 * XXX Not supported on FreeBSD, but we define it anyway
 */
void
cfree(void *ptr)
{
	if (unlikely(Destructed))
		return;

	if (Vmp == NULL) {
		je_vmem_free(ptr);
		return;
	}
	LOG(4, "ptr %p", ptr);
	je_vmem_pool_free((pool_t *)((uintptr_t)Vmp + Header_size), ptr);
}

/*
 * memalign -- allocate a block of size bytes, starting on an address
 * that is a multiple of boundary
 *
 * XXX Not supported on FreeBSD, but we define it anyway
 */
__ATTR_MALLOC__
__ATTR_ALLOC_ALIGN__(1)
__ATTR_ALLOC_SIZE__(2)
void *
memalign(size_t boundary, size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_aligned_alloc(boundary, size);
	}
	LOG(4, "boundary %zu  size %zu", boundary, size);
	return je_vmem_pool_aligned_alloc(
			(pool_t *)((uintptr_t)Vmp + Header_size),
			boundary, size);
}

/*
 * aligned_alloc -- allocate a block of size bytes, starting on an address
 * that is a multiple of alignment
 *
 * size must be a multiple of alignment
 */
__ATTR_MALLOC__
__ATTR_ALLOC_ALIGN__(1)
__ATTR_ALLOC_SIZE__(2)
void *
aligned_alloc(size_t alignment, size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	/* XXX - check if size is a multiple of alignment */

	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_aligned_alloc(alignment, size);
	}
	LOG(4, "alignment %zu  size %zu", alignment, size);
	return je_vmem_pool_aligned_alloc(
			(pool_t *)((uintptr_t)Vmp + Header_size),
			alignment, size);
}

/*
 * posix_memalign -- allocate a block of size bytes, starting on an address
 * that is a multiple of alignment
 */
__ATTR_NONNULL__(1)
int
posix_memalign(void **memptr, size_t alignment, size_t size)
{
	if (unlikely(Destructed))
		return ENOMEM;

	int ret = 0;
	int oerrno = errno;
	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_posix_memalign(memptr, alignment, size);
	}
	LOG(4, "alignment %zu  size %zu", alignment, size);
	*memptr = je_vmem_pool_aligned_alloc(
			(pool_t *)((uintptr_t)Vmp + Header_size),
			alignment, size);
	if (*memptr == NULL)
		ret = errno;
	errno = oerrno;
	return ret;
}

/*
 * valloc -- allocate a block of size bytes, starting on a page boundary
 */
__ATTR_MALLOC__
__ATTR_ALLOC_SIZE__(1)
void *
valloc(size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	ASSERTne(Pagesize, 0);
	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_aligned_alloc(Pagesize, size);
	}
	LOG(4, "size %zu", size);
	return je_vmem_pool_aligned_alloc(
			(pool_t *)((uintptr_t)Vmp + Header_size),
			Pagesize, size);
}

/*
 * pvalloc -- allocate a block of size bytes, starting on a page boundary
 *
 * Requested size is also aligned to page boundary.
 *
 * XXX Not supported on FreeBSD, but we define it anyway.
 */
__ATTR_MALLOC__
__ATTR_ALLOC_SIZE__(1)
void *
pvalloc(size_t size)
{
	if (unlikely(Destructed))
		return NULL;

	ASSERTne(Pagesize, 0);
	if (Vmp == NULL) {
		ASSERT(size <= HUGE);
		return je_vmem_aligned_alloc(Pagesize, roundup(size, Pagesize));
	}
	LOG(4, "size %zu", size);
	return je_vmem_pool_aligned_alloc(
			(pool_t *)((uintptr_t)Vmp + Header_size),
			Pagesize, roundup(size, Pagesize));
}

/*
 * malloc_usable_size -- get usable size of allocation
 */
size_t
malloc_usable_size(void *ptr)
{
	if (unlikely(Destructed))
		return 0;

	if (Vmp == NULL) {
		return je_vmem_malloc_usable_size(ptr);
	}
	LOG(4, "ptr %p", ptr);
	return je_vmem_pool_malloc_usable_size(
			(pool_t *)((uintptr_t)Vmp + Header_size), ptr);
}

#if (defined(__GLIBC__) && !defined(__UCLIBC__))

#ifndef __MALLOC_HOOK_VOLATILE
#define __MALLOC_HOOK_VOLATILE
#endif

/*
 * Interpose malloc hooks in glibc.  Even if the application uses dlopen
 * with RTLD_DEEPBIND flag, all the references to libc's malloc(3) functions
 * will be redirected to libvmmalloc.
 */
void *(*__MALLOC_HOOK_VOLATILE __malloc_hook) (size_t size,
	const void *caller) = (void *)malloc;
void *(*__MALLOC_HOOK_VOLATILE __realloc_hook) (void *ptr, size_t size,
	const void *caller) = (void *)realloc;
void (*__MALLOC_HOOK_VOLATILE __free_hook) (void *ptr, const void *caller) =
	(void *)free;
void *(*__MALLOC_HOOK_VOLATILE __memalign_hook) (size_t size, size_t alignment,
	const void *caller) = (void *)memalign;
#endif

/*
 * print_jemalloc_messages -- (internal) custom print function, for jemalloc
 *
 * Prints traces from jemalloc. All traces from jemalloc
 * are considered as error messages.
 */
static void
print_jemalloc_messages(void *ignore, const char *s)
{
	LOG_NONL(1, "%s", s);
}

/*
 * print_jemalloc_stats -- (internal) print function for jemalloc statistics
 */
static void
print_jemalloc_stats(void *ignore, const char *s)
{
	LOG_NONL(0, "%s", s);
}

/*
 * libvmmalloc_create -- (internal) create a memory pool in a temp file
 */
static VMEM *
libvmmalloc_create(const char *dir, size_t size)
{
	LOG(3, "dir \"%s\" size %zu", dir, size);

	if (size < VMMALLOC_MIN_POOL) {
		LOG(1, "size %zu smaller than %zu", size, VMMALLOC_MIN_POOL);
		errno = EINVAL;
		return NULL;
	}

	/* silently enforce multiple of page size */
	size = roundup(size, Pagesize);

	Fd = util_tmpfile(dir, "/vmem.XXXXXX", O_EXCL);
	if (Fd == -1)
		return NULL;

	if ((errno = os_posix_fallocate(Fd, 0, (os_off_t)size)) != 0) {
		ERR("!posix_fallocate");
		(void) os_close(Fd);
		return NULL;
	}

	void *addr;
	if ((addr = util_map(Fd, size, MAP_SHARED, 0, 4 << 20, NULL)) == NULL) {
		(void) os_close(Fd);
		return NULL;
	}

	/* store opaque info at beginning of mapped area */
	struct vmem *vmp = addr;
	memset(&vmp->hdr, '\0', sizeof(vmp->hdr));
	memcpy(vmp->hdr.signature, VMEM_HDR_SIG, POOL_HDR_SIG_LEN);
	vmp->addr = addr;
	vmp->size = size;
	vmp->caller_mapped = 0;

	/* Prepare pool for jemalloc */
	if (je_vmem_pool_create((void *)((uintptr_t)addr + Header_size),
			size - Header_size, 1 /* zeroed */,
			1 /* empty */) == NULL) {
		LOG(1, "vmem pool creation failed");
		util_unmap(vmp->addr, vmp->size);
		return NULL;
	}

	/*
	 * If possible, turn off all permissions on the pool header page.
	 *
	 * The prototype PMFS doesn't allow this when large pages are in
	 * use. It is not considered an error if this fails.
	 */
	util_range_none(addr, sizeof(struct pool_hdr));

	LOG(3, "vmp %p", vmp);
	return vmp;
}

/*
 * libvmmalloc_clone - (internal) clone the entire pool
 */
static int
libvmmalloc_clone(void)
{
	LOG(3, NULL);
	int err;
	Fd_clone = util_tmpfile(Dir, "/vmem.XXXXXX", O_EXCL);
	if (Fd_clone == -1)
		return -1;

	err = os_posix_fallocate(Fd_clone, 0, (os_off_t)Vmp->size);
	if (err != 0) {
		errno = err;
		ERR("!posix_fallocate");
		goto err_close;
	}

	void *addr = mmap(NULL, Vmp->size, PROT_READ|PROT_WRITE,
			MAP_SHARED, Fd_clone, 0);
	if (addr == MAP_FAILED) {
		LOG(1, "!mmap");
		goto err_close;
	}

	LOG(3, "copy the entire pool file: dst %p src %p size %zu",
			addr, Vmp->addr, Vmp->size);

	util_range_rw(Vmp->addr, sizeof(struct pool_hdr));

	/*
	 * Part of vmem pool was probably freed at some point, so Valgrind
	 * marked it as undefined/inaccessible. We need to duplicate the whole
	 * pool, so as a workaround temporarily disable error reporting.
	 */
	VALGRIND_DO_DISABLE_ERROR_REPORTING;
	memcpy(addr, Vmp->addr, Vmp->size);
	VALGRIND_DO_ENABLE_ERROR_REPORTING;

	if (munmap(addr, Vmp->size)) {
		ERR("!munmap");
		goto err_close;
	}
	util_range_none(Vmp->addr, sizeof(struct pool_hdr));
	return 0;

err_close:
	(void) os_close(Fd_clone);
	return -1;
}

/*
 * remap_as_private -- (internal) remap the pool as private
 */
static void
remap_as_private(void)
{
	LOG(3, "remap the pool file as private");

	void *r = mmap(Vmp->addr, Vmp->size, PROT_READ|PROT_WRITE,
			MAP_PRIVATE|MAP_FIXED, Fd, 0);

	if (r == MAP_FAILED) {
		out_log(NULL, 0, NULL, 0,
			"Error (libvmmalloc): remapping failed\n");
		abort();
	}

	if (r != Vmp->addr) {
		out_log(NULL, 0, NULL, 0,
			"Error (libvmmalloc): wrong address\n");
		abort();
	}

	Private = 1;
}

/*
 * libvmmalloc_prefork -- (internal) prepare for fork()
 *
 * Clones the entire pool or remaps it with MAP_PRIVATE flag.
 */
static void
libvmmalloc_prefork(void)
{
	LOG(3, NULL);

	/*
	 * There's no need to grab any locks here, as jemalloc pre-fork handler
	 * is executed first, and it does all the synchronization.
	 */

	ASSERTne(Vmp, NULL);
	ASSERTne(Dir, NULL);

	if (Private) {
		LOG(3, "already mapped as private - do nothing");
		return;
	}

	switch (Forkopt) {
	case 3:
		/* clone the entire pool; if it fails - remap it as private */
		LOG(3, "clone or remap");

	case 2:
		LOG(3, "clone the entire pool file");

		if (libvmmalloc_clone() == 0)
			break;

		if (Forkopt == 2) {
			out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
					"pool cloning failed\n");
			abort();
		}
		/* cloning failed; fall-thru to remapping */

	case 1:
		remap_as_private();
		break;

	case 0:
		LOG(3, "do nothing");
		break;

	default:
		FATAL("invalid fork action %d", Forkopt);
	}
}

/*
 * libvmmalloc_postfork_parent -- (internal) parent post-fork handler
 */
static void
libvmmalloc_postfork_parent(void)
{
	LOG(3, NULL);

	if (Forkopt == 0) {
		/* do nothing */
		return;
	}

	if (Private) {
		LOG(3, "pool mapped as private - do nothing");
	} else {
		LOG(3, "close the cloned pool file");
		(void) os_close(Fd_clone);
	}
}

/*
 * libvmmalloc_postfork_child -- (internal) child post-fork handler
 */
static void
libvmmalloc_postfork_child(void)
{
	LOG(3, NULL);

	if (Forkopt == 0) {
		/* do nothing */
		return;
	}

	if (Private) {
		LOG(3, "pool mapped as private - do nothing");
	} else {
		LOG(3, "close the original pool file");
		(void) os_close(Fd);
		Fd = Fd_clone;

		void *addr = Vmp->addr;
		size_t size = Vmp->size;

		LOG(3, "mapping cloned pool file at %p", addr);
		Vmp = mmap(addr, size, PROT_READ|PROT_WRITE,
				MAP_SHARED|MAP_FIXED, Fd, 0);
		if (Vmp == MAP_FAILED) {
			out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
					"mapping failed\n");
			abort();
		}

		if (Vmp != addr) {
			out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
					"wrong address\n");
			abort();
		}
	}

	/* XXX - open a new log file, with the new PID in the name */
}

/*
 * libvmmalloc_init -- load-time initialization for libvmmalloc
 *
 * Called automatically by the run-time loader.
 * The constructor priority guarantees this is executed before
 * libjemalloc constructor.
 */
__attribute__((constructor(101)))
static void
libvmmalloc_init(void)
{
	char *env_str;
	size_t size;

	/*
	 * Register fork handlers before jemalloc initialization.
	 * This provides the correct order of fork handlers execution.
	 * Note that the first malloc() will trigger jemalloc init, so we
	 * have to register fork handlers before the call to out_init(),
	 * as it may indirectly call malloc() when opening the log file.
	 */
	if (os_thread_atfork(libvmmalloc_prefork,
			libvmmalloc_postfork_parent,
			libvmmalloc_postfork_child) != 0) {
		perror("Error (libvmmalloc): os_thread_atfork");
		abort();
	}

	common_init(VMMALLOC_LOG_PREFIX, VMMALLOC_LOG_LEVEL_VAR,
			VMMALLOC_LOG_FILE_VAR, VMMALLOC_MAJOR_VERSION,
			VMMALLOC_MINOR_VERSION);
	out_set_vsnprintf_func(je_vmem_navsnprintf);
	LOG(3, NULL);

	/* set up jemalloc messages to a custom print function */
	je_vmem_malloc_message = print_jemalloc_messages;

	Header_size = roundup(sizeof(VMEM), Pagesize);

	if ((Dir = os_getenv(VMMALLOC_POOL_DIR_VAR)) == NULL) {
		out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
				"environment variable %s not specified",
				VMMALLOC_POOL_DIR_VAR);
		abort();
	}

	if ((env_str = os_getenv(VMMALLOC_POOL_SIZE_VAR)) == NULL) {
		out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
				"environment variable %s not specified",
				VMMALLOC_POOL_SIZE_VAR);
		abort();
	} else {
		long long v = atoll(env_str);
		if (v < 0) {
			out_log(NULL, 0, NULL, 0,
				"Error (libvmmalloc): negative %s",
				VMMALLOC_POOL_SIZE_VAR);
			abort();
		}

		size = (size_t)v;
	}

	if (size < VMMALLOC_MIN_POOL) {
		out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
				"%s value is less than minimum (%zu < %zu)",
				VMMALLOC_POOL_SIZE_VAR, size,
				VMMALLOC_MIN_POOL);
		abort();
	}

	if ((env_str = os_getenv(VMMALLOC_FORK_VAR)) != NULL) {
		Forkopt = atoi(env_str);
		if (Forkopt < 0 || Forkopt > 3) {
			out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
					"incorrect %s value (%d)",
					VMMALLOC_FORK_VAR, Forkopt);
				abort();
		}
#ifdef __FreeBSD__
		if (Forkopt > 1) {
			out_log(NULL, 0, NULL, 0, "Error (libvmmalloc): "
					"%s value %d not supported on FreeBSD",
					VMMALLOC_FORK_VAR, Forkopt);
				abort();
		}
#endif
		LOG(4, "Fork action %d", Forkopt);
	}

	/*
	 * XXX - vmem_create() could be used here, but then we need to
	 * link vmem.o, including all the vmem API.
	 */
	Vmp = libvmmalloc_create(Dir, size);
	if (Vmp == NULL) {
		out_log(NULL, 0, NULL, 0, "!Error (libvmmalloc): "
				"vmem pool creation failed");
		abort();
	}

	LOG(2, "initialization completed");
}

/*
 * libvmmalloc_fini -- libvmmalloc cleanup routine
 *
 * Called automatically when the process terminates and prints
 * some basic allocator statistics.
 */
__attribute__((destructor(102)))
static void
libvmmalloc_fini(void)
{
	LOG(3, NULL);

	char *env_str = os_getenv(VMMALLOC_LOG_STATS_VAR);
	if ((env_str != NULL) && strcmp(env_str, "1") == 0) {
		LOG_NONL(0, "\n=========   system heap  ========\n");
		je_vmem_malloc_stats_print(
			print_jemalloc_stats, NULL, "gba");

		LOG_NONL(0, "\n=========    vmem pool   ========\n");
		je_vmem_pool_malloc_stats_print(
			(pool_t *)((uintptr_t)Vmp + Header_size),
			print_jemalloc_stats, NULL, "gba");
	}

	common_fini();
	Destructed = true;
}