Blame morecore.c

Packit 2d622a
/*
Packit 2d622a
 * libhugetlbfs - Easy use of Linux hugepages
Packit 2d622a
 * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation.
Packit 2d622a
 *
Packit 2d622a
 * This library is free software; you can redistribute it and/or
Packit 2d622a
 * modify it under the terms of the GNU Lesser General Public License
Packit 2d622a
 * as published by the Free Software Foundation; either version 2.1 of
Packit 2d622a
 * the License, or (at your option) any later version.
Packit 2d622a
 *
Packit 2d622a
 * This library is distributed in the hope that it will be useful, but
Packit 2d622a
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 2d622a
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 2d622a
 * Lesser General Public License for more details.
Packit 2d622a
 *
Packit 2d622a
 * You should have received a copy of the GNU Lesser General Public
Packit 2d622a
 * License along with this library; if not, write to the Free Software
Packit 2d622a
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Packit 2d622a
 */
Packit 2d622a
Packit 2d622a
#define _GNU_SOURCE
Packit 2d622a
Packit 2d622a
#include <stdio.h>
Packit 2d622a
#include <stdlib.h>
Packit 2d622a
#include <malloc.h>
Packit 2d622a
#include <unistd.h>
Packit 2d622a
#include <sys/mman.h>
Packit 2d622a
#include <errno.h>
Packit 2d622a
#include <dlfcn.h>
Packit 2d622a
#include <string.h>
Packit 2d622a
#include <fcntl.h>
Packit 2d622a
Packit 2d622a
#include "hugetlbfs.h"
Packit 2d622a
Packit 2d622a
#include "libhugetlbfs_internal.h"
Packit 2d622a
Packit 2d622a
static int heap_fd;
Packit 2d622a
Packit 2d622a
static void *heapbase;
Packit 2d622a
static void *heaptop;
Packit 2d622a
static long mapsize;
Packit 2d622a
static long hpage_size;
Packit 2d622a
Packit 2d622a
static long hugetlbfs_next_addr(long addr)
Packit 2d622a
{
Packit 2d622a
#if defined(__powerpc64__)
Packit 2d622a
	return ALIGN(addr, 1L << SLICE_HIGH_SHIFT);
Packit 2d622a
#elif defined(__powerpc__) && !defined(PPC_NO_SEGMENTS)
Packit 2d622a
	return ALIGN(addr, 1L << SLICE_LOW_SHIFT);
Packit 2d622a
#elif defined(__ia64__)
Packit 2d622a
	if (addr < (1UL << SLICE_HIGH_SHIFT))
Packit 2d622a
		return ALIGN(addr, 1UL << SLICE_HIGH_SHIFT);
Packit 2d622a
	else
Packit 2d622a
		return ALIGN(addr, hpage_size);
Packit 2d622a
#else
Packit 2d622a
	return ALIGN(addr, hpage_size);
Packit 2d622a
#endif
Packit 2d622a
}
Packit 2d622a
Packit 2d622a
/*
Packit 2d622a
 * Our plan is to ask for pages 'roughly' at the BASE.  We expect and
Packit 2d622a
 * require the kernel to offer us sequential pages from wherever it
Packit 2d622a
 * first gave us a page.  If it does not do so, we return the page and
Packit 2d622a
 * pretend there are none this covers us for the case where another
Packit 2d622a
 * map is in the way.  This is required because 'morecore' must have
Packit 2d622a
 * 'sbrk' semantics, ie. return sequential, contigious memory blocks.
Packit 2d622a
 * Luckily, if it does not do so and we error out malloc will happily
Packit 2d622a
 * go back to small pages and use mmap to get them.  Hurrah.
Packit 2d622a
 */
Packit 2d622a
static void *hugetlbfs_morecore(ptrdiff_t increment)
Packit 2d622a
{
Packit 2d622a
	int ret;
Packit 2d622a
	void *p;
Packit 2d622a
	long delta;
Packit 2d622a
	int mmap_reserve = __hugetlb_opts.no_reserve ? MAP_NORESERVE : 0;
Packit 2d622a
	int mmap_hugetlb = 0;
Packit 2d622a
	int using_default_pagesize =
Packit 2d622a
		(hpage_size == kernel_default_hugepage_size());
Packit 2d622a
Packit 2d622a
	INFO("hugetlbfs_morecore(%ld) = ...\n", (long)increment);
Packit 2d622a
Packit 2d622a
	/*
Packit 2d622a
	 * how much to grow the heap by =
Packit 2d622a
	 * 	(size of heap) + malloc request - mmap'd space
Packit 2d622a
	 */
Packit 2d622a
	delta = (heaptop-heapbase) + increment - mapsize;
Packit 2d622a
Packit 2d622a
	INFO("heapbase = %p, heaptop = %p, mapsize = %lx, delta=%ld\n",
Packit 2d622a
	      heapbase, heaptop, mapsize, delta);
Packit 2d622a
Packit 2d622a
	/* align to multiple of hugepagesize. */
Packit 2d622a
	delta = ALIGN(delta, hpage_size);
Packit 2d622a
Packit 2d622a
#ifdef MAP_HUGETLB
Packit 2d622a
	mmap_hugetlb = MAP_HUGETLB;
Packit 2d622a
#endif
Packit 2d622a
Packit 2d622a
	if (delta > 0) {
Packit 2d622a
		/* growing the heap */
Packit 2d622a
Packit 2d622a
		INFO("Attempting to map %ld bytes\n", delta);
Packit 2d622a
Packit 2d622a
		/* map in (extend) more of the file at the end of our last map */
Packit 2d622a
		if (__hugetlb_opts.map_hugetlb && using_default_pagesize)
Packit 2d622a
			p = mmap(heapbase + mapsize, delta, PROT_READ|PROT_WRITE,
Packit 2d622a
				 mmap_hugetlb|MAP_ANONYMOUS|MAP_PRIVATE|mmap_reserve,
Packit 2d622a
				 heap_fd, mapsize);
Packit 2d622a
		else
Packit 2d622a
			p = mmap(heapbase + mapsize, delta, PROT_READ|PROT_WRITE,
Packit 2d622a
				 MAP_PRIVATE|mmap_reserve, heap_fd, mapsize);
Packit 2d622a
Packit 2d622a
		if (p == MAP_FAILED) {
Packit 2d622a
			WARNING("New heap segment map at %p failed: %s\n",
Packit 2d622a
				heapbase+mapsize, strerror(errno));
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		/* if this is the first map */
Packit 2d622a
		if (! mapsize) {
Packit 2d622a
			if (heapbase && (heapbase != p)) {
Packit 2d622a
				WARNING("Heap originates at %p instead of %p\n",
Packit 2d622a
					p, heapbase);
Packit 2d622a
				if (__hugetlbfs_debug)
Packit 2d622a
					dump_proc_pid_maps();
Packit 2d622a
			}
Packit 2d622a
			/* then setup the heap variables */
Packit 2d622a
			heapbase = heaptop = p;
Packit 2d622a
		} else if (p != (heapbase + mapsize)) {
Packit 2d622a
			/* Couldn't get the mapping where we wanted */
Packit 2d622a
			munmap(p, delta);
Packit 2d622a
			WARNING("New heap segment mapped at %p instead of %p\n",
Packit 2d622a
			      p, heapbase + mapsize);
Packit 2d622a
			if (__hugetlbfs_debug)
Packit 2d622a
				dump_proc_pid_maps();
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		/* Fault the region to ensure accesses succeed */
Packit 2d622a
		if (hugetlbfs_prefault(p, delta) != 0) {
Packit 2d622a
			munmap(p, delta);
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		/* we now have mmap'd further */
Packit 2d622a
		mapsize += delta;
Packit 2d622a
	} else if (delta < 0) {
Packit 2d622a
		/* shrinking the heap */
Packit 2d622a
Packit 2d622a
		if (!__hugetlb_opts.shrink_ok) {
Packit 2d622a
			/* shouldn't ever get here */
Packit 2d622a
			WARNING("Heap shrinking is turned off\n");
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		if (!mapsize) {
Packit 2d622a
			WARNING("Can't shrink empty heap!\n");
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		/*
Packit 2d622a
		 * If we are forced to change the heapaddr from the
Packit 2d622a
		 * original brk() value we have violated brk semantics
Packit 2d622a
		 * (which we are not supposed to do).  This shouldn't
Packit 2d622a
		 * pose a problem until glibc tries to trim the heap to an
Packit 2d622a
		 * address lower than what we aligned heapaddr to.  At that
Packit 2d622a
		 * point the alignment "gap" causes heap corruption.
Packit 2d622a
		 * So we don't allow the heap to shrink below heapbase.
Packit 2d622a
		 */
Packit 2d622a
		if (mapsize + delta < 0) {  /* remember: delta is negative */
Packit 2d622a
			WARNING("Unable to shrink heap below %p\n", heapbase);
Packit 2d622a
			/* unmap just what is currently mapped */
Packit 2d622a
			delta = -mapsize;
Packit 2d622a
			/* we need heaptop + increment == heapbase, so: */
Packit 2d622a
			increment = heapbase - heaptop;
Packit 2d622a
		}
Packit 2d622a
		INFO("Attempting to unmap %ld bytes @ %p\n", -delta,
Packit 2d622a
			heapbase + mapsize + delta);
Packit 2d622a
		ret = munmap(heapbase + mapsize + delta, -delta);
Packit 2d622a
		if (ret) {
Packit 2d622a
			WARNING("Unmapping failed while shrinking heap: "
Packit 2d622a
				"%s\n", strerror(errno));
Packit 2d622a
		} else {
Packit 2d622a
			mapsize += delta;
Packit 2d622a
			/*
Packit 2d622a
			* the glibc assumes by default that newly allocated
Packit 2d622a
			* memory by morecore() will be zeroed.  It would be
Packit 2d622a
			* wasteful to do it for allocation so we only shrink
Packit 2d622a
			* the top by the size of a page.
Packit 2d622a
			*/
Packit 2d622a
			increment = heapbase - heaptop + mapsize;
Packit 2d622a
Packit 2d622a
			if (!__hugetlb_opts.map_hugetlb && !using_default_pagesize){
Packit 2d622a
Packit 2d622a
				/*
Packit 2d622a
				* Now shrink the hugetlbfs file.
Packit 2d622a
				*/
Packit 2d622a
				ret = ftruncate(heap_fd, mapsize);
Packit 2d622a
				if (ret) {
Packit 2d622a
					WARNING("Could not truncate hugetlbfs file to "
Packit 2d622a
						"shrink heap: %s\n", strerror(errno));
Packit 2d622a
				}
Packit 2d622a
			}
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
	}
Packit 2d622a
	else if (increment < 0) {
Packit 2d622a
		/* Don't shrink by less than a page to avoid having to zero
Packit 2d622a
		 * the memory.  There is no point in lying to glibc since
Packit 2d622a
		 * we're not freeing any memory.
Packit 2d622a
		 */
Packit 2d622a
		increment = 0;
Packit 2d622a
	}
Packit 2d622a
Packit 2d622a
	/* heap is continuous */
Packit 2d622a
	p = heaptop;
Packit 2d622a
	/* and we now have added this much more space to the heap */
Packit 2d622a
	heaptop = heaptop + increment;
Packit 2d622a
Packit 2d622a
	INFO("... = %p\n", p);
Packit 2d622a
	return p;
Packit 2d622a
}
Packit 2d622a
Packit 2d622a
static void *thp_morecore(ptrdiff_t increment)
Packit 2d622a
{
Packit 2d622a
	void *p;
Packit 2d622a
	long delta;
Packit 2d622a
Packit 2d622a
	INFO("thp_morecore(%ld) = ...\n", (long)increment);
Packit 2d622a
Packit 2d622a
	delta = (heaptop - heapbase) + increment - mapsize;
Packit 2d622a
	delta = ALIGN(delta, hpage_size);
Packit 2d622a
Packit 2d622a
	if (delta > 0) {
Packit 2d622a
		/*
Packit 2d622a
		 * This first time we expand the mapping we need to account for
Packit 2d622a
		 * the initial heap mapping not necessarily being huge page
Packit 2d622a
		 * aligned
Packit 2d622a
		 */
Packit 2d622a
		if (!mapsize)
Packit 2d622a
			delta = hugetlbfs_next_addr((long)heapbase + delta) -
Packit 2d622a
					(unsigned long)heapbase;
Packit 2d622a
Packit 2d622a
		INFO("Adding %ld bytes to heap\n", delta);
Packit 2d622a
Packit 2d622a
		p = sbrk(delta);
Packit 2d622a
		if (p == (void *)-1) {
Packit 2d622a
			WARNING("sbrk returned ENOMEM\n");
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		if (!mapsize) {
Packit 2d622a
			if (heapbase && (heapbase != p)) {
Packit 2d622a
				WARNING("Heap was expected at %p instead of %p, "
Packit 2d622a
					"heap has been modified by someone else!\n",
Packit 2d622a
					heapbase, p);
Packit 2d622a
				if (__hugetlbfs_debug)
Packit 2d622a
					dump_proc_pid_maps();
Packit 2d622a
			}
Packit 2d622a
			heapbase = heaptop = p;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		mapsize += delta;
Packit 2d622a
#ifdef MADV_HUGEPAGE
Packit 2d622a
		madvise(p, delta, MADV_HUGEPAGE);
Packit 2d622a
#endif
Packit 2d622a
	} else if (delta < 0) {
Packit 2d622a
		/* shrinking the heap */
Packit 2d622a
		if (!mapsize) {
Packit 2d622a
			WARNING("Can't shrink an empty heap\n");
Packit 2d622a
			return NULL;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		INFO("Attempting to shrink heap by %ld bytes with sbrk\n",
Packit 2d622a
			-delta);
Packit 2d622a
		p = sbrk(delta);
Packit 2d622a
		if (p == (void *)-1) {
Packit 2d622a
			WARNING("Unable to shrink heap\n");
Packit 2d622a
			return heaptop;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		mapsize += delta;
Packit 2d622a
	}
Packit 2d622a
Packit 2d622a
	p = heaptop;
Packit 2d622a
	heaptop += increment;
Packit 2d622a
	INFO("... = %p\n", p);
Packit 2d622a
	return p;
Packit 2d622a
}
Packit 2d622a
Packit 2d622a
void hugetlbfs_setup_morecore(void)
Packit 2d622a
{
Packit 2d622a
	char *ep;
Packit 2d622a
	unsigned long heapaddr;
Packit 2d622a
Packit 2d622a
	if (! __hugetlb_opts.morecore)
Packit 2d622a
		return;
Packit 2d622a
	if (strcasecmp(__hugetlb_opts.morecore, "no") == 0) {
Packit 2d622a
		INFO("HUGETLB_MORECORE=%s, not setting up morecore\n",
Packit 2d622a
						__hugetlb_opts.morecore);
Packit 2d622a
		return;
Packit 2d622a
	}
Packit 2d622a
Packit 2d622a
	/*
Packit 2d622a
	 * Determine the page size that will be used for the heap.
Packit 2d622a
	 * This can be set explicitly by setting HUGETLB_MORECORE to a valid
Packit 2d622a
	 * page size string or by setting HUGETLB_DEFAULT_PAGE_SIZE.
Packit 2d622a
	 */
Packit 2d622a
	if (strncasecmp(__hugetlb_opts.morecore, "y", 1) == 0)
Packit 2d622a
		hpage_size = gethugepagesize();
Packit 2d622a
	else if (__hugetlb_opts.thp_morecore)
Packit 2d622a
		hpage_size = kernel_default_hugepage_size();
Packit 2d622a
	else
Packit 2d622a
		hpage_size = parse_page_size(__hugetlb_opts.morecore);
Packit 2d622a
Packit 2d622a
	if (hpage_size <= 0) {
Packit 2d622a
		if (errno == ENOSYS)
Packit 2d622a
			WARNING("Hugepages unavailable\n");
Packit 2d622a
		else if (errno == EOVERFLOW || errno == ERANGE)
Packit 2d622a
			WARNING("Hugepage size too large\n");
Packit 2d622a
		else if (errno == EINVAL)
Packit 2d622a
			WARNING("Invalid huge page size\n");
Packit 2d622a
		else
Packit 2d622a
			WARNING("Hugepage size (%s)\n", strerror(errno));
Packit 2d622a
		return;
Packit 2d622a
	}
Packit 2d622a
Packit 2d622a
	/*
Packit 2d622a
	 * We won't need an fd for the heap mmaps if we are using MAP_HUGETLB
Packit 2d622a
	 * or we are depending on transparent huge pages
Packit 2d622a
	 */
Packit 2d622a
	if(__hugetlb_opts.thp_morecore || (__hugetlb_opts.map_hugetlb &&
Packit 2d622a
			hpage_size == kernel_default_hugepage_size())) {
Packit 2d622a
		heap_fd = -1;
Packit 2d622a
	} else {
Packit 2d622a
		if (!hugetlbfs_find_path_for_size(hpage_size)) {
Packit 2d622a
			WARNING("Hugepage size %li unavailable", hpage_size);
Packit 2d622a
			return;
Packit 2d622a
		}
Packit 2d622a
Packit 2d622a
		heap_fd = hugetlbfs_unlinked_fd_for_size(hpage_size);
Packit 2d622a
		if (heap_fd < 0) {
Packit 2d622a
			WARNING("Couldn't open hugetlbfs file for morecore\n");
Packit 2d622a
			return;
Packit 2d622a
		}
Packit 2d622a
	}
Packit 2d622a
Packit 2d622a
	/*
Packit 2d622a
	 * THP morecore uses sbrk to allocate more heap space, counting on the
Packit 2d622a
	 * kernel to back the area with THP.  So setting heapbase is
Packit 2d622a
	 * meaningless if thp_morecore is used.
Packit 2d622a
	 */
Packit 2d622a
	if (!__hugetlb_opts.thp_morecore && __hugetlb_opts.heapbase) {
Packit 2d622a
		heapaddr = strtoul(__hugetlb_opts.heapbase, &ep, 16);
Packit 2d622a
		if (*ep != '\0') {
Packit 2d622a
			WARNING("Can't parse HUGETLB_MORECORE_HEAPBASE: %s\n",
Packit 2d622a
			      __hugetlb_opts.heapbase);
Packit 2d622a
			return;
Packit 2d622a
		}
Packit 2d622a
	} else {
Packit 2d622a
		heapaddr = (unsigned long)sbrk(0);
Packit 2d622a
		if (!__hugetlb_opts.thp_morecore)
Packit 2d622a
			heapaddr = hugetlbfs_next_addr(heapaddr);
Packit 2d622a
	}
Packit 2d622a
Packit 2d622a
	INFO("setup_morecore(): heapaddr = 0x%lx\n", heapaddr);
Packit 2d622a
Packit 2d622a
	heaptop = heapbase = (void *)heapaddr;
Packit 2d622a
	if (__hugetlb_opts.thp_morecore)
Packit 2d622a
		__morecore = &thp_morecore;
Packit 2d622a
	else
Packit 2d622a
		__morecore = &hugetlbfs_morecore;
Packit 2d622a
Packit 2d622a
	/* Set some allocator options more appropriate for hugepages */
Packit 2d622a
Packit 2d622a
	if (__hugetlb_opts.shrink_ok)
Packit 2d622a
		mallopt(M_TRIM_THRESHOLD, hpage_size + hpage_size / 2);
Packit 2d622a
	else
Packit 2d622a
		mallopt(M_TRIM_THRESHOLD, -1);
Packit 2d622a
	mallopt(M_TOP_PAD, hpage_size / 2);
Packit 2d622a
	/* we always want to use our morecore, not ordinary mmap().
Packit 2d622a
	 * This doesn't appear to prohibit malloc() from falling back
Packit 2d622a
	 * to mmap() if we run out of hugepages. */
Packit 2d622a
	mallopt(M_MMAP_MAX, 0);
Packit 2d622a
}