Blob Blame History Raw
/*
 * libhugetlbfs - Easy use of Linux hugepages
 * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <sched.h>

#include <hugetlbfs.h>
#include "hugetests.h"

/*
 * Test rationale:
 *
 * On PowerPC, the address space is divided into segments.  These segments can
 * contain either huge pages or normal pages, but not both.  All segments are
 * initially set up to map normal pages.  When a huge page mapping is created
 * within a set of empty segments, they are "enabled" for huge pages at that
 * time.  Once enabled for huge pages, they can not be used again for normal
 * pages for the remaining lifetime of the process.
 *
 * If the segment immediately preceeding the segment containing the stack is
 * converted to huge pages and the stack is made to grow into the this
 * preceeding segment, some kernels may attempt to map normal pages into the
 * huge page-only segment -- resulting in bugs.
 *
 * The kernel bug in question was fixed by commit
 * 0d59a01bc461bbab4017ff449b8401151ef44cf6.
 */

#ifdef __LP64__
#define STACK_ALLOCATION_SIZE	(256*1024*1024)
#else
#define STACK_ALLOCATION_SIZE	(16*1024*1024)
#endif

#define MIN_CHILD_STACK (2*1024*1024)
#define STEP (STACK_ALLOCATION_SIZE)

int do_child(void *stop_address)
{
	struct rlimit r;
	volatile int *x;

	/* corefile from this process is not interesting and limiting
	 * its size can save a lot of time. '1' is a special value,
	 * that will also abort dumping via pipe, which by default
	 * sets limit to RLIM_INFINITY. */
	r.rlim_cur = 1;
	r.rlim_max = 1;
	setrlimit(RLIMIT_CORE, &r);

	do {
		x = alloca(STACK_ALLOCATION_SIZE);
		*x = 1;
	} while ((void *)x >= stop_address);

	return 0;
}

void *try_setup_stack_and_huge(int fd, void *hint)
{
	void *mmap_address, *stack_start, *tmp;
	long hpage_size = gethugepagesize();
	void *stop = alloca(1);

	/*
	 * Find a spot for huge page. We start at "hint" and
	 * keep going down in "STEP" increments until we find
	 * a place where we can mmap huge page.
	 */
	mmap_address = PALIGN(hint, hpage_size);
	do {
		mmap_address += STEP;
		if (mmap_address >= stop)
			return NULL;
		if (range_is_mapped((unsigned long)mmap_address,
			(unsigned long)mmap_address + hpage_size))
			continue;
		tmp = mmap(mmap_address, hpage_size,
			PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0);
	} while (tmp == MAP_FAILED);
	verbose_printf("huge page is at: %p-%p\n",
		mmap_address, mmap_address + hpage_size);

	/*
	 * Find a spot for stack below huge page. We start at end of
	 * huge page we found above and keep trying to mmap stack
	 * below. Because stack needs to grow into hugepage, we
	 * also have to make sure nothing is mapped in gap between
	 * stack and huge page.
	 */
	stack_start = mmap_address + hpage_size;
	do {
		if (range_is_mapped((unsigned long)stack_start,
			(unsigned long)stack_start + STEP + MIN_CHILD_STACK)) {
			verbose_printf("range is mapped: %p-%p\n", stack_start,
				stack_start + STEP + MIN_CHILD_STACK);
			munmap(mmap_address, hpage_size);
			return NULL;
		}
		stack_start += STEP;
		if (stack_start >= stop)
			return NULL;
		tmp = mmap(stack_start, MIN_CHILD_STACK, PROT_READ|PROT_WRITE,
			MAP_GROWSDOWN|MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
	} while (tmp == MAP_FAILED);

	verbose_printf("Child stack is at %p-%p\n",
		stack_start, stack_start + MIN_CHILD_STACK);
	return stack_start + MIN_CHILD_STACK;
}

int main(int argc, char *argv[])
{
	int fd, pid, s, ret;
	struct rlimit r;
	void *stack_end;

	test_init(argc, argv);

	ret = getrlimit(RLIMIT_STACK, &r);
	if (ret)
		CONFIG("getrlimit failed: %s", strerror(errno));

	if (r.rlim_cur != RLIM_INFINITY)
		CONFIG("Stack rlimit must be 'unlimited'");

	fd = hugetlbfs_unlinked_fd();
	if (fd < 0)
		CONFIG("Couldn't get hugepage fd");

	stack_end = try_setup_stack_and_huge(fd, sbrk(0));
	if (!stack_end)
		PASS_INCONCLUSIVE();

	pid = clone(do_child, stack_end, SIGCHLD, 0);
	if (pid < 0)
		FAIL("clone: %s", strerror(errno));

	ret = waitpid(pid, &s, 0);
	if (ret == -1)
		FAIL("waitpid: %s", strerror(errno));

	/*
	 * The child grows its stack until a failure occurs.  We expect
	 * this to result in a SIGSEGV.  If any other signal is
	 * delivered (ie. SIGTRAP) or no signal is sent at all, we
	 * determine the kernel has not behaved correctly and trigger a
	 * test failure.
	 */
	if (WIFSIGNALED(s)) {
		int sig = WTERMSIG(s);

		if (sig == SIGSEGV) {
			PASS();
		} else {
			FAIL("Got unexpected signal: %s", strsignal(sig));
		}
	}
	FAIL("Child not signalled");
}