Blob Blame History Raw
/*
 * Test rationale:
 *
 * This test is designed to detect a kernel allocation race introduced
 * with hugepage demand-faulting.  The problem is that no lock is held
 * between allocating a hugepage and instantiating it in the
 * pagetables or page cache index.  In between the two, the (huge)
 * page is cleared, so there's substantial time.  Thus two processes
 * can race instantiating the (same) last available hugepage - one
 * will fail on the allocation, and thus cause an OOM fault even
 * though the page it actually wants is being instantiated by the
 * other racing process.
 *
 *
 * libhugetlbfs - Easy use of Linux hugepages
 * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sched.h>
#include <signal.h>
#include <sys/wait.h>
#include <pthread.h>
#include <linux/unistd.h>

#include <hugetlbfs.h>

#include "hugetests.h"

pid_t gettid(void)
{
	return syscall(__NR_gettid);
}

static long hpage_size;
static pid_t child1, child2;
static pthread_t thread1, thread2;

void cleanup(void)
{
	if (child1)
		kill(child1, SIGKILL);
	if (child2)
		kill(child2, SIGKILL);
}

static int one_racer(void *p, int cpu,
		     volatile int *mytrigger, volatile int *othertrigger)
{
	volatile int *pi = p;
	cpu_set_t cpuset;
	int err;

	/* Split onto different cpus to encourage the race */
	CPU_ZERO(&cpuset);
	CPU_SET(cpu, &cpuset);

	err = sched_setaffinity(gettid(), CPU_SETSIZE/8, &cpuset);
	if (err != 0)
		CONFIG("sched_setaffinity(cpu%d): %s", cpu, strerror(errno));

	/* Ready.. */
	*mytrigger = 1;
	/* Set.. */
	while (! *othertrigger)
		;

	/* Instantiate! */
	*pi = 1;

	return 0;
}

static void proc_racer(void *p, int cpu,
		       volatile int *mytrigger, volatile int *othertrigger)
{
	exit(one_racer(p, cpu, mytrigger, othertrigger));
}

struct racer_info {
	void *p; /* instantiation address */
	int cpu;
	int race_type;
	volatile int *mytrigger;
	volatile int *othertrigger;
	int status;
};

static void *thread_racer(void *info)
{
	struct racer_info *ri = info;

	one_racer(ri->p, ri->cpu, ri->mytrigger, ri->othertrigger);
	return ri;
}
static void run_race(void *syncarea, int race_type)
{
	volatile int *trigger1, *trigger2;
	int fd;
	void *p;
	int status1, status2;
	int online_cpus[2], ret;

	check_online_cpus(online_cpus, 2);

	memset(syncarea, 0, sizeof(*trigger1) + sizeof(*trigger2));
	trigger1 = syncarea;
	trigger2 = trigger1 + 1;

	/* Get a new file for the final page */
	fd = hugetlbfs_unlinked_fd();
	if (fd < 0)
		FAIL("hugetlbfs_unlinked_fd()");

	verbose_printf("Mapping final page.. ");
	p = mmap(NULL, hpage_size, PROT_READ|PROT_WRITE, race_type, fd, 0);
	if (p == MAP_FAILED)
		FAIL("mmap(): %s", strerror(errno));
	verbose_printf("%p\n", p);

	if (race_type == MAP_SHARED) {
		child1 = fork();
		if (child1 < 0)
			FAIL("fork(): %s", strerror(errno));
		if (child1 == 0)
			proc_racer(p, online_cpus[0], trigger1, trigger2);

		child2 = fork();
		if (child2 < 0)
			FAIL("fork(): %s", strerror(errno));
		if (child2 == 0)
			proc_racer(p, online_cpus[1], trigger2, trigger1);

		/* wait() calls */
		ret = waitpid(child1, &status1, 0);
		if (ret < 0)
			FAIL("waitpid() child 1: %s", strerror(errno));
		verbose_printf("Child 1 status: %x\n", status1);


		ret = waitpid(child2, &status2, 0);
		if (ret < 0)
			FAIL("waitpid() child 2: %s", strerror(errno));
		verbose_printf("Child 2 status: %x\n", status2);

		if (WIFSIGNALED(status1))
			FAIL("Child 1 killed by signal %s",
			     strsignal(WTERMSIG(status1)));
		if (WIFSIGNALED(status2))
		FAIL("Child 2 killed by signal %s",
		     strsignal(WTERMSIG(status2)));

		status1 = WEXITSTATUS(status1);
		status2 = WEXITSTATUS(status2);
	} else {
		struct racer_info ri1 = {
			.p = p,
			.cpu = online_cpus[0],
			.mytrigger = trigger1,
			.othertrigger = trigger2,
		};
		struct racer_info ri2 = {
			.p = p,
			.cpu = online_cpus[1],
			.mytrigger = trigger2,
			.othertrigger = trigger1,
		};
		void *tret1, *tret2;

		ret = pthread_create(&thread1, NULL, thread_racer, &ri1);
		if (ret != 0)
			FAIL("pthread_create() 1: %s\n", strerror(errno));

		ret = pthread_create(&thread2, NULL, thread_racer, &ri2);
		if (ret != 0)
			FAIL("pthread_create() 2: %s\n", strerror(errno));

		ret = pthread_join(thread1, &tret1);
		if (ret != 0)
			FAIL("pthread_join() 1: %s\n", strerror(errno));
		if (tret1 != &ri1)
			FAIL("Thread 1 returned %p not %p, killed?\n",
			     tret1, &ri1);
		ret = pthread_join(thread2, &tret2);
		if (ret != 0)
			FAIL("pthread_join() 2: %s\n", strerror(errno));
		if (tret2 != &ri2)
			FAIL("Thread 2 returned %p not %p, killed?\n",
			     tret2, &ri2);

		status1 = ri1.status;
		status2 = ri2.status;
	}

	if (status1 != 0)
		FAIL("Racer 1 terminated with code %d", status1);

	if (status2 != 0)
		FAIL("Racer 2 terminated with code %d", status2);
}

int main(int argc, char *argv[])
{
	unsigned long totpages;
	int fd;
	void *p, *q;
	unsigned long i;
	int race_type;

	test_init(argc, argv);

	if (argc != 2)
		CONFIG("Usage: alloc-instantiate-race <private|shared>");

	hpage_size = check_hugepagesize();
	totpages = get_huge_page_counter(hpage_size, HUGEPAGES_FREE);

	if (strcmp(argv[1], "shared") == 0) {
		race_type = MAP_SHARED;
	} else if (strcmp(argv[1], "private") == 0) {
		race_type = MAP_PRIVATE;
	} else {
		CONFIG("Usage: alloc-instantiate-race <private|shared>");
	}

	fd = hugetlbfs_unlinked_fd();
	if (fd < 0)
		FAIL("hugetlbfs_unlinked_fd()");

	/* Get a shared normal page for synchronization */
	verbose_printf("Mapping synchronization area..");
	q = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
		 MAP_SHARED|MAP_ANONYMOUS, -1, 0);
	if (q == MAP_FAILED)
		FAIL("mmap() sync area: %s", strerror(errno));
	verbose_printf("done\n");

	verbose_printf("Mapping %ld/%ld pages.. ", totpages-1, totpages);
	p = mmap(NULL, (totpages-1)*hpage_size, PROT_READ|PROT_WRITE,
		 MAP_SHARED, fd, 0);
	if (p == MAP_FAILED)
		FAIL("mmap() 1: %s", strerror(errno));

	/* Allocate all save one of the pages up front */
	verbose_printf("instantiating.. ");
	for (i = 0; i < (totpages - 1); i++)
		memset(p + (i * hpage_size), 0, sizeof(int));
	verbose_printf("done\n");

	run_race(q, race_type);

	PASS();
}