Blob Blame History Raw
/*
 * Copyright 2015-2018, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *
 *     * Neither the name of the copyright holder nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * blk.cpp -- pmemblk benchmarks definitions
 */

#include "benchmark.hpp"
#include "file.h"
#include "libpmem.h"
#include "libpmemblk.h"
#include "libpmempool.h"
#include "os.h"
#include "poolset_util.hpp"
#include <cassert>
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fcntl.h>
#include <unistd.h>

struct blk_bench;
struct blk_worker;

/*
 * op_type -- type of operation
 */
enum op_type {
	OP_TYPE_UNKNOWN,
	OP_TYPE_BLK,
	OP_TYPE_FILE,
	OP_TYPE_MEMCPY,
};

/*
 * op_mode -- mode of the copy process
 */
enum op_mode {
	OP_MODE_UNKNOWN,
	OP_MODE_STAT, /* read/write always the same chunk */
	OP_MODE_SEQ,  /* read/write chunk by chunk */
	OP_MODE_RAND  /* read/write to chunks selected randomly */
};

/*
 * typedef for the worker function
 */
typedef int (*worker_fn)(struct blk_bench *, struct benchmark_args *,
			 struct blk_worker *, os_off_t);

/*
 * blk_args -- benchmark specific arguments
 */
struct blk_args {
	size_t fsize;   /* requested file size */
	bool no_warmup; /* don't do warmup */
	unsigned seed;  /* seed for randomization */
	char *type_str; /* type: blk, file, memcpy */
	char *mode_str; /* mode: stat, seq, rand */
};

/*
 * blk_bench -- pmemblk benchmark context
 */
struct blk_bench {
	PMEMblkpool *pbp;	 /* pmemblk handle */
	char *addr;		  /* address of user data (memcpy) */
	int fd;			  /* file descr. for file io */
	size_t nblocks;		  /* actual number of blocks */
	size_t blocks_per_thread; /* number of blocks per thread */
	worker_fn worker;	 /* worker function */
	enum op_type type;
	enum op_mode mode;
};

/*
 * struct blk_worker -- pmemblk worker context
 */
struct blk_worker {
	os_off_t *blocks; /* array with block numbers */
	char *buff;       /* buffer for read/write */
	unsigned seed;    /* worker seed */
};

/*
 * parse_op_type -- parse command line "--operation" argument
 *
 * Returns proper operation type.
 */
static enum op_type
parse_op_type(const char *arg)
{
	if (strcmp(arg, "blk") == 0)
		return OP_TYPE_BLK;
	else if (strcmp(arg, "file") == 0)
		return OP_TYPE_FILE;
	else if (strcmp(arg, "memcpy") == 0)
		return OP_TYPE_MEMCPY;
	else
		return OP_TYPE_UNKNOWN;
}

/*
 * parse_op_mode -- parse command line "--mode" argument
 *
 * Returns proper operation mode.
 */
static enum op_mode
parse_op_mode(const char *arg)
{
	if (strcmp(arg, "stat") == 0)
		return OP_MODE_STAT;
	else if (strcmp(arg, "seq") == 0)
		return OP_MODE_SEQ;
	else if (strcmp(arg, "rand") == 0)
		return OP_MODE_RAND;
	else
		return OP_MODE_UNKNOWN;
}

/*
 * blk_do_warmup -- perform warm-up by writing to each block
 */
static int
blk_do_warmup(struct blk_bench *bb, struct benchmark_args *args)
{
	size_t lba;
	int ret = 0;
	auto *buff = (char *)calloc(1, args->dsize);
	if (!buff) {
		perror("calloc");
		return -1;
	}

	for (lba = 0; lba < bb->nblocks; ++lba) {
		switch (bb->type) {
			case OP_TYPE_FILE: {
				size_t off = lba * args->dsize;
				if (pwrite(bb->fd, buff, args->dsize, off) !=
				    (ssize_t)args->dsize) {
					perror("pwrite");
					ret = -1;
					goto out;
				}
			} break;
			case OP_TYPE_BLK:
				if (pmemblk_write(bb->pbp, buff, lba) < 0) {
					perror("pmemblk_write");
					ret = -1;
					goto out;
				}
				break;
			case OP_TYPE_MEMCPY: {
				size_t off = lba * args->dsize;
				pmem_memcpy_persist((char *)bb->addr + off,
						    buff, args->dsize);
			} break;
			default:
				perror("unknown type");
				ret = -1;
				goto out;
		}
	}

out:
	free(buff);
	return ret;
}

/*
 * blk_read -- read function for pmemblk
 */
static int
blk_read(struct blk_bench *bb, struct benchmark_args *ba,
	 struct blk_worker *bworker, os_off_t off)
{
	if (pmemblk_read(bb->pbp, bworker->buff, off) < 0) {
		perror("pmemblk_read");
		return -1;
	}
	return 0;
}

/*
 * fileio_read -- read function for file io
 */
static int
fileio_read(struct blk_bench *bb, struct benchmark_args *ba,
	    struct blk_worker *bworker, os_off_t off)
{
	os_off_t file_off = off * ba->dsize;
	if (pread(bb->fd, bworker->buff, ba->dsize, file_off) !=
	    (ssize_t)ba->dsize) {
		perror("pread");
		return -1;
	}
	return 0;
}

/*
 * memcpy_read -- read function for memcpy
 */
static int
memcpy_read(struct blk_bench *bb, struct benchmark_args *ba,
	    struct blk_worker *bworker, os_off_t off)
{
	os_off_t file_off = off * ba->dsize;
	memcpy(bworker->buff, (char *)bb->addr + file_off, ba->dsize);
	return 0;
}

/*
 * blk_write -- write function for pmemblk
 */
static int
blk_write(struct blk_bench *bb, struct benchmark_args *ba,
	  struct blk_worker *bworker, os_off_t off)
{
	if (pmemblk_write(bb->pbp, bworker->buff, off) < 0) {
		perror("pmemblk_write");
		return -1;
	}
	return 0;
}

/*
 * memcpy_write -- write function for memcpy
 */
static int
memcpy_write(struct blk_bench *bb, struct benchmark_args *ba,
	     struct blk_worker *bworker, os_off_t off)
{
	os_off_t file_off = off * ba->dsize;
	pmem_memcpy_persist((char *)bb->addr + file_off, bworker->buff,
			    ba->dsize);
	return 0;
}

/*
 * fileio_write -- write function for file io
 */
static int
fileio_write(struct blk_bench *bb, struct benchmark_args *ba,
	     struct blk_worker *bworker, os_off_t off)
{
	os_off_t file_off = off * ba->dsize;
	if (pwrite(bb->fd, bworker->buff, ba->dsize, file_off) !=
	    (ssize_t)ba->dsize) {
		perror("pwrite");
		return -1;
	}
	return 0;
}

/*
 * blk_operation -- main operations for blk_read and blk_write benchmark
 */
static int
blk_operation(struct benchmark *bench, struct operation_info *info)
{
	auto *bb = (struct blk_bench *)pmembench_get_priv(bench);
	auto *bworker = (struct blk_worker *)info->worker->priv;

	os_off_t off = bworker->blocks[info->index];
	return bb->worker(bb, info->args, bworker, off);
}

/*
 * blk_init_worker -- initialize worker
 */
static int
blk_init_worker(struct benchmark *bench, struct benchmark_args *args,
		struct worker_info *worker)
{
	struct blk_worker *bworker =
		(struct blk_worker *)malloc(sizeof(*bworker));

	if (!bworker) {
		perror("malloc");
		return -1;
	}

	auto *bb = (struct blk_bench *)pmembench_get_priv(bench);
	auto *bargs = (struct blk_args *)args->opts;

	bworker->seed = os_rand_r(&bargs->seed);

	bworker->buff = (char *)malloc(args->dsize);
	if (!bworker->buff) {
		perror("malloc");
		goto err_buff;
	}

	/* fill buffer with some random data */
	memset(bworker->buff, bworker->seed, args->dsize);

	assert(args->n_ops_per_thread != 0);
	bworker->blocks = (os_off_t *)malloc(sizeof(*bworker->blocks) *
					     args->n_ops_per_thread);
	if (!bworker->blocks) {
		perror("malloc");
		goto err_blocks;
	}

	switch (bb->mode) {
		case OP_MODE_RAND:
			for (size_t i = 0; i < args->n_ops_per_thread; i++) {
				bworker->blocks[i] =
					worker->index * bb->blocks_per_thread +
					os_rand_r(&bworker->seed) %
						bb->blocks_per_thread;
			}
			break;
		case OP_MODE_SEQ:
			for (size_t i = 0; i < args->n_ops_per_thread; i++)
				bworker->blocks[i] = i % bb->blocks_per_thread;
			break;
		case OP_MODE_STAT:
			for (size_t i = 0; i < args->n_ops_per_thread; i++)
				bworker->blocks[i] = 0;
			break;
		default:
			perror("unknown mode");
			goto err_blocks;
	}

	worker->priv = bworker;
	return 0;
err_blocks:
	free(bworker->buff);
err_buff:
	free(bworker);

	return -1;
}

/*
 * blk_free_worker -- cleanup worker
 */
static void
blk_free_worker(struct benchmark *bench, struct benchmark_args *args,
		struct worker_info *worker)
{
	auto *bworker = (struct blk_worker *)worker->priv;
	free(bworker->blocks);
	free(bworker->buff);
	free(bworker);
}

/*
 * blk_init -- function for initialization benchmark
 */
static int
blk_init(struct blk_bench *bb, struct benchmark_args *args)
{
	auto *ba = (struct blk_args *)args->opts;
	assert(ba != nullptr);

	char path[PATH_MAX];
	if (util_safe_strcpy(path, args->fname, sizeof(path)) != 0)
		return -1;

	bb->type = parse_op_type(ba->type_str);
	if (bb->type == OP_TYPE_UNKNOWN) {
		fprintf(stderr, "Invalid operation argument '%s'",
			ba->type_str);
		return -1;
	}

	enum file_type type = util_file_get_type(args->fname);
	if (type == OTHER_ERROR) {
		fprintf(stderr, "could not check type of file %s\n",
			args->fname);
		return -1;
	}

	if (bb->type == OP_TYPE_FILE && type == TYPE_DEVDAX) {
		fprintf(stderr, "fileio not supported on device dax\n");
		return -1;
	}

	bb->mode = parse_op_mode(ba->mode_str);
	if (bb->mode == OP_MODE_UNKNOWN) {
		fprintf(stderr, "Invalid mode argument '%s'", ba->mode_str);
		return -1;
	}

	if (ba->fsize == 0)
		ba->fsize = PMEMBLK_MIN_POOL;

	size_t req_fsize = ba->fsize;

	if (ba->fsize / args->dsize < args->n_threads ||
	    ba->fsize < PMEMBLK_MIN_POOL) {
		fprintf(stderr, "too small file size\n");
		return -1;
	}

	if (args->dsize >= ba->fsize) {
		fprintf(stderr, "block size bigger than file size\n");
		return -1;
	}

	if (args->is_poolset || type == TYPE_DEVDAX) {
		if (args->fsize < ba->fsize) {
			fprintf(stderr, "file size too large\n");
			return -1;
		}

		ba->fsize = 0;
	} else if (args->is_dynamic_poolset) {
		int ret = dynamic_poolset_create(args->fname, ba->fsize);
		if (ret == -1)
			return -1;

		if (util_safe_strcpy(path, POOLSET_PATH, sizeof(path)) != 0)
			return -1;

		ba->fsize = 0;
	}

	bb->fd = -1;

	/*
	 * Create pmemblk in order to get the number of blocks
	 * even for file-io mode.
	 */
	bb->pbp = pmemblk_create(path, args->dsize, ba->fsize, args->fmode);

	if (bb->pbp == nullptr) {
		perror("pmemblk_create");
		return -1;
	}

	bb->nblocks = pmemblk_nblock(bb->pbp);

	/* limit the number of used blocks */
	if (bb->nblocks > req_fsize / args->dsize)
		bb->nblocks = req_fsize / args->dsize;

	if (bb->nblocks < args->n_threads) {
		fprintf(stderr, "too small file size");
		goto out_close;
	}

	if (bb->type == OP_TYPE_FILE) {
		pmemblk_close(bb->pbp);
		bb->pbp = nullptr;

		int flags = O_RDWR | O_CREAT | O_SYNC;
#ifdef _WIN32
		flags |= O_BINARY;
#endif
		bb->fd = os_open(args->fname, flags, args->fmode);
		if (bb->fd < 0) {
			perror("open");
			return -1;
		}
	} else if (bb->type == OP_TYPE_MEMCPY) {
		/* skip pool header, so addr points to the first block */
		bb->addr = (char *)bb->pbp + 8192;
	}

	bb->blocks_per_thread = bb->nblocks / args->n_threads;

	if (!ba->no_warmup) {
		if (blk_do_warmup(bb, args) != 0)
			goto out_close;
	}

	return 0;
out_close:
	if (bb->type == OP_TYPE_FILE)
		os_close(bb->fd);
	else
		pmemblk_close(bb->pbp);
	return -1;
}

/*
 * blk_read_init - function for initializing blk_read benchmark
 */
static int
blk_read_init(struct benchmark *bench, struct benchmark_args *args)
{
	assert(bench != nullptr);
	assert(args != nullptr);

	int ret;
	auto *bb = (struct blk_bench *)malloc(sizeof(struct blk_bench));
	if (bb == nullptr) {
		perror("malloc");
		return -1;
	}

	pmembench_set_priv(bench, bb);

	ret = blk_init(bb, args);
	if (ret != 0) {
		free(bb);
		return ret;
	}

	switch (bb->type) {
		case OP_TYPE_FILE:
			bb->worker = fileio_read;
			break;
		case OP_TYPE_BLK:
			bb->worker = blk_read;
			break;
		case OP_TYPE_MEMCPY:
			bb->worker = memcpy_read;
			break;
		default:
			perror("unknown operation type");
			return -1;
	}

	return ret;
}

/*
 * blk_write_init - function for initializing blk_write benchmark
 */
static int
blk_write_init(struct benchmark *bench, struct benchmark_args *args)
{
	assert(bench != nullptr);
	assert(args != nullptr);

	int ret;
	auto *bb = (struct blk_bench *)malloc(sizeof(struct blk_bench));
	if (bb == nullptr) {
		perror("malloc");
		return -1;
	}

	pmembench_set_priv(bench, bb);

	ret = blk_init(bb, args);
	if (ret != 0) {
		free(bb);
		return ret;
	}

	switch (bb->type) {
		case OP_TYPE_FILE:
			bb->worker = fileio_write;
			break;
		case OP_TYPE_BLK:
			bb->worker = blk_write;
			break;
		case OP_TYPE_MEMCPY:
			bb->worker = memcpy_write;
			break;
		default:
			perror("unknown operation type");
			return -1;
	}

	return ret;
}

/*
 * blk_exit -- function for de-initialization benchmark
 */
static int
blk_exit(struct benchmark *bench, struct benchmark_args *args)
{
	auto *bb = (struct blk_bench *)pmembench_get_priv(bench);
	char path[PATH_MAX];
	if (util_safe_strcpy(path, args->fname, sizeof(path)) != 0)
		return -1;

	if (args->is_dynamic_poolset) {
		if (util_safe_strcpy(path, POOLSET_PATH, sizeof(path)) != 0)
			return -1;
	}

	int result;
	switch (bb->type) {
		case OP_TYPE_FILE:
			os_close(bb->fd);
			break;
		case OP_TYPE_BLK:
			pmemblk_close(bb->pbp);
			result = pmemblk_check(path, args->dsize);
			if (result < 0) {
				perror("pmemblk_check error");
				return -1;
			} else if (result == 0) {
				perror("pmemblk_check: not consistent");
				return -1;
			}
			break;
		case OP_TYPE_MEMCPY:
			pmemblk_close(bb->pbp);
			break;
		default:
			perror("unknown operation type");
			return -1;
	}

	free(bb);
	return 0;
}

static struct benchmark_clo blk_clo[5];
static struct benchmark_info blk_read_info;
static struct benchmark_info blk_write_info;

CONSTRUCTOR(blk_constructor)
void
blk_constructor(void)
{
	blk_clo[0].opt_short = 'o';
	blk_clo[0].opt_long = "operation";
	blk_clo[0].descr = "Operation type - blk, file, memcpy";
	blk_clo[0].type = CLO_TYPE_STR;
	blk_clo[0].off = clo_field_offset(struct blk_args, type_str);
	blk_clo[0].def = "blk";

	blk_clo[1].opt_short = 'w';
	blk_clo[1].opt_long = "no-warmup";
	blk_clo[1].descr = "Don't do warmup";
	blk_clo[1].type = CLO_TYPE_FLAG;
	blk_clo[1].off = clo_field_offset(struct blk_args, no_warmup);

	blk_clo[2].opt_short = 'm';
	blk_clo[2].opt_long = "mode";
	blk_clo[2].descr = "Reading/writing mode - stat, seq, rand";
	blk_clo[2].type = CLO_TYPE_STR;
	blk_clo[2].off = clo_field_offset(struct blk_args, mode_str);
	blk_clo[2].def = "seq";

	blk_clo[3].opt_short = 'S';
	blk_clo[3].opt_long = "seed";
	blk_clo[3].descr = "Random seed";
	blk_clo[3].off = clo_field_offset(struct blk_args, seed);
	blk_clo[3].def = "1";
	blk_clo[3].type = CLO_TYPE_UINT;
	blk_clo[3].type_uint.size = clo_field_size(struct blk_args, seed);
	blk_clo[3].type_uint.base = CLO_INT_BASE_DEC;
	blk_clo[3].type_uint.min = 1;
	blk_clo[3].type_uint.max = UINT_MAX;

	blk_clo[4].opt_short = 's';
	blk_clo[4].opt_long = "file-size";
	blk_clo[4].descr = "Requested file size in bytes - 0 means minimum";
	blk_clo[4].type = CLO_TYPE_UINT;
	blk_clo[4].off = clo_field_offset(struct blk_args, fsize);
	blk_clo[4].def = "0";
	blk_clo[4].type_uint.size = clo_field_size(struct blk_args, fsize);
	blk_clo[4].type_uint.base = CLO_INT_BASE_DEC;
	blk_clo[4].type_uint.min = 0;
	blk_clo[4].type_uint.max = ~0;

	blk_read_info.name = "blk_read";
	blk_read_info.brief = "Benchmark for blk_read() operation";
	blk_read_info.init = blk_read_init;
	blk_read_info.exit = blk_exit;
	blk_read_info.multithread = true;
	blk_read_info.multiops = true;
	blk_read_info.init_worker = blk_init_worker;
	blk_read_info.free_worker = blk_free_worker;
	blk_read_info.operation = blk_operation;
	blk_read_info.clos = blk_clo;
	blk_read_info.nclos = ARRAY_SIZE(blk_clo);
	blk_read_info.opts_size = sizeof(struct blk_args);
	blk_read_info.rm_file = true;
	blk_read_info.allow_poolset = true;

	REGISTER_BENCHMARK(blk_read_info);

	blk_write_info.name = "blk_write";
	blk_write_info.brief = "Benchmark for blk_write() operation";
	blk_write_info.init = blk_write_init;
	blk_write_info.exit = blk_exit;
	blk_write_info.multithread = true;
	blk_write_info.multiops = true;
	blk_write_info.init_worker = blk_init_worker;
	blk_write_info.free_worker = blk_free_worker;
	blk_write_info.operation = blk_operation;
	blk_write_info.clos = blk_clo;
	blk_write_info.nclos = ARRAY_SIZE(blk_clo);
	blk_write_info.opts_size = sizeof(struct blk_args);
	blk_write_info.rm_file = true;
	blk_write_info.allow_poolset = true;

	REGISTER_BENCHMARK(blk_write_info);
}