/*
* Copyright 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* pool.c -- pool processing functions
*/
#include <stdio.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <endian.h>
#ifndef _WIN32
#include <sys/ioctl.h>
#ifdef __FreeBSD__
#include <sys/disk.h>
#define BLKGETSIZE64 DIOCGMEDIASIZE
#else
#include <linux/fs.h>
#endif
#endif
#include "libpmem.h"
#include "libpmemlog.h"
#include "libpmemblk.h"
#include "libpmempool.h"
#include "out.h"
#include "pmempool.h"
#include "pool.h"
#include "lane.h"
#include "obj.h"
#include "btt.h"
#include "file.h"
#include "os.h"
#include "set.h"
#include "check_util.h"
#include "util_pmem.h"
#include "mmap.h"
/* arbitrary size of a maximum file part being read / write at once */
#define RW_BUFFERING_SIZE (128 * 1024 * 1024)
/*
* pool_btt_lseek -- (internal) perform lseek in BTT file mode
*/
static inline os_off_t
pool_btt_lseek(struct pool_data *pool, os_off_t offset, int whence)
{
os_off_t result;
if ((result = os_lseek(pool->set_file->fd, offset, whence)) == -1)
ERR("!lseek");
return result;
}
/*
* pool_btt_read -- (internal) perform read in BTT file mode
*/
static inline ssize_t
pool_btt_read(struct pool_data *pool, void *dst, size_t count)
{
size_t total = 0;
ssize_t nread;
while (count > total &&
(nread = util_read(pool->set_file->fd, dst, count - total))) {
if (nread == -1) {
ERR("!read");
return total ? (ssize_t)total : -1;
}
dst = (void *)((ssize_t)dst + nread);
total += (size_t)nread;
}
return (ssize_t)total;
}
/*
* pool_btt_write -- (internal) perform write in BTT file mode
*/
static inline ssize_t
pool_btt_write(struct pool_data *pool, const void *src, size_t count)
{
ssize_t nwrite = 0;
size_t total = 0;
while (count > total &&
(nwrite = util_write(pool->set_file->fd, src,
count - total))) {
if (nwrite == -1) {
ERR("!write");
return total ? (ssize_t)total : -1;
}
src = (void *)((ssize_t)src + nwrite);
total += (size_t)nwrite;
}
return (ssize_t)total;
}
/*
* pool_set_read_header -- (internal) read a header of a pool set
*/
static int
pool_set_read_header(const char *fname, struct pool_hdr *hdr)
{
struct pool_set *set;
int ret = 0;
if (util_poolset_read(&set, fname)) {
return -1;
}
/* open the first part set file to read the pool header values */
const struct pool_set_part *part = PART(REP(set, 0), 0);
int fdp = util_file_open(part->path, NULL, 0, O_RDONLY);
if (fdp < 0) {
ERR("cannot open poolset part file");
ret = -1;
goto err_pool_set;
}
/* read the pool header from first pool set file */
if (pread(fdp, hdr, sizeof(*hdr), 0) != sizeof(*hdr)) {
ERR("cannot read pool header from poolset");
ret = -1;
goto err_close_part;
}
err_close_part:
os_close(fdp);
err_pool_set:
util_poolset_free(set);
return ret;
}
/*
* pool_set_map -- (internal) map poolset
*/
static int
pool_set_map(const char *fname, struct pool_set **poolset, unsigned flags)
{
ASSERTeq(util_is_poolset_file(fname), 1);
struct pool_hdr hdr;
if (pool_set_read_header(fname, &hdr))
return -1;
util_convert2h_hdr_nocheck(&hdr);
/* parse pool type from first pool set file */
enum pool_type type = pool_hdr_get_type(&hdr);
if (type == POOL_TYPE_UNKNOWN) {
ERR("cannot determine pool type from poolset");
return -1;
}
/*
* Open the poolset, the values passed to util_pool_open are read
* from the first poolset file, these values are then compared with
* the values from all headers of poolset files.
*/
struct pool_attr attr;
util_pool_hdr2attr(&attr, &hdr);
if (util_pool_open(poolset, fname, 0 /* minpartsize */, &attr,
NULL, NULL, flags | POOL_OPEN_IGNORE_SDS |
POOL_OPEN_IGNORE_BAD_BLOCKS)) {
ERR("opening poolset failed");
return -1;
}
return 0;
}
/*
* pool_params_from_header -- parse pool params from pool header
*/
void
pool_params_from_header(struct pool_params *params, const struct pool_hdr *hdr)
{
memcpy(params->signature, hdr->signature, sizeof(params->signature));
memcpy(¶ms->features, &hdr->features, sizeof(params->features));
/*
* Check if file is a part of pool set by comparing the UUID with the
* next part UUID. If it is the same it means the pool consist of a
* single file.
*/
int uuid_eq_next = uuidcmp(hdr->uuid, hdr->next_part_uuid);
int uuid_eq_prev = uuidcmp(hdr->uuid, hdr->prev_part_uuid);
params->is_part = !params->is_poolset && (uuid_eq_next || uuid_eq_prev);
params->type = pool_hdr_get_type(hdr);
}
/*
* pool_check_type_to_pool_type -- (internal) convert check pool type to
* internal pool type value
*/
static enum pool_type
pool_check_type_to_pool_type(enum pmempool_pool_type check_pool_type)
{
switch (check_pool_type) {
case PMEMPOOL_POOL_TYPE_LOG:
return POOL_TYPE_LOG;
case PMEMPOOL_POOL_TYPE_BLK:
return POOL_TYPE_BLK;
case PMEMPOOL_POOL_TYPE_OBJ:
return POOL_TYPE_OBJ;
default:
ERR("can not convert pmempool_pool_type %u to pool_type",
check_pool_type);
return POOL_TYPE_UNKNOWN;
}
}
/*
* pool_parse_params -- parse pool type, file size and block size
*/
static int
pool_params_parse(const PMEMpoolcheck *ppc, struct pool_params *params,
int check)
{
LOG(3, NULL);
int is_btt = ppc->args.pool_type == PMEMPOOL_POOL_TYPE_BTT;
params->type = POOL_TYPE_UNKNOWN;
params->is_poolset = util_is_poolset_file(ppc->path) == 1;
int fd = util_file_open(ppc->path, NULL, 0, O_RDONLY);
if (fd < 0)
return -1;
int ret = 0;
os_stat_t stat_buf;
ret = os_fstat(fd, &stat_buf);
if (ret)
goto out_close;
ASSERT(stat_buf.st_size >= 0);
params->mode = stat_buf.st_mode;
struct pool_set *set;
void *addr;
if (params->is_poolset) {
/*
* Need to close the poolset because it will be opened with
* flock in the following instructions.
*/
os_close(fd);
fd = -1;
if (check) {
if (pool_set_map(ppc->path, &set, 0))
return -1;
} else {
ret = util_poolset_create_set(&set, ppc->path,
0, 0, true);
if (ret < 0) {
LOG(2, "cannot open pool set -- '%s'",
ppc->path);
return -1;
}
if (set->remote) {
ERR("poolsets with remote replicas are not "
"supported");
return -1;
}
if (util_pool_open_nocheck(set,
POOL_OPEN_IGNORE_BAD_BLOCKS))
return -1;
}
params->size = set->poolsize;
addr = set->replica[0]->part[0].addr;
/*
* XXX mprotect for device dax with length not aligned to its
* page granularity causes SIGBUS on the next page fault.
* The length argument of this call should be changed to
* set->poolsize once the kernel issue is solved.
*/
if (mprotect(addr, set->replica[0]->repsize,
PROT_READ) < 0) {
ERR("!mprotect");
goto out_unmap;
}
params->is_dev_dax = set->replica[0]->part[0].is_dev_dax;
params->is_pmem = set->replica[0]->is_pmem;
} else if (is_btt) {
params->size = (size_t)stat_buf.st_size;
#ifndef _WIN32
if (params->mode & S_IFBLK)
if (ioctl(fd, BLKGETSIZE64, ¶ms->size)) {
ERR("!ioctl");
goto out_close;
}
#endif
addr = NULL;
} else {
enum file_type type = util_file_get_type(ppc->path);
if (type < 0) {
ret = -1;
goto out_close;
}
ssize_t s = util_file_get_size(ppc->path);
if (s < 0) {
ret = -1;
goto out_close;
}
params->size = (size_t)s;
int map_sync;
addr = util_map(fd, params->size, MAP_SHARED, 1, 0, &map_sync);
if (addr == NULL) {
ret = -1;
goto out_close;
}
params->is_dev_dax = type == TYPE_DEVDAX;
params->is_pmem = params->is_dev_dax || map_sync ||
pmem_is_pmem(addr, params->size);
}
/* stop processing for BTT device */
if (is_btt) {
params->type = POOL_TYPE_BTT;
params->is_part = false;
goto out_close;
}
struct pool_hdr hdr;
memcpy(&hdr, addr, sizeof(hdr));
util_convert2h_hdr_nocheck(&hdr);
pool_params_from_header(params, &hdr);
if (ppc->args.pool_type != PMEMPOOL_POOL_TYPE_DETECT) {
enum pool_type declared_type =
pool_check_type_to_pool_type(ppc->args.pool_type);
if ((params->type & ~declared_type) != 0) {
ERR("declared pool type does not match");
errno = EINVAL;
ret = 1;
goto out_unmap;
}
}
if (params->type == POOL_TYPE_BLK) {
struct pmemblk pbp;
memcpy(&pbp, addr, sizeof(pbp));
params->blk.bsize = le32toh(pbp.bsize);
} else if (params->type == POOL_TYPE_OBJ) {
struct pmemobjpool *pop = addr;
memcpy(params->obj.layout, pop->layout,
PMEMOBJ_MAX_LAYOUT);
}
out_unmap:
if (params->is_poolset) {
ASSERTeq(fd, -1);
ASSERTne(addr, NULL);
util_poolset_close(set, DO_NOT_DELETE_PARTS);
} else if (!is_btt) {
ASSERTne(fd, -1);
ASSERTne(addr, NULL);
munmap(addr, params->size);
}
out_close:
if (fd != -1)
os_close(fd);
return ret;
}
/*
* pool_set_file_open -- (internal) opens pool set file or regular file
*/
static struct pool_set_file *
pool_set_file_open(const char *fname, struct pool_params *params, int rdonly)
{
LOG(3, NULL);
struct pool_set_file *file = calloc(1, sizeof(*file));
if (!file)
return NULL;
file->fname = strdup(fname);
if (!file->fname)
goto err;
const char *path = file->fname;
if (params->type != POOL_TYPE_BTT) {
int ret = util_poolset_create_set(&file->poolset, path,
0, 0, true);
if (ret < 0) {
LOG(2, "cannot open pool set -- '%s'", path);
goto err_free_fname;
}
unsigned flags = (rdonly ? POOL_OPEN_COW : 0) |
POOL_OPEN_IGNORE_BAD_BLOCKS;
if (util_pool_open_nocheck(file->poolset, flags))
goto err_free_fname;
file->size = file->poolset->poolsize;
/* get modification time from the first part of first replica */
path = file->poolset->replica[0]->part[0].path;
file->addr = file->poolset->replica[0]->part[0].addr;
} else {
int oflag = rdonly ? O_RDONLY : O_RDWR;
file->fd = util_file_open(fname, NULL, 0, oflag);
file->size = params->size;
}
os_stat_t buf;
if (os_stat(path, &buf)) {
ERR("%s", path);
goto err_close_poolset;
}
file->mtime = buf.st_mtime;
file->mode = buf.st_mode;
return file;
err_close_poolset:
if (params->type != POOL_TYPE_BTT)
util_poolset_close(file->poolset, DO_NOT_DELETE_PARTS);
else if (file->fd != -1)
os_close(file->fd);
err_free_fname:
free(file->fname);
err:
free(file);
return NULL;
}
/*
* pool_set_parse -- parse poolset file
*/
int
pool_set_parse(struct pool_set **setp, const char *path)
{
LOG(3, "setp %p path %s", setp, path);
int fd = os_open(path, O_RDONLY);
int ret = 0;
if (fd < 0)
return 1;
if (util_poolset_parse(setp, path, fd)) {
ret = 1;
goto err_close;
}
err_close:
os_close(fd);
return ret;
}
/*
* pool_data_alloc -- allocate pool data and open set_file
*/
struct pool_data *
pool_data_alloc(PMEMpoolcheck *ppc)
{
LOG(3, NULL);
struct pool_data *pool = calloc(1, sizeof(*pool));
if (!pool) {
ERR("!calloc");
return NULL;
}
TAILQ_INIT(&pool->arenas);
pool->uuid_op = UUID_NOP;
if (pool_params_parse(ppc, &pool->params, 0))
goto error;
int rdonly = CHECK_IS_NOT(ppc, REPAIR);
int prv = CHECK_IS(ppc, DRY_RUN);
if (prv && pool->params.is_dev_dax) {
errno = ENOTSUP;
ERR("!cannot perform a dry run on dax device");
goto error;
}
pool->set_file = pool_set_file_open(ppc->path, &pool->params, prv);
if (pool->set_file == NULL)
goto error;
/*
* XXX mprotect for device dax with length not aligned to its
* page granularity causes SIGBUS on the next page fault.
* The length argument of this call should be changed to
* pool->set_file->poolsize once the kernel issue is solved.
*/
if (rdonly && mprotect(pool->set_file->addr,
pool->set_file->poolset->replica[0]->repsize,
PROT_READ) < 0)
goto error;
if (pool->params.type != POOL_TYPE_BTT) {
if (pool_set_file_map_headers(pool->set_file, rdonly, prv))
goto error;
}
return pool;
error:
pool_data_free(pool);
return NULL;
}
/*
* pool_set_file_close -- (internal) closes pool set file or regular file
*/
static void
pool_set_file_close(struct pool_set_file *file)
{
LOG(3, NULL);
if (file->poolset)
util_poolset_close(file->poolset, DO_NOT_DELETE_PARTS);
else if (file->addr) {
munmap(file->addr, file->size);
os_close(file->fd);
} else if (file->fd)
os_close(file->fd);
free(file->fname);
free(file);
}
/*
* pool_data_free -- close set_file and release pool data
*/
void
pool_data_free(struct pool_data *pool)
{
LOG(3, NULL);
if (pool->set_file) {
if (pool->params.type != POOL_TYPE_BTT)
pool_set_file_unmap_headers(pool->set_file);
pool_set_file_close(pool->set_file);
}
while (!TAILQ_EMPTY(&pool->arenas)) {
struct arena *arenap = TAILQ_FIRST(&pool->arenas);
if (arenap->map)
free(arenap->map);
if (arenap->flog)
free(arenap->flog);
TAILQ_REMOVE(&pool->arenas, arenap, next);
free(arenap);
}
free(pool);
}
/*
* pool_set_file_map -- return mapped address at given offset
*/
void *
pool_set_file_map(struct pool_set_file *file, uint64_t offset)
{
if (file->addr == MAP_FAILED)
return NULL;
return (char *)file->addr + offset;
}
/*
* pool_read -- read from pool set file or regular file
*
* 'buff' has to be a buffer at least 'nbytes' long
* 'off' is an offset from the beginning of the pool
*/
int
pool_read(struct pool_data *pool, void *buff, size_t nbytes, uint64_t off)
{
if (off + nbytes > pool->set_file->size)
return -1;
if (pool->params.type != POOL_TYPE_BTT)
memcpy(buff, (char *)pool->set_file->addr + off, nbytes);
else {
if (pool_btt_lseek(pool, (os_off_t)off, SEEK_SET) == -1)
return -1;
if ((size_t)pool_btt_read(pool, buff, nbytes) != nbytes)
return -1;
}
return 0;
}
/*
* pool_write -- write to pool set file or regular file
*
* 'buff' has to be a buffer at least 'nbytes' long
* 'off' is an offset from the beginning of the pool
*/
int
pool_write(struct pool_data *pool, const void *buff, size_t nbytes,
uint64_t off)
{
if (off + nbytes > pool->set_file->size)
return -1;
if (pool->params.type != POOL_TYPE_BTT) {
memcpy((char *)pool->set_file->addr + off, buff, nbytes);
util_persist_auto(pool->params.is_pmem,
(char *)pool->set_file->addr + off, nbytes);
} else {
if (pool_btt_lseek(pool, (os_off_t)off, SEEK_SET) == -1)
return -1;
if ((size_t)pool_btt_write(pool, buff, nbytes) != nbytes)
return -1;
}
return 0;
}
/*
* pool_copy -- make a copy of the pool
*/
int
pool_copy(struct pool_data *pool, const char *dst_path, int overwrite)
{
struct pool_set_file *file = pool->set_file;
int dfd;
int exists = util_file_exists(dst_path);
if (exists < 0)
return -1;
if (exists) {
if (!overwrite) {
errno = EEXIST;
return -1;
}
dfd = util_file_open(dst_path, NULL, 0, O_RDWR);
} else {
errno = 0;
dfd = util_file_create(dst_path, file->size, 0);
}
if (dfd < 0)
return -1;
int result = 0;
os_stat_t stat_buf;
if (os_stat(file->fname, &stat_buf)) {
result = -1;
goto out_close;
}
if (fchmod(dfd, stat_buf.st_mode)) {
result = -1;
goto out_close;
}
void *daddr = mmap(NULL, file->size, PROT_READ | PROT_WRITE,
MAP_SHARED, dfd, 0);
if (daddr == MAP_FAILED) {
result = -1;
goto out_close;
}
if (pool->params.type != POOL_TYPE_BTT) {
void *saddr = pool_set_file_map(file, 0);
memcpy(daddr, saddr, file->size);
goto out_unmap;
}
void *buf = malloc(RW_BUFFERING_SIZE);
if (buf == NULL) {
ERR("!malloc");
result = -1;
goto out_unmap;
}
if (pool_btt_lseek(pool, 0, SEEK_SET) == -1) {
result = -1;
goto out_free;
}
ssize_t buf_read = 0;
void *dst = daddr;
while ((buf_read = pool_btt_read(pool, buf, RW_BUFFERING_SIZE))) {
if (buf_read == -1)
break;
memcpy(dst, buf, (size_t)buf_read);
dst = (void *)((ssize_t)dst + buf_read);
}
out_free:
free(buf);
out_unmap:
munmap(daddr, file->size);
out_close:
(void) os_close(dfd);
return result;
}
/*
* pool_set_part_copy -- make a copy of the poolset part
*/
int
pool_set_part_copy(struct pool_set_part *dpart, struct pool_set_part *spart,
int overwrite)
{
LOG(3, "dpart %p spart %p", dpart, spart);
int result = 0;
os_stat_t stat_buf;
if (os_fstat(spart->fd, &stat_buf)) {
ERR("!util_stat");
return -1;
}
size_t smapped = 0;
void *saddr = pmem_map_file(spart->path, 0, 0, S_IREAD, &smapped, NULL);
if (!saddr)
return -1;
size_t dmapped = 0;
int is_pmem;
void *daddr;
int exists = util_file_exists(dpart->path);
if (exists < 0) {
result = -1;
goto out_sunmap;
}
if (exists) {
if (!overwrite) {
errno = EEXIST;
result = -1;
goto out_sunmap;
}
daddr = pmem_map_file(dpart->path, 0, 0, S_IWRITE, &dmapped,
&is_pmem);
} else {
errno = 0;
daddr = pmem_map_file(dpart->path, dpart->filesize,
PMEM_FILE_CREATE | PMEM_FILE_EXCL,
stat_buf.st_mode, &dmapped, &is_pmem);
}
if (!daddr) {
result = -1;
goto out_sunmap;
}
#ifdef DEBUG
/* provide extra logging in case of wrong dmapped/smapped value */
if (dmapped < smapped) {
LOG(1, "dmapped < smapped: dmapped = %lu, smapped = %lu",
dmapped, smapped);
ASSERT(0);
}
#endif
if (is_pmem) {
pmem_memcpy_persist(daddr, saddr, smapped);
} else {
memcpy(daddr, saddr, smapped);
pmem_msync(daddr, smapped);
}
pmem_unmap(daddr, dmapped);
out_sunmap:
pmem_unmap(saddr, smapped);
return result;
}
/*
* pool_memset -- memset pool part described by off and count
*/
int
pool_memset(struct pool_data *pool, uint64_t off, int c, size_t count)
{
int result = 0;
if (pool->params.type != POOL_TYPE_BTT)
memset((char *)off, 0, count);
else {
if (pool_btt_lseek(pool, (os_off_t)off, SEEK_SET) == -1)
return -1;
size_t zero_size = min(count, RW_BUFFERING_SIZE);
void *buf = malloc(zero_size);
if (!buf) {
ERR("!malloc");
return -1;
}
memset(buf, c, zero_size);
ssize_t nwrite = 0;
do {
zero_size = min(zero_size, count);
nwrite = pool_btt_write(pool, buf, zero_size);
if (nwrite < 0) {
result = -1;
break;
}
count -= (size_t)nwrite;
} while (count > 0);
free(buf);
}
return result;
}
/*
* pool_set_files_count -- get total number of parts of all replicas
*/
unsigned
pool_set_files_count(struct pool_set_file *file)
{
unsigned ret = 0;
unsigned nreplicas = file->poolset->nreplicas;
for (unsigned r = 0; r < nreplicas; r++) {
struct pool_replica *rep = file->poolset->replica[r];
ret += rep->nparts;
}
return ret;
}
/*
* pool_set_file_map_headers -- map headers of each pool set part file
*/
int
pool_set_file_map_headers(struct pool_set_file *file, int rdonly, int prv)
{
if (!file->poolset)
return -1;
for (unsigned r = 0; r < file->poolset->nreplicas; r++) {
struct pool_replica *rep = file->poolset->replica[r];
for (unsigned p = 0; p < rep->nparts; p++) {
struct pool_set_part *part = &rep->part[p];
if (util_map_hdr(part,
prv ? MAP_PRIVATE : MAP_SHARED, rdonly)) {
part->hdr = NULL;
goto err;
}
}
}
return 0;
err:
pool_set_file_unmap_headers(file);
return -1;
}
/*
* pool_set_file_unmap_headers -- unmap headers of each pool set part file
*/
void
pool_set_file_unmap_headers(struct pool_set_file *file)
{
if (!file->poolset)
return;
for (unsigned r = 0; r < file->poolset->nreplicas; r++) {
struct pool_replica *rep = file->poolset->replica[r];
for (unsigned p = 0; p < rep->nparts; p++) {
struct pool_set_part *part = &rep->part[p];
util_unmap_hdr(part);
}
}
}
/*
* pool_get_signature -- (internal) return signature of specified pool type
*/
static const char *
pool_get_signature(enum pool_type type)
{
switch (type) {
case POOL_TYPE_LOG:
return LOG_HDR_SIG;
case POOL_TYPE_BLK:
return BLK_HDR_SIG;
case POOL_TYPE_OBJ:
return OBJ_HDR_SIG;
default:
return NULL;
}
}
/*
* pool_hdr_default -- return default pool header values
*/
void
pool_hdr_default(enum pool_type type, struct pool_hdr *hdrp)
{
memset(hdrp, 0, sizeof(*hdrp));
const char *sig = pool_get_signature(type);
ASSERTne(sig, NULL);
memcpy(hdrp->signature, sig, POOL_HDR_SIG_LEN);
switch (type) {
case POOL_TYPE_LOG:
hdrp->major = LOG_FORMAT_MAJOR;
hdrp->features = log_format_feat_default;
break;
case POOL_TYPE_BLK:
hdrp->major = BLK_FORMAT_MAJOR;
hdrp->features = blk_format_feat_default;
break;
case POOL_TYPE_OBJ:
hdrp->major = OBJ_FORMAT_MAJOR;
hdrp->features = obj_format_feat_default;
break;
default:
break;
}
}
/*
* pool_hdr_get_type -- return pool type based on pool header data
*/
enum pool_type
pool_hdr_get_type(const struct pool_hdr *hdrp)
{
if (memcmp(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN) == 0)
return POOL_TYPE_LOG;
else if (memcmp(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN) == 0)
return POOL_TYPE_BLK;
else if (memcmp(hdrp->signature, OBJ_HDR_SIG, POOL_HDR_SIG_LEN) == 0)
return POOL_TYPE_OBJ;
else
return POOL_TYPE_UNKNOWN;
}
/*
* pool_get_pool_type_str -- return human-readable pool type string
*/
const char *
pool_get_pool_type_str(enum pool_type type)
{
switch (type) {
case POOL_TYPE_BTT:
return "btt";
case POOL_TYPE_LOG:
return "pmemlog";
case POOL_TYPE_BLK:
return "pmemblk";
case POOL_TYPE_OBJ:
return "pmemobj";
default:
return "unknown";
}
}
/*
* pool_set_type -- get pool type of a poolset
*/
enum pool_type
pool_set_type(struct pool_set *set)
{
struct pool_hdr hdr;
/* open the first part file to read the pool header values */
const struct pool_set_part *part = PART(REP(set, 0), 0);
if (util_file_pread(part->path, &hdr, sizeof(hdr), 0) !=
sizeof(hdr)) {
ERR("cannot read pool header from poolset");
return POOL_TYPE_UNKNOWN;
}
util_convert2h_hdr_nocheck(&hdr);
enum pool_type type = pool_hdr_get_type(&hdr);
return type;
}
/*
* pool_btt_info_valid -- check consistency of BTT Info header
*/
int
pool_btt_info_valid(struct btt_info *infop)
{
if (memcmp(infop->sig, BTTINFO_SIG, BTTINFO_SIG_LEN) != 0)
return 0;
return util_checksum(infop, sizeof(*infop), &infop->checksum, 0, 0);
}
/*
* pool_blk_get_first_valid_arena -- get first valid BTT Info in arena
*/
int
pool_blk_get_first_valid_arena(struct pool_data *pool, struct arena *arenap)
{
arenap->zeroed = true;
uint64_t offset = pool_get_first_valid_btt(pool, &arenap->btt_info,
2 * BTT_ALIGNMENT, &arenap->zeroed);
if (offset != 0) {
arenap->offset = offset;
arenap->valid = true;
return 1;
}
return 0;
}
/*
* pool_next_arena_offset -- get offset of next arena
*
* Calculated offset is theoretical. Function does not check if such arena can
* exist.
*/
uint64_t
pool_next_arena_offset(struct pool_data *pool, uint64_t offset)
{
uint64_t lastoff = (pool->set_file->size & ~(BTT_ALIGNMENT - 1));
uint64_t nextoff = min(offset + BTT_MAX_ARENA, lastoff);
return nextoff;
}
/*
* pool_get_first_valid_btt -- return offset to first valid BTT Info
*
* - Return offset to valid BTT Info header in pool file.
* - Start looking from given offset.
* - Convert BTT Info header to host endianness.
* - Return the BTT Info header by pointer.
* - If zeroed pointer provided would check if all checked BTT Info are zeroed
* which is useful for BLK pools
*/
uint64_t
pool_get_first_valid_btt(struct pool_data *pool, struct btt_info *infop,
uint64_t offset, bool *zeroed)
{
/* if we have valid arena get BTT Info header from it */
if (pool->narenas != 0) {
struct arena *arenap = TAILQ_FIRST(&pool->arenas);
memcpy(infop, &arenap->btt_info, sizeof(*infop));
return arenap->offset;
}
const size_t info_size = sizeof(*infop);
/* theoretical offsets to BTT Info header and backup */
uint64_t offsets[2] = {offset, 0};
while (offsets[0] < pool->set_file->size) {
/* calculate backup offset */
offsets[1] = pool_next_arena_offset(pool, offsets[0]) -
info_size;
/* check both offsets: header and backup */
for (int i = 0; i < 2; ++i) {
if (pool_read(pool, infop, info_size, offsets[i]))
continue;
/* check if all possible BTT Info are zeroed */
if (zeroed)
*zeroed &= util_is_zeroed((const void *)infop,
info_size);
/* check if read BTT Info is valid */
if (pool_btt_info_valid(infop)) {
btt_info_convert2h(infop);
return offsets[i];
}
}
/* jump to next arena */
offsets[0] += BTT_MAX_ARENA;
}
return 0;
}
/*
* pool_get_min_size -- return the minimum pool size of a pool of a given type
*/
size_t
pool_get_min_size(enum pool_type type)
{
switch (type) {
case POOL_TYPE_LOG:
return PMEMLOG_MIN_POOL;
case POOL_TYPE_BLK:
return PMEMBLK_MIN_POOL;
case POOL_TYPE_OBJ:
return PMEMOBJ_MIN_POOL;
default:
ERR("unknown type of a pool");
return SIZE_MAX;
}
}