/*
* Copyright 2014-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* mmap.c -- mmap utilities
*/
#include <errno.h>
#include <inttypes.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "file.h"
#include "queue.h"
#include "mmap.h"
#include "sys_util.h"
#include "os.h"
int Mmap_no_random;
void *Mmap_hint;
static os_rwlock_t Mmap_list_lock;
static SORTEDQ_HEAD(map_list_head, map_tracker) Mmap_list =
SORTEDQ_HEAD_INITIALIZER(Mmap_list);
/*
* util_mmap_init -- initialize the mmap utils
*
* This is called from the library initialization code.
*/
void
util_mmap_init(void)
{
LOG(3, NULL);
util_rwlock_init(&Mmap_list_lock);
/*
* For testing, allow overriding the default mmap() hint address.
* If hint address is defined, it also disables address randomization.
*/
char *e = os_getenv("PMEM_MMAP_HINT");
if (e) {
char *endp;
errno = 0;
unsigned long long val = strtoull(e, &endp, 16);
if (errno || endp == e) {
LOG(2, "Invalid PMEM_MMAP_HINT");
} else if (os_access(OS_MAPFILE, R_OK)) {
LOG(2, "No /proc, PMEM_MMAP_HINT ignored");
} else {
Mmap_hint = (void *)val;
Mmap_no_random = 1;
LOG(3, "PMEM_MMAP_HINT set to %p", Mmap_hint);
}
}
}
/*
* util_mmap_fini -- clean up the mmap utils
*
* This is called before process stop.
*/
void
util_mmap_fini(void)
{
LOG(3, NULL);
util_rwlock_destroy(&Mmap_list_lock);
}
/*
* util_map -- memory map a file
*
* This is just a convenience function that calls mmap() with the
* appropriate arguments and includes our trace points.
*/
void *
util_map(int fd, size_t len, int flags, int rdonly, size_t req_align,
int *map_sync)
{
LOG(3, "fd %d len %zu flags %d rdonly %d req_align %zu map_sync %p",
fd, len, flags, rdonly, req_align, map_sync);
void *base;
void *addr = util_map_hint(len, req_align);
if (addr == MAP_FAILED) {
ERR("cannot find a contiguous region of given size");
return NULL;
}
if (req_align)
ASSERTeq((uintptr_t)addr % req_align, 0);
int proto = rdonly ? PROT_READ : PROT_READ|PROT_WRITE;
base = util_map_sync(addr, len, proto, flags, fd, 0, map_sync);
if (base == MAP_FAILED) {
ERR("!mmap %zu bytes", len);
return NULL;
}
LOG(3, "mapped at %p", base);
return base;
}
/*
* util_unmap -- unmap a file
*
* This is just a convenience function that calls munmap() with the
* appropriate arguments and includes our trace points.
*/
int
util_unmap(void *addr, size_t len)
{
LOG(3, "addr %p len %zu", addr, len);
/*
* XXX Workaround for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=169608
*/
#ifdef __FreeBSD__
if (!IS_PAGE_ALIGNED((uintptr_t)addr)) {
errno = EINVAL;
ERR("!munmap");
return -1;
}
#endif
int retval = munmap(addr, len);
if (retval < 0)
ERR("!munmap");
return retval;
}
/*
* util_map_tmpfile -- reserve space in an unlinked file and memory-map it
*
* size must be multiple of page size.
*/
void *
util_map_tmpfile(const char *dir, size_t size, size_t req_align)
{
int oerrno;
if (((os_off_t)size) < 0) {
ERR("invalid size (%zu) for os_off_t", size);
errno = EFBIG;
return NULL;
}
int fd = util_tmpfile(dir, OS_DIR_SEP_STR "vmem.XXXXXX", O_EXCL);
if (fd == -1) {
LOG(2, "cannot create temporary file in dir %s", dir);
goto err;
}
if ((errno = os_posix_fallocate(fd, 0, (os_off_t)size)) != 0) {
ERR("!posix_fallocate");
goto err;
}
void *base;
if ((base = util_map(fd, size, MAP_SHARED,
0, req_align, NULL)) == NULL) {
LOG(2, "cannot mmap temporary file");
goto err;
}
(void) os_close(fd);
return base;
err:
oerrno = errno;
if (fd != -1)
(void) os_close(fd);
errno = oerrno;
return NULL;
}
/*
* util_range_ro -- set a memory range read-only
*/
int
util_range_ro(void *addr, size_t len)
{
LOG(3, "addr %p len %zu", addr, len);
uintptr_t uptr;
int retval;
/*
* mprotect requires addr to be a multiple of pagesize, so
* adjust addr and len to represent the full 4k chunks
* covering the given range.
*/
/* increase len by the amount we gain when we round addr down */
len += (uintptr_t)addr & (Pagesize - 1);
/* round addr down to page boundary */
uptr = (uintptr_t)addr & ~(Pagesize - 1);
if ((retval = mprotect((void *)uptr, len, PROT_READ)) < 0)
ERR("!mprotect: PROT_READ");
return retval;
}
/*
* util_range_rw -- set a memory range read-write
*/
int
util_range_rw(void *addr, size_t len)
{
LOG(3, "addr %p len %zu", addr, len);
uintptr_t uptr;
int retval;
/*
* mprotect requires addr to be a multiple of pagesize, so
* adjust addr and len to represent the full 4k chunks
* covering the given range.
*/
/* increase len by the amount we gain when we round addr down */
len += (uintptr_t)addr & (Pagesize - 1);
/* round addr down to page boundary */
uptr = (uintptr_t)addr & ~(Pagesize - 1);
if ((retval = mprotect((void *)uptr, len, PROT_READ|PROT_WRITE)) < 0)
ERR("!mprotect: PROT_READ|PROT_WRITE");
return retval;
}
/*
* util_range_none -- set a memory range for no access allowed
*/
int
util_range_none(void *addr, size_t len)
{
LOG(3, "addr %p len %zu", addr, len);
uintptr_t uptr;
int retval;
/*
* mprotect requires addr to be a multiple of pagesize, so
* adjust addr and len to represent the full 4k chunks
* covering the given range.
*/
/* increase len by the amount we gain when we round addr down */
len += (uintptr_t)addr & (Pagesize - 1);
/* round addr down to page boundary */
uptr = (uintptr_t)addr & ~(Pagesize - 1);
if ((retval = mprotect((void *)uptr, len, PROT_NONE)) < 0)
ERR("!mprotect: PROT_NONE");
return retval;
}
/*
* util_range_comparer -- (internal) compares the two mapping trackers
*/
static intptr_t
util_range_comparer(struct map_tracker *a, struct map_tracker *b)
{
return ((intptr_t)a->base_addr - (intptr_t)b->base_addr);
}
/*
* util_range_find_unlocked -- (internal) find the map tracker
* for given address range
*
* Returns the first entry at least partially overlapping given range.
* It's up to the caller to check whether the entry exactly matches the range,
* or if the range spans multiple entries.
*/
static struct map_tracker *
util_range_find_unlocked(uintptr_t addr, size_t len)
{
LOG(10, "addr 0x%016" PRIxPTR " len %zu", addr, len);
uintptr_t end = addr + len;
struct map_tracker *mt;
SORTEDQ_FOREACH(mt, &Mmap_list, entry) {
if (addr < mt->end_addr &&
(addr >= mt->base_addr || end > mt->base_addr))
goto out;
/* break if there is no chance to find matching entry */
if (addr < mt->base_addr)
break;
}
mt = NULL;
out:
return mt;
}
/*
* util_range_find -- find the map tracker for given address range
* the same as util_range_find_unlocked but locked
*/
struct map_tracker *
util_range_find(uintptr_t addr, size_t len)
{
LOG(10, "addr 0x%016" PRIxPTR " len %zu", addr, len);
util_rwlock_rdlock(&Mmap_list_lock);
struct map_tracker *mt = util_range_find_unlocked(addr, len);
util_rwlock_unlock(&Mmap_list_lock);
return mt;
}
/*
* util_range_register -- add a memory range into a map tracking list
*/
int
util_range_register(const void *addr, size_t len, const char *path,
enum pmem_map_type type)
{
LOG(3, "addr %p len %zu path %s type %d", addr, len, path, type);
/* check if not tracked already */
if (util_range_find((uintptr_t)addr, len) != NULL) {
ERR(
"duplicated persistent memory range; presumably unmapped with munmap() instead of pmem_unmap(): addr %p len %zu",
addr, len);
errno = ENOMEM;
return -1;
}
struct map_tracker *mt;
mt = Malloc(sizeof(struct map_tracker));
if (mt == NULL) {
ERR("!Malloc");
return -1;
}
mt->base_addr = (uintptr_t)addr;
mt->end_addr = mt->base_addr + len;
mt->type = type;
if (type == PMEM_DEV_DAX)
mt->region_id = util_ddax_region_find(path);
util_rwlock_wrlock(&Mmap_list_lock);
SORTEDQ_INSERT(&Mmap_list, mt, entry, struct map_tracker,
util_range_comparer);
util_rwlock_unlock(&Mmap_list_lock);
return 0;
}
/*
* util_range_split -- (internal) remove or split a map tracking entry
*/
static int
util_range_split(struct map_tracker *mt, const void *addrp, const void *endp)
{
LOG(3, "begin %p end %p", addrp, endp);
uintptr_t addr = (uintptr_t)addrp;
uintptr_t end = (uintptr_t)endp;
ASSERTne(mt, NULL);
if (addr == end || addr % Mmap_align != 0 || end % Mmap_align != 0) {
ERR(
"invalid munmap length, must be non-zero and page aligned");
return -1;
}
struct map_tracker *mtb = NULL;
struct map_tracker *mte = NULL;
/*
* 1) b e b e
* xxxxxxxxxxxxx => xxx.......xxxx - mtb+mte
* 2) b e b e
* xxxxxxxxxxxxx => xxxxxxx....... - mtb
* 3) b e b e
* xxxxxxxxxxxxx => ........xxxxxx - mte
* 4) b e b e
* xxxxxxxxxxxxx => .............. - <none>
*/
if (addr > mt->base_addr) {
/* case #1/2 */
/* new mapping at the beginning */
mtb = Malloc(sizeof(struct map_tracker));
if (mtb == NULL) {
ERR("!Malloc");
goto err;
}
mtb->base_addr = mt->base_addr;
mtb->end_addr = addr;
mtb->region_id = mt->region_id;
mtb->type = mt->type;
}
if (end < mt->end_addr) {
/* case #1/3 */
/* new mapping at the end */
mte = Malloc(sizeof(struct map_tracker));
if (mte == NULL) {
ERR("!Malloc");
goto err;
}
mte->base_addr = end;
mte->end_addr = mt->end_addr;
mte->region_id = mt->region_id;
mte->type = mt->type;
}
SORTEDQ_REMOVE(&Mmap_list, mt, entry);
if (mtb) {
SORTEDQ_INSERT(&Mmap_list, mtb, entry,
struct map_tracker, util_range_comparer);
}
if (mte) {
SORTEDQ_INSERT(&Mmap_list, mte, entry,
struct map_tracker, util_range_comparer);
}
/* free entry for the original mapping */
Free(mt);
return 0;
err:
Free(mtb);
Free(mte);
return -1;
}
/*
* util_range_unregister -- remove a memory range
* from map tracking list
*
* Remove the region between [begin,end]. If it's in a middle of the existing
* mapping, it results in two new map trackers.
*/
int
util_range_unregister(const void *addr, size_t len)
{
LOG(3, "addr %p len %zu", addr, len);
int ret = 0;
util_rwlock_wrlock(&Mmap_list_lock);
/*
* Changes in the map tracker list must match the underlying behavior.
*
* $ man 2 mmap:
* The address addr must be a multiple of the page size (but length
* need not be). All pages containing a part of the indicated range
* are unmapped.
*
* This means that we must align the length to the page size.
*/
len = PAGE_ALIGNED_UP_SIZE(len);
void *end = (char *)addr + len;
/* XXX optimize the loop */
struct map_tracker *mt;
while ((mt = util_range_find_unlocked((uintptr_t)addr, len)) != NULL) {
if (util_range_split(mt, addr, end) != 0) {
ret = -1;
break;
}
}
util_rwlock_unlock(&Mmap_list_lock);
return ret;
}
/*
* util_range_is_pmem -- return true if entire range
* is persistent memory
*/
int
util_range_is_pmem(const void *addrp, size_t len)
{
LOG(10, "addr %p len %zu", addrp, len);
uintptr_t addr = (uintptr_t)addrp;
int retval = 1;
util_rwlock_rdlock(&Mmap_list_lock);
do {
struct map_tracker *mt = util_range_find(addr, len);
if (mt == NULL) {
LOG(4, "address not found 0x%016" PRIxPTR, addr);
retval = 0;
break;
}
LOG(10, "range found - begin 0x%016" PRIxPTR
" end 0x%016" PRIxPTR,
mt->base_addr, mt->end_addr);
if (mt->base_addr > addr) {
LOG(10, "base address doesn't match: "
"0x%" PRIxPTR " > 0x%" PRIxPTR,
mt->base_addr, addr);
retval = 0;
break;
}
uintptr_t map_len = mt->end_addr - addr;
if (map_len > len)
map_len = len;
len -= map_len;
addr += map_len;
} while (len > 0);
util_rwlock_unlock(&Mmap_list_lock);
return retval;
}