/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "uct_iface.h"
#include "uct_md.h"
#include <ucs/arch/cpu.h>
#include <ucs/profile/profile.h>
#include <ucs/sys/math.h>
typedef struct {
uct_alloc_method_t method;
size_t length;
uct_mem_h memh;
} uct_iface_mp_chunk_hdr_t;
typedef struct {
uct_base_iface_t *iface;
uct_iface_mpool_init_obj_cb_t init_obj_cb;
} uct_iface_mp_priv_t;
const char *uct_alloc_method_names[] = {
[UCT_ALLOC_METHOD_THP] = "thp",
[UCT_ALLOC_METHOD_MD] = "md",
[UCT_ALLOC_METHOD_HEAP] = "heap",
[UCT_ALLOC_METHOD_MMAP] = "mmap",
[UCT_ALLOC_METHOD_HUGE] = "huge",
[UCT_ALLOC_METHOD_LAST] = NULL
};
static inline int uct_mem_get_mmap_flags(unsigned uct_mmap_flags)
{
int mm_flags = 0;
#ifdef MAP_NONBLOCK
if (uct_mmap_flags & UCT_MD_MEM_FLAG_NONBLOCK) {
mm_flags |= MAP_NONBLOCK;
}
#endif
if (uct_mmap_flags & UCT_MD_MEM_FLAG_FIXED) {
mm_flags |= MAP_FIXED;
}
return mm_flags;
}
ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
uct_alloc_method_t *methods, unsigned num_methods,
uct_md_h *mds, unsigned num_mds,
const char *alloc_name, uct_allocated_memory_t *mem)
{
uct_alloc_method_t *method;
uct_md_attr_t md_attr;
ucs_status_t status;
size_t alloc_length;
unsigned md_index;
uct_mem_h memh;
uct_md_h md;
void *address;
int ret;
#ifdef SHM_HUGETLB
int shmid;
#endif
#ifdef MADV_HUGEPAGE
ssize_t huge_page_size;
#endif
if (min_length == 0) {
ucs_error("Allocation length cannot be 0");
return UCS_ERR_INVALID_PARAM;
}
if (num_methods == 0) {
ucs_error("No allocation methods provided");
return UCS_ERR_INVALID_PARAM;
}
if ((flags & UCT_MD_MEM_FLAG_FIXED) &&
(!addr || ((uintptr_t)addr % ucs_get_page_size()))) {
ucs_debug("UCT_MD_MEM_FLAG_FIXED requires valid page size aligned address");
return UCS_ERR_INVALID_PARAM;
}
for (method = methods; method < methods + num_methods; ++method) {
ucs_trace("trying allocation method %s", uct_alloc_method_names[*method]);
switch (*method) {
case UCT_ALLOC_METHOD_MD:
/* Allocate with one of the specified memory domains */
for (md_index = 0; md_index < num_mds; ++md_index) {
md = mds[md_index];
status = uct_md_query(md, &md_attr);
if (status != UCS_OK) {
ucs_error("Failed to query MD");
return status;
}
/* Check if MD supports allocation */
if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) {
continue;
}
/* Check if MD supports allocation with fixed address
* if it's requested */
if ((flags & UCT_MD_MEM_FLAG_FIXED) &&
!(md_attr.cap.flags & UCT_MD_FLAG_FIXED)) {
continue;
}
/* Allocate memory using the MD.
* If the allocation fails, it's considered an error and we don't
* fall-back, because this MD already exposed support for memory
* allocation.
*/
alloc_length = min_length;
address = addr;
status = uct_md_mem_alloc(md, &alloc_length, &address, flags,
alloc_name, &memh);
if (status != UCS_OK) {
ucs_error("failed to allocate %zu bytes using md %s for %s: %s",
alloc_length, md->component->name,
alloc_name, ucs_status_string(status));
return status;
}
ucs_assert(memh != UCT_MEM_HANDLE_NULL);
mem->md = md;
mem->mem_type = md_attr.cap.access_mem_type;
mem->memh = memh;
goto allocated;
}
break;
case UCT_ALLOC_METHOD_THP:
#ifdef MADV_HUGEPAGE
/* Fixed option is not supported for thp allocation*/
if (flags & UCT_MD_MEM_FLAG_FIXED) {
break;
}
if (!ucs_is_thp_enabled()) {
break;
}
huge_page_size = ucs_get_huge_page_size();
if (huge_page_size <= 0) {
break;
}
alloc_length = ucs_align_up(min_length, huge_page_size);
if (alloc_length >= 2 * min_length) {
break;
}
ret = ucs_posix_memalign(&address, huge_page_size, alloc_length
UCS_MEMTRACK_VAL);
if (ret != 0) {
ucs_trace("failed to allocate %zu bytes using THP: %m", alloc_length);
} else {
ret = madvise(address, alloc_length, MADV_HUGEPAGE);
if (ret != 0) {
ucs_trace("madvise(address=%p, length=%zu, HUGEPAGE) "
"returned %d: %m", address, alloc_length, ret);
ucs_free(address);
} else {
goto allocated_without_md;
}
}
#endif
break;
case UCT_ALLOC_METHOD_HEAP:
/* Allocate aligned memory using libc allocator */
/* Fixed option is not supported for heap allocation*/
if (flags & UCT_MD_MEM_FLAG_FIXED) {
break;
}
alloc_length = min_length;
ret = ucs_posix_memalign(&address, UCS_SYS_CACHE_LINE_SIZE,
alloc_length UCS_MEMTRACK_VAL);
if (ret == 0) {
goto allocated_without_md;
}
ucs_trace("failed to allocate %zu bytes from the heap", alloc_length);
break;
case UCT_ALLOC_METHOD_MMAP:
/* Request memory from operating system using mmap() */
alloc_length = min_length;
address = addr;
status = ucs_mmap_alloc(&alloc_length, &address,
uct_mem_get_mmap_flags(flags)
UCS_MEMTRACK_VAL);
if (status== UCS_OK) {
goto allocated_without_md;
}
ucs_trace("failed to mmap %zu bytes: %s", min_length,
ucs_status_string(status));
break;
case UCT_ALLOC_METHOD_HUGE:
#ifdef SHM_HUGETLB
/* Allocate huge pages */
alloc_length = min_length;
address = (flags & UCT_MD_MEM_FLAG_FIXED) ? addr : NULL;
status = ucs_sysv_alloc(&alloc_length, min_length * 2, &address,
SHM_HUGETLB, alloc_name, &shmid);
if (status == UCS_OK) {
goto allocated_without_md;
}
#else
status = UCS_ERR_NO_MEMORY;
#endif
ucs_trace("failed to allocate %zu bytes from hugetlb: %s",
min_length, ucs_status_string(status));
break;
default:
ucs_error("Invalid allocation method %d", *method);
return UCS_ERR_INVALID_PARAM;
}
}
ucs_debug("Could not allocate memory with any of the provided methods");
return UCS_ERR_NO_MEMORY;
allocated_without_md:
mem->md = NULL;
mem->mem_type = UCS_MEMORY_TYPE_HOST;
mem->memh = UCT_MEM_HANDLE_NULL;
allocated:
ucs_trace("allocated %zu bytes at %p using %s", alloc_length, address,
(mem->md == NULL) ? uct_alloc_method_names[*method]
: mem->md->component->name);
mem->address = address;
mem->length = alloc_length;
mem->method = *method;
return UCS_OK;
}
ucs_status_t uct_mem_free(const uct_allocated_memory_t *mem)
{
switch (mem->method) {
case UCT_ALLOC_METHOD_MD:
return uct_md_mem_free(mem->md, mem->memh);
case UCT_ALLOC_METHOD_THP:
case UCT_ALLOC_METHOD_HEAP:
ucs_free(mem->address);
return UCS_OK;
case UCT_ALLOC_METHOD_MMAP:
return ucs_mmap_free(mem->address, mem->length);
case UCT_ALLOC_METHOD_HUGE:
return ucs_sysv_free(mem->address);
default:
ucs_warn("Invalid memory allocation method: %d", mem->method);
return UCS_ERR_INVALID_PARAM;
}
}
ucs_status_t uct_iface_mem_alloc(uct_iface_h tl_iface, size_t length, unsigned flags,
const char *name, uct_allocated_memory_t *mem)
{
uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t);
uct_md_attr_t md_attr;
ucs_status_t status;
status = uct_mem_alloc(NULL, length, UCT_MD_MEM_ACCESS_ALL,
iface->config.alloc_methods,
iface->config.num_alloc_methods, &iface->md, 1,
name, mem);
if (status != UCS_OK) {
goto err;
}
/* If the memory was not allocated using MD, register it */
if (mem->method != UCT_ALLOC_METHOD_MD) {
status = uct_md_query(iface->md, &md_attr);
if (status != UCS_OK) {
goto err_free;
}
/* If MD does not support registration, allow only the MD method */
if ((md_attr.cap.flags & UCT_MD_FLAG_REG) &&
(md_attr.cap.reg_mem_types & UCS_BIT(mem->mem_type))) {
status = uct_md_mem_reg(iface->md, mem->address, mem->length, flags,
&mem->memh);
if (status != UCS_OK) {
goto err_free;
}
ucs_assert(mem->memh != UCT_MEM_HANDLE_NULL);
} else {
mem->memh = UCT_MEM_HANDLE_NULL;
}
mem->md = iface->md;
}
return UCS_OK;
err_free:
uct_mem_free(mem);
err:
return status;
}
void uct_iface_mem_free(const uct_allocated_memory_t *mem)
{
if ((mem->method != UCT_ALLOC_METHOD_MD) &&
(mem->memh != UCT_MEM_HANDLE_NULL))
{
(void)uct_md_mem_dereg(mem->md, mem->memh);
}
uct_mem_free(mem);
}
static inline uct_iface_mp_priv_t* uct_iface_mp_priv(ucs_mpool_t *mp)
{
return (uct_iface_mp_priv_t*)ucs_mpool_priv(mp);
}
UCS_PROFILE_FUNC(ucs_status_t, uct_iface_mp_chunk_alloc, (mp, size_p, chunk_p),
ucs_mpool_t *mp, size_t *size_p, void **chunk_p)
{
uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface;
uct_iface_mp_chunk_hdr_t *hdr;
uct_allocated_memory_t mem;
ucs_status_t status;
size_t length;
length = sizeof(*hdr) + *size_p;
status = uct_iface_mem_alloc(&iface->super, length,
UCT_MD_MEM_ACCESS_ALL | UCT_MD_MEM_FLAG_LOCK,
ucs_mpool_name(mp), &mem);
if (status != UCS_OK) {
return status;
}
ucs_assert(mem.memh != UCT_MEM_HANDLE_NULL);
ucs_assert(mem.md == iface->md);
hdr = mem.address;
hdr->method = mem.method;
hdr->length = mem.length;
hdr->memh = mem.memh;
*size_p = mem.length - sizeof(*hdr);
*chunk_p = hdr + 1;
return UCS_OK;
}
UCS_PROFILE_FUNC_VOID(uct_iface_mp_chunk_release, (mp, chunk),
ucs_mpool_t *mp, void *chunk)
{
uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface;
uct_iface_mp_chunk_hdr_t *hdr;
uct_allocated_memory_t mem;
hdr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(*hdr));
mem.address = hdr;
mem.method = hdr->method;
mem.memh = hdr->memh;
mem.length = hdr->length;
mem.md = iface->md;
uct_iface_mem_free(&mem);
}
static void uct_iface_mp_obj_init(ucs_mpool_t *mp, void *obj, void *chunk)
{
uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface;
uct_iface_mpool_init_obj_cb_t init_obj_cb;
uct_iface_mp_chunk_hdr_t *hdr;
init_obj_cb = uct_iface_mp_priv(mp)->init_obj_cb;
hdr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(*hdr));
if (init_obj_cb != NULL) {
init_obj_cb(&iface->super, obj, hdr->memh);
}
}
static ucs_mpool_ops_t uct_iface_mpool_ops = {
.chunk_alloc = uct_iface_mp_chunk_alloc,
.chunk_release = uct_iface_mp_chunk_release,
.obj_init = uct_iface_mp_obj_init,
.obj_cleanup = NULL
};
ucs_status_t uct_iface_mpool_init(uct_base_iface_t *iface, ucs_mpool_t *mp,
size_t elem_size, size_t align_offset, size_t alignment,
const uct_iface_mpool_config_t *config, unsigned grow,
uct_iface_mpool_init_obj_cb_t init_obj_cb,
const char *name)
{
unsigned elems_per_chunk;
ucs_status_t status;
elems_per_chunk = (config->bufs_grow != 0) ? config->bufs_grow : grow;
status = ucs_mpool_init(mp, sizeof(uct_iface_mp_priv_t),
elem_size, align_offset, alignment,
elems_per_chunk, config->max_bufs,
&uct_iface_mpool_ops, name);
if (status != UCS_OK) {
return status;
}
uct_iface_mp_priv(mp)->iface = iface;
uct_iface_mp_priv(mp)->init_obj_cb = init_obj_cb;
return UCS_OK;
}