/** * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. * * See file LICENSE for terms. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "uct_iface.h" #include "uct_md.h" #include #include #include typedef struct { uct_alloc_method_t method; size_t length; uct_mem_h memh; } uct_iface_mp_chunk_hdr_t; typedef struct { uct_base_iface_t *iface; uct_iface_mpool_init_obj_cb_t init_obj_cb; } uct_iface_mp_priv_t; const char *uct_alloc_method_names[] = { [UCT_ALLOC_METHOD_THP] = "thp", [UCT_ALLOC_METHOD_MD] = "md", [UCT_ALLOC_METHOD_HEAP] = "heap", [UCT_ALLOC_METHOD_MMAP] = "mmap", [UCT_ALLOC_METHOD_HUGE] = "huge", [UCT_ALLOC_METHOD_LAST] = NULL }; static inline int uct_mem_get_mmap_flags(unsigned uct_mmap_flags) { int mm_flags = 0; #ifdef MAP_NONBLOCK if (uct_mmap_flags & UCT_MD_MEM_FLAG_NONBLOCK) { mm_flags |= MAP_NONBLOCK; } #endif if (uct_mmap_flags & UCT_MD_MEM_FLAG_FIXED) { mm_flags |= MAP_FIXED; } return mm_flags; } ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags, uct_alloc_method_t *methods, unsigned num_methods, uct_md_h *mds, unsigned num_mds, const char *alloc_name, uct_allocated_memory_t *mem) { uct_alloc_method_t *method; uct_md_attr_t md_attr; ucs_status_t status; size_t alloc_length; unsigned md_index; uct_mem_h memh; uct_md_h md; void *address; int ret; #ifdef SHM_HUGETLB int shmid; #endif #ifdef MADV_HUGEPAGE ssize_t huge_page_size; #endif if (min_length == 0) { ucs_error("Allocation length cannot be 0"); return UCS_ERR_INVALID_PARAM; } if (num_methods == 0) { ucs_error("No allocation methods provided"); return UCS_ERR_INVALID_PARAM; } if ((flags & UCT_MD_MEM_FLAG_FIXED) && (!addr || ((uintptr_t)addr % ucs_get_page_size()))) { ucs_debug("UCT_MD_MEM_FLAG_FIXED requires valid page size aligned address"); return UCS_ERR_INVALID_PARAM; } for (method = methods; method < methods + num_methods; ++method) { ucs_trace("trying allocation method %s", uct_alloc_method_names[*method]); switch (*method) { case UCT_ALLOC_METHOD_MD: /* Allocate with one of the specified memory domains */ for (md_index = 0; md_index < num_mds; ++md_index) { md = mds[md_index]; status = uct_md_query(md, &md_attr); if (status != UCS_OK) { ucs_error("Failed to query MD"); return status; } /* Check if MD supports allocation */ if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) { continue; } /* Check if MD supports allocation with fixed address * if it's requested */ if ((flags & UCT_MD_MEM_FLAG_FIXED) && !(md_attr.cap.flags & UCT_MD_FLAG_FIXED)) { continue; } /* Allocate memory using the MD. * If the allocation fails, it's considered an error and we don't * fall-back, because this MD already exposed support for memory * allocation. */ alloc_length = min_length; address = addr; status = uct_md_mem_alloc(md, &alloc_length, &address, flags, alloc_name, &memh); if (status != UCS_OK) { ucs_error("failed to allocate %zu bytes using md %s for %s: %s", alloc_length, md->component->name, alloc_name, ucs_status_string(status)); return status; } ucs_assert(memh != UCT_MEM_HANDLE_NULL); mem->md = md; mem->mem_type = md_attr.cap.access_mem_type; mem->memh = memh; goto allocated; } break; case UCT_ALLOC_METHOD_THP: #ifdef MADV_HUGEPAGE /* Fixed option is not supported for thp allocation*/ if (flags & UCT_MD_MEM_FLAG_FIXED) { break; } if (!ucs_is_thp_enabled()) { break; } huge_page_size = ucs_get_huge_page_size(); if (huge_page_size <= 0) { break; } alloc_length = ucs_align_up(min_length, huge_page_size); if (alloc_length >= 2 * min_length) { break; } ret = ucs_posix_memalign(&address, huge_page_size, alloc_length UCS_MEMTRACK_VAL); if (ret != 0) { ucs_trace("failed to allocate %zu bytes using THP: %m", alloc_length); } else { ret = madvise(address, alloc_length, MADV_HUGEPAGE); if (ret != 0) { ucs_trace("madvise(address=%p, length=%zu, HUGEPAGE) " "returned %d: %m", address, alloc_length, ret); ucs_free(address); } else { goto allocated_without_md; } } #endif break; case UCT_ALLOC_METHOD_HEAP: /* Allocate aligned memory using libc allocator */ /* Fixed option is not supported for heap allocation*/ if (flags & UCT_MD_MEM_FLAG_FIXED) { break; } alloc_length = min_length; ret = ucs_posix_memalign(&address, UCS_SYS_CACHE_LINE_SIZE, alloc_length UCS_MEMTRACK_VAL); if (ret == 0) { goto allocated_without_md; } ucs_trace("failed to allocate %zu bytes from the heap", alloc_length); break; case UCT_ALLOC_METHOD_MMAP: /* Request memory from operating system using mmap() */ alloc_length = min_length; address = addr; status = ucs_mmap_alloc(&alloc_length, &address, uct_mem_get_mmap_flags(flags) UCS_MEMTRACK_VAL); if (status== UCS_OK) { goto allocated_without_md; } ucs_trace("failed to mmap %zu bytes: %s", min_length, ucs_status_string(status)); break; case UCT_ALLOC_METHOD_HUGE: #ifdef SHM_HUGETLB /* Allocate huge pages */ alloc_length = min_length; address = (flags & UCT_MD_MEM_FLAG_FIXED) ? addr : NULL; status = ucs_sysv_alloc(&alloc_length, min_length * 2, &address, SHM_HUGETLB, alloc_name, &shmid); if (status == UCS_OK) { goto allocated_without_md; } #else status = UCS_ERR_NO_MEMORY; #endif ucs_trace("failed to allocate %zu bytes from hugetlb: %s", min_length, ucs_status_string(status)); break; default: ucs_error("Invalid allocation method %d", *method); return UCS_ERR_INVALID_PARAM; } } ucs_debug("Could not allocate memory with any of the provided methods"); return UCS_ERR_NO_MEMORY; allocated_without_md: mem->md = NULL; mem->mem_type = UCS_MEMORY_TYPE_HOST; mem->memh = UCT_MEM_HANDLE_NULL; allocated: ucs_trace("allocated %zu bytes at %p using %s", alloc_length, address, (mem->md == NULL) ? uct_alloc_method_names[*method] : mem->md->component->name); mem->address = address; mem->length = alloc_length; mem->method = *method; return UCS_OK; } ucs_status_t uct_mem_free(const uct_allocated_memory_t *mem) { switch (mem->method) { case UCT_ALLOC_METHOD_MD: return uct_md_mem_free(mem->md, mem->memh); case UCT_ALLOC_METHOD_THP: case UCT_ALLOC_METHOD_HEAP: ucs_free(mem->address); return UCS_OK; case UCT_ALLOC_METHOD_MMAP: return ucs_mmap_free(mem->address, mem->length); case UCT_ALLOC_METHOD_HUGE: return ucs_sysv_free(mem->address); default: ucs_warn("Invalid memory allocation method: %d", mem->method); return UCS_ERR_INVALID_PARAM; } } ucs_status_t uct_iface_mem_alloc(uct_iface_h tl_iface, size_t length, unsigned flags, const char *name, uct_allocated_memory_t *mem) { uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); uct_md_attr_t md_attr; ucs_status_t status; status = uct_mem_alloc(NULL, length, UCT_MD_MEM_ACCESS_ALL, iface->config.alloc_methods, iface->config.num_alloc_methods, &iface->md, 1, name, mem); if (status != UCS_OK) { goto err; } /* If the memory was not allocated using MD, register it */ if (mem->method != UCT_ALLOC_METHOD_MD) { status = uct_md_query(iface->md, &md_attr); if (status != UCS_OK) { goto err_free; } /* If MD does not support registration, allow only the MD method */ if ((md_attr.cap.flags & UCT_MD_FLAG_REG) && (md_attr.cap.reg_mem_types & UCS_BIT(mem->mem_type))) { status = uct_md_mem_reg(iface->md, mem->address, mem->length, flags, &mem->memh); if (status != UCS_OK) { goto err_free; } ucs_assert(mem->memh != UCT_MEM_HANDLE_NULL); } else { mem->memh = UCT_MEM_HANDLE_NULL; } mem->md = iface->md; } return UCS_OK; err_free: uct_mem_free(mem); err: return status; } void uct_iface_mem_free(const uct_allocated_memory_t *mem) { if ((mem->method != UCT_ALLOC_METHOD_MD) && (mem->memh != UCT_MEM_HANDLE_NULL)) { (void)uct_md_mem_dereg(mem->md, mem->memh); } uct_mem_free(mem); } static inline uct_iface_mp_priv_t* uct_iface_mp_priv(ucs_mpool_t *mp) { return (uct_iface_mp_priv_t*)ucs_mpool_priv(mp); } UCS_PROFILE_FUNC(ucs_status_t, uct_iface_mp_chunk_alloc, (mp, size_p, chunk_p), ucs_mpool_t *mp, size_t *size_p, void **chunk_p) { uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; uct_iface_mp_chunk_hdr_t *hdr; uct_allocated_memory_t mem; ucs_status_t status; size_t length; length = sizeof(*hdr) + *size_p; status = uct_iface_mem_alloc(&iface->super, length, UCT_MD_MEM_ACCESS_ALL | UCT_MD_MEM_FLAG_LOCK, ucs_mpool_name(mp), &mem); if (status != UCS_OK) { return status; } ucs_assert(mem.memh != UCT_MEM_HANDLE_NULL); ucs_assert(mem.md == iface->md); hdr = mem.address; hdr->method = mem.method; hdr->length = mem.length; hdr->memh = mem.memh; *size_p = mem.length - sizeof(*hdr); *chunk_p = hdr + 1; return UCS_OK; } UCS_PROFILE_FUNC_VOID(uct_iface_mp_chunk_release, (mp, chunk), ucs_mpool_t *mp, void *chunk) { uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; uct_iface_mp_chunk_hdr_t *hdr; uct_allocated_memory_t mem; hdr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(*hdr)); mem.address = hdr; mem.method = hdr->method; mem.memh = hdr->memh; mem.length = hdr->length; mem.md = iface->md; uct_iface_mem_free(&mem); } static void uct_iface_mp_obj_init(ucs_mpool_t *mp, void *obj, void *chunk) { uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; uct_iface_mpool_init_obj_cb_t init_obj_cb; uct_iface_mp_chunk_hdr_t *hdr; init_obj_cb = uct_iface_mp_priv(mp)->init_obj_cb; hdr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(*hdr)); if (init_obj_cb != NULL) { init_obj_cb(&iface->super, obj, hdr->memh); } } static ucs_mpool_ops_t uct_iface_mpool_ops = { .chunk_alloc = uct_iface_mp_chunk_alloc, .chunk_release = uct_iface_mp_chunk_release, .obj_init = uct_iface_mp_obj_init, .obj_cleanup = NULL }; ucs_status_t uct_iface_mpool_init(uct_base_iface_t *iface, ucs_mpool_t *mp, size_t elem_size, size_t align_offset, size_t alignment, const uct_iface_mpool_config_t *config, unsigned grow, uct_iface_mpool_init_obj_cb_t init_obj_cb, const char *name) { unsigned elems_per_chunk; ucs_status_t status; elems_per_chunk = (config->bufs_grow != 0) ? config->bufs_grow : grow; status = ucs_mpool_init(mp, sizeof(uct_iface_mp_priv_t), elem_size, align_offset, alignment, elems_per_chunk, config->max_bufs, &uct_iface_mpool_ops, name); if (status != UCS_OK) { return status; } uct_iface_mp_priv(mp)->iface = iface; uct_iface_mp_priv(mp)->init_obj_cb = init_obj_cb; return UCS_OK; }