/** * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. * * See file LICENSE for terms. */ #include #include #include static ucs_status_t ucx_perf_rocm_init(ucx_perf_context_t *perf) { hipError_t ret; unsigned group_index; int num_gpus; int gpu_index; group_index = rte_call(perf, group_index); ret = hipGetDeviceCount(&num_gpus); if (ret != hipSuccess) { return UCS_ERR_NO_DEVICE; } gpu_index = group_index % num_gpus; ret = hipSetDevice(gpu_index); if (ret != hipSuccess) { return UCS_ERR_NO_DEVICE; } return UCS_OK; } static inline ucs_status_t ucx_perf_rocm_alloc(size_t length, ucs_memory_type_t mem_type, void **address_p) { hipError_t ret; ucs_assert((mem_type == UCS_MEMORY_TYPE_ROCM) || (mem_type == UCS_MEMORY_TYPE_ROCM_MANAGED)); ret = ((mem_type == UCS_MEMORY_TYPE_ROCM) ? hipMalloc(address_p, length) : hipMallocManaged(address_p, length, hipMemAttachGlobal)); if (ret != hipSuccess) { ucs_error("failed to allocate memory"); return UCS_ERR_NO_MEMORY; } return UCS_OK; } static ucs_status_t ucp_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length, void **address_p, ucp_mem_h *memh_p, int non_blk_flag) { return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM, address_p); } static ucs_status_t ucp_perf_rocm_alloc_managed(const ucx_perf_context_t *perf, size_t length, void **address_p, ucp_mem_h *memh_p, int non_blk_flag) { return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM_MANAGED, address_p); } static void ucp_perf_rocm_free(const ucx_perf_context_t *perf, void *address, ucp_mem_h memh) { hipFree(address); } static inline ucs_status_t uct_perf_rocm_alloc_reg_mem(const ucx_perf_context_t *perf, size_t length, ucs_memory_type_t mem_type, unsigned flags, uct_allocated_memory_t *alloc_mem) { ucs_status_t status; status = ucx_perf_rocm_alloc(length, mem_type, &alloc_mem->address); if (status != UCS_OK) { return status; } status = uct_md_mem_reg(perf->uct.md, alloc_mem->address, length, flags, &alloc_mem->memh); if (status != UCS_OK) { hipFree(alloc_mem->address); ucs_error("failed to register memory"); return status; } alloc_mem->mem_type = mem_type; alloc_mem->md = perf->uct.md; return UCS_OK; } static ucs_status_t uct_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length, unsigned flags, uct_allocated_memory_t *alloc_mem) { return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM, flags, alloc_mem); } static ucs_status_t uct_perf_rocm_managed_alloc(const ucx_perf_context_t *perf, size_t length, unsigned flags, uct_allocated_memory_t *alloc_mem) { return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM_MANAGED, flags, alloc_mem); } static void uct_perf_rocm_free(const ucx_perf_context_t *perf, uct_allocated_memory_t *alloc_mem) { ucs_status_t status; ucs_assert(alloc_mem->md == perf->uct.md); status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh); if (status != UCS_OK) { ucs_error("failed to deregister memory"); } hipFree(alloc_mem->address); } static void ucx_perf_rocm_memcpy(void *dst, ucs_memory_type_t dst_mem_type, const void *src, ucs_memory_type_t src_mem_type, size_t count) { hipError_t ret; ret = hipMemcpy(dst, src, count, hipMemcpyDefault); if (ret != hipSuccess) { ucs_error("failed to copy memory: %s", hipGetErrorString(ret)); } } static void* ucx_perf_rocm_memset(void *dst, int value, size_t count) { hipError_t ret; ret = hipMemset(dst, value, count); if (ret != hipSuccess) { ucs_error("failed to set memory: %s", hipGetErrorString(ret)); } return dst; } UCS_STATIC_INIT { static ucx_perf_allocator_t rocm_allocator = { .mem_type = UCS_MEMORY_TYPE_ROCM, .init = ucx_perf_rocm_init, .ucp_alloc = ucp_perf_rocm_alloc, .ucp_free = ucp_perf_rocm_free, .uct_alloc = uct_perf_rocm_alloc, .uct_free = uct_perf_rocm_free, .memcpy = ucx_perf_rocm_memcpy, .memset = ucx_perf_rocm_memset }; static ucx_perf_allocator_t rocm_managed_allocator = { .mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED, .init = ucx_perf_rocm_init, .ucp_alloc = ucp_perf_rocm_alloc_managed, .ucp_free = ucp_perf_rocm_free, .uct_alloc = uct_perf_rocm_managed_alloc, .uct_free = uct_perf_rocm_free, .memcpy = ucx_perf_rocm_memcpy, .memset = ucx_perf_rocm_memset }; ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = &rocm_allocator; ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = &rocm_managed_allocator; } UCS_STATIC_CLEANUP { ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = NULL; ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = NULL; }