Blob Blame History Raw
/**
 * Copyright (C) Mellanox Technologies Ltd. 2001-2019.  ALL RIGHTS RESERVED.
 * Copyright (C) Advanced Micro Devices, Inc. 2019.  ALL RIGHTS RESERVED.
 *
 * See file LICENSE for terms.
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include "mem_buffer.h"

#include <ucp/core/ucp_mm.h>
#include <ucs/debug/assert.h>
#include <common/test_helpers.h>

#if HAVE_CUDA
#  include <cuda.h>
#  include <cuda_runtime.h>

#define CUDA_CALL(_code) \
    do { \
        cudaError_t cerr = _code; \
        if (cerr != cudaSuccess) { \
            UCS_TEST_ABORT(# _code << " failed"); \
        } \
    } while (0)

#endif

#if HAVE_ROCM
#  include <hip_runtime.h>

#define ROCM_CALL(_code) \
    do { \
        hipError_t cerr = _code; \
        if (cerr != hipSuccess) { \
            UCS_TEST_ABORT(# _code << " failed"); \
        } \
    } while (0)

#endif


std::vector<ucs_memory_type_t> mem_buffer::supported_mem_types()
{
    static std::vector<ucs_memory_type_t> vec;

    if (vec.empty()) {
        vec.push_back(UCS_MEMORY_TYPE_HOST);
#if HAVE_CUDA
        vec.push_back(UCS_MEMORY_TYPE_CUDA);
        vec.push_back(UCS_MEMORY_TYPE_CUDA_MANAGED);
#endif
#if HAVE_ROCM
        vec.push_back(UCS_MEMORY_TYPE_ROCM);
        vec.push_back(UCS_MEMORY_TYPE_ROCM_MANAGED);
#endif
    }

    return vec;
}

void *mem_buffer::allocate(size_t size, ucs_memory_type_t mem_type)
{
    void *ptr;

    switch (mem_type) {
    case UCS_MEMORY_TYPE_HOST:
        ptr = malloc(size);
        if (ptr == NULL) {
            UCS_TEST_ABORT("malloc() failed");
        }
        return ptr;
#if HAVE_CUDA
    case UCS_MEMORY_TYPE_CUDA:
        CUDA_CALL(cudaMalloc(&ptr, size));
        return ptr;
    case UCS_MEMORY_TYPE_CUDA_MANAGED:
        CUDA_CALL(cudaMallocManaged(&ptr, size));
        return ptr;
#endif
#if HAVE_ROCM
    case UCS_MEMORY_TYPE_ROCM:
        ROCM_CALL(hipMalloc(&ptr, size));
        return ptr;
    case UCS_MEMORY_TYPE_ROCM_MANAGED:
        ROCM_CALL(hipMallocManaged(&ptr, size));
        return ptr;
#endif
    default:
        UCS_TEST_SKIP_R(std::string(ucs_memory_type_names[mem_type]) +
                        " memory is not supported");
    }
}

void mem_buffer::release(void *ptr, ucs_memory_type_t mem_type)
{
    switch (mem_type) {
    case UCS_MEMORY_TYPE_HOST:
        free(ptr);
        break;
#if HAVE_CUDA
    case UCS_MEMORY_TYPE_CUDA:
    case UCS_MEMORY_TYPE_CUDA_MANAGED:
        CUDA_CALL(cudaFree(ptr));
        break;
#endif
#if HAVE_ROCM
    case UCS_MEMORY_TYPE_ROCM:
    case UCS_MEMORY_TYPE_ROCM_MANAGED:
        ROCM_CALL(hipFree(ptr));
        break;
#endif
    default:
        break;
    }
}

void mem_buffer::pattern_fill(void *buffer, size_t length, uint64_t seed)
{
    uint64_t *ptr = (uint64_t*)buffer;
    char *end = (char *)buffer + length;

    while ((char*)(ptr + 1) <= end) {
        *ptr = seed;
        seed = pat(seed);
        ++ptr;
    }
    memcpy(ptr, &seed, end - (char*)ptr);
}

void mem_buffer::pattern_check(const void *buffer, size_t length, uint64_t seed)
{
    const char* end = (const char*)buffer + length;
    const uint64_t *ptr = (const uint64_t*)buffer;

    while ((const char*)(ptr + 1) <= end) {
       if (*ptr != seed) {
            UCS_TEST_ABORT("At offset " << ((const char*)ptr - (const char*)buffer) << ": " <<
                           "Expected: 0x" << std::hex << seed << " " <<
                           "Got: 0x" << std::hex << (*ptr) << std::dec);
        }
        seed = pat(seed);
        ++ptr;
    }

    size_t remainder = (end - (const char*)ptr);
    if (remainder > 0) {
        ucs_assert(remainder < sizeof(*ptr));
        uint64_t mask = UCS_MASK_SAFE(remainder * 8 * sizeof(char));
        uint64_t value = 0;
        memcpy(&value, ptr, remainder);
        if (value != (seed & mask)) {
             UCS_TEST_ABORT("At offset " << ((const char*)ptr - (const char*)buffer) <<
                            " (remainder " << remainder << ") : " <<
                            "Expected: 0x" << std::hex << (seed & mask) << " " <<
                            "Mask: 0x" << std::hex << mask << " " <<
                            "Got: 0x" << std::hex << value << std::dec);
         }
    }
}

void mem_buffer::pattern_check(const void *buffer, size_t length)
{
    if (length > sizeof(uint64_t)) {
        pattern_check(buffer, length, *(const uint64_t*)buffer);
    }
}

void mem_buffer::pattern_fill(void *buffer, size_t length, uint64_t seed,
                              ucs_memory_type_t mem_type)
{
    if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) {
        pattern_fill(buffer, length, seed);
    } else {
        ucs::auto_buffer temp(length);
        pattern_fill(*temp, length, seed);
        copy_to(buffer, *temp, length, mem_type);
    }
}

void mem_buffer::pattern_check(const void *buffer, size_t length, uint64_t seed,
                               ucs_memory_type_t mem_type)
{
    if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) {
        pattern_check(buffer, length, seed);
    } else {
        ucs::auto_buffer temp(length);
        copy_from(*temp, buffer, length, mem_type);
        pattern_check(*temp, length, seed);
    }
}

void mem_buffer::copy_to(void *dst, const void *src, size_t length,
                         ucs_memory_type_t dst_mem_type)
{
    switch (dst_mem_type) {
    case UCS_MEMORY_TYPE_HOST:
    case UCS_MEMORY_TYPE_CUDA_MANAGED:
    case UCS_MEMORY_TYPE_ROCM_MANAGED:
        memcpy(dst, src, length);
        break;
#if HAVE_CUDA
    case UCS_MEMORY_TYPE_CUDA:
        CUDA_CALL(cudaMemcpy(dst, src, length, cudaMemcpyHostToDevice));
        CUDA_CALL(cudaDeviceSynchronize());
        break;
#endif
#if HAVE_ROCM
    case UCS_MEMORY_TYPE_ROCM:
        ROCM_CALL(hipMemcpy(dst, src, length, hipMemcpyHostToDevice));
        ROCM_CALL(hipDeviceSynchronize());
        break;
#endif
    default:
        abort_wrong_mem_type(dst_mem_type);
    }
}

void mem_buffer::copy_from(void *dst, const void *src, size_t length,
                           ucs_memory_type_t src_mem_type)
{
    switch (src_mem_type) {
    case UCS_MEMORY_TYPE_HOST:
    case UCS_MEMORY_TYPE_CUDA_MANAGED:
    case UCS_MEMORY_TYPE_ROCM_MANAGED:
        memcpy(dst, src, length);
        break;
#if HAVE_CUDA
    case UCS_MEMORY_TYPE_CUDA:
        CUDA_CALL(cudaMemcpy(dst, src, length, cudaMemcpyDeviceToHost));
        CUDA_CALL(cudaDeviceSynchronize());
        break;
#endif
#if HAVE_ROCM
    case UCS_MEMORY_TYPE_ROCM:
        ROCM_CALL(hipMemcpy(dst, src, length, hipMemcpyDeviceToHost));
        ROCM_CALL(hipDeviceSynchronize());
        break;
#endif
    default:
        abort_wrong_mem_type(src_mem_type);
    }
}

bool mem_buffer::compare(const void *expected, const void *buffer,
                         size_t length, ucs_memory_type_t mem_type)
{
    if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) {
        return memcmp(expected, buffer, length) == 0;
    } else {
        ucs::auto_buffer temp(length);
        copy_from(*temp, buffer, length, mem_type);
        return memcmp(expected, *temp, length) == 0;
    }
}

std::string mem_buffer::mem_type_name(ucs_memory_type_t mem_type)
{
    return ucs_memory_type_names[mem_type];
}

void mem_buffer::abort_wrong_mem_type(ucs_memory_type_t mem_type) {
    UCS_TEST_ABORT("Wrong buffer memory type " + mem_type_name(mem_type));
}

uint64_t mem_buffer::pat(uint64_t prev) {
    /* LFSR pattern */
    static const uint64_t polynom = 1337;
    return (prev << 1) | (__builtin_parityl(prev & polynom) & 1);
}

mem_buffer::mem_buffer(size_t size, ucs_memory_type_t mem_type) :
    m_mem_type(mem_type), m_ptr(allocate(size, mem_type)), m_size(size) {
}

mem_buffer::~mem_buffer() {
    release(ptr(), mem_type());
}

ucs_memory_type_t mem_buffer::mem_type() const {
    return m_mem_type;
}

void *mem_buffer::ptr() const {
    return m_ptr;
}

size_t mem_buffer::size() const {
    return m_size;
}