/** * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. * * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ #include "ucp_test.h" #include #define MB pow(1024.0, -2) #define UCP_ARM_PERF_TEST_MULTIPLIER 2 class test_ucp_perf : public ucp_test, public test_perf { protected: virtual void init() { test_base::init(); /* Skip entities creation in ucp_test */ ucs_log_push_handler(log_handler); } virtual void cleanup() { ucs_log_pop_handler(); test_base::cleanup(); } static ucs_log_func_rc_t log_handler(const char *file, unsigned line, const char *function, ucs_log_level_t level, const char *message, va_list ap) { // Ignore errors that transport cannot reach peer if (level == UCS_LOG_LEVEL_ERROR) { std::string err_str = format_message(message, ap); if (strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNREACHABLE)) || strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNSUPPORTED))) { UCS_TEST_MESSAGE << err_str; return UCS_LOG_FUNC_RC_STOP; } } return UCS_LOG_FUNC_RC_CONTINUE; } const static test_spec tests[]; }; const test_perf::test_spec test_ucp_perf::tests[] = { { "tag latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0, 0 }, { "tag iov latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG, UCP_PERF_DATATYPE_IOV, 8192, 3, { 1024, 1024, 1024 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0, 0 }, { "tag mr", "Mpps", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0, 0 }, { "tag sync mr", "Mpps", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.05, 100.0, 0}, { "tag wild mr", "Mpps", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0, UCX_PERF_TEST_FLAG_TAG_WILDCARD }, { "tag bw", "MB/sec", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 }, { "tag bw_zcopy_multi", "MB/sec", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 16, 100000lu, ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 }, { "put latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, { "put rate", "Mpps", UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.5, 100.0, 0 }, { "put bw", "MB/sec", UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 2048 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 }, { "get latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, { "get bw", "MB/sec", UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu, ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 }, { "stream latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, { "stream bw", "MB/sec", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu, ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 }, { "stream recv-data latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, UCX_PERF_TEST_FLAG_STREAM_RECV_DATA }, { "stream recv-data bw", "MB/sec", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu, ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, UCX_PERF_TEST_FLAG_STREAM_RECV_DATA }, { "atomic add rate", "Mpps", UCX_PERF_API_UCP, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 1000000lu, ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 500.0, 0 }, { "atomic fadd latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, { "atomic swap latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, { "atomic cswap latency", "usec", UCX_PERF_API_UCP, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, { NULL } }; UCS_TEST_P(test_ucp_perf, envelope) { bool check_perf = true; size_t max_iter = std::numeric_limits::max(); if (has_transport("tcp")) { check_perf = false; max_iter = 1000lu; } std::stringstream ss; ss << GetParam(); /* coverity[tainted_string_argument] */ ucs::scoped_setenv tls("UCX_TLS", ss.str().c_str()); ucs::scoped_setenv warn_invalid("UCX_WARN_INVALID_CONFIG", "no"); /* Run all tests */ for (const test_spec *test_iter = tests; test_iter->title != NULL; ++test_iter) { test_spec test = *test_iter; if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_ARM_AARCH64) { test.max *= UCP_ARM_PERF_TEST_MULTIPLIER; test.min /= UCP_ARM_PERF_TEST_MULTIPLIER; } test.iters = ucs_min(test.iters, max_iter); run_test(test, 0, check_perf, "", ""); } } UCP_INSTANTIATE_TEST_CASE(test_ucp_perf)