/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
*
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/
#include "ucp_test.h"
#include <gtest/common/test_perf.h>
#define MB pow(1024.0, -2)
#define UCP_ARM_PERF_TEST_MULTIPLIER 2
class test_ucp_perf : public ucp_test, public test_perf {
protected:
virtual void init() {
test_base::init(); /* Skip entities creation in ucp_test */
ucs_log_push_handler(log_handler);
}
virtual void cleanup() {
ucs_log_pop_handler();
test_base::cleanup();
}
static ucs_log_func_rc_t
log_handler(const char *file, unsigned line, const char *function,
ucs_log_level_t level, const char *message, va_list ap) {
// Ignore errors that transport cannot reach peer
if (level == UCS_LOG_LEVEL_ERROR) {
std::string err_str = format_message(message, ap);
if (strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNREACHABLE)) ||
strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNSUPPORTED))) {
UCS_TEST_MESSAGE << err_str;
return UCS_LOG_FUNC_RC_STOP;
}
}
return UCS_LOG_FUNC_RC_CONTINUE;
}
const static test_spec tests[];
};
const test_perf::test_spec test_ucp_perf::tests[] =
{
{ "tag latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0,
0 },
{ "tag iov latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG,
UCP_PERF_DATATYPE_IOV, 8192, 3, { 1024, 1024, 1024 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0,
0 },
{ "tag mr", "Mpps",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0,
0 },
{ "tag sync mr", "Mpps",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.05, 100.0, 0},
{ "tag wild mr", "Mpps",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0,
UCX_PERF_TEST_FLAG_TAG_WILDCARD },
{ "tag bw", "MB/sec",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 },
{ "tag bw_zcopy_multi", "MB/sec",
UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 16, 100000lu,
ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 },
{ "put latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
0 },
{ "put rate", "Mpps",
UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu,
ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.5, 100.0,
0 },
{ "put bw", "MB/sec",
UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 2048 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
0 },
{ "get latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
0 },
{ "get bw", "MB/sec",
UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
0 },
{ "stream latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 },
{ "stream bw", "MB/sec",
UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 },
{ "stream recv-data latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
UCX_PERF_TEST_FLAG_STREAM_RECV_DATA },
{ "stream recv-data bw", "MB/sec",
UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu,
ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0,
UCX_PERF_TEST_FLAG_STREAM_RECV_DATA },
{ "atomic add rate", "Mpps",
UCX_PERF_API_UCP, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 1000000lu,
ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 500.0,
0 },
{ "atomic fadd latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
0 },
{ "atomic swap latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
0 },
{ "atomic cswap latency", "usec",
UCX_PERF_API_UCP, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI,
UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu,
ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0,
0 },
{ NULL }
};
UCS_TEST_P(test_ucp_perf, envelope) {
bool check_perf = true;
size_t max_iter = std::numeric_limits<size_t>::max();
if (has_transport("tcp")) {
check_perf = false;
max_iter = 1000lu;
}
std::stringstream ss;
ss << GetParam();
/* coverity[tainted_string_argument] */
ucs::scoped_setenv tls("UCX_TLS", ss.str().c_str());
ucs::scoped_setenv warn_invalid("UCX_WARN_INVALID_CONFIG", "no");
/* Run all tests */
for (const test_spec *test_iter = tests; test_iter->title != NULL; ++test_iter) {
test_spec test = *test_iter;
if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_ARM_AARCH64) {
test.max *= UCP_ARM_PERF_TEST_MULTIPLIER;
test.min /= UCP_ARM_PERF_TEST_MULTIPLIER;
}
test.iters = ucs_min(test.iters, max_iter);
run_test(test, 0, check_perf, "", "");
}
}
UCP_INSTANTIATE_TEST_CASE(test_ucp_perf)