/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED.
* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/
#include <common/test.h>
extern "C" {
#include <ucs/sys/sys.h>
#include <ucs/time/time.h>
#include <ucs/profile/profile.h>
}
#include <pthread.h>
#include <fstream>
#if HAVE_PROFILING
class scoped_profile {
public:
scoped_profile(ucs::test_base& test, const std::string &file_name,
const char *mode) : m_test(test), m_file_name(file_name)
{
ucs_profile_global_cleanup();
ucs_profile_reset_locations();
m_test.push_config();
m_test.modify_config("PROFILE_MODE", mode);
m_test.modify_config("PROFILE_FILE", m_file_name.c_str());
ucs_profile_global_init();
}
std::string read() {
ucs_profile_dump();
std::ifstream f(m_file_name.c_str());
return std::string(std::istreambuf_iterator<char>(f),
std::istreambuf_iterator<char>());
}
~scoped_profile() {
ucs_profile_global_cleanup();
unlink(m_file_name.c_str());
m_test.pop_config();
ucs_profile_global_init();
}
private:
ucs::test_base& m_test;
const std::string m_file_name;
};
class test_profile : public testing::TestWithParam<int>,
public ucs::test_base {
public:
test_profile();
~test_profile();
UCS_TEST_BASE_IMPL;
protected:
static const int MIN_LINE;
static const int MAX_LINE;
static const unsigned NUM_LOCAITONS;
static const char* PROFILE_FILENAME;
std::set<int> m_tids;
pthread_spinlock_t m_tids_lock;
struct thread_param {
test_profile *test;
int iters;
};
void add_tid(int tid);
static void *profile_thread_func(void *arg);
int num_threads() const;
void run_profiled_code(int num_iters);
void test_header(const ucs_profile_header_t *hdr, unsigned exp_mode,
const void **ptr);
void test_locations(const ucs_profile_location_t *locations,
unsigned num_locations, const void **ptr);
void test_thread_locations(const ucs_profile_thread_header_t *thread_hdr,
unsigned num_locations, uint64_t exp_count,
unsigned exp_num_records, const void **ptr);
void do_test(unsigned int_mode, const std::string& str_mode);
};
static int sum(int a, int b)
{
return a + b;
}
const int test_profile::MIN_LINE = __LINE__;
static void *test_request = &test_request;
UCS_PROFILE_FUNC_VOID(profile_test_func1, ())
{
UCS_PROFILE_REQUEST_NEW(test_request, "allocate", 10);
UCS_PROFILE_REQUEST_EVENT(test_request, "work", 0);
UCS_PROFILE_REQUEST_FREE(test_request);
UCS_PROFILE_CODE("code") {
UCS_PROFILE_SAMPLE("sample");
}
}
UCS_PROFILE_FUNC(int, profile_test_func2, (a, b), int a, int b)
{
return UCS_PROFILE_CALL(sum, a, b);
}
const int test_profile::MAX_LINE = __LINE__;
const unsigned test_profile::NUM_LOCAITONS = 12u;
const char* test_profile::PROFILE_FILENAME = "test.prof";
test_profile::test_profile()
{
pthread_spin_init(&m_tids_lock, 0);
}
test_profile::~test_profile()
{
pthread_spin_destroy(&m_tids_lock);
}
void test_profile::add_tid(int tid)
{
pthread_spin_lock(&m_tids_lock);
m_tids.insert(tid);
pthread_spin_unlock(&m_tids_lock);
}
void *test_profile::profile_thread_func(void *arg)
{
const thread_param *param = (const thread_param*)arg;
param->test->add_tid(ucs_get_tid());
for (int i = 0; i < param->iters; ++i) {
profile_test_func1();
profile_test_func2(1, 2);
}
return NULL;
}
int test_profile::num_threads() const
{
return GetParam();
}
void test_profile::run_profiled_code(int num_iters)
{
int ret;
thread_param param;
param.iters = num_iters;
param.test = this;
if (num_threads() == 1) {
profile_thread_func(¶m);
} else {
std::vector<pthread_t> threads;
for (int i = 0; i < num_threads(); ++i) {
pthread_t profile_thread;
ret = pthread_create(&profile_thread, NULL, profile_thread_func,
(void*)¶m);
if (ret < 0) {
ADD_FAILURE() << "pthread_create failed: " << strerror(errno);
break;
}
threads.push_back(profile_thread);
}
while (!threads.empty()) {
void *result;
ret = pthread_join(threads.back(), &result);
if (ret < 0) {
ADD_FAILURE() << "pthread_join failed: " << strerror(errno);
}
threads.pop_back();
}
}
}
void test_profile::test_header(const ucs_profile_header_t *hdr, unsigned exp_mode,
const void **ptr)
{
EXPECT_EQ(UCS_PROFILE_FILE_VERSION, hdr->version);
EXPECT_EQ(std::string(ucs_get_host_name()), std::string(hdr->hostname));
EXPECT_EQ(getpid(), (pid_t)hdr->pid);
EXPECT_EQ(exp_mode, hdr->mode);
EXPECT_EQ(NUM_LOCAITONS, hdr->num_locations);
EXPECT_EQ((uint32_t)num_threads(), hdr->num_threads);
EXPECT_NEAR(hdr->one_second / ucs_time_from_sec(1.0), 1.0, 0.01);
*ptr = hdr + 1;
}
void test_profile::test_locations(const ucs_profile_location_t *locations,
unsigned num_locations, const void **ptr)
{
std::set<std::string> loc_names;
for (unsigned i = 0; i < num_locations; ++i) {
const ucs_profile_location_t *loc = &locations[i];
EXPECT_EQ(std::string(basename(__FILE__)), std::string(loc->file));
EXPECT_GE(loc->line, MIN_LINE);
EXPECT_LE(loc->line, MAX_LINE);
loc_names.insert(loc->name);
}
EXPECT_NE(loc_names.end(), loc_names.find("profile_test_func1"));
EXPECT_NE(loc_names.end(), loc_names.find("profile_test_func2"));
EXPECT_NE(loc_names.end(), loc_names.find("code"));
EXPECT_NE(loc_names.end(), loc_names.find("sample"));
EXPECT_NE(loc_names.end(), loc_names.find("sum"));
EXPECT_NE(loc_names.end(), loc_names.find("allocate"));
EXPECT_NE(loc_names.end(), loc_names.find("work"));
*ptr = locations + num_locations;
}
void test_profile::test_thread_locations(
const ucs_profile_thread_header_t *thread_hdr,
unsigned num_locations, uint64_t exp_count,
unsigned exp_num_records, const void **ptr)
{
const ucs_profile_thread_location_t *loc;
EXPECT_NE(m_tids.end(), m_tids.find(thread_hdr->tid));
EXPECT_EQ(exp_num_records, thread_hdr->num_records);
EXPECT_LE(thread_hdr->end_time, ucs_get_time());
EXPECT_LE(thread_hdr->start_time, thread_hdr->end_time);
EXPECT_LE(thread_hdr->end_time - thread_hdr->start_time,
ucs_time_from_sec(1.0) * ucs::test_time_multiplier() * (1 + exp_count));
for (unsigned i = 0; i < num_locations; ++i) {
loc = &reinterpret_cast<const ucs_profile_thread_location_t*>
(thread_hdr + 1)[i];
EXPECT_EQ(exp_count, loc->count);
EXPECT_LE(loc->total_time,
ucs_time_from_sec(1.0) * ucs::test_time_multiplier() * exp_count);
}
*ptr = reinterpret_cast<const ucs_profile_thread_location_t*>(thread_hdr + 1) +
num_locations;
}
void test_profile::do_test(unsigned int_mode, const std::string& str_mode)
{
const int ITER = 5;
uint64_t exp_count = (int_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) ?
ITER : 0;
uint64_t exp_num_records = (int_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) ?
(NUM_LOCAITONS * ITER) : 0;
scoped_profile p(*this, PROFILE_FILENAME, str_mode.c_str());
run_profiled_code(ITER);
std::string data = p.read();
const void *ptr = &data[0];
/* Read and test file header */
const ucs_profile_header_t *hdr =
reinterpret_cast<const ucs_profile_header_t*>(ptr);
test_header(hdr, int_mode, &ptr);
/* Read and test global locations */
const ucs_profile_location_t *locations =
reinterpret_cast<const ucs_profile_location_t*>(ptr);
test_locations(locations, hdr->num_locations, &ptr);
/* Read and test threads */
for (int i = 0; i < num_threads(); ++i) {
const ucs_profile_thread_header_t *thread_hdr =
reinterpret_cast<const ucs_profile_thread_header_t*>(ptr);
test_thread_locations(thread_hdr, hdr->num_locations, exp_count,
exp_num_records, &ptr);
const ucs_profile_record_t *records =
reinterpret_cast<const ucs_profile_record_t*>(ptr);
uint64_t prev_ts = records[0].timestamp;
for (uint64_t i = 0; i < thread_hdr->num_records; ++i) {
const ucs_profile_record_t *rec = &records[i];
/* test location index */
EXPECT_GE(rec->location, 0u);
EXPECT_LT(rec->location, uint32_t(NUM_LOCAITONS));
/* test timestamp */
EXPECT_GE(rec->timestamp, prev_ts);
prev_ts = rec->timestamp;
/* test param64 */
const ucs_profile_location_t *loc = &locations[rec->location];
if ((loc->type == UCS_PROFILE_TYPE_REQUEST_NEW) ||
(loc->type == UCS_PROFILE_TYPE_REQUEST_EVENT) ||
(loc->type == UCS_PROFILE_TYPE_REQUEST_FREE))
{
EXPECT_EQ((uintptr_t)&test_request, rec->param64);
}
}
ptr = records + thread_hdr->num_records;
}
EXPECT_EQ(&data[data.size()], ptr) << data.size();
}
UCS_TEST_P(test_profile, accum) {
do_test(UCS_BIT(UCS_PROFILE_MODE_ACCUM), "accum");
}
UCS_TEST_P(test_profile, log) {
do_test(UCS_BIT(UCS_PROFILE_MODE_LOG), "log");
}
UCS_TEST_P(test_profile, log_accum) {
do_test(UCS_BIT(UCS_PROFILE_MODE_LOG) | UCS_BIT(UCS_PROFILE_MODE_ACCUM),
"log,accum");
}
INSTANTIATE_TEST_CASE_P(st, test_profile, ::testing::Values(1));
INSTANTIATE_TEST_CASE_P(mt, test_profile, ::testing::Values(2, 4, 8));
class test_profile_perf : public test_profile {
};
UCS_TEST_SKIP_COND_P(test_profile_perf, overhead, RUNNING_ON_VALGRIND) {
#if defined(__x86_64__) || defined(__powerpc64__)
const double EXP_OVERHEAD_NSEC = 100.0;
#else
const double EXP_OVERHEAD_NSEC = 150.0;
#endif
const int ITERS = 100;
const int WARMUP_ITERS = 5;
const int COUNT = 100000;
double overhead_nsec = 0.0;
scoped_profile p(*this, PROFILE_FILENAME, "accum");
for (int retry = 0; retry < (ucs::perf_retry_count + 1); ++retry) {
ucs_time_t time_profile_on = 0;
ucs_time_t time_profile_off = 0;
for (int i = 0; i < WARMUP_ITERS + ITERS; ++i) {
ucs_time_t t;
t = ucs_get_time();
for (volatile int j = 0; j < COUNT;) {
++j;
}
if (i > WARMUP_ITERS) {
time_profile_off += ucs_get_time() - t;
}
t = ucs_get_time();
for (volatile int j = 0; j < COUNT;) {
UCS_PROFILE_CODE("test") {
++j;
}
}
if (i > WARMUP_ITERS) {
time_profile_on += ucs_get_time() - t;
}
}
overhead_nsec = ucs_time_to_nsec(time_profile_on - time_profile_off) /
COUNT / ITERS;
UCS_TEST_MESSAGE << "overhead: " << overhead_nsec << " nsec";
if (!ucs::perf_retry_count) {
UCS_TEST_MESSAGE << "not validating performance";
return; /* Success */
} else if (overhead_nsec < EXP_OVERHEAD_NSEC) {
return; /* Success */
} else {
ucs::safe_sleep(ucs::perf_retry_interval);
}
}
EXPECT_LT(overhead_nsec, EXP_OVERHEAD_NSEC) << "Profiling overhead is too high";
}
INSTANTIATE_TEST_CASE_P(st, test_profile_perf, ::testing::Values(1));
#endif