/* * Copyright (C) 2014 - 2018 Intel Corporation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice(s), * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice(s), * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include // sort #include // log2 #include #include #include "framework.hpp" namespace performance_tests { namespace ch = std::chrono; using std::cout; using std::endl; using std::unique_lock; using std::mutex; #ifdef __DEBUG mutex g_coutMutex; int g_msgLevel = 1; #endif void Barrier::wait() { unique_lock lock(m_barrierMutex); // Decrement number of threads awaited at the barrier m_waiting--; if (m_waiting == 0) { // Called by the last expected thread - notify all waiting threads and exit m_cVar.notify_all(); // Store the time when barrier was released if (m_releasedAt.tv_sec == 0 && m_releasedAt.tv_nsec == 0) { clock_gettime(CLOCK_MONOTONIC, &m_releasedAt); } return; } // Wait unitl the last expected thread calls wait() on Barrier instance, or timeout occurs m_cVar.wait_until(lock, ch::system_clock::now() + ch::seconds(10), []() { return GetInstance().m_waiting == 0; }); } // Worker class Worker::Worker( uint32_t actionsCount, const vector &allocationSizes, Operation *freeOperation, memkind_t kind) : m_actionsCount(actionsCount) , m_allocationSizes(allocationSizes) , m_actions(vector(actionsCount, nullptr)) , m_kind(kind) { assert(freeOperation->getName() == OperationName::Free); } Worker::~Worker() { for (Action *action : m_actions) { //each action delete action; } } void Worker::init(const vector &testOperations, Operation *&freeOperation) { for(uint32_t i = 0 ; i < m_actionsCount ; i++) { int bucketSize = rand() % Operation::MaxBucketSize; for (Operation *operation : testOperations) { //each operation if (operation->checkCondition(bucketSize)) { size_t size = m_allocationSizes[m_allocationSizes.size() > 1 ? rand() % m_allocationSizes.size() : 0]; m_actions[i] = new Action( operation, freeOperation, m_kind, size, log2(rand() % size), sizeof(void *) * (1 << ((rand() % Operation::MemalignMaxMultiplier)))); break; } } } } void Worker::run() { m_thread = new thread(&Worker::work, this); } #ifdef __DEBUG uint16_t Worker::getId() { return m_threadId; } void Worker::setId(uint16_t threadId) { m_threadId = threadId; } #endif void Worker::finish() { if (m_thread != nullptr) { m_thread->join(); delete m_thread; } } void Worker::work() { EMIT(1, "Entering barrier " << m_threadId) Barrier::GetInstance().wait(); EMIT(1, "Starting thread " << m_threadId) for (Action *action : m_actions) { action->alloc(); } } void Worker::clean() { EMIT(2, "Cleaning thread " << m_threadId) for (Action *action : m_actions) { action->free(); } EMIT(1, "Thread " << m_threadId << " finished") } // PerformanceTest class PerformanceTest::PerformanceTest( size_t repeatsCount, size_t threadsCount, size_t operationsCount) : m_repeatsCount(repeatsCount) , m_discardCount(repeatsCount * (distardPercent / 100.0)) , m_threadsCount(threadsCount) , m_operationsCount(operationsCount) , m_executionMode(ExecutionMode::SingleInteration) { } void PerformanceTest::setAllocationSizes(const vector &allocationSizes) { m_allocationSizes = allocationSizes; } void PerformanceTest::setOperations(const vector> &testOperations, Operation *freeOperation) { m_testOperations = testOperations; m_freeOperation = freeOperation; } void PerformanceTest::setExecutionMode(ExecutionMode executionMode) { m_executionMode = executionMode; } void PerformanceTest::setKind(const vector &kinds) { m_kinds = kinds; } inline void PerformanceTest::runIteration() { timespec iterationStop, iterationStart; Barrier::GetInstance().reset(m_threadsCount); for (Worker *worker : m_workers) { worker->run(); } for (Worker *worker : m_workers) { worker->finish(); } EMIT(1, "Alloc completed"); clock_gettime(CLOCK_MONOTONIC, &iterationStop); iterationStart = Barrier::GetInstance().releasedAt(); m_durations.push_back( (iterationStop.tv_sec * NanoSecInSec + iterationStop.tv_nsec) - (iterationStart.tv_sec * NanoSecInSec + iterationStart.tv_nsec) ); for (Worker *worker : m_workers) { worker->clean(); } } void PerformanceTest::prepareWorkers() { for (size_t threadId = 0; threadId < m_threadsCount; threadId++) { m_workers.push_back( new Worker( m_operationsCount, m_allocationSizes, m_freeOperation, m_kinds.size() > 0 ? m_kinds[threadId % m_kinds.size()] : nullptr) ); #ifdef __DEBUG m_workers.back()->setId(threadId); #endif if (m_executionMode == ExecutionMode::SingleInteration) { // In ManyIterations mode, operations will be set for each thread at the beginning of each iteration m_workers.back()->init(m_testOperations[threadId % m_testOperations.size()], m_freeOperation); } } } Metrics PerformanceTest::getMetrics() { uint64_t totalDuration = 0; std::sort(m_durations.begin(), m_durations.end()); m_durations.erase(m_durations.end() - m_discardCount, m_durations.end()); for (uint64_t &duration : m_durations) { totalDuration += duration; } Metrics metrics; metrics.executedOperations = m_durations.size() * m_threadsCount * m_operationsCount; metrics.totalDuration = totalDuration; metrics.repeatDuration = (double) totalDuration / ((uint64_t)m_durations.size() * NanoSecInSec); metrics.iterationDuration = metrics.repeatDuration; if (m_executionMode == ExecutionMode::ManyIterations) { metrics.executedOperations *= m_testOperations.size(); metrics.iterationDuration /= m_testOperations.size(); } metrics.operationsPerSecond = (double) metrics.executedOperations * NanoSecInSec / totalDuration; metrics.avgOperationDuration = (double) totalDuration / metrics.executedOperations; assert(metrics.iterationDuration != 0.0); return metrics; } void PerformanceTest::writeMetrics(const string &suiteName, const string &caseName, const string &fileName) { Metrics metrics = getMetrics(); // For thousands separation setlocale(LC_ALL, ""); if (!fileName.empty()) { FILE *f; if((f = fopen(fileName.c_str(), "a+"))) { fprintf(f, "%s;%s;%zu;%zu;%lu;%f;%f;%f;%f\n", suiteName.c_str(), caseName.c_str(), m_repeatsCount, m_threadsCount, metrics.executedOperations, metrics.operationsPerSecond, metrics.avgOperationDuration, metrics.iterationDuration, metrics.repeatDuration); fclose(f); } } printf("Operations/sec:\t\t\t%'f\n" "Avg. operation duration:\t%f nsec\n" "Iteration duration:\t\t%f sec\n" "Repeat duration:\t\t%f sec\n", metrics.operationsPerSecond, metrics.avgOperationDuration, metrics.iterationDuration, metrics.repeatDuration); } int PerformanceTest::run() { if (m_testOperations.empty() || m_allocationSizes.empty() || m_freeOperation == nullptr) { cout << "ERROR: Test not initialized" << endl; return 1; } // Create threads prepareWorkers(); //warmup kinds void *alloc = nullptr; for (const memkind_t &kind : m_kinds) { m_testOperations[0][0]->perform(kind, alloc, 1e6); m_freeOperation->perform(kind, alloc); } for (size_t repeat = 0; repeat < m_repeatsCount; repeat++) { EMIT(1, "Test run #" << repeat) if (m_executionMode == ExecutionMode::SingleInteration) { runIteration(); } else { // Perform each operations list in separate iteration, for each thread for (vector &ops : m_testOperations) { for (Worker *worker : m_workers) { worker->init(ops, m_freeOperation); } runIteration(); } } } return 0; } void PerformanceTest::showInfo() { printf("Test parameters: %lu repeats, %lu threads, %d operations per thread\n", m_repeatsCount, m_threadsCount, m_operationsCount); printf("Thread memory allocation operations:\n"); for (unsigned long i = 0; i < m_testOperations.size(); i++) { if (m_executionMode == ExecutionMode::SingleInteration) { printf("\tThread %lu,%lu,...\n", i, i + (m_testOperations.size())); } else { printf("\tIteration %lu\n", i); } for (const Operation *op : m_testOperations[i]) { printf("\t\t %s (bucket size: %d)\n", op->getNameStr().c_str(), op->getBucketSize()); } } printf("Memory free operation:\n\t\t%s\n", m_freeOperation->getNameStr().c_str()); printf("Allocation sizes:\n"); for (size_t size : m_allocationSizes) { printf("\t\t%lu bytes\n", size); } } }