Blob Blame History Raw
/*
 * Copyright (C) 2017 - 2018 Intel Corporation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright notice(s),
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice(s),
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "common.h"
#include "random_sizes_allocator.h"
#include "proc_stat.h"
#include "allocator_perf_tool/GTestAdapter.hpp"
#include "allocator_perf_tool/Allocation_info.hpp"

#include <memkind.h>

#include <condition_variable>
#include <functional>
#include <mutex>
#include <random>
#include <thread>

class Worker
{
public:
    Worker(RandomSizesAllocator &&allocator, double malloc_probability)
        : allocator(std::move(allocator)), malloc_probability(malloc_probability)  {}

    void work()
    {
        if (allocator.empty() || get_random_bool(malloc_probability)) {
            requested_memory_sum += allocator.malloc_random_memory();
        } else {
            requested_memory_sum -= allocator.free_random_memory();
        }
    }

    size_t get_requested_memory_sum_bytes() const
    {
        return requested_memory_sum;
    }

private:
    bool get_random_bool(double probability)
    {
        std::bernoulli_distribution distribution(probability);
        return distribution(generator);
    }

    std::default_random_engine generator;
    RandomSizesAllocator allocator;
    size_t requested_memory_sum = 0;
    double malloc_probability;
};


class MemoryFootprintStats
{
public:
    void reset()
    {
        std::lock_guard<std::mutex> lk(sample_guard);
        initial_virtual_memory = proc_stat.get_virtual_memory_size_bytes();
        initial_physical_memory = proc_stat.get_physical_memory_size_bytes();
        vm_overhead_sum = 0;
        current_vm_overhead = 0;
        max_vm_overhead = 0;

        current_phys_overhead = 0;
        phys_overhead_sum = 0;
        max_phys_overhead = 0;

        requested_memory = 0;
        sample_count = 0;
    }

    void sample(long long requested_memory_bytes)
    {
        std::lock_guard<std::mutex> lk(sample_guard);
        current_vm = proc_stat.get_virtual_memory_size_bytes();
        current_phys = proc_stat.get_physical_memory_size_bytes();

        sample_count++;

        requested_memory = requested_memory_bytes;

        current_vm_overhead = current_vm - initial_virtual_memory - requested_memory;
        vm_overhead_sum += current_vm_overhead;
        max_vm_overhead = std::max(max_vm_overhead, current_vm_overhead);

        current_phys_overhead = current_phys - initial_physical_memory;
        phys_overhead_sum += current_phys_overhead;
        max_phys_overhead = std::max(max_phys_overhead, current_phys_overhead);
    }

    void log_data() const
    {
        std::lock_guard<std::mutex> lk(sample_guard);
        GTestAdapter::RecordProperty("avg_vm_overhead_per_operation_mb",
                                     convert_bytes_to_mb(vm_overhead_sum) / sample_count);
        GTestAdapter::RecordProperty("avg_vm_overhead_growth_per_operation_mb",
                                     convert_bytes_to_mb(current_vm_overhead) / sample_count);
        GTestAdapter::RecordProperty("max_vm_overhead_mb",
                                     convert_bytes_to_mb(max_vm_overhead));
        GTestAdapter::RecordProperty("avg_phys_overhead_per_operation_mb",
                                     convert_bytes_to_mb(phys_overhead_sum) / sample_count);
        GTestAdapter::RecordProperty("max_phys_overhead_mb",
                                     convert_bytes_to_mb(max_phys_overhead));
        GTestAdapter::RecordProperty("overhead_to_requested_memory_ratio_percent",
                                     100.f * current_vm_overhead / requested_memory);
        GTestAdapter::RecordProperty("requested_memory_mb",
                                     convert_bytes_to_mb(requested_memory));
    }
private:
    long long initial_virtual_memory;
    long long vm_overhead_sum = 0;
    long long current_vm_overhead = 0;
    long long max_vm_overhead = 0;

    long long initial_physical_memory;
    long long current_phys_overhead = 0;
    long long phys_overhead_sum = 0;
    long long max_phys_overhead = 0;

    long long requested_memory = 0;
    long long sample_count = 0;

    long long current_vm;
    long long current_phys;

    ProcStat proc_stat;
    mutable std::mutex sample_guard;
};

/* Execute func calling it n_calls times in n_threads threads.
 * The execution is multithreaded but the func calls order is sequential.
 * func takes thread id, and operation id as an argument,
 * and must return thread id of the next thread, where thread ids are in range <0, n_threads-1>.
 * init_thread_id specify the initial thread id.
 */
void run_multithreaded_seq_exec(unsigned n_threads, unsigned init_thread_id,
                                unsigned n_calls, std::function<unsigned(unsigned, unsigned)> func)
{
    std::vector<std::thread> threads;
    std::mutex mutex;
    std::condition_variable turns_holder;
    unsigned current_call = 0;
    unsigned current_tid = init_thread_id;

    threads.reserve(n_threads);

    mutex.lock();

    for(int tid=0; tid<n_threads; ++tid) {
        threads.emplace_back([ &, tid]() {
            while(current_call < n_calls) {
                std::unique_lock<std::mutex> lk(mutex);
                turns_holder.wait(lk, [ &,tid] {return current_tid == tid || current_call == n_calls;});
                if(current_call == n_calls) {
                    return;
                }
                current_tid = func(tid, current_call);
                ASSERT_LT(current_tid, n_threads) << "Incorrect thread id!";
                current_call++;
                lk.unlock();
                turns_holder.notify_all();
            }
        });
    }

    mutex.unlock();

    for(int i=0; i<threads.size(); i++) {
        threads[i].join();
    }
}

/*
 * Create threads and measure the cost of maintaining allocations from threads.
 * Allocations order is sequential (otherwise the results might be very nondeterministic).
 */
void run_test(memkind_t kind, size_t min_size, size_t max_size,
              unsigned n_threads, double malloc_probability=1.0, unsigned n_calls=1000)
{
    Worker worker(RandomSizesAllocator(kind, min_size, max_size, n_calls),
                  malloc_probability);

    MemoryFootprintStats mem_footprint_stats;

    auto func = [&](unsigned tid, unsigned id) -> unsigned {
        if(id == 0)
        {
            mem_footprint_stats.reset();
        }

        worker.work();
        mem_footprint_stats.sample(worker.get_requested_memory_sum_bytes());

        return (tid + 1) % n_threads; //next thread id
    };
    run_multithreaded_seq_exec(n_threads, 0, n_calls, func);
    mem_footprint_stats.log_data();
}

class MemoryFootprintTest: public :: testing::Test
{};

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_only_malloc_small_allocations_1_thread)
{
    run_test(MEMKIND_DEFAULT, 128, 15 * KB, 1);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_only_malloc_small_allocations_10_thread)
{
    run_test(MEMKIND_DEFAULT, 128, 15 * KB, 10);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_only_malloc_medium_allocations_1_thread)
{
    run_test(MEMKIND_DEFAULT, 16 * KB, 1 * MB, 1);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_only_malloc_medium_allocations_10_thread)
{
    run_test(MEMKIND_DEFAULT, 16 * KB, 1 * MB, 10);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_only_malloc_large_allocations_1_thread)
{
    run_test(MEMKIND_DEFAULT, 2 * MB, 10 * MB, 1, 1.0, 100);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_only_malloc_large_allocations_10_thread)
{
    run_test(MEMKIND_DEFAULT, 2 * MB, 10 * MB, 10, 1.0, 100);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_small_allocations_1_thread)
{
    run_test(MEMKIND_DEFAULT, 128, 15 * KB, 1, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_small_allocations_10_thread)
{
    run_test(MEMKIND_DEFAULT, 128, 15 * KB, 10, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_medium_allocations_1_thread)
{
    run_test(MEMKIND_DEFAULT, 16 * KB, 1 * MB, 1, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_large_allocations_10_thread)
{
    run_test(MEMKIND_DEFAULT, 2 * MB, 10 * MB,  10, 0.8, 100);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_only_malloc_small_allocations_1_thread)
{
    run_test(MEMKIND_HBW, 128, 15 * KB, 1);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_only_malloc_small_allocations_10_thread)
{
    run_test(MEMKIND_HBW, 128, 15 * KB, 10);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_only_malloc_medium_allocations_1_thread)
{
    run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 1);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_only_malloc_medium_allocations_10_thread)
{
    run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 10);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_only_malloc_large_allocations_1_thread)
{
    run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 1, 100);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_only_malloc_large_allocations_10_thread)
{
    run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 10, 100);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_random_malloc80_free20_random_small_allocations_1_thread)
{
    run_test(MEMKIND_HBW, 128, 15 * KB, 1, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_random_malloc80_free20_random_small_allocations_10_thread)
{
    run_test(MEMKIND_HBW, 128, 15 * KB, 10, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_random_malloc80_free20_random_medium_allocations_1_thread)
{
    run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 1, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_random_malloc80_free20_random_medium_allocations_10_thread)
{
    run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 10, 0.8);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_random_malloc80_free20_random_large_allocations_1_thread)
{
    run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 1, 0.8, 100);
}

TEST_F(MemoryFootprintTest,
       test_TC_MEMKIND_HBW_random_malloc80_free20_random_large_allocations_10_thread)
{
    run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 10, 0.8, 100);
}