/*
* BSD LICENSE
*
* Copyright(c) 2016-2020 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#ifndef __TSC_H__
#define __TSC_H__
#ifdef __cplusplus
extern "C" {
#endif
/**
* TSC profile structure
*/
struct tsc_prof {
uint64_t clk_start; /**< start TSC of an iteration */
uint64_t clk_avgc; /**< count to calculate an average */
double clk_min; /**< min cycle cost recorded */
double clk_max; /**< max cycle cost recorded */
double clk_avg; /**< cumulative sum to
calculate an average */
double clk_result; /**< avg cycles cost */
double cost; /**< cost of measurement */
char name[128];
};
/**
* @brief Get TSC value for the start of measured block of code
*
* This function prevents out of order execution before reading TSC.
* LFENCE instruction is used for it:
* - no OOO
* - load buffers are empty after lfence
* - no deliberate restrictions on store buffers, some stores may drain though
* Another options to prevent OOO are:
* - cpuid; affects LB and SB (both get emptied)
* - forced branch miss-prediction; no effect on LB or SB but
* loads/stores may drain
* When measured code has very high cycle cost preventing OOO
* may not be required and RDTSCP instruction may be enough.
*
* @return TSC value
*/
static __attribute__((always_inline)) inline uint64_t __tsc_start(void)
{
uint32_t cycles_high, cycles_low;
#ifdef __x86_64__
asm volatile("lfence\n\t"
"rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
: : "%rax", "%rdx");
#else
asm volatile("lfence\n\t"
"rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
: : "%eax", "%edx");
#endif
return(((uint64_t)cycles_high << 32) | cycles_low);
}
/**
* @brief Get TSC value for the end of measured block of code
*
* No OOO prevention required. RDTSCP is used here which makes sure
* all previous instructions retire before reading TSC.
*
* @return TSC value
*/
static __attribute__((always_inline)) inline uint64_t __tsc_end(void)
{
uint32_t cycles_high, cycles_low;
#ifdef __x86_64__
asm volatile("rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
: : "%rax", "%rdx");
#else
asm volatile("rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
: : "%eax", "%edx");
#endif
return ((uint64_t)cycles_high << 32) | cycles_low;
}
/**
* @brief Starts cycle measurement of code iteration
*
* tsc_start() and tsc_end() or tsc_end_ex() can be called multiple times.
*
* @param p pointer to TSC profile structure
*/
static __attribute__((always_inline)) inline void
tsc_start(struct tsc_prof *p)
{
p->clk_start = __tsc_start();
}
/**
* @brief Stops cycle measurement of code iteration
*
* @param p pointer to TSC profile structure
* @param inc number of items processed within the iteration.
* This allows code to calculate average cycle cost per work item even
* though number of code iterations may be different.
* @param clk_start start TSC value. This is useful when using one
* start TSC reading for multiple different TSC profiles.
*/
static __attribute__((always_inline)) inline void
tsc_end_ex(struct tsc_prof *p, const unsigned inc, const uint64_t clk_start)
{
double clk_diff = (double) (__tsc_end() - clk_start);
p->clk_avgc += inc;
p->clk_avg += (clk_diff - p->cost);
clk_diff = clk_diff / (double) inc;
if (clk_diff < p->clk_min)
p->clk_min = clk_diff;
if (clk_diff > p->clk_max)
p->clk_max = clk_diff;
}
/**
* @brief Stops cycle measurement of code iteration
*
* @param p pointer to TSC profile structure
* @param inc number of items processed within the iteration
*/
static __attribute__((always_inline)) inline void
tsc_end(struct tsc_prof *p, const unsigned inc)
{
tsc_end_ex(p, inc, p->clk_start);
}
/**
* @brief Calculates an average cycle cost per item
*
* Calculated average cycle cost is also stored in TSC profile structure.
*
* @param p pointer to TSC profile structure
*
* @return Calculated average cycle cost per work item
* @retval NAN if no code measurement done so far
*/
static __attribute__((always_inline)) inline
double tsc_get_avg(struct tsc_prof *p)
{
double avg_c = 0.0;
if (p->clk_avgc > 0)
avg_c = (p->clk_avg / ((double) p->clk_avgc));
p->clk_result = avg_c;
return avg_c;
}
/**
* @brief Initializes TSC profile structure
*
* @param p pointer to TSC profile structure
* @param name string describing the measurement in printf() format
* @param ... variadic arguments depending on \a name format
*/
void tsc_init(struct tsc_prof *p, const char *name, ...);
/**
* @brief Prints measured TSC profile data
*
* @param p pointer to TSC profile structure
*/
void tsc_print(struct tsc_prof *p);
#ifdef __cplusplus
}
#endif
#endif /* __TSC_H__ */