/*
* smpl_amd64_ibs.c - AMD64 Family 10h IBS sampling
*
* Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*
* Copyright (c) 2008 Advanced Mirco Devices Inc.
* Contributed by Robert Richter <robert.richter@amd.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <getopt.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <perfmon/pfmlib.h>
#include <perfmon/pfmlib_amd64.h>
#include <perfmon/perfmon.h>
#include <perfmon/perfmon_dfl_smpl.h>
typedef struct {
int opt_no_show;
int opt_block;
int opt_setup;
} options_t;
enum {
OPT_IBSOP, /* 0: default */
OPT_IBSFETCH,
OPT_IBSOP_NATIVE,
};
typedef pfm_dfl_smpl_arg_t smpl_fmt_arg_t;
typedef pfm_dfl_smpl_hdr_t smpl_hdr_t;
typedef pfm_dfl_smpl_entry_t smpl_entry_t;
typedef pfm_dfl_smpl_arg_t smpl_arg_t;
#define FMT_NAME PFM_DFL_SMPL_NAME
#define NUM_PMCS PFMLIB_MAX_PMCS
#define NUM_PMDS PFMLIB_MAX_PMDS
#define PMD_IBSOP_NUM 7
#define PMD_IBSFETCH_NUM 3
static uint64_t collected_samples, collected_partial;
static options_t options;
static struct option the_options[]={
{ "help", 0, 0, 1},
{ "ovfl-block", 0, &options.opt_block, 1},
{ "no-show", 0, &options.opt_no_show, 1},
{ "ibsop", 0, &options.opt_setup, OPT_IBSOP},
{ "ibsfetch", 0, &options.opt_setup, OPT_IBSFETCH},
{ "ibsop-native", 0, &options.opt_setup, OPT_IBSOP_NATIVE},
{ 0, 0, 0, 0}
};
static void fatal_error(char *fmt,...) __attribute__((noreturn));
#define BPL (sizeof(uint64_t)<<3)
#define LBPL 6
static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum)
{
bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1));
}
static inline int pfm_bv_isset(uint64_t *bv, uint16_t rnum)
{
return bv[rnum>>LBPL] & (1UL <<(rnum&(BPL-1))) ? 1 : 0;
}
static inline void pfm_bv_copy(uint64_t *d, uint64_t *j, uint16_t n)
{
if (n <= BPL)
*d = *j;
else {
memcpy(d, j, (n>>LBPL)*sizeof(uint64_t));
}
}
static void
warning(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
static void
fatal_error(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
exit(1);
}
int
child(char **arg)
{
/*
* force the task to stop before executing the first
* user level instruction
*/
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
execvp(arg[0], arg);
/* not reached */
exit(1);
}
static void
process_smpl_buf(smpl_hdr_t *hdr, uint64_t *smpl_pmds, unsigned int num_smpl_pmds, size_t entry_size)
{
static uint64_t last_overflow = ~0; /* initialize to biggest value possible */
static uint64_t last_count;
smpl_entry_t *ent;
size_t pos, count;
ibsopdata_t *opdata;
ibsopdata2_t *opdata2;
ibsopdata3_t *opdata3;
uint64_t entry, *reg;
unsigned int j, n;
if (hdr->hdr_overflows == last_overflow && hdr->hdr_count == last_count) {
warning("skipping identical set of samples %"PRIu64" = %"PRIu64"\n",
hdr->hdr_overflows, last_overflow);
return;
}
count = hdr->hdr_count;
if (options.opt_no_show) {
collected_samples += count;
return;
}
ent = (smpl_entry_t *)(hdr+1);
pos = (unsigned long)ent;
entry = collected_samples;
while(count--) {
printf("entry %"PRIu64" PID:%d TID:%d CPU:%d LAST_VAL:%"PRIu64" IIP:0x%llx\n",
entry,
ent->tgid,
ent->pid,
ent->cpu,
-ent->last_reset_val,
(unsigned long long)ent->ip);
/*
* print body: additional PMDs recorded
* PMD are recorded in increasing index order
*/
reg = (uint64_t *)(ent+1);
n = num_smpl_pmds;
for(j=0; n; j++) {
if (pfm_bv_isset(smpl_pmds, j)) {
switch(j) {
case 7:
printf("PMD%-3d:0x%016"PRIx64"\n", j, *reg);
/* check valid "record" bit */
if ((*reg & (1ull<<18)) == 0) {
printf("no data captured\n");
goto skip;
}
break;
case 9: /*IBSOPSDATA */
opdata = (ibsopdata_t *)reg;
printf("PMD%-3d:0x%016"PRIx64" : comptoret=%u tagtoretctr=%u opbrnresync=%u opmispret=%u opret=%u brntk=%u brnmips=%u bnrret=%u\n",
j,
*reg,
opdata->reg.ibscomptoretctr,
opdata->reg.ibstagtoretctr,
opdata->reg.ibsopbrnresync,
opdata->reg.ibsopmispreturn,
opdata->reg.ibsopreturn,
opdata->reg.ibsopbrntaken,
opdata->reg.ibsopbrnmisp,
opdata->reg.ibsopbrnret);
break;
case 10:
opdata2 = (ibsopdata2_t *)reg;
printf("PMD%-3d:0x%016"PRIx64" : reqsrc=%u reqdstproc=%s reqcachehitst=%u\n",
j,
*reg,
opdata2->reg.nbibsreqsrc,
opdata2->reg.nbibsreqdstproc ? "local" : "remote",
opdata2->reg.nbibsreqcachehitst);
break;
case 11:
opdata3 = (ibsopdata3_t *)reg;
printf("PMD%-3d:0x%016"PRIx64" : ld=%u st=%u L1TLBmiss=%u L2TLBmiss=%u L1TLBhit2M=%u L1TLBhit1G=%u L2TLBhit2M=%u miss=%u misalign=%u ld_bankconf=%u st_bankconf=%u st_to_ld_conf=%u st_to_ld_canc=%u UCaccess=%u WCaccess=%u lock=%u MAB=%u linevalid=%u physvalid=%u miss_lat=%u\n",
j,
*reg,
opdata3->reg.ibsldop,
opdata3->reg.ibsstop,
opdata3->reg.ibsdcl1tlbmiss,
opdata3->reg.ibsdcl2tlbmiss,
opdata3->reg.ibsdcl1tlbhit2m,
opdata3->reg.ibsdcl1tlbhit1g,
opdata3->reg.ibsdcl2tlbhit2m,
opdata3->reg.ibsdcmiss,
opdata3->reg.ibsdcmissacc,
opdata3->reg.ibsdcldbnkcon,
opdata3->reg.ibsdcstbnkcon,
opdata3->reg.ibsdcsttoldfwd,
opdata3->reg.ibsdcsttoldcan,
opdata3->reg.ibsdcucmemacc,
opdata3->reg.ibsdcwcmemacc,
opdata3->reg.ibsdclockedop,
opdata3->reg.ibsdcmabhit,
opdata3->reg.ibsdclinaddrvalid,
opdata3->reg.ibsdcphyaddrvalid,
opdata3->reg.ibsdcmisslat);
break;
default:
printf("PMD%-3d:0x%016"PRIx64"\n", j, *reg);
}
reg++;
n--;
}
}
skip:
pos += entry_size;
ent = (smpl_entry_t *)pos;
entry++;
}
collected_samples = entry;
last_overflow = hdr->hdr_overflows;
if (last_count != hdr->hdr_count && (last_count || last_overflow == 0))
collected_partial += hdr->hdr_count;
last_count = hdr->hdr_count;
}
static int
setup_pmu_ibsop_native(pfarg_pmr_t *pc, pfarg_pmr_t *pd, pfarg_pmd_attr_t *pa)
{
uint64_t ibs_ops_smpl;
/*
* OBSCTL sampling period (20 bits)
* bits 3:0 must be zero
*/
ibs_ops_smpl = 0xffff0;
/*
* IBSOPSCTL config
*
* bit 17: enable
* bits 0-15: bit 19-4 of sampling period
*/
pc[0].reg_num = 5;
pc[0].reg_value = (1ULL <<17) | ((ibs_ops_smpl >> 4) & 0xffffULL);
/* IBSOPSCTL data
*
* point to the same MSR register. It correspond to the associated
* data register, i.e., the register to which the IBS interrupt will
* be associated.
*
* Randomization on IBS control register (IBSOPSCTL, IBSFETCHCTL) is
* ignored.
*
* The value, short_reset, long_reset values are ignored. Use the
* corresponding PMC registers to set sampling period.
*
* If the last_reset-value is important for your program, then you can
* get it frmo the controlling PMC (4, 5). Alternatively, you can set
* the reg_value field to the value of the corresponding PMC register.
*/
pd[0].reg_num = 7;
pd[0].reg_flags = PFM_REGFL_OVFL_NOTIFY;
pd[0].reg_value = pc[0].reg_value;
pa[0].reg_long_reset = pc[0].reg_value;
pa[0].reg_short_reset = pc[0].reg_value;
pfm_bv_set(pa[0].reg_smpl_pmds, 7);
pfm_bv_set(pa[0].reg_smpl_pmds, 8);
pfm_bv_set(pa[0].reg_smpl_pmds, 9);
pfm_bv_set(pa[0].reg_smpl_pmds, 10);
pfm_bv_set(pa[0].reg_smpl_pmds, 11);
pfm_bv_set(pa[0].reg_smpl_pmds, 12);
pfm_bv_set(pa[0].reg_smpl_pmds, 13);
return PFMLIB_SUCCESS;
}
static int
setup_pmu_ibsop(pfarg_pmr_t *pc, pfarg_pmr_t *pd, pfarg_pmd_attr_t *pa)
{
pfmlib_amd64_input_param_t inp_mod;
pfmlib_output_param_t outp;
pfmlib_amd64_output_param_t outp_mod;
int ret;
memset(&inp_mod,0, sizeof(inp_mod));
memset(&outp,0, sizeof(outp));
memset(&outp_mod,0, sizeof(outp_mod));
/* setup ibsopctl register */
inp_mod.ibsop.maxcnt = 0xFFFF0;
inp_mod.flags |= PFMLIB_AMD64_USE_IBSOP;
/* setup Perfmon2 registers */
ret = pfm_dispatch_events(NULL, &inp_mod, &outp, &outp_mod);
if (ret != PFMLIB_SUCCESS) {
fprintf(stderr, "cannot dispatch events: %s\n", pfm_strerror(ret));
return ret;
}
if (outp.pfp_pmc_count != 1) {
fprintf(stderr, "Unexpected PMC register count: %d\n",
outp.pfp_pmc_count);
return PFMLIB_ERR_INVAL;
}
if (outp.pfp_pmd_count != 1) {
fprintf(stderr, "Unexpected PMD register count: %d\n",
outp.pfp_pmd_count);
return PFMLIB_ERR_INVAL;
}
if (outp_mod.ibsop_base != 0) {
fprintf(stderr, "Unexpected IBSOP base register: %d\n",
outp_mod.ibsop_base);
return PFMLIB_ERR_INVAL;
}
/* PMC_IBSOPCTL */
pc[0].reg_num = outp.pfp_pmcs[0].reg_num;
pc[0].reg_value = outp.pfp_pmcs[0].reg_value;
/* PMD_IBSOPCTL */
pd[0].reg_num = outp.pfp_pmds[0].reg_num;
pd[0].reg_value = 0;
/* setup all IBSOP registers for sampling */
pd[0].reg_flags = PFM_REGFL_OVFL_NOTIFY;
if (pd[0].reg_num > 64 - PMD_IBSOP_NUM) {
fprintf(stderr, "Unexpected IBSOP base: %d\n",
(int)pd[0].reg_num);
return PFMLIB_ERR_INVAL;
}
pa[0].reg_smpl_pmds[0] =
((1UL << PMD_IBSOP_NUM) - 1) << outp.pfp_pmds[0].reg_num;
return PFMLIB_SUCCESS;
}
static int
setup_pmu_ibsfetch(pfarg_pmr_t *pc, pfarg_pmr_t *pd, pfarg_pmd_attr_t *pa)
{
pfmlib_amd64_input_param_t inp_mod;
pfmlib_output_param_t outp;
pfmlib_amd64_output_param_t outp_mod;
int ret;
memset(&inp_mod,0, sizeof(inp_mod));
memset(&outp,0, sizeof(outp));
memset(&outp_mod,0, sizeof(outp_mod));
/* setup ibsfetchctl register */
inp_mod.ibsfetch.maxcnt = 0xFFFF0;
inp_mod.flags |= PFMLIB_AMD64_USE_IBSFETCH;
/* setup Perfmon2 registers */
ret = pfm_dispatch_events(NULL, &inp_mod, &outp, &outp_mod);
if (ret != PFMLIB_SUCCESS) {
fprintf(stderr, "cannot dispatch events: %s\n", pfm_strerror(ret));
return ret;
}
if (outp.pfp_pmc_count != 1) {
fprintf(stderr, "Unexpected PMC register count: %d\n",
outp.pfp_pmc_count);
return PFMLIB_ERR_INVAL;
}
if (outp.pfp_pmd_count != 1) {
fprintf(stderr, "Unexpected PMD register count: %d\n",
outp.pfp_pmd_count);
return PFMLIB_ERR_INVAL;
}
if (outp_mod.ibsfetch_base != 0) {
fprintf(stderr, "Unexpected IBSFETCH base register: %d\n",
outp_mod.ibsfetch_base);
return PFMLIB_ERR_INVAL;
}
/* PMC_IBSFETCHCTL */
pc[0].reg_num = outp.pfp_pmcs[0].reg_num;
pc[0].reg_value = outp.pfp_pmcs[0].reg_value;
/* PMD_IBSFETCHCTL */
pd[0].reg_num = outp.pfp_pmds[0].reg_num;
pd[0].reg_value = 0;
/* setup all IBSFETCH registers for sampling */
pd[0].reg_flags = PFM_REGFL_OVFL_NOTIFY;
if (pd[0].reg_num > 64 - PMD_IBSFETCH_NUM) {
fprintf(stderr, "Unexpected IBSFETCH base: %d\n",
(int)pd[0].reg_num);
return PFMLIB_ERR_INVAL;
}
pa[0].reg_smpl_pmds[0] =
((1UL << PMD_IBSFETCH_NUM) - 1) << outp.pfp_pmds[0].reg_num;
return PFMLIB_SUCCESS;
}
int
mainloop(char **arg)
{
pfarg_pmr_t pc[1];
pfarg_pmr_t pd[1];
pfarg_pmd_attr_t pa[1];
smpl_hdr_t *hdr;
smpl_arg_t buf_arg;
struct timeval start_time, end_time;
pfarg_msg_t msg;
uint64_t ovfl_count = 0;
size_t entry_size;
void *buf_addr;
pid_t pid;
int status, ret, fd;
int pmc_count, pmd_count;
unsigned int num_smpl_pmds = 0;
uint32_t ctx_flags;
memset(pd, 0, sizeof(pd));
memset(pa, 0, sizeof(pa));
memset(pc, 0, sizeof(pc));
/* defaults */
num_smpl_pmds = 7;
pmc_count = pmd_count = 1;
switch (options.opt_setup) {
case OPT_IBSOP:
ret = setup_pmu_ibsop(pc, pd, pa);
break;
case OPT_IBSOP_NATIVE:
ret = setup_pmu_ibsop_native(pc, pd, pa);
break;
case OPT_IBSFETCH:
num_smpl_pmds = 3;
ret = setup_pmu_ibsfetch(pc, pd, pa);
break;
default:
ret = PFMLIB_ERR_NOTSUPP;
break;
}
if (ret != PFMLIB_SUCCESS)
fatal_error("cannot setup #%d\n", options.opt_setup);
/*
* in this example program, we use fixed-size entries, therefore we
* can compute the entry size in advance. Perfmon-2 supports variable
* size entries.
*/
entry_size = sizeof(smpl_entry_t)+(num_smpl_pmds<<3);
/*
* prepare session flags
*/
/*
* We initialize the format specific information.
* The format is identified by its UUID which must be copied
* into the ctx_buf_fmt_id field.
*/
ctx_flags = options.opt_block ? PFM_FL_NOTIFY_BLOCK : 0;
/*
* we use a samplig format, thus we are passing extra arguments
*/
ctx_flags |= PFM_FL_SMPL_FMT;
/*
* the size of the buffer is indicated in bytes (not entries).
*
* The kernel will record into the buffer up to a certain point.
* No partial samples are ever recorded.
*/
buf_arg.buf_size = 3*getpagesize();
/*
* now create our perfmon session.
*/
fd = pfm_create(ctx_flags, NULL, FMT_NAME, &buf_arg, sizeof(buf_arg));
if (fd == -1) {
if (errno == ENOSYS) {
fatal_error("Your kernel does not have performance monitoring support!\n");
}
fatal_error("cannot create session %s\n", strerror(errno));
}
/*
* retrieve the virtual address at which the sampling
* buffer has been mapped
*/
buf_addr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (buf_addr == MAP_FAILED)
fatal_error("cannot mmap sampling buffer: %s\n", strerror(errno));
printf("buffer mapped @%p\n", buf_addr);
hdr = (smpl_hdr_t *)buf_addr;
printf("hdr_cur_offs=%llu version=%u.%u\n",
(unsigned long long)hdr->hdr_cur_offs,
PFM_VERSION_MAJOR(hdr->hdr_version),
PFM_VERSION_MINOR(hdr->hdr_version));
if (PFM_VERSION_MAJOR(hdr->hdr_version) < 1)
fatal_error("invalid buffer format version\n");
/*
* Now program the registers
*/
if (pfm_write(fd, 0, PFM_RW_PMC, pc, pmc_count * sizeof(*pc)))
fatal_error("pfm_write error errno %d\n",errno);
/*
* initialize the PMDs
* To be read, each PMD must be either written or declared
* as being part of a sample (reg_smpl_pmds, reg_reset_pmds)
*/
if (pfm_write(fd, 0, PFM_RW_PMD_ATTR, pd, pmd_count * sizeof(*pd)))
fatal_error("pfm_write(PMD) error errno %d\n",errno);
/*
* Create the child task
*/
if ((pid=fork()) == -1)
fatal_error("Cannot fork process\n");
/*
* In order to get the PFM_END_MSG message, it is important
* to ensure that the child task does not inherit the file
* descriptor of the session. By default, file descriptor
* are inherited during exec(). We explicitely close it
* here. We could have set it up through fcntl(FD_CLOEXEC)
* to achieve the same thing.
*/
if (pid == 0) {
close(fd);
child(arg);
}
/*
* wait for the child to exec
*/
waitpid(pid, &status, WUNTRACED);
/*
* process is stopped at this point
*/
if (WIFEXITED(status)) {
warning("task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status));
goto terminate_session;
}
/*
* attach session to stopped task
*/
if (pfm_attach(fd, 0, pid))
fatal_error("pfm_attach error errno %d\n",errno);
/*
* activate monitoring for stopped task.
* (nothing will be measured at this point
*/
if (pfm_set_state(fd, 0, PFM_ST_START))
fatal_error("pfm_start error errno %d\n",errno);
/*
* detach child. Side effect includes
* activation of monitoring.
*/
ptrace(PTRACE_DETACH, pid, NULL, 0);
gettimeofday(&start_time, NULL);
/*
* core loop
*/
for(;;) {
/*
* wait for overflow/end notification messages
*/
ret = read(fd, &msg, sizeof(msg));
if (ret == -1) {
if(ret == -1 && errno == EINTR) {
warning("read interrupted, retrying\n");
continue;
}
fatal_error("cannot read perfmon msg: %s\n", strerror(errno));
}
switch(msg.type) {
case PFM_MSG_OVFL: /* the sampling buffer is full */
process_smpl_buf(hdr, pa[0].reg_smpl_pmds, num_smpl_pmds, entry_size);
ovfl_count++;
/*
* reactivate monitoring once we are done with the samples
*
* Note that this call can fail with EBUSY in non-blocking mode
* as the task may have disappeared while we were processing
* the samples.
*/
if (pfm_set_state(fd, 0, PFM_ST_RESTART)) {
if (errno != EBUSY)
fatal_error("pfm_set_state(restart) error errno %d\n",errno);
else
warning("pfm_set_state(restart): task probably terminated \n");
}
break;
case PFM_MSG_END: /* monitored task terminated */
printf("task terminated\n");
goto terminate_session;
default: fatal_error("unknown message type %d\n", msg.type);
}
}
terminate_session:
/*
* cleanup child
*/
wait4(pid, &status, 0, NULL);
gettimeofday(&end_time, NULL);
/*
* check for any leftover samples
*/
process_smpl_buf(hdr, pa[0].reg_smpl_pmds, num_smpl_pmds, entry_size);
close(fd);
/*
* unmap buffer, actually free the buffer and session because placed after
* the close(), i.e. is the last reference. See comments about close() above.
*/
ret = munmap(hdr, (size_t)buf_arg.buf_size);
if (ret)
fatal_error("cannot unmap buffer: %s\n", strerror(errno));
printf("%"PRIu64" samples (%"PRIu64" in partial buffer) collected in %"PRIu64" buffer overflows\n",
collected_samples,
collected_partial,
ovfl_count);
return 0;
}
static void
usage(void)
{
printf("usage: smpl_amd64_ibs [-hdv] [--help] [--no-show] "
"[--ovfl-block] [--ibsop] [--ibsfetch] [--ibsop-native] cmd\n");
}
int
main(int argc, char **argv)
{
pfmlib_options_t pfmlib_options;
int c, ret;
/*
* pass options to library
*/
memset(&pfmlib_options, 0, sizeof(pfmlib_options));
pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */
while ((c=getopt_long(argc, argv,"+hvd", the_options, 0)) != -1) {
switch(c) {
case 0: continue;
case 1:
case 'h':
usage();
exit(0);
case 'v':
pfmlib_options.pfm_verbose = 1;
continue;
case 'd':
pfmlib_options.pfm_debug = 1;
continue;
default:
fatal_error("");
}
}
if (argv[optind] == NULL) {
fatal_error("You must specify a command to execute\n");
}
pfm_set_options(&pfmlib_options);
/*
* Initialize pfm library
*/
ret = pfm_initialize();
if (ret != PFMLIB_SUCCESS)
fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret));
pfm_get_pmu_type(&c);
if (c != PFMLIB_AMD64_PMU) {
fatal_error("not running on an AMD64 processor\n");
}
/*
* XXX: would need to check for family 10h
*/
return mainloop(argv+optind);
}