/*
* multiplex2.c - example of kernel-level time-based or overflow-based event multiplexing
*
* Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307 USA
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* for getline */
#endif
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <syscall.h>
#include <getopt.h>
#include <signal.h>
#include <math.h>
#include <limits.h>
#include <setjmp.h>
#include <fcntl.h>
#include <time.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include <perfmon/pfmlib.h>
#include <perfmon/perfmon.h>
#include "detect_pmcs.h"
#define MAX_EVT_NAME_LEN 128
#define MULTIPLEX_VERSION "0.2"
#define SMPL_FREQ_IN_HZ 100
#define NUM_PMCS 256
typedef struct {
struct {
int opt_plm; /* which privilege level to monitor (more than one possible) */
int opt_debug; /* print debug information */
int opt_verbose; /* verbose output */
int opt_us_format; /* print large numbers with comma for thousands */
int opt_ovfl_switch; /* overflow-based switching */
int opt_is_system; /* use system-wide */
int opt_excl_idle; /* exclude idle task */
int opt_excl_intr; /* exclude interrupts */
int opt_intr_only; /* interrupts only*/
int opt_no_cmd_out; /* redirect cmd output to /dev/null */
int opt_no_header; /* no header */
} program_opt_flags;
unsigned long max_counters; /* maximum number of counter for the platform */
uint64_t smpl_freq_hz;
uint64_t smpl_freq_ns;
unsigned long session_timeout;
uint64_t smpl_period;
uint64_t clock_res;
unsigned long cpu_mhz;
pid_t attach_pid;
int pin_cmd_cpu;
int pin_cpu;
} program_options_t;
#define opt_plm program_opt_flags.opt_plm
#define opt_debug program_opt_flags.opt_debug
#define opt_verbose program_opt_flags.opt_verbose
#define opt_us_format program_opt_flags.opt_us_format
#define opt_ovfl_switch program_opt_flags.opt_ovfl_switch
#define opt_is_system program_opt_flags.opt_is_system
#define opt_excl_idle program_opt_flags.opt_excl_idle
#define opt_excl_intr program_opt_flags.opt_excl_intr
#define opt_intr_only program_opt_flags.opt_intr_only
#define opt_no_cmd_out program_opt_flags.opt_no_cmd_out
#define opt_no_header program_opt_flags.opt_no_header
typedef struct _event_set_t {
struct _event_set_t *next;
char *event_str;
unsigned int n_events;
} event_set_t;
typedef int pfm_ctxid_t;
static program_options_t options;
static pfarg_pmc_t *all_pmcs;
static pfarg_pmd_t *all_pmds;
static pfarg_setdesc_t *all_sets;
static event_set_t *all_events;
static unsigned int num_pmds, num_pmcs, num_sets, total_events;
static volatile int time_to_quit;
static jmp_buf jbuf;
static void fatal_error(char *fmt,...) __attribute__((noreturn));
static void
vbprintf(char *fmt, ...)
{
va_list ap;
if (options.opt_verbose == 0) return;
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
}
static void
fatal_error(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
exit(1);
}
/*
* unreliable for CPU with variable clock speed
*/
static unsigned long
get_cpu_speed(void)
{
FILE *fp1;
unsigned long f1 = 0, f2 = 0;
char buffer[128], *p, *value;
memset(buffer, 0, sizeof(buffer));
fp1 = fopen("/proc/cpuinfo", "r");
if (fp1 == NULL) return 0;
for (;;) {
buffer[0] = '\0';
p = fgets(buffer, 127, fp1);
if (p == NULL)
break;
/* skip blank lines */
if (*p == '\n') continue;
p = strchr(buffer, ':');
if (p == NULL)
break;
/*
* p+2: +1 = space, +2= firt character
* strlen()-1 gets rid of \n
*/
*p = '\0';
value = p+2;
value[strlen(value)-1] = '\0';
if (!strncasecmp("cpu MHz", buffer, 7)) {
float fl;
sscanf(value, "%f", &fl);
f1 = lroundf(fl);
break;
}
if (!strncasecmp("BogoMIPS", buffer, 8)) {
float fl;
sscanf(value, "%f", &fl);
f2 = lroundf(fl);
}
}
fclose(fp1);
return f1 == 0 ? f2 : f1;
}
/*
* pin task to CPU
*/
#ifndef __NR_sched_setaffinity
#error "you need to define __NR_sched_setaffinity"
#endif
#define MAX_CPUS 2048
#define NR_CPU_BITS (MAX_CPUS>>3)
int
pin_cpu(pid_t pid, unsigned int cpu)
{
uint64_t my_mask[NR_CPU_BITS];
if (cpu >= MAX_CPUS)
fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS);
my_mask[cpu>>6] = 1ULL << (cpu&63);
return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask);
}
int
child(char **arg)
{
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
if (options.pin_cmd_cpu != -1) {
pin_cpu(getpid(), options.pin_cmd_cpu);
vbprintf("command running on CPU core %d\n", options.pin_cmd_cpu);
}
if (options.opt_no_cmd_out) {
close(1);
close(2);
}
execvp(arg[0], arg);
/* not reached */
exit(1);
}
static void
dec2sep(char *str2, char *str, char sep)
{
int i, l, b, j, c=0;
l = strlen(str2);
if (l <= 3) {
strcpy(str, str2);
return;
}
b = l + l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */
for(i=l, j=0; i >= 0; i--, j++) {
if (j) c++;
str[b-j] = str2[i];
if (c == 3 && i>0) {
str[b-++j] = sep;
c = 0;
}
}
}
static void
print_results(int ctxid, uint64_t *eff_timeout)
{
unsigned int i, j, cnt, ovfl_event;
uint64_t value, tot_runs = 0;
uint64_t tot_dur = 0, c;
pfarg_setinfo_t *all_setinfos;
event_set_t *e;
char *p;
char tmp1[32], tmp2[32], *str;
char mtotal_str[32], *mtotal;
char stotal_str[32], *stotal;
int ret;
all_setinfos = malloc(sizeof(pfarg_setinfo_t)*num_sets);
if (all_setinfos == NULL)
fatal_error("cannot allocate all_setinfo\n");
memset(all_setinfos, 0, sizeof(pfarg_setinfo_t)*num_sets);
for(i=0; i < num_sets; i++)
all_setinfos[i].set_id = i;
/*
* read all counters in one call
*
* There is a limitation on the size of the argument vector and
* it may be necesarry to split into multiple calls. That limit
* is usally at page size (16KB)
*/
ret = pfm_read_pmds(ctxid, all_pmds, num_pmds);
if (ret == -1)
fatal_error("cannot read pmds: %s\n", strerror(errno));
/*
* extract all set information
*
* There is a limitation on the size of the argument vector and
* it may be necesarry to split into multiple calls. That limit
* is usually at page size (16KB)
*/
ret = pfm_getinfo_evtsets(ctxid, all_setinfos, num_sets);
if (ret == -1)
fatal_error("cannot get set info: %s\n", strerror(errno));
/*
* compute average number of runs
*
* the number of runs per set can be at most off by 1 between all sets
*/
for (i=0, cnt = 0; i < num_sets; i++) {
if (all_setinfos[i].set_runs == 0)
fatal_error("not enough runs to collect meaningful results: set%u did not run\n", i);
tot_runs += all_setinfos[i].set_runs;
tot_dur += all_setinfos[i].set_act_duration;
}
/*
* print the results
*
* It is important to realize, that the first event we specified may not
* be in PMD4. Not all events can be measured by any monitor. That's why
* we need to use the pc[] array to figure out where event i was allocated.
*
*/
if (options.opt_no_header == 0) {
printf("# %.2fHz period = %"PRIu64"nsecs\n# %"PRIu64" cycles @ %lu MHz\n",
1000000000.0 / options.smpl_freq_ns,
options.smpl_freq_ns,
options.smpl_period,
options.cpu_mhz);
if (options.opt_ovfl_switch == 0)
printf("# using time-based multiplexing\n"
"# %"PRIu64" nsecs effective switch timeout\n",
*eff_timeout);
else
printf("# using overflow-based multiplexing\n");
if (options.opt_is_system)
printf("# system-wide mode on CPU core %d\n",options.pin_cpu);
printf("# %d sets\n", num_sets);
printf("# %.2f average run per set\n", (double)tot_runs/num_sets);
printf("# %.2f average ns per set\n", (double)tot_dur/num_sets);
printf("# set measured total #runs scaled total event name\n");
printf("# ------------------------------------------------------------------\n");
}
ovfl_event = options.opt_ovfl_switch ? 1 : 0;
for (i=0, e = all_events, cnt = 0; i < num_sets; i++, e = e->next) {
str = e->event_str;
for(j=0; j < e->n_events-ovfl_event; j++, cnt++) {
value = all_pmds[cnt].reg_value;
sprintf(tmp1, "%"PRIu64, value);
if (options.opt_us_format) {
dec2sep(tmp1, mtotal_str, ',');
} else {
strcpy(mtotal_str, tmp1);
}
mtotal = mtotal_str;
/*
* scaling
* We use duration rather than number of runs to compute a more precise
* scaled value. This avoids overcounting when the last set only partially
* ran.
*
* We use double to avoid overflowing of the 64-bit count in case of very
* large total duration
*/
c = llround(((double)value*tot_dur)/(double)all_setinfos[i].set_act_duration);
sprintf(tmp2, "%"PRIu64, c);
if (options.opt_us_format) {
dec2sep(tmp2, stotal_str, ',');
} else {
strcpy(stotal_str, tmp2);
}
stotal = stotal_str;
printf(" %03d %20s %8"PRIu64" %20s %s\n",
i,
mtotal,
all_setinfos[i].set_runs,
stotal,
str);
p = strchr(str, '\0');
if (p)
str = p+1;
}
/*
* skip first event
*/
if (options.opt_ovfl_switch) cnt++;
}
}
static void
sigintr_handler(int sig)
{
if (sig == SIGALRM)
time_to_quit = 1;
else
time_to_quit = 2;
longjmp(jbuf, 1);
}
static int
measure_one_task(char **argv)
{
int ctxid;
pfarg_ctx_t ctx[1];
pfarg_setdesc_t *my_sets;
pfarg_pmc_t *my_pmcs;
pfarg_pmd_t *my_pmds;
pfarg_load_t load_arg;
uint64_t eff_timeout;
pfarg_msg_t msg;
pid_t pid;
int status, ret;
my_pmcs = malloc(sizeof(pfarg_pmc_t)*num_pmcs);
my_pmds = malloc(sizeof(pfarg_pmd_t)*num_pmds);
my_sets = malloc(sizeof(pfarg_setdesc_t)*num_sets);
if (my_pmcs == NULL || my_pmds == NULL || my_sets == NULL)
fatal_error("cannot allocate event tables\n");
/*
* make private copies
*/
memcpy(my_pmcs, all_pmcs, sizeof(pfarg_pmc_t)*num_pmcs);
memcpy(my_pmds, all_pmds, sizeof(pfarg_pmd_t)*num_pmds);
memcpy(my_sets, all_sets, sizeof(pfarg_setdesc_t)*num_sets);
memset(ctx, 0, sizeof(ctx));
memset(&load_arg, 0, sizeof(load_arg));
/*
* create the context
*/
ctxid = pfm_create_context(ctx, NULL, NULL, 0);
if (ctxid == -1 ) {
if (errno == ENOSYS) {
fatal_error("Your kernel does not have performance monitoring support!\n");
}
fatal_error("Can't create PFM context %s\n", strerror(errno));
}
/*
* set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e.,
* fd not visible to child.
*/
if (fcntl(ctxid, F_SETFD, FD_CLOEXEC))
fatal_error("cannot set CLOEXEC: %s\n", strerror(errno));
/*
* create the event sets
*
* event set 0 is always exist by default for backward compatibility
* reason. However to avoid special casing set0 for creation, a PFM_CREATE_EVTSETS
* for set0 does not complain and behaves as a PFM_CHANGE_EVTSETS
*/
vbprintf("requested timeout %"PRIu64" nsecs\n", my_sets[0].set_timeout);
if (pfm_create_evtsets(ctxid, my_sets, num_sets))
fatal_error("cannot create sets\n");
eff_timeout = my_sets[0].set_timeout;
vbprintf("effective timeout %"PRIu64" nsecs\n", my_sets[0].set_timeout);
/*
* Now program the all the registers in one call
*
* Note that there is a limitation on the size of the argument vector
* that can be passed. It is usually set to a page size (16KB).
*/
if (pfm_write_pmcs(ctxid, my_pmcs, num_pmcs) == -1)
fatal_error("pfm_write_pmcs error errno %d\n",errno);
/*
* initialize the PMD registers.
*
* To be read, each PMD must be either written or declared
* as being part of a sample (reg_smpl_pmds)
*/
if (pfm_write_pmds(ctxid, my_pmds, num_pmds) == -1)
fatal_error("pfm_write_pmds error errno %d\n",errno);
/*
* now launch the child code
*/
if (options.attach_pid == 0) {
if ((pid= fork()) == -1) fatal_error("Cannot fork process\n");
if (pid == 0) exit(child(argv));
} else {
pid = options.attach_pid;
ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
if (ret) {
fatal_error("cannot attach to task %d: %s\n",options.attach_pid, strerror(errno));
}
}
ret = waitpid(pid, &status, WUNTRACED);
if (ret < 0 || WIFEXITED(status))
fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status));
vbprintf("child created and stopped\n");
/*
* now attach the context
*/
load_arg.load_pid = pid;
if (pfm_load_context(ctxid, &load_arg) == -1)
fatal_error("pfm_load_context error errno %d\n",errno);
/*
* start monitoring
*/
if (pfm_start(ctxid, NULL) == -1)
fatal_error("pfm_start error errno %d\n",errno);
ptrace(PTRACE_DETACH, pid, NULL, 0);
vbprintf("child restarted\n");
if (setjmp(jbuf) == 1) {
if (time_to_quit == 1) {
printf("timeout expired\n");
}
if (time_to_quit == 2)
printf("session interrupted\n");
goto finish_line;
}
signal(SIGALRM, sigintr_handler);
signal(SIGINT, sigintr_handler);
if (options.session_timeout) {
printf("<monitoring for %lu seconds>\n", options.session_timeout);
alarm(options.session_timeout);
}
/*
* mainloop
*/
ret = read(ctxid, &msg, sizeof(msg));
if (ret < sizeof(msg))
fatal_error("interrupted read\n");
switch(msg.type) {
case PFM_MSG_OVFL:
fatal_error("unexpected ovfl message\n");
break;
case PFM_MSG_END:
break;
default: printf("unknown message type %d\n", msg.type);
}
finish_line:
/*
* cleanup after an alarm timeout
*/
if (time_to_quit) {
/* stop monitored task */
ptrace(PTRACE_ATTACH, pid, NULL, 0);
waitpid(pid, NULL, WUNTRACED);
/* detach context */
pfm_unload_context(ctxid);
}
if (options.attach_pid == 0) {
kill(pid, SIGKILL);
waitpid(pid, &status, 0);
} else {
ptrace(PTRACE_DETACH, pid, NULL, 0);
}
if (time_to_quit < 2)
print_results(ctxid, &eff_timeout);
close(ctxid);
return 0;
}
static int
measure_one_cpu(char **argv)
{
int ctxid, status;
pfarg_ctx_t ctx[1];
pfarg_pmc_t *my_pmcs;
pfarg_pmd_t *my_pmds;
pfarg_setdesc_t *my_sets;
pfarg_load_t load_arg;
pid_t pid = 0;
int ret;
my_pmcs = malloc(sizeof(pfarg_pmc_t)*total_events);
my_pmds = malloc(sizeof(pfarg_pmd_t)*total_events);
my_sets = malloc(sizeof(pfarg_setdesc_t)*num_sets);
if (my_pmcs == NULL || my_pmds == NULL || my_sets == NULL)
fatal_error("cannot allocate event tables\n");
/*
* make private copies
*/
memcpy(my_pmcs, all_pmcs, sizeof(pfarg_pmc_t)*num_pmcs);
memcpy(my_pmds, all_pmds, sizeof(pfarg_pmd_t)*num_pmds);
memcpy(my_sets, all_sets, sizeof(pfarg_setdesc_t)*num_sets);
memset(ctx, 0, sizeof(ctx));
memset(&load_arg, 0, sizeof(load_arg));
if (options.pin_cpu == -1) {
options.pin_cpu = 0;
printf("forcing monitoring onto CPU core 0\n");
pin_cpu(getpid(), 0);
}
ctx[0].ctx_flags = PFM_FL_SYSTEM_WIDE;
/*
* create the context
*/
ctxid = pfm_create_context(ctx, NULL, NULL, 0);
if (ctxid == -1) {
if (errno == ENOSYS) {
fatal_error("Your kernel does not have performance monitoring support!\n");
}
fatal_error("Can't create PFM context %s\n", strerror(errno));
}
/*
* set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e.,
* fd not visible to child.
*/
if (fcntl(ctxid, F_SETFD, FD_CLOEXEC))
fatal_error("cannot set CLOEXEC: %s\n", strerror(errno));
/*
* create the event sets
*
* event set 0 is always created by default for backward compatibility
* reason. However to avoid special casing set0 for creation, a PFM_CREATE_EVTSETS
* for set0 does not complain and behaves as a PFM_CHANGE_EVTSETS
*/
if (pfm_create_evtsets(ctxid, my_sets, num_sets))
fatal_error("cannot create sets\n");
/*
* Now program the all the registers in one call
*
* Note that there is a limitation on the size of the argument vector
* that can be passed. It is usually set to a page size (16KB).
*/
if (pfm_write_pmcs(ctxid, my_pmcs, num_pmcs) == -1)
fatal_error("pfm_write_pmcs error errno %d\n",errno);
/*
* initialize the PMD registers.
*
* To be read, each PMD must be either written or declared
* as being part of a sample (reg_smpl_pmds)
*/
if (pfm_write_pmds(ctxid, my_pmds, num_pmds) == -1)
fatal_error("pfm_write_pmds error errno %d\n",errno);
/*
* now launch the child code
*/
if (*argv) {
if ((pid = fork()) == -1) fatal_error("Cannot fork process\n");
if (pid == 0) exit(child(argv));
}
/*
* wait for the child to exec or be stopped
* We do this even in system-wide mode to ensure
* that the task does not start until we are ready
* to monitor.
*/
if (pid) {
ret = waitpid(pid, &status, WUNTRACED);
if (ret < 0 || WIFEXITED(status))
fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status));
vbprintf("child created and stopped\n");
}
/*
* now attach the context
*/
load_arg.load_pid = options.opt_is_system ? getpid() : pid;
if (pfm_load_context(ctxid, &load_arg) == -1)
fatal_error("pfm_load_context error errno %d\n",errno);
/*
* start monitoring
*/
if (pfm_start(ctxid, NULL) == -1)
fatal_error("pfm_start error errno %d\n",errno);
if (pid) ptrace(PTRACE_DETACH, pid, NULL, 0);
if (pid == 0) {
if (options.session_timeout == 0) {
printf("<press enter to stop>\n");
getchar();
} else {
printf("<monitoring for %lu seconds>\n", options.session_timeout);
sleep(options.session_timeout);
}
} else {
ret = waitpid(pid, &status, 0);
}
print_results(ctxid, &my_sets[0].set_timeout);
if (ctxid) close(ctxid);
return 0;
}
int
mainloop(char **argv)
{
event_set_t *e;
pfmlib_input_param_t inp;
pfmlib_output_param_t outp;
pfmlib_regmask_t impl_counters, used_pmcs;
pfmlib_event_t cycle_event;
unsigned int i, j;
char *p, *str;
int ret;
unsigned int max_counters, allowed_counters;
pfm_get_num_counters(&max_counters);
if (max_counters < 2 && options.opt_ovfl_switch)
fatal_error("not enough counter to get overflow switching to work\n");
allowed_counters = max_counters;
/*
* account for overflow counter (cpu cycles)
*/
if (options.opt_ovfl_switch) allowed_counters--;
memset(&used_pmcs, 0, sizeof(used_pmcs));
memset(&impl_counters, 0, sizeof(impl_counters));
pfm_get_impl_counters(&impl_counters);
options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq_hz;
vbprintf("%"PRIu64"Hz period = %"PRIu64" cycles @ %luMhz\n", options.smpl_freq_hz, options.smpl_period, options.cpu_mhz);
for (e = all_events; e; e = e->next) {
for (p = str = e->event_str; p ; ) {
p = strchr(str, ',');
if (p) str = p +1;
total_events++;
}
}
/*
* account for extra event per set (cycle event)
*/
if (options.opt_ovfl_switch) {
total_events += num_sets;
/*
* look for our trigger event
*/
if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS)
fatal_error("Cannot find cycle event\n");
}
vbprintf("total_events=%u\n", total_events);
/*
* assumes number of pmds = number of events
* cannot assume number of pmcs = num of events (e.g., P4 2 PMCS per event)
*/
all_pmcs = calloc(NUM_PMCS, sizeof(pfarg_pmc_t));
all_pmds = calloc(total_events, sizeof(pfarg_pmd_t));
all_sets = calloc(num_sets, sizeof(pfarg_setdesc_t));
if (all_pmcs == NULL || all_pmds == NULL || all_sets == NULL)
fatal_error("cannot allocate event tables\n");
/*
* use the library to figure out assignments for all events of all sets
*/
for (i=0, e = all_events; i < num_sets; i++, e = e->next) {
memset(&inp,0, sizeof(inp));
memset(&outp,0, sizeof(outp));
/*
* build the pfp_unavail_pmcs bitmask by looking
* at what perfmon has available. It is not always
* the case that all PMU registers are actually available
* to applications. For instance, on IA-32 platforms, some
* registers may be reserved for the NMI watchdog timer.
*
* With this bitmap, the library knows which registers NOT to
* use. Of source, it is possible that no valid assignement may
* be possible if certina PMU registers are not available.
*/
detect_unavail_pmcs(-1, &inp.pfp_unavail_pmcs);
str = e->event_str;
for(j=0, p = str; p && j < allowed_counters; j++) {
p = strchr(str, ',');
if (p)
*p = '\0';
ret = pfm_find_full_event(str, &inp.pfp_events[j]);
if (ret != PFMLIB_SUCCESS)
fatal_error("event %s for set %d event %d: %s\n", str, i, j, pfm_strerror(ret));
if (p)
str = p + 1;
}
if (p) {
fatal_error("error in set %d: cannot have more than %d event(s) per set %s\n",
i,
allowed_counters,
options.opt_ovfl_switch ? "(overflow switch mode)": "(hardware limit)");
}
/*
* add the cycle event as the last event when we switch on overflow
*/
if (options.opt_ovfl_switch) {
inp.pfp_events[j] = cycle_event;
inp.pfp_event_count = j+1;
inp.pfp_dfl_plm = options.opt_plm;
e->n_events = j+1;
} else {
e->n_events = j;
inp.pfp_event_count = j;
}
inp.pfp_dfl_plm = options.opt_plm;
if (options.opt_is_system)
inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE;
vbprintf("PMU programming for set %d\n", i);
/*
* let the library do the hard work
*/
if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS)
fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret));
/*
* propagate from libpfm to kernel data structures
*/
for (j=0; j < outp.pfp_pmc_count; j++, num_pmcs++) {
all_pmcs[num_pmcs].reg_num = outp.pfp_pmcs[j].reg_num;
all_pmcs[num_pmcs].reg_value = outp.pfp_pmcs[j].reg_value;
all_pmcs[num_pmcs].reg_set = i;
}
for (j=0; j < outp.pfp_pmd_count; j++, num_pmds++) {
all_pmds[num_pmds].reg_num = outp.pfp_pmds[j].reg_num;
all_pmds[num_pmds].reg_set = i;
}
/*
* setup event set properties
*/
all_sets[i].set_id = i;
if (options.opt_ovfl_switch) {
all_sets[i].set_flags = PFM_SETFL_OVFL_SWITCH;
/*
* last counter contains our sampling counter
*
* the first overflow of our trigger counter does
* trigger a switch.
*/
all_pmds[num_pmds-1].reg_ovfl_switch_cnt = 1;
/*
* We do this even in system-wide mode to ensure
* that the task does not start until we are ready
* to monitor.
* setup the sampling period
*/
all_pmds[num_pmds-1].reg_value = - options.smpl_period;
all_pmds[num_pmds-1].reg_short_reset = - options.smpl_period;
all_pmds[num_pmds-1].reg_long_reset = - options.smpl_period;
} else {
/*
* setup the switch timeout (in nanoseconds)
* Note that the actual timeout may be bigger than requested
* due to timer tick granularity. It is always advised to
* check the set_timeout value upon return from set creation.
* The structure will by then contain the actual timeout.
*/
all_sets[i].set_flags = PFM_SETFL_TIME_SWITCH;
all_sets[i].set_timeout = options.smpl_freq_ns;
}
#ifdef __ia64__
if (options.opt_excl_intr && options.opt_is_system)
all_sets[i].set_flags |= PFM_ITA_SETFL_EXCL_INTR;
if (options.opt_intr_only && options.opt_is_system)
all_sets[i].set_flags |= PFM_ITA_SETFL_INTR_ONLY;
#endif
}
if (options.opt_is_system)
return measure_one_cpu(argv);
return measure_one_task(argv);
}
static struct option multiplex_options[]={
{ "help", 0, 0, 1},
{ "freq", 1, 0, 2 },
{ "kernel-level", 0, 0, 3 },
{ "user-level", 0, 0, 4 },
{ "version", 0, 0, 5 },
{ "set", 1, 0, 6 },
{ "session-timeout", 1, 0, 7 },
{ "attach-task", 1, 0, 8 },
{ "pin-cmd", 1, 0, 9 },
{ "cpu", 1, 0, 10 },
{ "verbose", 0, &options.opt_verbose, 1 },
{ "debug", 0, &options.opt_debug, 1 },
{ "us-counter-format", 0, &options.opt_us_format, 1},
{ "ovfl-switch", 0, &options.opt_ovfl_switch, 1},
{ "system-wide", 0, &options.opt_is_system, 1},
#ifdef __ia64__
{ "excl-intr", 0, &options.opt_excl_intr, 1},
{ "intr-only", 0, &options.opt_intr_only, 1},
#endif
{ "no-cmd-output", 0, &options.opt_no_cmd_out, 1},
{ "no-header", 0, &options.opt_no_header, 1},
{ 0, 0, 0, 0}
};
static void
generate_default_sets(void)
{
event_set_t *es, *tail = NULL;
pfmlib_event_t events[2];
size_t len;
char *name;
unsigned int i;
int ret;
ret = pfm_get_cycle_event(&events[0]);
if (ret != PFMLIB_SUCCESS)
fatal_error("cannot find cycle event\n");
ret = pfm_get_inst_retired_event(&events[1]);
if (ret != PFMLIB_SUCCESS)
fatal_error("cannot find instruction retired event\n");
pfm_get_max_event_name_len(&len);
for (i=0; i < 2; i++) {
name = malloc(len+1);
if (name == NULL)
fatal_error("cannot allocate space for event name\n");
pfm_get_full_event_name(events+i, name, len+1);
es = (event_set_t *)malloc(sizeof(event_set_t));
if (es == NULL)
fatal_error("cannot allocate new event set\n");
memset(es, 0, sizeof(*es));
es->event_str = name;
es->next = NULL;
es->n_events = 0;
if (all_events == NULL)
all_events = es;
else
tail->next = es;
tail = es;
}
num_sets = i;
}
static void
print_usage(char **argv)
{
printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]);
printf( "-h, --help\t\t\t\tdisplay this help and exit\n"
"-V, --version\t\t\t\toutput version information and exit\n"
"-u, --user-level\t\t\tmonitor at the user level for all events\n"
"-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n"
"-c, --us-counter-format\t\t\tprint large counts with comma for thousands\n"
"-p pid, --attach-task pid\t\tattach to a running task\n"
"--set=ev1[,ev2,ev3,ev4,...]\t\tdescribe one set\n"
"--freq=number\t\t\t\tset set switching frequency in Hz\n"
"-c cpu, --cpu=cpu\t\t\tCPU to use for system-wide [default current]\n"
"--ovfl-switch\t\t\t\tuse overflow based multiplexing (default: time-based)\n"
"--verbose\t\t\t\tprint more information during execution\n"
"--system-wide\t\t\t\tuse system-wide (only one CPU at a time)\n"
"--excl-idle\t\t\t\texclude idle task(system-wide only)\n"
"--excl-intr\t\t\t\texclude interrupt triggered execution(system-wide only)\n"
"--intr-only\t\t\t\tinclude only interrupt triggered execution(system-wide only)\n"
"--session-timeout=sec\t\t\tsession timeout in seconds (system-wide only)\n"
"--no-cmd-output\t\t\t\toutput of executed command redirected to /dev/null\n"
"--pin-cmd=cpu\t\t\t\tpin executed command onto a specific cpu\n"
);
}
int
main(int argc, char **argv)
{
char *endptr = NULL;
pfmlib_options_t pfmlib_options;
event_set_t *tail = NULL, *es;
unsigned long long_val;
struct timespec ts;
uint64_t f_ns, d, f_final;
int c, ret;
options.pin_cmd_cpu = options.pin_cpu = -1;
while ((c=getopt_long(argc, argv,"+vhkuVct:p:", multiplex_options, 0)) != -1) {
switch(c) {
case 0: continue; /* fast path for options */
case 'h':
case 1:
print_usage(argv);
exit(0);
case 'v': options.opt_verbose = 1;
break;
case 'c':
options.opt_us_format = 1;
break;
case 2:
if (options.smpl_freq_hz) fatal_error("sampling frequency set twice\n");
options.smpl_freq_hz = strtoull(optarg, &endptr, 10);
if (*endptr != '\0')
fatal_error("invalid frequency: %s\n", optarg);
break;
case 3:
case 'k':
options.opt_plm |= PFM_PLM0;
break;
case 4:
case 'u':
options.opt_plm |= PFM_PLM3;
break;
case 'V':
case 5:
printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n"
"Copyright (C) 2004 Hewlett-Packard Company\n");
exit(0);
case 6:
es = (event_set_t *)malloc(sizeof(event_set_t));
if (es == NULL) fatal_error("cannot allocate new event set\n");
es->event_str = optarg;
es->next = NULL;
es->n_events = 0;
if (all_events == NULL)
all_events = es;
else
tail->next = es;
tail = es;
num_sets++;
break;
case 't':
case 7:
if (options.session_timeout) fatal_error("too many timeouts\n");
if (*optarg == '\0') fatal_error("--session-timeout needs an argument\n");
long_val = strtoul(optarg,&endptr, 10);
if (*endptr != '\0')
fatal_error("invalid number of seconds for timeout: %s\n", optarg);
if (long_val >= UINT_MAX)
fatal_error("timeout is too big, must be < %u\n", UINT_MAX);
options.session_timeout = (unsigned int)long_val;
break;
case 'p':
case 8:
if (options.attach_pid) fatal_error("process to attach specified twice\n");
options.attach_pid = (pid_t)atoi(optarg);
break;
case 9:
if (options.pin_cmd_cpu != -1) fatal_error("cannot pin command twice\n");
options.pin_cmd_cpu = atoi(optarg);
break;
case 10:
if (options.pin_cpu != -1) fatal_error("cannot pin to more than one cpu\n");
options.pin_cpu = atoi(optarg);
break;
default:
fatal_error(""); /* just quit silently now */
}
}
if (optind == argc && options.opt_is_system == 0 && options.attach_pid == 0)
fatal_error("you need to specify a command to measure\n");
/*
* pass options to library (optional)
*/
memset(&pfmlib_options, 0, sizeof(pfmlib_options));
pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */
pfm_set_options(&pfmlib_options);
/*
* Initialize pfm library (required before we can use it)
*/
ret = pfm_initialize();
if (ret != PFMLIB_SUCCESS)
fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret));
if ((options.cpu_mhz = get_cpu_speed()) == 0)
fatal_error("can't get CPU speed\n");
/*
* extract kernel clock resolution
*/
clock_getres(CLOCK_MONOTONIC, &ts);
options.clock_res = ts.tv_sec * 1000000000 + ts.tv_nsec;
/*
* adjust frequency to be a multiple of clock resolution
* otherwise kernel will fail pfm_create_evtsets()
*/
/*
* f_ns = run period in ns (1s/hz)
* default switch period is clock resolution
*/
if (options.smpl_freq_hz == 0)
f_ns = options.clock_res;
else
f_ns = 1000000000 / options.smpl_freq_hz;
/* round up period in nanoseconds */
d = (f_ns+options.clock_res-1) / options.clock_res;
/* final period (multilple of clock_res */
f_final = d * options.clock_res;
if (options.opt_ovfl_switch)
printf("clock_res=%"PRIu64"ns(%.2fHz) ask period=%"PRIu64"ns(%.2fHz) get period=%"PRIu64"ns(%.2fHz)\n",
options.clock_res,
1000000000.0 / options.clock_res,
f_ns,
1000000000.0 / f_ns,
f_final,
1000000000.0 / f_final);
if (f_ns != f_final)
printf("Not getting the expected frequency due to kernel/hw limitation\n");
/* adjust period */
options.smpl_freq_ns = f_final;
/* not used */
options.smpl_freq_hz = 1000000000 / f_final;
if (options.opt_plm == 0) options.opt_plm = PFM_PLM3;
if (num_sets == 0)
generate_default_sets();
return mainloop(argv+optind);
}