/* * multiplex2.c - example of kernel-level time-based or overflow-based event multiplexing * * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* for getline */ #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define MAX_EVT_NAME_LEN 128 #define MULTIPLEX_VERSION "0.2" #define SMPL_FREQ_IN_HZ 100 #define NUM_PMCS 256 typedef struct { struct { int opt_plm; /* which privilege level to monitor (more than one possible) */ int opt_debug; /* print debug information */ int opt_verbose; /* verbose output */ int opt_us_format; /* print large numbers with comma for thousands */ int opt_ovfl_switch; /* overflow-based switching */ int opt_is_system; /* use system-wide */ int opt_excl_idle; /* exclude idle task */ int opt_excl_intr; /* exclude interrupts */ int opt_intr_only; /* interrupts only*/ int opt_no_cmd_out; /* redirect cmd output to /dev/null */ int opt_no_header; /* no header */ } program_opt_flags; unsigned long max_counters; /* maximum number of counter for the platform */ uint64_t smpl_freq_hz; uint64_t smpl_freq_ns; unsigned long session_timeout; uint64_t smpl_period; uint64_t clock_res; unsigned long cpu_mhz; pid_t attach_pid; int pin_cmd_cpu; int pin_cpu; } program_options_t; #define opt_plm program_opt_flags.opt_plm #define opt_debug program_opt_flags.opt_debug #define opt_verbose program_opt_flags.opt_verbose #define opt_us_format program_opt_flags.opt_us_format #define opt_ovfl_switch program_opt_flags.opt_ovfl_switch #define opt_is_system program_opt_flags.opt_is_system #define opt_excl_idle program_opt_flags.opt_excl_idle #define opt_excl_intr program_opt_flags.opt_excl_intr #define opt_intr_only program_opt_flags.opt_intr_only #define opt_no_cmd_out program_opt_flags.opt_no_cmd_out #define opt_no_header program_opt_flags.opt_no_header typedef struct _event_set_t { struct _event_set_t *next; char *event_str; unsigned int n_events; } event_set_t; typedef int pfm_ctxid_t; static program_options_t options; static pfarg_pmc_t *all_pmcs; static pfarg_pmd_t *all_pmds; static pfarg_setdesc_t *all_sets; static event_set_t *all_events; static unsigned int num_pmds, num_pmcs, num_sets, total_events; static volatile int time_to_quit; static jmp_buf jbuf; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void vbprintf(char *fmt, ...) { va_list ap; if (options.opt_verbose == 0) return; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * unreliable for CPU with variable clock speed */ static unsigned long get_cpu_speed(void) { FILE *fp1; unsigned long f1 = 0, f2 = 0; char buffer[128], *p, *value; memset(buffer, 0, sizeof(buffer)); fp1 = fopen("/proc/cpuinfo", "r"); if (fp1 == NULL) return 0; for (;;) { buffer[0] = '\0'; p = fgets(buffer, 127, fp1); if (p == NULL) break; /* skip blank lines */ if (*p == '\n') continue; p = strchr(buffer, ':'); if (p == NULL) break; /* * p+2: +1 = space, +2= firt character * strlen()-1 gets rid of \n */ *p = '\0'; value = p+2; value[strlen(value)-1] = '\0'; if (!strncasecmp("cpu MHz", buffer, 7)) { float fl; sscanf(value, "%f", &fl); f1 = lroundf(fl); break; } if (!strncasecmp("BogoMIPS", buffer, 8)) { float fl; sscanf(value, "%f", &fl); f2 = lroundf(fl); } } fclose(fp1); return f1 == 0 ? f2 : f1; } /* * pin task to CPU */ #ifndef __NR_sched_setaffinity #error "you need to define __NR_sched_setaffinity" #endif #define MAX_CPUS 2048 #define NR_CPU_BITS (MAX_CPUS>>3) int pin_cpu(pid_t pid, unsigned int cpu) { uint64_t my_mask[NR_CPU_BITS]; if (cpu >= MAX_CPUS) fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS); my_mask[cpu>>6] = 1ULL << (cpu&63); return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask); } int child(char **arg) { ptrace(PTRACE_TRACEME, 0, NULL, NULL); if (options.pin_cmd_cpu != -1) { pin_cpu(getpid(), options.pin_cmd_cpu); vbprintf("command running on CPU core %d\n", options.pin_cmd_cpu); } if (options.opt_no_cmd_out) { close(1); close(2); } execvp(arg[0], arg); /* not reached */ exit(1); } static void dec2sep(char *str2, char *str, char sep) { int i, l, b, j, c=0; l = strlen(str2); if (l <= 3) { strcpy(str, str2); return; } b = l + l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */ for(i=l, j=0; i >= 0; i--, j++) { if (j) c++; str[b-j] = str2[i]; if (c == 3 && i>0) { str[b-++j] = sep; c = 0; } } } static void print_results(int ctxid, uint64_t *eff_timeout) { unsigned int i, j, cnt, ovfl_event; uint64_t value, tot_runs = 0; uint64_t tot_dur = 0, c; pfarg_setinfo_t *all_setinfos; event_set_t *e; char *p; char tmp1[32], tmp2[32], *str; char mtotal_str[32], *mtotal; char stotal_str[32], *stotal; int ret; all_setinfos = malloc(sizeof(pfarg_setinfo_t)*num_sets); if (all_setinfos == NULL) fatal_error("cannot allocate all_setinfo\n"); memset(all_setinfos, 0, sizeof(pfarg_setinfo_t)*num_sets); for(i=0; i < num_sets; i++) all_setinfos[i].set_id = i; /* * read all counters in one call * * There is a limitation on the size of the argument vector and * it may be necesarry to split into multiple calls. That limit * is usally at page size (16KB) */ ret = pfm_read_pmds(ctxid, all_pmds, num_pmds); if (ret == -1) fatal_error("cannot read pmds: %s\n", strerror(errno)); /* * extract all set information * * There is a limitation on the size of the argument vector and * it may be necesarry to split into multiple calls. That limit * is usually at page size (16KB) */ ret = pfm_getinfo_evtsets(ctxid, all_setinfos, num_sets); if (ret == -1) fatal_error("cannot get set info: %s\n", strerror(errno)); /* * compute average number of runs * * the number of runs per set can be at most off by 1 between all sets */ for (i=0, cnt = 0; i < num_sets; i++) { if (all_setinfos[i].set_runs == 0) fatal_error("not enough runs to collect meaningful results: set%u did not run\n", i); tot_runs += all_setinfos[i].set_runs; tot_dur += all_setinfos[i].set_act_duration; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ if (options.opt_no_header == 0) { printf("# %.2fHz period = %"PRIu64"nsecs\n# %"PRIu64" cycles @ %lu MHz\n", 1000000000.0 / options.smpl_freq_ns, options.smpl_freq_ns, options.smpl_period, options.cpu_mhz); if (options.opt_ovfl_switch == 0) printf("# using time-based multiplexing\n" "# %"PRIu64" nsecs effective switch timeout\n", *eff_timeout); else printf("# using overflow-based multiplexing\n"); if (options.opt_is_system) printf("# system-wide mode on CPU core %d\n",options.pin_cpu); printf("# %d sets\n", num_sets); printf("# %.2f average run per set\n", (double)tot_runs/num_sets); printf("# %.2f average ns per set\n", (double)tot_dur/num_sets); printf("# set measured total #runs scaled total event name\n"); printf("# ------------------------------------------------------------------\n"); } ovfl_event = options.opt_ovfl_switch ? 1 : 0; for (i=0, e = all_events, cnt = 0; i < num_sets; i++, e = e->next) { str = e->event_str; for(j=0; j < e->n_events-ovfl_event; j++, cnt++) { value = all_pmds[cnt].reg_value; sprintf(tmp1, "%"PRIu64, value); if (options.opt_us_format) { dec2sep(tmp1, mtotal_str, ','); } else { strcpy(mtotal_str, tmp1); } mtotal = mtotal_str; /* * scaling * We use duration rather than number of runs to compute a more precise * scaled value. This avoids overcounting when the last set only partially * ran. * * We use double to avoid overflowing of the 64-bit count in case of very * large total duration */ c = llround(((double)value*tot_dur)/(double)all_setinfos[i].set_act_duration); sprintf(tmp2, "%"PRIu64, c); if (options.opt_us_format) { dec2sep(tmp2, stotal_str, ','); } else { strcpy(stotal_str, tmp2); } stotal = stotal_str; printf(" %03d %20s %8"PRIu64" %20s %s\n", i, mtotal, all_setinfos[i].set_runs, stotal, str); p = strchr(str, '\0'); if (p) str = p+1; } /* * skip first event */ if (options.opt_ovfl_switch) cnt++; } } static void sigintr_handler(int sig) { if (sig == SIGALRM) time_to_quit = 1; else time_to_quit = 2; longjmp(jbuf, 1); } static int measure_one_task(char **argv) { int ctxid; pfarg_ctx_t ctx[1]; pfarg_setdesc_t *my_sets; pfarg_pmc_t *my_pmcs; pfarg_pmd_t *my_pmds; pfarg_load_t load_arg; uint64_t eff_timeout; pfarg_msg_t msg; pid_t pid; int status, ret; my_pmcs = malloc(sizeof(pfarg_pmc_t)*num_pmcs); my_pmds = malloc(sizeof(pfarg_pmd_t)*num_pmds); my_sets = malloc(sizeof(pfarg_setdesc_t)*num_sets); if (my_pmcs == NULL || my_pmds == NULL || my_sets == NULL) fatal_error("cannot allocate event tables\n"); /* * make private copies */ memcpy(my_pmcs, all_pmcs, sizeof(pfarg_pmc_t)*num_pmcs); memcpy(my_pmds, all_pmds, sizeof(pfarg_pmd_t)*num_pmds); memcpy(my_sets, all_sets, sizeof(pfarg_setdesc_t)*num_sets); memset(ctx, 0, sizeof(ctx)); memset(&load_arg, 0, sizeof(load_arg)); /* * create the context */ ctxid = pfm_create_context(ctx, NULL, NULL, 0); if (ctxid == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., * fd not visible to child. */ if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); /* * create the event sets * * event set 0 is always exist by default for backward compatibility * reason. However to avoid special casing set0 for creation, a PFM_CREATE_EVTSETS * for set0 does not complain and behaves as a PFM_CHANGE_EVTSETS */ vbprintf("requested timeout %"PRIu64" nsecs\n", my_sets[0].set_timeout); if (pfm_create_evtsets(ctxid, my_sets, num_sets)) fatal_error("cannot create sets\n"); eff_timeout = my_sets[0].set_timeout; vbprintf("effective timeout %"PRIu64" nsecs\n", my_sets[0].set_timeout); /* * Now program the all the registers in one call * * Note that there is a limitation on the size of the argument vector * that can be passed. It is usually set to a page size (16KB). */ if (pfm_write_pmcs(ctxid, my_pmcs, num_pmcs) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); /* * initialize the PMD registers. * * To be read, each PMD must be either written or declared * as being part of a sample (reg_smpl_pmds) */ if (pfm_write_pmds(ctxid, my_pmds, num_pmds) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * now launch the child code */ if (options.attach_pid == 0) { if ((pid= fork()) == -1) fatal_error("Cannot fork process\n"); if (pid == 0) exit(child(argv)); } else { pid = options.attach_pid; ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret) { fatal_error("cannot attach to task %d: %s\n",options.attach_pid, strerror(errno)); } } ret = waitpid(pid, &status, WUNTRACED); if (ret < 0 || WIFEXITED(status)) fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); vbprintf("child created and stopped\n"); /* * now attach the context */ load_arg.load_pid = pid; if (pfm_load_context(ctxid, &load_arg) == -1) fatal_error("pfm_load_context error errno %d\n",errno); /* * start monitoring */ if (pfm_start(ctxid, NULL) == -1) fatal_error("pfm_start error errno %d\n",errno); ptrace(PTRACE_DETACH, pid, NULL, 0); vbprintf("child restarted\n"); if (setjmp(jbuf) == 1) { if (time_to_quit == 1) { printf("timeout expired\n"); } if (time_to_quit == 2) printf("session interrupted\n"); goto finish_line; } signal(SIGALRM, sigintr_handler); signal(SIGINT, sigintr_handler); if (options.session_timeout) { printf("\n", options.session_timeout); alarm(options.session_timeout); } /* * mainloop */ ret = read(ctxid, &msg, sizeof(msg)); if (ret < sizeof(msg)) fatal_error("interrupted read\n"); switch(msg.type) { case PFM_MSG_OVFL: fatal_error("unexpected ovfl message\n"); break; case PFM_MSG_END: break; default: printf("unknown message type %d\n", msg.type); } finish_line: /* * cleanup after an alarm timeout */ if (time_to_quit) { /* stop monitored task */ ptrace(PTRACE_ATTACH, pid, NULL, 0); waitpid(pid, NULL, WUNTRACED); /* detach context */ pfm_unload_context(ctxid); } if (options.attach_pid == 0) { kill(pid, SIGKILL); waitpid(pid, &status, 0); } else { ptrace(PTRACE_DETACH, pid, NULL, 0); } if (time_to_quit < 2) print_results(ctxid, &eff_timeout); close(ctxid); return 0; } static int measure_one_cpu(char **argv) { int ctxid, status; pfarg_ctx_t ctx[1]; pfarg_pmc_t *my_pmcs; pfarg_pmd_t *my_pmds; pfarg_setdesc_t *my_sets; pfarg_load_t load_arg; pid_t pid = 0; int ret; my_pmcs = malloc(sizeof(pfarg_pmc_t)*total_events); my_pmds = malloc(sizeof(pfarg_pmd_t)*total_events); my_sets = malloc(sizeof(pfarg_setdesc_t)*num_sets); if (my_pmcs == NULL || my_pmds == NULL || my_sets == NULL) fatal_error("cannot allocate event tables\n"); /* * make private copies */ memcpy(my_pmcs, all_pmcs, sizeof(pfarg_pmc_t)*num_pmcs); memcpy(my_pmds, all_pmds, sizeof(pfarg_pmd_t)*num_pmds); memcpy(my_sets, all_sets, sizeof(pfarg_setdesc_t)*num_sets); memset(ctx, 0, sizeof(ctx)); memset(&load_arg, 0, sizeof(load_arg)); if (options.pin_cpu == -1) { options.pin_cpu = 0; printf("forcing monitoring onto CPU core 0\n"); pin_cpu(getpid(), 0); } ctx[0].ctx_flags = PFM_FL_SYSTEM_WIDE; /* * create the context */ ctxid = pfm_create_context(ctx, NULL, NULL, 0); if (ctxid == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., * fd not visible to child. */ if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); /* * create the event sets * * event set 0 is always created by default for backward compatibility * reason. However to avoid special casing set0 for creation, a PFM_CREATE_EVTSETS * for set0 does not complain and behaves as a PFM_CHANGE_EVTSETS */ if (pfm_create_evtsets(ctxid, my_sets, num_sets)) fatal_error("cannot create sets\n"); /* * Now program the all the registers in one call * * Note that there is a limitation on the size of the argument vector * that can be passed. It is usually set to a page size (16KB). */ if (pfm_write_pmcs(ctxid, my_pmcs, num_pmcs) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); /* * initialize the PMD registers. * * To be read, each PMD must be either written or declared * as being part of a sample (reg_smpl_pmds) */ if (pfm_write_pmds(ctxid, my_pmds, num_pmds) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * now launch the child code */ if (*argv) { if ((pid = fork()) == -1) fatal_error("Cannot fork process\n"); if (pid == 0) exit(child(argv)); } /* * wait for the child to exec or be stopped * We do this even in system-wide mode to ensure * that the task does not start until we are ready * to monitor. */ if (pid) { ret = waitpid(pid, &status, WUNTRACED); if (ret < 0 || WIFEXITED(status)) fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); vbprintf("child created and stopped\n"); } /* * now attach the context */ load_arg.load_pid = options.opt_is_system ? getpid() : pid; if (pfm_load_context(ctxid, &load_arg) == -1) fatal_error("pfm_load_context error errno %d\n",errno); /* * start monitoring */ if (pfm_start(ctxid, NULL) == -1) fatal_error("pfm_start error errno %d\n",errno); if (pid) ptrace(PTRACE_DETACH, pid, NULL, 0); if (pid == 0) { if (options.session_timeout == 0) { printf("\n"); getchar(); } else { printf("\n", options.session_timeout); sleep(options.session_timeout); } } else { ret = waitpid(pid, &status, 0); } print_results(ctxid, &my_sets[0].set_timeout); if (ctxid) close(ctxid); return 0; } int mainloop(char **argv) { event_set_t *e; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_regmask_t impl_counters, used_pmcs; pfmlib_event_t cycle_event; unsigned int i, j; char *p, *str; int ret; unsigned int max_counters, allowed_counters; pfm_get_num_counters(&max_counters); if (max_counters < 2 && options.opt_ovfl_switch) fatal_error("not enough counter to get overflow switching to work\n"); allowed_counters = max_counters; /* * account for overflow counter (cpu cycles) */ if (options.opt_ovfl_switch) allowed_counters--; memset(&used_pmcs, 0, sizeof(used_pmcs)); memset(&impl_counters, 0, sizeof(impl_counters)); pfm_get_impl_counters(&impl_counters); options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq_hz; vbprintf("%"PRIu64"Hz period = %"PRIu64" cycles @ %luMhz\n", options.smpl_freq_hz, options.smpl_period, options.cpu_mhz); for (e = all_events; e; e = e->next) { for (p = str = e->event_str; p ; ) { p = strchr(str, ','); if (p) str = p +1; total_events++; } } /* * account for extra event per set (cycle event) */ if (options.opt_ovfl_switch) { total_events += num_sets; /* * look for our trigger event */ if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS) fatal_error("Cannot find cycle event\n"); } vbprintf("total_events=%u\n", total_events); /* * assumes number of pmds = number of events * cannot assume number of pmcs = num of events (e.g., P4 2 PMCS per event) */ all_pmcs = calloc(NUM_PMCS, sizeof(pfarg_pmc_t)); all_pmds = calloc(total_events, sizeof(pfarg_pmd_t)); all_sets = calloc(num_sets, sizeof(pfarg_setdesc_t)); if (all_pmcs == NULL || all_pmds == NULL || all_sets == NULL) fatal_error("cannot allocate event tables\n"); /* * use the library to figure out assignments for all events of all sets */ for (i=0, e = all_events; i < num_sets; i++, e = e->next) { memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmcs(-1, &inp.pfp_unavail_pmcs); str = e->event_str; for(j=0, p = str; p && j < allowed_counters; j++) { p = strchr(str, ','); if (p) *p = '\0'; ret = pfm_find_full_event(str, &inp.pfp_events[j]); if (ret != PFMLIB_SUCCESS) fatal_error("event %s for set %d event %d: %s\n", str, i, j, pfm_strerror(ret)); if (p) str = p + 1; } if (p) { fatal_error("error in set %d: cannot have more than %d event(s) per set %s\n", i, allowed_counters, options.opt_ovfl_switch ? "(overflow switch mode)": "(hardware limit)"); } /* * add the cycle event as the last event when we switch on overflow */ if (options.opt_ovfl_switch) { inp.pfp_events[j] = cycle_event; inp.pfp_event_count = j+1; inp.pfp_dfl_plm = options.opt_plm; e->n_events = j+1; } else { e->n_events = j; inp.pfp_event_count = j; } inp.pfp_dfl_plm = options.opt_plm; if (options.opt_is_system) inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; vbprintf("PMU programming for set %d\n", i); /* * let the library do the hard work */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret)); /* * propagate from libpfm to kernel data structures */ for (j=0; j < outp.pfp_pmc_count; j++, num_pmcs++) { all_pmcs[num_pmcs].reg_num = outp.pfp_pmcs[j].reg_num; all_pmcs[num_pmcs].reg_value = outp.pfp_pmcs[j].reg_value; all_pmcs[num_pmcs].reg_set = i; } for (j=0; j < outp.pfp_pmd_count; j++, num_pmds++) { all_pmds[num_pmds].reg_num = outp.pfp_pmds[j].reg_num; all_pmds[num_pmds].reg_set = i; } /* * setup event set properties */ all_sets[i].set_id = i; if (options.opt_ovfl_switch) { all_sets[i].set_flags = PFM_SETFL_OVFL_SWITCH; /* * last counter contains our sampling counter * * the first overflow of our trigger counter does * trigger a switch. */ all_pmds[num_pmds-1].reg_ovfl_switch_cnt = 1; /* * We do this even in system-wide mode to ensure * that the task does not start until we are ready * to monitor. * setup the sampling period */ all_pmds[num_pmds-1].reg_value = - options.smpl_period; all_pmds[num_pmds-1].reg_short_reset = - options.smpl_period; all_pmds[num_pmds-1].reg_long_reset = - options.smpl_period; } else { /* * setup the switch timeout (in nanoseconds) * Note that the actual timeout may be bigger than requested * due to timer tick granularity. It is always advised to * check the set_timeout value upon return from set creation. * The structure will by then contain the actual timeout. */ all_sets[i].set_flags = PFM_SETFL_TIME_SWITCH; all_sets[i].set_timeout = options.smpl_freq_ns; } #ifdef __ia64__ if (options.opt_excl_intr && options.opt_is_system) all_sets[i].set_flags |= PFM_ITA_SETFL_EXCL_INTR; if (options.opt_intr_only && options.opt_is_system) all_sets[i].set_flags |= PFM_ITA_SETFL_INTR_ONLY; #endif } if (options.opt_is_system) return measure_one_cpu(argv); return measure_one_task(argv); } static struct option multiplex_options[]={ { "help", 0, 0, 1}, { "freq", 1, 0, 2 }, { "kernel-level", 0, 0, 3 }, { "user-level", 0, 0, 4 }, { "version", 0, 0, 5 }, { "set", 1, 0, 6 }, { "session-timeout", 1, 0, 7 }, { "attach-task", 1, 0, 8 }, { "pin-cmd", 1, 0, 9 }, { "cpu", 1, 0, 10 }, { "verbose", 0, &options.opt_verbose, 1 }, { "debug", 0, &options.opt_debug, 1 }, { "us-counter-format", 0, &options.opt_us_format, 1}, { "ovfl-switch", 0, &options.opt_ovfl_switch, 1}, { "system-wide", 0, &options.opt_is_system, 1}, #ifdef __ia64__ { "excl-intr", 0, &options.opt_excl_intr, 1}, { "intr-only", 0, &options.opt_intr_only, 1}, #endif { "no-cmd-output", 0, &options.opt_no_cmd_out, 1}, { "no-header", 0, &options.opt_no_header, 1}, { 0, 0, 0, 0} }; static void generate_default_sets(void) { event_set_t *es, *tail = NULL; pfmlib_event_t events[2]; size_t len; char *name; unsigned int i; int ret; ret = pfm_get_cycle_event(&events[0]); if (ret != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); ret = pfm_get_inst_retired_event(&events[1]); if (ret != PFMLIB_SUCCESS) fatal_error("cannot find instruction retired event\n"); pfm_get_max_event_name_len(&len); for (i=0; i < 2; i++) { name = malloc(len+1); if (name == NULL) fatal_error("cannot allocate space for event name\n"); pfm_get_full_event_name(events+i, name, len+1); es = (event_set_t *)malloc(sizeof(event_set_t)); if (es == NULL) fatal_error("cannot allocate new event set\n"); memset(es, 0, sizeof(*es)); es->event_str = name; es->next = NULL; es->n_events = 0; if (all_events == NULL) all_events = es; else tail->next = es; tail = es; } num_sets = i; } static void print_usage(char **argv) { printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]); printf( "-h, --help\t\t\t\tdisplay this help and exit\n" "-V, --version\t\t\t\toutput version information and exit\n" "-u, --user-level\t\t\tmonitor at the user level for all events\n" "-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n" "-c, --us-counter-format\t\t\tprint large counts with comma for thousands\n" "-p pid, --attach-task pid\t\tattach to a running task\n" "--set=ev1[,ev2,ev3,ev4,...]\t\tdescribe one set\n" "--freq=number\t\t\t\tset set switching frequency in Hz\n" "-c cpu, --cpu=cpu\t\t\tCPU to use for system-wide [default current]\n" "--ovfl-switch\t\t\t\tuse overflow based multiplexing (default: time-based)\n" "--verbose\t\t\t\tprint more information during execution\n" "--system-wide\t\t\t\tuse system-wide (only one CPU at a time)\n" "--excl-idle\t\t\t\texclude idle task(system-wide only)\n" "--excl-intr\t\t\t\texclude interrupt triggered execution(system-wide only)\n" "--intr-only\t\t\t\tinclude only interrupt triggered execution(system-wide only)\n" "--session-timeout=sec\t\t\tsession timeout in seconds (system-wide only)\n" "--no-cmd-output\t\t\t\toutput of executed command redirected to /dev/null\n" "--pin-cmd=cpu\t\t\t\tpin executed command onto a specific cpu\n" ); } int main(int argc, char **argv) { char *endptr = NULL; pfmlib_options_t pfmlib_options; event_set_t *tail = NULL, *es; unsigned long long_val; struct timespec ts; uint64_t f_ns, d, f_final; int c, ret; options.pin_cmd_cpu = options.pin_cpu = -1; while ((c=getopt_long(argc, argv,"+vhkuVct:p:", multiplex_options, 0)) != -1) { switch(c) { case 0: continue; /* fast path for options */ case 'h': case 1: print_usage(argv); exit(0); case 'v': options.opt_verbose = 1; break; case 'c': options.opt_us_format = 1; break; case 2: if (options.smpl_freq_hz) fatal_error("sampling frequency set twice\n"); options.smpl_freq_hz = strtoull(optarg, &endptr, 10); if (*endptr != '\0') fatal_error("invalid frequency: %s\n", optarg); break; case 3: case 'k': options.opt_plm |= PFM_PLM0; break; case 4: case 'u': options.opt_plm |= PFM_PLM3; break; case 'V': case 5: printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n" "Copyright (C) 2004 Hewlett-Packard Company\n"); exit(0); case 6: es = (event_set_t *)malloc(sizeof(event_set_t)); if (es == NULL) fatal_error("cannot allocate new event set\n"); es->event_str = optarg; es->next = NULL; es->n_events = 0; if (all_events == NULL) all_events = es; else tail->next = es; tail = es; num_sets++; break; case 't': case 7: if (options.session_timeout) fatal_error("too many timeouts\n"); if (*optarg == '\0') fatal_error("--session-timeout needs an argument\n"); long_val = strtoul(optarg,&endptr, 10); if (*endptr != '\0') fatal_error("invalid number of seconds for timeout: %s\n", optarg); if (long_val >= UINT_MAX) fatal_error("timeout is too big, must be < %u\n", UINT_MAX); options.session_timeout = (unsigned int)long_val; break; case 'p': case 8: if (options.attach_pid) fatal_error("process to attach specified twice\n"); options.attach_pid = (pid_t)atoi(optarg); break; case 9: if (options.pin_cmd_cpu != -1) fatal_error("cannot pin command twice\n"); options.pin_cmd_cpu = atoi(optarg); break; case 10: if (options.pin_cpu != -1) fatal_error("cannot pin to more than one cpu\n"); options.pin_cpu = atoi(optarg); break; default: fatal_error(""); /* just quit silently now */ } } if (optind == argc && options.opt_is_system == 0 && options.attach_pid == 0) fatal_error("you need to specify a command to measure\n"); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); if ((options.cpu_mhz = get_cpu_speed()) == 0) fatal_error("can't get CPU speed\n"); /* * extract kernel clock resolution */ clock_getres(CLOCK_MONOTONIC, &ts); options.clock_res = ts.tv_sec * 1000000000 + ts.tv_nsec; /* * adjust frequency to be a multiple of clock resolution * otherwise kernel will fail pfm_create_evtsets() */ /* * f_ns = run period in ns (1s/hz) * default switch period is clock resolution */ if (options.smpl_freq_hz == 0) f_ns = options.clock_res; else f_ns = 1000000000 / options.smpl_freq_hz; /* round up period in nanoseconds */ d = (f_ns+options.clock_res-1) / options.clock_res; /* final period (multilple of clock_res */ f_final = d * options.clock_res; if (options.opt_ovfl_switch) printf("clock_res=%"PRIu64"ns(%.2fHz) ask period=%"PRIu64"ns(%.2fHz) get period=%"PRIu64"ns(%.2fHz)\n", options.clock_res, 1000000000.0 / options.clock_res, f_ns, 1000000000.0 / f_ns, f_final, 1000000000.0 / f_final); if (f_ns != f_final) printf("Not getting the expected frequency due to kernel/hw limitation\n"); /* adjust period */ options.smpl_freq_ns = f_final; /* not used */ options.smpl_freq_hz = 1000000000 / f_final; if (options.opt_plm == 0) options.opt_plm = PFM_PLM3; if (num_sets == 0) generate_default_sets(); return mainloop(argv+optind); }