/* $Id: perfex.c,v 1.33.2.8 2010/06/08 20:48:56 mikpe Exp $ * * NAME * perfex - a command-line interface to processor performance counters * * SYNOPSIS * perfex [-e event] .. [--nhlm_offcore_rsp_{0,1}=value] [--p4{pe,pmv}=value] [-o file] command * perfex { -i | -l | -L } * * DESCRIPTION * The given command is executed; after it is complete, perfex * prints the values of the various hardware performance counters. * * OPTIONS * -e event | --event=event * Specify an event to be counted. * Multiple event specifiers may be given, limited by the * number of available performance counters in the processor. * * The full syntax of an event specifier is "evntsel/evntsel2@pmc". * All three components are 32-bit processor-specific numbers, * written in decimal or hexadecimal notation. * * "evntsel" is the primary processor-specific event selection * code to use for this event. This field is mandatory. * * "/evntsel2" provides auxiliary event selection data for this * event. On a Pentium 4, "evntsel" is put in the counter's * CCCR register, and "evntsel2" is put in the associated ESCR * register. On an AMD Family 10h processor, "evntsel2" is put * in the high 32 bits of the counter's 64-bit EVNTSEL register. * On other processors "/evntsel2" should be omitted. * * "@pmc" describes which CPU counter number to assign this * event to. When omitted, the events are assigned in the * order listed, starting from 0. Either all or none of the * event specifiers should use the "@pmc" notation. * Explicit counter assignment via "@pmc" is required on * Pentium 4 and VIA C3 processors. It is also required for the * fixed-function counters on Core 2 and Atom processors. * * The counts, together with an event description are written * to the result file (default is stderr). * * --nhlm_offcore_rsp_{0,1}=value * Specify the value to be stored in the auxiliary control * registers OFFCORE_RSP_{0,1} on Nehalem processors. * * --p4pe=value | --p4_pebs_enable=value * --p4pmv=value | --p4_pebs_matrix_vert=value * Specify the value to be stored in the auxiliary control * register PEBS_ENABLE or PEBS_MATRIX_VERT, which are used * for replay tagging events on Pentium 4 processors. * Note: Intel's documentation states that bit 25 should be * set in PEBS_ENABLE, but this is not true and the driver * will disallow it. * * -i | --info * Instead of running a command, generate output which * identifies the current processor and its capabilities. * * -l | --list * Instead of running a command, generate output which * identifies the current processor and its capabilities, * and lists its countable events. * * -L | --long-list * Like -l, but list the events in a more detailed format. * * -o file | --output=file * Write the results to file instead of stderr. * * EXAMPLES * The following commands count the number of retired instructions * in user-mode on an Intel P6 processor: * * perfex -e 0x004100C0 some_program * perfex --event=0x004100C0 some_program * * The following command does the same on an Intel Pentium 4 processor: * * perfex -e 0x00039000/0x04000204@0x8000000C some_program * * Explanation: Program IQ_CCCR0 with required flags, ESCR select 4 * (== CRU_ESCR0), and Enable. Program CRU_ESCR0 with event 2 * (instr_retired), NBOGUSNTAG, CPL>0. Map this event to IQ_COUNTER0 * (0xC) with fast RDPMC enabled. * * The following command counts the number of L1 cache read misses * on a Pentium 4 processor: * * perfex -e 0x0003B000/0x12000204@0x8000000C --p4pe=0x01000001 --p4pmv=0x1 some_program * * Explanation: IQ_CCCR0 is bound to CRU_ESCR2, CRU_ESCR2 is set up * for replay_event with non-bogus uops and CPL>0, and PEBS_ENABLE * and PEBS_MATRIX_VERT are set up for the 1stL_cache_load_miss_retired * metric. Note that bit 25 is NOT set in PEBS_ENABLE. * * DEPENDENCIES * perfex only works on Linux systems which have been modified * to include the perfctr kernel extension. Perfctr is available at * http://user.it.uu.se/~mikpe/linux/perfctr/. * * NOTES * perfex is superficially similar to IRIX' perfex(1). * The -a, -mp, -s, and -x options are not yet implemented. * * Copyright (C) 1999-2010 Mikael Pettersson */ /* * Theory of operation: * - Parent creates a socketpair(). * - Parent forks. * - Child creates and sets up its perfctrs. * - Child sends its perfctr fd to parent via the socketpair(). * - Child exec:s the command. * - Parent waits for child to exit. * - Parent receives child's perfctr fd via the socketpair(). * - Parent retrieves child's final control and counts via the fd. */ #include #include #include #include #include #include /* for offsetof() */ #include #include #include /* for strerror() */ #include #include "libperfctr.h" #include "arch.h" /* * Our child-to-parent protocol is the following: * There is an int-sized data packet, with an optional 'struct cmsg_fd' * control message attached. * The data packet (which must be present, as control messages don't * work with zero-sized payloads) contains an 'int' status. * If status != 0, then it is an 'errno' value from the child's * perfctr setup code. */ struct cmsg_fd { struct cmsghdr hdr; int fd; /* 64-bit machines pad here, which causes problems since the kernel derives the number of fds from the size. The CMSG_FD_TRUE_SIZE macro gives the true payload size. */ }; #define CMSG_FD_TRUE_SIZE (offsetof(struct cmsg_fd, fd) + sizeof(int)) #define CMSG_FD_PADDED_SIZE sizeof(struct cmsg_fd) static int my_send(int sock, int fd, int status) { struct msghdr msg; struct iovec iov; struct cmsg_fd cmsg_fd; int buf[1]; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_flags = 0; buf[0] = status; iov.iov_base = buf; iov.iov_len = sizeof buf; msg.msg_iov = &iov; msg.msg_iovlen = 1; if (status != 0) { /* errno, don't send fd */ msg.msg_control = 0; msg.msg_controllen = 0; } else { cmsg_fd.hdr.cmsg_len = CMSG_FD_TRUE_SIZE; cmsg_fd.hdr.cmsg_level = SOL_SOCKET; cmsg_fd.hdr.cmsg_type = SCM_RIGHTS; cmsg_fd.fd = fd; msg.msg_control = &cmsg_fd; msg.msg_controllen = CMSG_FD_TRUE_SIZE; } return sendmsg(sock, &msg, 0) == sizeof buf ? 0 : -1; } static int my_send_fd(int sock, int fd) { return my_send(sock, fd, 0); } static int my_send_err(int sock) { return my_send(sock, -1, errno); } static int my_receive(int sock, int *fd) { struct msghdr msg; struct iovec iov; struct cmsg_fd cmsg_fd; int buf[1]; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_flags = 0; buf[0] = -1; iov.iov_base = buf; iov.iov_len = sizeof(buf); msg.msg_iov = &iov; msg.msg_iovlen = 1; memset(&cmsg_fd, ~0, sizeof cmsg_fd); msg.msg_control = &cmsg_fd; msg.msg_controllen = CMSG_FD_TRUE_SIZE; if (recvmsg(sock, &msg, 0) != sizeof buf) return -1; if (buf[0] == 0 && msg.msg_control == &cmsg_fd && msg.msg_controllen == CMSG_FD_PADDED_SIZE && cmsg_fd.hdr.cmsg_type == SCM_RIGHTS && cmsg_fd.hdr.cmsg_level == SOL_SOCKET && cmsg_fd.hdr.cmsg_len == CMSG_FD_TRUE_SIZE && cmsg_fd.fd >= 0) { *fd = cmsg_fd.fd; return 0; } if (msg.msg_controllen == 0 && buf[0] != 0) errno = buf[0]; else errno = EPROTO; return -1; } static int do_open_self(int creat) { int fd; fd = _vperfctr_open(creat); if (fd >= 0 && perfctr_abi_check_fd(fd) < 0) { close(fd); return -1; } return fd; } static int do_child(int sock, const struct vperfctr_control *control, char **argv) { int fd; fd = do_open_self(1); if (fd < 0) { my_send_err(sock); return 1; } if (_vperfctr_control(fd, control) < 0) { my_send_err(sock); return 1; } if (my_send_fd(sock, fd) < 0) { my_send_err(sock); /* well, we can try.. */ return 1; } close(fd); close(sock); execvp(argv[0], argv); perror(argv[0]); return 1; } static int do_parent(int sock, int child_pid, FILE *resfile, const struct perfctr_info *info) { int child_status; int fd; struct perfctr_sum_ctrs sum; struct vperfctr_control control; /* this can be done before or after the recvmsg() */ if (waitpid(child_pid, &child_status, 0) < 0) { perror("perfex: waitpid"); return 1; } if (!WIFEXITED(child_status)) { fprintf(stderr, "perfex: child did not exit normally\n"); return 1; } if (my_receive(sock, &fd) < 0) { perror("perfex: receiving fd/status"); return 1; } close(sock); /* XXX: surely we don't need to repeat the ABI check here? */ if (_vperfctr_read_sum(fd, &sum) < 0) { perror("perfex: read_sum"); return 1; } if (_vperfctr_read_control(fd, &control) < 0) { perror("perfex: read_control"); return 1; } close(fd); do_print(resfile, info, &control.cpu_control, &sum); return WEXITSTATUS(child_status); } static int do_perfex( const struct vperfctr_control *control, char **argv, FILE *resfile, const struct perfctr_info *info) { int pid; int sv[2]; if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) { perror("perfex: socketpair"); return 1; } pid = fork(); if (pid < 0) { perror("perfex: fork"); return 1; } if (pid == 0) { close(sv[0]); return do_child(sv[1], control, argv); } else { close(sv[1]); return do_parent(sv[0], pid, resfile, info); } } static int get_info(struct perfctr_info *info) { int fd; fd = do_open_self(0); if (fd < 0) { perror("perfex: open perfctrs"); return -1; } if (perfctr_info(fd, info) < 0) { perror("perfex: perfctr_info"); close(fd); return -1; } close(fd); return 0; } static struct perfctr_cpus_info *get_cpus_info(void) { int fd; struct perfctr_cpus_info *cpus_info; fd = do_open_self(0); if (fd < 0) { perror("perfex: open perfctrs"); return NULL; } cpus_info = perfctr_cpus_info(fd); if (!cpus_info) perror("perfex: perfctr_cpus_info"); close(fd); return cpus_info; } static int do_info(const struct perfctr_info *info) { struct perfctr_cpus_info *cpus_info; cpus_info = get_cpus_info(); printf("PerfCtr Info:\n"); perfctr_info_print(info); if (cpus_info) { perfctr_cpus_info_print(cpus_info); free(cpus_info); } return 0; } static void do_print_event(const struct perfctr_event *event, int long_format, const char *event_prefix) { printf("%s%s", event_prefix, event->name); if (long_format) printf(":0x%02X:0x%X:0x%X", event->evntsel, event->counters_set, event->unit_mask ? event->unit_mask->default_value : 0); printf("\n"); } static void do_print_event_set(const struct perfctr_event_set *event_set, int long_format) { unsigned int i; if (event_set->include) do_print_event_set(event_set->include, long_format); for(i = 0; i < event_set->nevents; ++i) do_print_event(&event_set->events[i], long_format, event_set->event_prefix); } static void do_list(const struct perfctr_info *info, int long_format) { const struct perfctr_event_set *event_set; unsigned int nrctrs; printf("CPU type %s\n", perfctr_info_cpu_name(info)); printf("%s time-stamp counter available\n", (info->cpu_features & PERFCTR_FEATURE_RDTSC) ? "One" : "No"); nrctrs = perfctr_info_nrctrs(info); printf("%u performance counter%s available\n", nrctrs, (nrctrs == 1) ? "" : "s"); printf("Overflow interrupts%s available\n", (info->cpu_features & PERFCTR_FEATURE_PCINT) ? "" : " not"); event_set = perfctr_cpu_event_set(info->cpu_type); if (!event_set || !event_set->nevents) { printf("\nThe user-space library does not include an event list for this CPU type (this is not an error)\n"); return; } printf("\nEvents Available:\n"); if (long_format) printf("Name:EvntSel:CounterSet:DefaultUnitMask\n"); do_print_event_set(event_set, long_format); return; } /* Hack while phasing out an old number parsing bug. */ static unsigned int strtoul_base = 16; static unsigned int quiet; unsigned long my_strtoul(const char *nptr, char **endptr) { unsigned long val1; val1 = strtoul(nptr, endptr, strtoul_base); if (strtoul_base == 16 && !quiet) { unsigned long val2 = strtoul(nptr, NULL, 0); if (val1 != val2) fprintf(stderr, "perfex: warning: string '%s' is base-dependent, assuming base 16." " Please prefix hexadecimal numbers with '0x'.\n", nptr); } return val1; } static const struct option long_options[] = { { "decimal", 0, NULL, 'd' }, { "event", 1, NULL, 'e' }, { "help", 0, NULL, 'h' }, { "hex", 0, NULL, 'x' }, { "info", 0, NULL, 'i' }, { "list", 0, NULL, 'l' }, { "long-list", 0, NULL, 'L' }, { "output", 1, NULL, 'o' }, ARCH_LONG_OPTIONS { 0 } }; static void do_usage(void) { fprintf(stderr, "Usage: perfex [options] [] ...\n"); fprintf(stderr, "\tperfex -i\n"); fprintf(stderr, "\tperfex -h\n"); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "\t-e | --event=\tEvent to be counted\n"); fprintf(stderr, "\t-h | --help\t\t\tPrint this help text\n"); fprintf(stderr, "\t-o | --output=\tWrite output to file (default is stderr)\n"); fprintf(stderr, "\t-i | --info\t\t\tPrint PerfCtr driver information\n"); fprintf(stderr, "\t-l | --list\t\t\tList available events\n"); fprintf(stderr, "\t-L | --long-list\t\tList available events in long format\n"); fprintf(stderr, "\t-d | --decimal\t\t\tAllow decimal numbers in event specifications\n"); fprintf(stderr, "\t-x | --hex\t\t\tOnly accept hexadecimal numbers in event specifications\n"); do_arch_usage(); } int main(int argc, char **argv) { struct perfctr_info info; struct vperfctr_control control; int n; FILE *resfile; /* prime info, as we'll need it in most cases */ if (get_info(&info)) return 1; /* this deliberately also clears CLOEXEC in control.flags */ memset(&control, 0, sizeof control); if (info.cpu_features & PERFCTR_FEATURE_RDTSC) control.cpu_control.tsc_on = 1; n = 0; resfile = stderr; for(;;) { /* the '+' is there to prevent permutation of argv[] */ int ch = getopt_long(argc, argv, "+de:hilLo:x", long_options, NULL); switch (ch) { case -1: /* no more options */ if (optind >= argc) { fprintf(stderr, "perfex: command missing\n"); return 1; } argv += optind; break; case 'h': do_usage(); return 0; case 'i': return do_info(&info); case 'l': do_list(&info, 0); return 0; case 'L': do_list(&info, 1); return 0; case 'o': if ((resfile = fopen(optarg, "w")) == NULL) { fprintf(stderr, "perfex: %s: %s\n", optarg, strerror(errno)); return 1; } continue; case 'd': strtoul_base = 0; continue; case 'x': strtoul_base = 16; quiet = 1; continue; case 'e': n = do_event_spec(n, optarg, &control.cpu_control); continue; default: if (do_arch_option(ch, optarg, &control.cpu_control) < 0) { do_usage(); return 1; } continue; } break; } return do_perfex(&control, argv, resfile, &info); }