/* * task_inherit.c - example of a task counting event in a tree of child processes * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define MAX_GROUPS 256 typedef struct { const char *events[MAX_GROUPS]; int num_groups; int format_group; int inherit; int print; int pin; pid_t pid; } options_t; static options_t options; static volatile int quit; int child(char **arg) { /* * execute the requested command */ execvp(arg[0], arg); errx(1, "cannot exec: %s\n", arg[0]); /* not reached */ } static void read_groups(perf_event_desc_t *fds, int num) { uint64_t *values = NULL; size_t new_sz, sz = 0; int i, evt; ssize_t ret; /* * { u64 nr; * { u64 time_enabled; } && PERF_FORMAT_ENABLED * { u64 time_running; } && PERF_FORMAT_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID * } cntr[nr]; * } && PERF_FORMAT_GROUP * * we do not use FORMAT_ID in this program */ for (evt = 0; evt < num; ) { int num_evts_to_read; if (options.format_group) { num_evts_to_read = perf_get_group_nevents(fds, num, evt); new_sz = sizeof(uint64_t) * (3 + num_evts_to_read); } else { num_evts_to_read = 1; new_sz = sizeof(uint64_t) * 3; } if (new_sz > sz) { sz = new_sz; values = realloc(values, sz); } if (!values) err(1, "cannot allocate memory for values\n"); ret = read(fds[evt].fd, values, new_sz); if (ret != (ssize_t)new_sz) { /* unsigned */ if (ret == -1) err(1, "cannot read values event %s", fds[evt].name); /* likely pinned and could not be loaded */ warnx("could not read event %d, tried to read %zu bytes, but got %zd", evt, new_sz, ret); } /* * propagate to save area */ for (i = evt; i < (evt + num_evts_to_read); i++) { if (options.format_group) values[0] = values[3 + (i - evt)]; /* * scaling because we may be sharing the PMU and * thus may be multiplexed */ fds[i].values[0] = values[0]; fds[i].values[1] = values[1]; fds[i].values[2] = values[2]; } evt += num_evts_to_read; } if (values) free(values); } static void print_counts(perf_event_desc_t *fds, int num) { double ratio; uint64_t val, delta; int i; read_groups(fds, num); for(i=0; i < num; i++) { val = perf_scale(fds[i].values); delta = perf_scale_delta(fds[i].values, fds[i].prev_values); ratio = perf_scale_ratio(fds[i].values); /* separate groups */ if (perf_is_group_leader(fds, i)) putchar('\n'); if (options.print) printf("%'20"PRIu64" %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n", val, delta, fds[i].name, (1.0-ratio)*100.0, fds[i].values[1], fds[i].values[2]); else printf("%'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n", val, fds[i].name, (1.0-ratio)*100.0, fds[i].values[1], fds[i].values[2]); fds[i].prev_values[0] = fds[i].values[0]; fds[i].prev_values[1] = fds[i].values[1]; fds[i].prev_values[2] = fds[i].values[2]; } } static void sig_handler(int n) { quit = 1; } int parent(char **arg) { perf_event_desc_t *fds = NULL; int status, ret, i, num_fds = 0, grp, group_fd; int ready[2], go[2]; char buf; pid_t pid; go[0] = go[1] = -1; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed"); for (grp = 0; grp < options.num_groups; grp++) { int ret; ret = perf_setup_list_events(options.events[grp], &fds, &num_fds); if (ret || !num_fds) exit(1); } pid = options.pid; if (!pid) { ret = pipe(ready); if (ret) err(1, "cannot create pipe ready"); ret = pipe(go); if (ret) err(1, "cannot create pipe go"); /* * Create the child task */ if ((pid=fork()) == -1) err(1, "Cannot fork process"); /* * and launch the child code * * The pipe is used to avoid a race condition * between for() and exec(). We need the pid * of the new tak but we want to start measuring * at the first user level instruction. Thus we * need to prevent exec until we have attached * the events. */ if (pid == 0) { close(ready[0]); close(go[1]); /* * let the parent know we exist */ close(ready[1]); if (read(go[0], &buf, 1) == -1) err(1, "unable to read go_pipe"); exit(child(arg)); } close(ready[1]); close(go[0]); if (read(ready[0], &buf, 1) == -1) err(1, "unable to read child_ready_pipe"); close(ready[0]); } for(i=0; i < num_fds; i++) { int is_group_leader; /* boolean */ is_group_leader = perf_is_group_leader(fds, i); if (is_group_leader) { /* this is the group leader */ group_fd = -1; } else { group_fd = fds[fds[i].group_leader].fd; } /* * create leader disabled with enable_on-exec */ if (!options.pid) { fds[i].hw.disabled = is_group_leader; fds[i].hw.enable_on_exec = is_group_leader; } fds[i].hw.read_format = PERF_FORMAT_SCALE; /* request timing information necessary for scaling counts */ if (is_group_leader && options.format_group) fds[i].hw.read_format |= PERF_FORMAT_GROUP; if (options.inherit) fds[i].hw.inherit = 1; if (options.pin && is_group_leader) fds[i].hw.pinned = 1; fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0); if (fds[i].fd == -1) { warn("cannot attach event%d %s", i, fds[i].name); goto error; } } if (!options.pid && go[1] > -1) close(go[1]); if (options.print) { if (!options.pid) { while(waitpid(pid, &status, WNOHANG) == 0) { sleep(1); print_counts(fds, num_fds); } } else { while(quit == 0) { sleep(1); print_counts(fds, num_fds); } } } else { if (!options.pid) waitpid(pid, &status, 0); else pause(); print_counts(fds, num_fds); } for(i=0; i < num_fds; i++) close(fds[i].fd); perf_free_fds(fds, num_fds); /* free libpfm resources cleanly */ pfm_terminate(); return 0; error: free(fds); if (!options.pid) kill(SIGKILL, pid); /* free libpfm resources cleanly */ pfm_terminate(); return -1; } static void usage(void) { printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e event1,event2,...] cmd\n" "-h\t\tget help\n" "-i\t\tinherit across fork\n" "-f\t\tuse PERF_FORMAT_GROUP for reading up counts (experimental, not working)\n" "-p\t\tprint counts every second\n" "-P\t\tpin events\n" "-t pid\tmeasure existing pid\n" "-e ev,ev\tgroup of events to measure (multiple -e switches are allowed)\n" ); } int main(int argc, char **argv) { int c; setlocale(LC_ALL, ""); while ((c=getopt(argc, argv,"+he:ifpPt:")) != -1) { switch(c) { case 'e': if (options.num_groups < MAX_GROUPS) { options.events[options.num_groups++] = optarg; } else { errx(1, "you cannot specify more than %d groups.\n", MAX_GROUPS); } break; case 'f': options.format_group = 1; break; case 'p': options.print = 1; break; case 'P': options.pin = 1; break; case 'i': options.inherit = 1; break; case 't': options.pid = atoi(optarg); break; case 'h': usage(); exit(0); default: errx(1, "unknown error"); } } if (options.num_groups == 0) { options.events[0] = "cycles,instructions"; options.num_groups = 1; } if (!argv[optind] && !options.pid) errx(1, "you must specify a command to execute or a thread to attach to\n"); signal(SIGINT, sig_handler); return parent(argv+optind); }