/*
* task_cpu.c - example of per-thread remote monitoring with per-cpu breakdown
*
* Copyright (c) 2010 Google, Inc
* Contributed by Stephane Eranian <eranian@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdarg.h>
#include <signal.h>
#include <sys/wait.h>
#include <locale.h>
#include <sys/ioctl.h>
#include <err.h>
#include "perf_util.h"
#define MAX_GROUPS 256
#define MAX_CPUS 64
typedef struct {
const char *events[MAX_GROUPS];
int num_groups;
int format_group;
int inherit;
int print;
int pin;
int ncpus;
pid_t pid;
} options_t;
static options_t options;
static volatile int quit;
int
child(char **arg)
{
/*
* execute the requested command
*/
execvp(arg[0], arg);
errx(1, "cannot exec: %s\n", arg[0]);
/* not reached */
}
static void
read_groups(perf_event_desc_t *fds, int num)
{
uint64_t *values = NULL;
size_t new_sz, sz = 0;
int i, evt;
ssize_t ret;
/*
* { u64 nr;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
* } cntr[nr];
* } && PERF_FORMAT_GROUP
*
* we do not use FORMAT_ID in this program
*/
for (evt = 0; evt < num; ) {
int num_evts_to_read;
if (options.format_group) {
num_evts_to_read = perf_get_group_nevents(fds, num, evt);
new_sz = sizeof(uint64_t) * (3 + num_evts_to_read);
} else {
num_evts_to_read = 1;
new_sz = sizeof(uint64_t) * 3;
}
if (new_sz > sz) {
sz = new_sz;
values = realloc(values, sz);
}
if (!values)
err(1, "cannot allocate memory for values\n");
ret = read(fds[evt].fd, values, new_sz);
if (ret != (ssize_t)new_sz) { /* unsigned */
if (ret == -1)
err(1, "cannot read values event %s", fds[evt].name);
/* likely pinned and could not be loaded */
warnx("could not read event %d, tried to read %zu bytes, but got %zd",
evt, new_sz, ret);
}
/*
* propagate to save area
*/
for (i = evt; i < (evt + num_evts_to_read); i++) {
if (options.format_group)
values[0] = values[3 + (i - evt)];
/*
* scaling because we may be sharing the PMU and
* thus may be multiplexed
*/
fds[i].values[0] = values[0];
fds[i].values[1] = values[1];
fds[i].values[2] = values[2];
}
evt += num_evts_to_read;
}
if (values)
free(values);
}
static void
print_counts(perf_event_desc_t *fds, int num, int cpu)
{
double ratio;
uint64_t val, delta;
int i;
read_groups(fds, num);
for(i=0; i < num; i++) {
val = perf_scale(fds[i].values);
delta = perf_scale_delta(fds[i].values, fds[i].prev_values);
ratio = perf_scale_ratio(fds[i].values);
/* separate groups */
if (perf_is_group_leader(fds, i))
putchar('\n');
if (options.print)
printf("CPU%-2d %'20"PRIu64" %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n",
cpu,
val,
delta,
fds[i].name,
(1.0-ratio)*100.0,
fds[i].values[1],
fds[i].values[2]);
else
printf("CPU%-2d %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n",
cpu,
val,
fds[i].name,
(1.0-ratio)*100.0,
fds[i].values[1],
fds[i].values[2]);
}
}
static void sig_handler(int n)
{
quit = 1;
}
int
parent(char **arg)
{
perf_event_desc_t *fds, *fds_cpus[MAX_CPUS];
int status, ret, i, num_fds = 0, grp, group_fd;
int ready[2], go[2], cpu;
char buf;
pid_t pid;
go[0] = go[1] = -1;
if (pfm_initialize() != PFM_SUCCESS)
errx(1, "libpfm initialization failed");
if (options.ncpus >= MAX_CPUS)
errx(1, "maximum number of cpus exceeded (%d)", MAX_CPUS);
memset(fds_cpus, 0, sizeof(fds_cpus));
for (cpu=0; cpu < options.ncpus; cpu++) {
for (grp = 0; grp < options.num_groups; grp++) {
num_fds = 0;
ret = perf_setup_list_events(options.events[grp], &fds_cpus[cpu], &num_fds);
if (ret || !num_fds)
exit(1);
}
}
pid = options.pid;
if (!pid) {
ret = pipe(ready);
if (ret)
err(1, "cannot create pipe ready");
ret = pipe(go);
if (ret)
err(1, "cannot create pipe go");
/*
* Create the child task
*/
if ((pid=fork()) == -1)
err(1, "Cannot fork process");
/*
* and launch the child code
*
* The pipe is used to avoid a race condition
* between for() and exec(). We need the pid
* of the new tak but we want to start measuring
* at the first user level instruction. Thus we
* need to prevent exec until we have attached
* the events.
*/
if (pid == 0) {
close(ready[0]);
close(go[1]);
/*
* let the parent know we exist
*/
close(ready[1]);
if (read(go[0], &buf, 1) == -1)
err(1, "unable to read go_pipe");
exit(child(arg));
}
close(ready[1]);
close(go[0]);
if (read(ready[0], &buf, 1) == -1)
err(1, "unable to read child_ready_pipe");
close(ready[0]);
}
for (cpu=0; cpu < options.ncpus; cpu++) {
fds = fds_cpus[cpu];
for(i=0; i < num_fds; i++) {
int is_group_leader; /* boolean */
is_group_leader = perf_is_group_leader(fds, i);
if (is_group_leader) {
/* this is the group leader */
group_fd = -1;
} else {
group_fd = fds[fds[i].group_leader].fd;
}
/*
* create leader disabled with enable_on-exec
*/
if (!options.pid) {
fds[i].hw.disabled = is_group_leader;
fds[i].hw.enable_on_exec = is_group_leader;
}
fds[i].hw.read_format = PERF_FORMAT_SCALE;
/* request timing information necessary for scaling counts */
if (is_group_leader && options.format_group)
fds[i].hw.read_format |= PERF_FORMAT_GROUP;
if (options.inherit)
fds[i].hw.inherit = 1;
if (options.pin && is_group_leader)
fds[i].hw.pinned = 1;
fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, group_fd, 0);
if (fds[i].fd == -1) {
warn("cannot attach event%d %s", i, fds[i].name);
goto error;
}
}
}
if (!options.pid && go[1] > -1)
close(go[1]);
if (options.print) {
if (!options.pid) {
while(waitpid(pid, &status, WNOHANG) == 0) {
sleep(1);
for (cpu=0; cpu < options.ncpus; cpu++) {
fds = fds_cpus[cpu];
print_counts(fds, num_fds, cpu);
}
}
} else {
while(quit == 0) {
sleep(1);
for (cpu=0; cpu < options.ncpus; cpu++) {
fds = fds_cpus[cpu];
print_counts(fds, num_fds, cpu);
}
}
}
} else {
if (!options.pid)
waitpid(pid, &status, 0);
else {
pause();
for (cpu=0; cpu < options.ncpus; cpu++) {
fds = fds_cpus[cpu];
for(i=0; i < num_fds; i++)
ioctl(fds[i].fd, PERF_EVENT_IOC_DISABLE, 0);
}
}
for (cpu=0; cpu < options.ncpus; cpu++) {
fds = fds_cpus[cpu];
print_counts(fds, num_fds, cpu);
}
}
for (cpu=0; cpu < options.ncpus; cpu++) {
fds = fds_cpus[cpu];
for(i=0; i < num_fds; i++)
close(fds[i].fd);
perf_free_fds(fds, num_fds);
}
/* free libpfm resources cleanly */
pfm_terminate();
return 0;
error:
free(fds);
if (!options.pid)
kill(SIGKILL, pid);
/* free libpfm resources cleanly */
pfm_terminate();
return -1;
}
static void
usage(void)
{
printf("usage: task_cpu [-h] [-i] [-g] [-p] [-P] [-t pid] [-e event1,event2,...] cmd\n"
"-h\t\tget help\n"
"-i\t\tinherit across fork\n"
"-f\t\tuse PERF_FORMAT_GROUP for reading up counts (experimental, not working)\n"
"-p\t\tprint counts every second\n"
"-P\t\tpin events\n"
"-t pid\tmeasure existing pid\n"
"-e ev,ev\tgroup of events to measure (multiple -e switches are allowed)\n"
);
}
int
main(int argc, char **argv)
{
int c;
setlocale(LC_ALL, "");
while ((c=getopt(argc, argv,"+he:ifpPt:")) != -1) {
switch(c) {
case 'e':
if (options.num_groups < MAX_GROUPS) {
options.events[options.num_groups++] = optarg;
} else {
errx(1, "you cannot specify more than %d groups.\n",
MAX_GROUPS);
}
break;
case 'f':
options.format_group = 1;
break;
case 'p':
options.print = 1;
break;
case 'P':
options.pin = 1;
break;
case 'i':
options.inherit = 1;
break;
case 't':
options.pid = atoi(optarg);
break;
case 'h':
usage();
exit(0);
default:
errx(1, "unknown error");
}
}
options.ncpus = sysconf(_SC_NPROCESSORS_ONLN);
if (options.ncpus < 1)
errx(1, "cannot determine number of online processors");
if (options.num_groups == 0) {
options.events[0] = "cycles,instructions";
options.num_groups = 1;
}
if (!argv[optind] && !options.pid)
errx(1, "you must specify a command to execute or a thread to attach to\n");
signal(SIGINT, sig_handler);
return parent(argv+optind);
}