Blob Blame History Raw
/* rtop.c - a simple PMU-based CPU utilization tool
 *
 * Copyright (c) 2009 Google, Inc
 * Contributed by Stephane Eranian <eranian@gmail.com>
 *
 * Based on:
 * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P.
 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <getopt.h>
#include <curses.h>
#include <termios.h>
#include <signal.h>
#include <ctype.h>
#include <math.h>
#include <limits.h>
#include <err.h>

#include "perf_util.h"

#define RTOP_VERSION "0.2"
/* 
 * max number of cpus (threads) supported
 */
#define RTOP_MAX_CPUS		2048 /* MUST BE power of 2 */
#define RTOP_CPUMASK_BITS	(sizeof(unsigned long)<<3)
#define RTOP_CPUMASK_COUNT	(RTOP_MAX_CPUS/RTOP_CPUMASK_BITS)

#define RTOP_CPUMASK_SET(m, g)		((m)[(g)/RTOP_CPUMASK_BITS] |=  (1UL << ((g) % RTOP_CPUMASK_BITS)))
#define RTOP_CPUMASK_CLEAR(m, g)	((m)[(g)/RTOP_CPUMASK_BITS] &= ~(1UL << ((g) % RTOP_CPUMASK_BITS)))
#define RTOP_CPUMASK_ISSET(m, g)	((m)[(g)/RTOP_CPUMASK_BITS] &   (1UL << ((g) % RTOP_CPUMASK_BITS)))

typedef unsigned long rtop_cpumask_t[RTOP_CPUMASK_COUNT];

typedef struct {
	struct {
		int	opt_verbose;
		int	opt_delay;	/* refresh delay in second */
		int	opt_delay_set;
	} program_opt_flags;
	rtop_cpumask_t	cpu_mask;	  /* which CPUs to use in system wide mode */
	long		online_cpus;
	long		selected_cpus;
	unsigned long	cpu_mhz;
} program_options_t;

#define opt_verbose program_opt_flags.opt_verbose
#define opt_delay program_opt_flags.opt_delay
#define opt_delay_set program_opt_flags.opt_delay_set


static program_options_t 	options;
static struct termios		saved_tty;
static int			time_to_quit;
static int			term_rows, term_cols;

static void
get_term_size(void)
{
	int ret;
        struct winsize ws;

	ret = ioctl(1, TIOCGWINSZ, &ws);
	if (ret) 
		err(1, "cannot determine screen size");

	if (ws.ws_row > 10) {
                term_cols = ws.ws_col;
                term_rows = ws.ws_row;
        } else {
                term_cols = 80;
                term_rows = 24;
        }

	if (term_rows < options.selected_cpus)
		errx(1, "you need at least %ld rows on your terminal to display all CPUs", options.selected_cpus);
}

static void
sigwinch_handler(int n)
{
	get_term_size();
}

static void
setup_screen(void)
{
	int ret;

	ret = tcgetattr(0, &saved_tty);
	if (ret == -1)
		errx(1, "cannot save tty settings\n");

	get_term_size();

	initscr();
	nocbreak();
	resizeterm(term_rows, term_cols);
}

static void
close_screen(void)
{
	endwin();

	tcsetattr(0, TCSAFLUSH, &saved_tty);
}

static void
fatal_errorw(char *fmt, ...)
{
	va_list ap;

	close_screen();

	va_start(ap, fmt);
	vfprintf(stderr, fmt, ap);
	va_end(ap);

	exit(1);
}

static void
sigint_handler(int n)
{
	time_to_quit = 1;
}

static unsigned long
find_cpu_speed(void)
{
	FILE *fp1;	
	unsigned long f1 = 0, f2 = 0;
	char buffer[128], *p, *value;

	memset(buffer, 0, sizeof(buffer));

	fp1 = fopen("/proc/cpuinfo", "r");
	if (fp1 == NULL)
		return 0;

	for (;;) {
		buffer[0] = '\0';

		p  = fgets(buffer, 127, fp1);
		if (p == NULL)
			break;

		/* skip  blank lines */
		if (*p == '\n') continue;

		p = strchr(buffer, ':');
		if (p == NULL)
			break;

		/*
		 * p+2: +1 = space, +2= firt character
		 * strlen()-1 gets rid of \n
		 */
		*p = '\0';
		value = p+2;

		value[strlen(value)-1] = '\0';

		if (!strncasecmp("cpu MHz", buffer, 7)) {
			float fl;
			sscanf(value, "%f", &fl);
			f1 = lroundf(fl);
			break;
		}
		if (!strncasecmp("BogoMIPS", buffer, 8)) {
			float fl;
			sscanf(value, "%f", &fl);
			f2 = lroundf(fl);
		}
	}
	fclose(fp1);
	return f1 == 0 ? f2 : f1;
}

static void
setup_signals(void)
{
	struct sigaction act;
	sigset_t my_set;

	/*
	 * SIGINT is a asynchronous signal
	 * sent to the process (not a specific thread). POSIX states
	 * that one and only one thread will execute the handler. This
	 * could be any thread that does not have the signal blocked.
	 */

	/*
	 * install SIGINT handler
	 */
	memset(&act,0,sizeof(act));
	sigemptyset(&my_set);
	act.sa_handler = sigint_handler;
	sigaction (SIGINT, &act, 0);

	/*
	 * install SIGWINCH handler
	 */
	memset(&act,0,sizeof(act));
	sigemptyset(&my_set);
	act.sa_handler = sigwinch_handler;
	sigaction (SIGWINCH, &act, 0);
}

static struct option rtop_cmd_options[]={
	{ "help", 0, 0, 1 },
	{ "version", 0, 0, 2 },
	{ "delay", 0, 0, 3 },
	{ "cpu-list", 1, 0, 4 },

	{ "verbose", 0, &options.opt_verbose, 1 },
	{ 0, 0, 0, 0}
};

#define MAX_EVENTS	2

typedef struct {
	uint64_t prev_values[MAX_EVENTS];
	int fd[MAX_EVENTS];
	int cpu;
} cpudesc_t;

/*
 * 	{ u64		nr;
 * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
 * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
 * 	  { u64		value;
 * 	    { u64	id;           } && PERF_FORMAT_ID
 * 	  }		cntr[nr];
 */
typedef struct {
	uint64_t nr;
	uint64_t time_enabled;
	uint64_t time_running;
	uint64_t values[2];
} rtop_grp_t;

static void
mainloop(void)
{
	struct perf_event_attr ev[MAX_EVENTS];
	unsigned long itc_delta;
	cpudesc_t *cpus;
	int i, j = 0, k, ncpus = 0;
	int num, ret;

	ncpus = options.selected_cpus;

	cpus = calloc(ncpus, sizeof(cpudesc_t));
	if (!cpus)
		err(1, "cannot allocate file descriptors");

	memset(ev, 0, sizeof(ev));

	/* measure user cycles */
	ev[0].type = PERF_TYPE_HARDWARE;
	ev[0].config = PERF_COUNT_HW_CPU_CYCLES;
	ev[0].read_format = PERF_FORMAT_SCALE|PERF_FORMAT_GROUP;
	ev[0].exclude_kernel = 1;
	ev[0].disabled = 1;
	ev[0].pinned = 0;

	/* measure kernel cycles */
	ev[1].type = PERF_TYPE_HARDWARE;
	ev[1].config = PERF_COUNT_HW_CPU_CYCLES;
	ev[1].exclude_user = 1;
	ev[1].disabled = 1;
	ev[1].pinned = 0;

	num = 2;

	for(i=0, k = 0; ncpus; i++) {
		if (RTOP_CPUMASK_ISSET(options.cpu_mask, i) == 0)
			continue;

		cpus[k].cpu = i;
		cpus[k].fd[0] = -1;
		for(j=0 ; j < num; j++) {
			cpus[k].fd[j] = perf_event_open(ev+j, -1, i, cpus[k].fd[0], 0);
			if (cpus[k].fd[j] == -1)
				fatal_errorw("cannot open event %d on CPU%d: %s\n", j, i, strerror(errno));
		}
		ncpus--;
		k++;
	}
	ncpus = options.selected_cpus;

	itc_delta = options.opt_delay * options.cpu_mhz * 1000000;

	for(i=0; i < ncpus; i++)
		for(j=0; j < num; j++)
			ioctl(cpus[i].fd[j], PERF_EVENT_IOC_ENABLE, 0);
	
	for(;time_to_quit == 0;) {

		sleep(options.opt_delay);

		move(0, 0);

		for(i=0; i < ncpus; i++) {
			uint64_t values[MAX_EVENTS];
			uint64_t raw_values[5];
			double k_cycles, u_cycles, i_cycles, ratio;

			/*
			 * given our events are in the same group, we can do a
			 * group read and get both counts + scaling information
			 */
			ret = read(cpus[i].fd[0], raw_values, sizeof(raw_values));
			if (ret != sizeof(raw_values))
				fatal_errorw("cannot read count for event %d on CPU%d\n", j, cpus[i].cpu);

			printw("nr=%"PRIu64"\n", raw_values[0]);
			printw("ena=%"PRIu64"\n", raw_values[1]);
			printw("run=%"PRIu64"\n", raw_values[2]);

			raw_values[0] = raw_values[3];
			values[0] = perf_scale(raw_values);

			raw_values[0] = raw_values[4];
			values[1] = perf_scale(raw_values);

			ratio = perf_scale_ratio(raw_values);

			k_cycles   = (double)(values[1] - cpus[i].prev_values[1])*100.0/ (double)itc_delta;
			u_cycles   = (double)(values[0] - cpus[i].prev_values[0])*100.0/ (double)itc_delta;
			i_cycles   = 100.0 - (k_cycles + u_cycles);

			cpus[i].prev_values[0] = values[0];
			cpus[i].prev_values[1] = values[1];
			/*
			 * adjust for rounding errors
			 */
			if (i_cycles < 0.0) i_cycles = 0.0;
			if (i_cycles > 100.0) i_cycles = 100.0;
			if (k_cycles > 100.0) k_cycles = 100.0;
			if (u_cycles > 100.0) u_cycles = 100.0;

			printw("CPU%-2ld %6.2f%% usr %6.2f%% sys %6.2f%% idle (scaling ratio %.2f%%)\n",
				i,
				u_cycles,
				k_cycles,
				i_cycles,
				ratio*100.0);
		}
		refresh();


	}
	for(i=0; i < ncpus; i++)
		for(j=0; j < num; j++)
			close(cpus[i].fd[j]);
	free(cpus);
}

void
populate_cpumask(char *cpu_list)
{
	char *p;
	unsigned long start_cpu, end_cpu = 0;
	unsigned long i, count = 0;

	options.online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
	if (options.online_cpus == -1) 
		errx(1, "cannot figure out the number of online processors");

	if (cpu_list == NULL)  {
		if (options.online_cpus >= RTOP_MAX_CPUS)
			errx(1, "rtop can only handle to %u CPUs", RTOP_MAX_CPUS);

		for(i=0; i < options.online_cpus; i++)
			RTOP_CPUMASK_SET(options.cpu_mask, i);

		options.selected_cpus = options.online_cpus;

		return;
	} 

	while(isdigit(*cpu_list)) { 
		p = NULL;
		start_cpu = strtoul(cpu_list, &p, 0); /* auto-detect base */

		if (start_cpu == ULONG_MAX || (*p != '\0' && *p != ',' && *p != '-'))
			goto invalid;

		if (p && *p == '-') {
			cpu_list = ++p;
			p = NULL;

			end_cpu = strtoul(cpu_list, &p, 0); /* auto-detect base */
			
			if (end_cpu == ULONG_MAX || (*p != '\0' && *p != ','))
				goto invalid;
			if (end_cpu < start_cpu)
				goto invalid_range; 
		} else {
			end_cpu = start_cpu;
		}

		if (start_cpu >= RTOP_MAX_CPUS || end_cpu >= RTOP_MAX_CPUS)
			goto too_big;

		for (; start_cpu <= end_cpu; start_cpu++) {

			if (start_cpu >= options.online_cpus)
				goto not_online; /* XXX: assume contiguous range of CPUs */

			if (RTOP_CPUMASK_ISSET(options.cpu_mask, start_cpu))
				continue;

			RTOP_CPUMASK_SET(options.cpu_mask, start_cpu);

			count++;
		}

		if (*p) ++p;

		cpu_list = p;
	}

	options.selected_cpus = count;

	return;
invalid:
	errx(1, "invalid cpu list argument: %s", cpu_list);
	/* no return */
not_online:
	errx(1, "cpu %lu is not online", start_cpu);
	/* no return */
invalid_range:
	errx(1, "cpu range %lu - %lu is invalid", start_cpu, end_cpu);
	/* no return */
too_big:
	errx(1, "rtop is limited to %u CPUs", RTOP_MAX_CPUS);
	/* no return */
}


static void
usage(void)
{
	printf(	"usage: rtop [options]:\n"
		"-h, --help\t\t\tdisplay this help and exit\n"
		"-v, --verbose\t\t\tverbose output\n"
		"-V, --version\t\t\tshow version and exit\n"
		"-d nsec, --delay=nsec\t\tnumber of seconds between refresh (default=1s)\n"
		"--cpu-list=cpu1,cpu2\t\tlist of CPUs to monitor(default=all)\n"
	);

}

int
main(int argc, char **argv)
{
	int c;
	char *cpu_list = NULL;

	//if (geteuid()) err(1, "perf_event requires root privileges to create system-wide measurments\n");

	while ((c=getopt_long(argc, argv,"+vhVd:", rtop_cmd_options, 0)) != -1) {
		switch(c) {
			case   0: continue; /* fast path for options */
			case 'v': options.opt_verbose = 1;
				  break;
			case 1:
			case 'h':
				usage();
				exit(0);
			case 2:
			case 'V':
				printf("rtop version " RTOP_VERSION " Date: " __DATE__ "\n"
					"Copyright (C) 2009 Google, Inc\n");
				exit(0);
			case 3:
			case 'd':
				options.opt_delay = atoi(optarg);
				if (options.opt_delay < 0)
					errx(1, "invalid delay, must be >= 0");
				options.opt_delay_set = 1;
				break;
			case 4:
				if (*optarg == '\0')
					errx(1, "--cpu-list needs an argument\n");
				cpu_list = optarg;
				break;
			default:
				errx(1, "unknown option\n");
		}
	}
	/*
	 * default refresh delay
	 */
	if (options.opt_delay_set == 0)
		options.opt_delay = 1;

	options.cpu_mhz = find_cpu_speed();

	populate_cpumask(cpu_list);

	setup_signals();
	setup_screen();
	mainloop();
	close_screen();

	return 0;
}