Blob Blame History Raw
/*
 * multiplex.c - example of user-level event multiplexing
 *
 * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
 *
 * This file is part of pfmon, a sample tool to measure performance
 * of applications on Linux/ia64.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 * 02111-1307 USA
 */

#ifndef _GNU_SOURCE
  #define _GNU_SOURCE /* for getline */
#endif
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>
#include <setjmp.h>
#include <stdarg.h>
#include <getopt.h>
#include <fcntl.h>
#include <sys/poll.h>
#include <sys/wait.h>
#include <sys/ptrace.h>

#include <perfmon/perfmon.h>
#include <perfmon/pfmlib.h>

#define MULTIPLEX_VERSION	"0.1"

#define MIN_FULL_PERIODS 100

#define SMPL_FREQ_IN_HZ	100

#define NUM_PMCS PMU_MAX_PMCS
#define NUM_PMDS PMU_MAX_PMDS

#define MAX_NUM_COUNTERS	32
#define MAX_PMU_NAME_LEN	32
typedef struct {
	struct {
		int opt_plm;	/* which privilege level to monitor (more than one possible) */
		int opt_debug;	/* print debug information */
		int opt_verbose;	/* verbose output */
		int opt_us_format;	/* print large numbers with comma for thousands */
	} program_opt_flags;

	unsigned long max_counters;	/* maximum number of counter for the platform */
	unsigned long smpl_freq;
	unsigned long smpl_period;

	unsigned long cpu_mhz;
	unsigned long full_periods;
} program_options_t;

#define opt_plm			program_opt_flags.opt_plm
#define opt_debug		program_opt_flags.opt_debug
#define opt_verbose		program_opt_flags.opt_verbose
#define opt_us_format		program_opt_flags.opt_us_format

typedef struct {
	char			*event_names[MAX_NUM_COUNTERS];
	pfmlib_input_param_t	pfm_inp;
	pfmlib_output_param_t	pfm_outp;
	pfarg_reg_t		pmcs[MAX_NUM_COUNTERS];
	pfarg_reg_t		pmds[MAX_NUM_COUNTERS];
	unsigned long		values[MAX_NUM_COUNTERS];
	unsigned long		n_runs;
	unsigned int		n_counters;
	unsigned int		n_pmcs;
} event_set_t;

typedef int	pfm_ctxid_t;

static pfm_ctxid_t ctxid;
static int current_set;
static program_options_t options;

/*
 * NO MORE THAN MAX_COUNTERS-1 (3) EVENTS PER SET
 */
static event_set_t events[]={
	{ {"BACK_END_BUBBLE_ALL","BACK_END_BUBBLE_L1D_FPU_RSE","BE_EXE_BUBBLE_ALL", },},
	{ {"BACK_END_BUBBLE_FE", "BACK_END_BUBBLE_L1D_FPU_RSE", "BE_RSE_BUBBLE_ALL",},},
	{ {"BE_L1D_FPU_BUBBLE_ALL", "BE_L1D_FPU_BUBBLE_L1D", "BE_EXE_BUBBLE_FRALL",},},
	{ {"BE_EXE_BUBBLE_GRALL", "BE_EXE_BUBBLE_GRGR", },},
	{ {"NOPS_RETIRED", "CPU_CYCLES", },}
};
#define N_SETS	(sizeof(events)/sizeof(event_set_t))

static void fatal_error(char *fmt,...) __attribute__((noreturn));

static void
vbprintf(char *fmt, ...)
{
	va_list ap;

	if (options.opt_verbose == 0) return;

	va_start(ap, fmt);
	vprintf(fmt, ap);
	va_end(ap);
}

static void
fatal_error(char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	vfprintf(stderr, fmt, ap);
	va_end(ap);

	exit(1);
}


static unsigned long
get_cpu_speed(void)
{
	FILE *fp1;	
	unsigned long f = 0;
	char buffer[128], *p, *value;

	memset(buffer, 0, sizeof(buffer));

	fp1 = fopen("/proc/cpuinfo", "r");
	if (fp1 == NULL) return 0;

	for (;;) {
		buffer[0] = '\0';

		p  = fgets(buffer, 127, fp1);
		if (p == NULL) goto end;

		/* skip  blank lines */
		if (*p == '\n') continue;

		p = strchr(buffer, ':');
		if (p == NULL) goto end;	

		/*
		 * p+2: +1 = space, +2= firt character
		 * strlen()-1 gets rid of \n
		 */
		*p = '\0';
		value = p+2;

		value[strlen(value)-1] = '\0';

		if (!strncmp("cpu MHz", buffer, 7)) {
			sscanf(value, "%lu", &f);
			goto end;
		}
	}
end:
	fclose(fp1);
	return f;
}


static void
update_set(pfm_ctxid_t ctxid, int set_idx)
{
	event_set_t *cset = events + set_idx;
	int count;
	int ret;
	int i;


	/*
	 * we do not read the last counter (cpu_cycles) to avoid overwriting
	 * the reg_value field which will be used for next round
	 *
	 * We need to retry the read in case we get EBUSY because it means that
	 * the child task context is not yet available from inspection by PFM_READ_PMDS.
	 *
	 */
	count = cset->n_counters - 1;

	ret = perfmonctl(ctxid, PFM_READ_PMDS, cset->pmds, count);
	if (ret == -1) {
		fatal_error("update_set reading set %d: %s\n", set_idx, strerror(errno));
	}

	/* update counts for this set */
	for (i=0; i < count; i++) {
		cset->values[i]        += cset->pmds[i].reg_value;
		cset->pmds[i].reg_value = 0UL; /* reset for next round */
	}
}


#if 0
static void
update_last_set(pfm_ctxid_t ctxid, int set_idx)
{
	event_set_t *cset = events + set_idx;
	unsigned long cycles;
	int i;

	/*
	 * this time we read ALL the counters (including CPU_CYCLES) because we
	 * need it to scale the last period
	 */
	if (perfmonctl(ctxid, PFM_READ_PMDS, cset->pmds, cset->n_counters) == -1) {
		fatal_error("update_last_set reading set %d\n", set_idx);
	}
	
	cycles = ~0UL - cset->pmds[cset->n_counters-1].reg_value;

	printf("last period = %4.1f%% of full period\n", (cycles*100.0)/options.smpl_period);

	/* this time we scale the value to the length of this last period */
	for (i=0; i < cset->n_counters-1; i++) {
		cset->values[i] += (cset->pmds[i].reg_value*cycles)/options.smpl_period;
	}
}
#endif

int
child(char **arg)
{
	ptrace(PTRACE_TRACEME, 0, NULL, NULL);

	execvp(arg[0], arg);
	/* not reached */

	exit(1);
}

static void
dec2sep(char *str2, char *str, char sep)
{
	int i, l, b, j, c=0;

	l = strlen(str2);
	if (l <= 3) {
		strcpy(str, str2);
		return;
	}
	b = l +  l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */
	for(i=l, j=0; i >= 0; i--, j++) {
		if (j) c++;
		str[b-j] = str2[i];
		if (c == 3) {
			str[b-++j] = sep;
			c = 0;
		}
	}
}

static void
print_results(void)
{
	unsigned int i, j;
	event_set_t *e;
	char tmp1[32], tmp2[32];
	char mtotal_str[32], *mtotal;
	char stotal_str[32], *stotal;

       /*
 	* print the results
 	*
 	* It is important to realize, that the first event we specified may not
 	* be in PMD4. Not all events can be measured by any monitor. That's why
 	* we need to use the pc[] array to figure out where event i was allocated.
 	*
 	*/
	printf("%lu Hz period = %lu cycles @ %lu Mhz\n", options.smpl_freq, options.smpl_period, options.cpu_mhz);
	printf("%lu full periods\n", options.full_periods);
	printf("%lu event sets\n", N_SETS);
	printf("set        measured total     #runs         scaled total event name\n");
	printf("-------------------------------------------------------------------\n");

	for (i=0; i < N_SETS; i++) {
		e = events + i;
		for(j=0; j < e->n_counters-1; j++) {

			sprintf(tmp1, "%"PRIu64, e->values[j]);

			if (options.opt_us_format) {
				dec2sep(tmp1, mtotal_str, ',');
				mtotal = mtotal_str;
			} else {
				mtotal  = tmp1;
			}
			sprintf(tmp2, "%"PRIu64, (e->values[j]*options.full_periods)/e->n_runs);  /* stupid scaling */

			if (options.opt_us_format) {
				dec2sep(tmp2, stotal_str, ',');
				stotal = stotal_str;
			} else {
				stotal  = tmp2;
			}

			printf("%03d: %20s  %8"PRIu64" %20s %s\n",
				i,
				mtotal,
				e->n_runs,
				stotal,
				e->event_names[j]);
		}
	}
}

static void
switch_sets(void)
{
	event_set_t *cset;

	update_set(ctxid, current_set);
	current_set = (current_set+1) % N_SETS;


	cset        = events+current_set;
	cset->n_runs++;

	vbprintf("starting run %lu for set %d n_pmcs=%d pmd=%"PRIu64"\n",
		cset->n_runs, current_set, cset->n_pmcs,
		cset->pmds[cset->n_counters-1].reg_value);

	/*
	 * if one set as less events than another one, the left-over events will continue
	 * to count for nothing. That's fine because we will restore their values when
	 * the correspinding set is reloaded
	 */
	if (perfmonctl(ctxid, PFM_WRITE_PMCS, cset->pmcs, cset->n_pmcs) == -1) {
		fatal_error("overflow handler writing pmcs set %d : %d\n", current_set, errno);
	}

	if (perfmonctl(ctxid, PFM_WRITE_PMDS, cset->pmds, cset->n_counters) == -1) {
		fatal_error("overflow handler writing pmds set %d\n", current_set);
	}

	options.full_periods++;

	if (perfmonctl(ctxid, PFM_RESTART,NULL, 0) == -1) {
		perror("PFM_RESTART");
		exit(1);
	}

}

int
parent(char **arg)
{
	event_set_t *e;
	pfarg_context_t ctx[1];
	pfarg_load_t load_arg;
	event_set_t *cset;
	pfm_msg_t msg;
	struct pollfd ctx_pollfd;
	pfmlib_regmask_t impl_counters, used_pmcs;
	pfmlib_event_t cycle_event;
	unsigned int i, j, k, l,idx;
	int r, status, ret;
	unsigned int max_counters, allowed_counters;
	pid_t pid;

	pfm_get_num_counters(&max_counters);
	if (max_counters < 2) 
		fatal_error("not enough counter to do anything meaningful\n");

	allowed_counters = max_counters-1; /* reserve one slot for our sampling period */

	memset(&used_pmcs, 0, sizeof(used_pmcs));
	memset(&impl_counters, 0, sizeof(impl_counters));

	pfm_get_impl_counters(&impl_counters);

	memset(ctx, 0, sizeof(ctx));
	memset(&load_arg, 0, sizeof(load_arg));

	if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS) {
		fatal_error("Cannot find cycle event\n");
	}

	options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq;

	vbprintf("%lu Hz period = %lu cycles @ %lu Mhz\n", options.smpl_freq, options.smpl_period, options.cpu_mhz);

	for (i=0; i < N_SETS; i++) {

		e = events+i;

		memset(&e->pfm_inp,0, sizeof(pfmlib_input_param_t));
		memset(&e->pfm_outp,0, sizeof(pfmlib_output_param_t));

		for(j=0; e->event_names[j] && j < allowed_counters; j++) {

			if (pfm_find_event(e->event_names[j], &idx) != PFMLIB_SUCCESS) {
				fatal_error("Cannot find %s event\n", e->event_names[j]);
			}
			e->pfm_inp.pfp_events[j].event = idx;
		}

		if (e->event_names[j]) {
			fatal_error("cannot have more than %d events per set (CPU_CYCLES uses 1 slot)\n", allowed_counters);
		}
		e->pfm_inp.pfp_events[j]   = cycle_event;
		e->pfm_inp.pfp_event_count = j+1;
		e->pfm_inp.pfp_dfl_plm     = options.opt_plm;

		e->n_pmcs     = j+1; 		/* used pmcs +1=sampling period */
		e->n_counters = j+1;		/* used pmd/pmc counter pairs  +1=sampling period */

		vbprintf("PMU programming for set %d\n", i);

		if ((ret=pfm_dispatch_events(&e->pfm_inp, NULL, &e->pfm_outp, NULL)) != PFMLIB_SUCCESS) {
			fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret));
		}
		/*
		 * propagate from libpfm to kernel data structures
		 */
		for (j=0; j < e->n_counters; j++) {
			e->pmcs[j].reg_num   = e->pfm_outp.pfp_pmcs[j].reg_num;
			e->pmcs[j].reg_value = e->pfm_outp.pfp_pmcs[j].reg_value;

			e->pmds[j].reg_num   = e->pmcs[j].reg_num;

			pfm_regmask_set(&used_pmcs, e->pmcs[j].reg_num);
		}

		/* last counter contains our sampling counter */
		e->pmcs[j-1].reg_flags 	    |= PFM_REGFL_OVFL_NOTIFY;
		e->pmds[j-1].reg_value       = (~0) - options.smpl_period + 1;
		e->pmds[j-1].reg_short_reset = (~0) - options.smpl_period + 1;
		e->pmds[j-1].reg_long_reset  = (~0) - options.smpl_period + 1;

		for (j=0; j < e->n_counters-1; j++) {
			vbprintf("[pmd[%u]=0x%"PRIx64"/0x%"PRIx64"/0x%"PRIx64"]\n",
				e->pmds[j].reg_num,
				e->pmds[j].reg_value,
				e->pmds[j].reg_short_reset,
				e->pmds[j].reg_long_reset);
		}
		vbprintf("[pmd[%u]=0x%"PRIx64"/0x%"PRIx64"/0x%"PRIx64"]\n",
				e->pmds[j].reg_num,
				e->pmds[j].reg_value,
				e->pmds[j].reg_short_reset,
				e->pmds[j].reg_long_reset);

		/*
		 * we blank the unused pmcs to make sure every set uses all the counters, i.e.,
		 * cannot overflow due to some previous sampling periods that uses a counter
		 * beyond the number used by the current set
		 */
		for(j=0, k=e->n_pmcs, l=0; l < max_counters; j++) {
			if (pfm_regmask_isset(&impl_counters, j) == 0) continue;
			l++;
			if (pfm_regmask_isset(&used_pmcs, j)) continue;
			e->pmcs[k].reg_num   = j;
			e->pmcs[k].reg_value = 0UL;
			k++;
		}
		e->n_pmcs= k;
	}
	/*
	 * point to first set of counters
	 */
	current_set = 0;

	/*
	 * we block on counter overflow
	 */
	ctx[0].ctx_flags = PFM_FL_NOTIFY_BLOCK;

	/*
	 * attach the context to the task
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}

	/*
	 * extract context id
	 */
	ctxid = ctx[0].ctx_fd;

	/*
	 * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e.,
	 * fd not visible to child.
	 */
	if (fcntl(ctxid, F_SETFD, FD_CLOEXEC))
		fatal_error("cannot set CLOEXEC: %s\n", strerror(errno));

	ctx_pollfd.fd     = ctxid;
	ctx_pollfd.events = POLLIN;

	cset  = events + current_set;
	cset->n_runs++;

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann coutning monitors.
	 */
	if (perfmonctl(ctxid, PFM_WRITE_PMCS, cset->pmcs, cset->n_pmcs) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}

	/*
	 * initialize the PMDs
	 */
	if (perfmonctl(ctxid, PFM_WRITE_PMDS, cset->pmds, cset->n_counters) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * now launch the child code
	 */
	if ((pid= fork()) == -1) fatal_error("Cannot fork process\n");
	if (pid == 0) exit(child(arg));

	/*
	 * wait for the child to exec
	 */
	r = waitpid(pid, &status, WUNTRACED);

	if (r < 0 || WIFEXITED(status))
		fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status));

	vbprintf("child created and stopped\n");

	/*
	 * the child is stopped, load context
	 */
	load_arg.load_pid = pid;
	if (perfmonctl(ctxid, PFM_LOAD_CONTEXT, &load_arg, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}

	/*
	 * make sure monitoring will be activated when the execution is resumed
	 */
	if (perfmonctl(ctxid, PFM_START, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_START errno %d\n",errno);
	}

	/*
	 * resume execution
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);

	/*
	 * mainloop
	 */
	for(;;) {
		ret = read(ctxid, &msg, sizeof(msg));
		if (ret < 0) break;
		switch(msg.type) {
			case PFM_MSG_OVFL:
				switch_sets();
				break;
			case PFM_MSG_END:
				goto finish_line;
			default: printf("unknown message type %d\n", msg.type);
		}
	}
finish_line:

	if (options.full_periods < MIN_FULL_PERIODS) {
		fatal_error("Not enough periods (%lu) to print results\n", options.full_periods);
	}

	//update_last_set(pid, current_set);

	waitpid(pid, &status, 0);

	print_results();

	if (ctxid) close(ctxid);

	return 0;
}



static struct option multiplex_options[]={
	{ "help", 0, 0, 1},
	{ "freq", 1, 0, 2 },
	{ "kernel-level", 0, 0, 3 },
	{ "user-level", 0, 0, 4 },
	{ "version", 0, 0, 5 },

	{ "verbose", 0, &options.opt_verbose, 1 },
	{ "debug", 0, &options.opt_debug, 1 },
	{ "us-counter-format", 0, &options.opt_us_format, 1},
	{ 0, 0, 0, 0}
};

static void
print_usage(char **argv)
{
	printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]);

	printf(	"-h, --help\t\t\t\tdisplay this help and exit\n"
		"-V, --version\t\t\t\toutput version information and exit\n"
		"-u, --user-level\t\t\tmonitor at the user level for all events\n"
		"-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n"
		"-c, --us-counter-format\tprint large counts with comma for thousands\n"
		"--freq=number\t\t\t\tset sampling frequency in Hz\n"
		"--verbose\t\t\t\tprint more information during execution\n"
	);
}


int
main(int argc, char **argv)
{
	char *endptr = NULL;
	pfmlib_options_t pfmlib_options;
	int c, type;


	while ((c=getopt_long(argc, argv,"+vhkuVc", multiplex_options, 0)) != -1) {
		switch(c) {
			case   0: continue; /* fast path for options */

			case   1:
				  print_usage(argv);
				  exit(0);

			case 'v': options.opt_verbose = 1;
				  break;
			case  'c':
				  options.opt_us_format = 1;
				  break;
			case   2:
			case 'V':
				if (options.smpl_freq) fatal_error("sampling frequency set twice\n");
				options.smpl_freq = strtoul(optarg, &endptr, 10);
				if (*endptr != '\0')
					fatal_error("invalid freqyency: %s\n", optarg);
				break;
			case   3:
			case 'k':
				options.opt_plm |= PFM_PLM0;
				break;
			case   4:
			case 'u':
				options.opt_plm |= PFM_PLM3;
				break;
			case   5:
				printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n"
					"Copyright (C) 2002 Hewlett-Packard Company\n");
				exit(0);
			default:
				fatal_error(""); /* just quit silently now */
		}
	}

	if (optind == argc) fatal_error("you need to specify a command to measure\n");

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		fatal_error("can't initialize library\n");
	}
	/*
	 * Let's make sure we run this on the right CPU family
	 */
	pfm_get_pmu_type(&type);
	if (type != PFMLIB_ITANIUM2_PMU) {
		char model[MAX_PMU_NAME_LEN];
		pfm_get_pmu_name(model, MAX_PMU_NAME_LEN);
		fatal_error("this program does not work with %s PMU\n", model);
	}

	if ((options.cpu_mhz = get_cpu_speed()) == 0) {
		fatal_error("can't get CPU speed\n");
	}
	if (options.smpl_freq == 0UL) options.smpl_freq = SMPL_FREQ_IN_HZ;
	if (options.opt_plm == 0) options.opt_plm = PFM_PLM3;
	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */
	pfm_set_options(&pfmlib_options);

	return parent(argv+optind);
}