Blame src/components/perf_event/perf_event.c

Packit 577717
/*
Packit 577717
* File:    perf_event.c
Packit 577717
*
Packit 577717
* Author:  Corey Ashford
Packit 577717
*          cjashfor@us.ibm.com
Packit 577717
*          - based upon perfmon.c written by -
Packit 577717
*          Philip Mucci
Packit 577717
*          mucci@cs.utk.edu
Packit 577717
* Mods:    Gary Mohr
Packit 577717
*          gary.mohr@bull.com
Packit 577717
* Mods:    Vince Weaver
Packit 577717
*          vweaver1@eecs.utk.edu
Packit 577717
* Mods:	   Philip Mucci
Packit 577717
*	   mucci@eecs.utk.edu
Packit 577717
* Mods:    Gary Mohr
Packit 577717
*          gary.mohr@bull.com
Packit 577717
*          Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4.
Packit 577717
*          This adds several new event masks, including cpu=, u=, and k= which give the user
Packit 577717
*          the ability to set cpu number to use or control the domain (user, kernel, or both)
Packit 577717
*          in which the counter should be incremented.  These are event masks so it is now 
Packit 577717
*          possible to have multiple events in the same event set that count activity from 
Packit 577717
*          differennt cpu's or count activity in different domains.
Packit 577717
*/
Packit 577717
Packit 577717
Packit 577717
#include <fcntl.h>
Packit 577717
#include <string.h>
Packit 577717
#include <errno.h>
Packit 577717
#include <signal.h>
Packit 577717
#include <syscall.h>
Packit 577717
#include <sys/utsname.h>
Packit 577717
#include <sys/mman.h>
Packit 577717
#include <sys/ioctl.h>
Packit 577717
Packit 577717
/* PAPI-specific includes */
Packit 577717
#include "papi.h"
Packit 577717
#include "papi_memory.h"
Packit 577717
#include "papi_internal.h"
Packit 577717
#include "papi_vector.h"
Packit 577717
#include "extras.h"
Packit 577717
Packit 577717
/* libpfm4 includes */
Packit 577717
#include "papi_libpfm4_events.h"
Packit 577717
#include "pe_libpfm4_events.h"
Packit 577717
#include "perfmon/pfmlib.h"
Packit 577717
#include PEINCLUDE
Packit 577717
Packit 577717
/* Linux-specific includes */
Packit 577717
#include "mb.h"
Packit 577717
#include "linux-memory.h"
Packit 577717
#include "linux-timer.h"
Packit 577717
#include "linux-common.h"
Packit 577717
#include "linux-context.h"
Packit 577717
Packit 577717
#include "perf_event_lib.h"
Packit 577717
#include "perf_helpers.h"
Packit 577717
Packit 577717
/* Set to enable pre-Linux 2.6.34 perf_event workarounds   */
Packit 577717
/* If disabling them gets no complaints then we can remove */
Packit 577717
/* These in a future version of PAPI.                      */
Packit 577717
#define OBSOLETE_WORKAROUNDS 0
Packit 577717
Packit 577717
/* Defines for ctx->state */
Packit 577717
#define PERF_EVENTS_OPENED  0x01
Packit 577717
#define PERF_EVENTS_RUNNING 0x02
Packit 577717
Packit 577717
/* Forward declaration */
Packit 577717
papi_vector_t _perf_event_vector;
Packit 577717
Packit 577717
/* Globals */
Packit 577717
struct native_event_table_t perf_native_event_table;
Packit 577717
static int our_cidx;
Packit 577717
static int exclude_guest_unsupported;
Packit 577717
Packit 577717
/* The kernel developers say to never use a refresh value of 0        */
Packit 577717
/* See https://lkml.org/lkml/2011/5/24/172                            */
Packit 577717
/* However, on some platforms (like Power) a value of 1 does not work */
Packit 577717
/* We're still tracking down why this happens.                        */
Packit 577717
Packit 577717
#if defined(__powerpc__)
Packit 577717
#define PAPI_REFRESH_VALUE 0
Packit 577717
#else
Packit 577717
#define PAPI_REFRESH_VALUE 1
Packit 577717
#endif
Packit 577717
Packit 577717
static int _pe_set_domain( hwd_control_state_t *ctl, int domain);
Packit 577717
Packit 577717
#if (OBSOLETE_WORKAROUNDS==1)
Packit 577717
Packit 577717
/* Check for processor support */
Packit 577717
/* Can be used for generic checking, though in general we only     */
Packit 577717
/* check for pentium4 here because support was broken for multiple */
Packit 577717
/* kernel releases and the usual standard detections did not       */
Packit 577717
/* handle this.  So we check for pentium 4 explicitly.             */
Packit 577717
static int
Packit 577717
processor_supported(int vendor, int family) {
Packit 577717
Packit 577717
   /* Error out if kernel too early to support p4 */
Packit 577717
   if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) {
Packit 577717
      if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) {
Packit 577717
         PAPIERROR("Pentium 4 not supported on kernels before 2.6.35");
Packit 577717
         return PAPI_ENOSUPP;
Packit 577717
      }
Packit 577717
   }
Packit 577717
   return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
#endif
Packit 577717
Packit 577717
/* Fix up the config based on what CPU/Vendor we are running on */
Packit 577717
static int
Packit 577717
pe_vendor_fixups(papi_vector_t *vector)
Packit 577717
{
Packit 577717
     /* powerpc */
Packit 577717
     /* On IBM and Power6 Machines default domain should include supervisor */
Packit 577717
  if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_IBM ) {
Packit 577717
     vector->cmp_info.available_domains |=
Packit 577717
                  PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
Packit 577717
     if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) {
Packit 577717
        vector->cmp_info.default_domain =
Packit 577717
                  PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
Packit 577717
     }
Packit 577717
  }
Packit 577717
Packit 577717
  if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_MIPS ) {
Packit 577717
     vector->cmp_info.available_domains |= PAPI_DOM_KERNEL;
Packit 577717
  }
Packit 577717
Packit 577717
  if ((_papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL) ||
Packit 577717
      (_papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_AMD)) {
Packit 577717
     vector->cmp_info.fast_real_timer = 1;
Packit 577717
  }
Packit 577717
Packit 577717
	/* ARM */
Packit 577717
	if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM) {
Packit 577717
Packit 577717
		/* Some ARMv7 and earlier could not measure	*/
Packit 577717
		/* KERNEL and USER separately.			*/
Packit 577717
Packit 577717
		/* Whitelist CortexA7 and CortexA15		*/
Packit 577717
		/* There might be more				*/
Packit 577717
Packit 577717
		if ((_papi_hwi_system_info.hw_info.cpuid_family < 8) &&
Packit 577717
			(_papi_hwi_system_info.hw_info.cpuid_model!=0xc07) &&
Packit 577717
			(_papi_hwi_system_info.hw_info.cpuid_model!=0xc0f)) {
Packit 577717
Packit 577717
			vector->cmp_info.available_domains |=
Packit 577717
				PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
Packit 577717
			vector->cmp_info.default_domain =
Packit 577717
				PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	/* CRAY */
Packit 577717
	if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_CRAY ) {
Packit 577717
		vector->cmp_info.available_domains |= PAPI_DOM_OTHER;
Packit 577717
	}
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/******************************************************************/
Packit 577717
/******** Kernel Version Dependent Routines  **********************/
Packit 577717
/******************************************************************/
Packit 577717
Packit 577717
Packit 577717
/* PERF_FORMAT_GROUP allows reading an entire group's counts at once   */
Packit 577717
/* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results   */
Packit 577717
/*  from attached processes.  We are lazy and disable it for all cases */
Packit 577717
/*  commit was:  050735b08ca8a016bbace4445fa025b88fee770b              */
Packit 577717
Packit 577717
static int
Packit 577717
bug_format_group(void) {
Packit 577717
Packit 577717
Packit 577717
#if (OBSOLETE_WORKAROUNDS==1)
Packit 577717
	if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1;
Packit 577717
#endif
Packit 577717
Packit 577717
	/* MIPS, as of version 3.1, does not support this properly */
Packit 577717
	/* FIXME: is this still true? */
Packit 577717
Packit 577717
#if defined(__mips__)
Packit 577717
  return 1;
Packit 577717
#endif
Packit 577717
Packit 577717
  return 0;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
#if (OBSOLETE_WORKAROUNDS==1)
Packit 577717
Packit 577717
Packit 577717
/* There's a bug prior to Linux 2.6.33 where if you are using */
Packit 577717
/* PERF_FORMAT_GROUP, the TOTAL_TIME_ENABLED and              */
Packit 577717
/* TOTAL_TIME_RUNNING fields will be zero unless you disable  */
Packit 577717
/* the counters first                                         */
Packit 577717
static int
Packit 577717
bug_sync_read(void) {
Packit 577717
Packit 577717
  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
Packit 577717
Packit 577717
  return 0;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
#endif
Packit 577717
Packit 577717
/* Set the F_SETOWN_EX flag on the fd.                          */
Packit 577717
/* This affects which thread an overflow signal gets sent to    */
Packit 577717
/* Handled in a subroutine to handle the fact that the behavior */
Packit 577717
/* is dependent on kernel version.                              */
Packit 577717
static int
Packit 577717
fcntl_setown_fd(int fd) {
Packit 577717
Packit 577717
	int ret;
Packit 577717
	struct f_owner_ex fown_ex;
Packit 577717
Packit 577717
	/* F_SETOWN_EX is not available until 2.6.32 */
Packit 577717
	/* but PAPI perf_event support didn't work on 2.6.31 anyay */
Packit 577717
Packit 577717
	/* set ownership of the descriptor */
Packit 577717
	fown_ex.type = F_OWNER_TID;
Packit 577717
	fown_ex.pid  = mygettid();
Packit 577717
	ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex );
Packit 577717
Packit 577717
	if ( ret == -1 ) {
Packit 577717
		PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s",
Packit 577717
			fd, strerror( errno ) );
Packit 577717
		return PAPI_ESYS;
Packit 577717
	}
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/* The read format on perf_event varies based on various flags that */
Packit 577717
/* are passed into it.  This helper avoids copying this logic       */
Packit 577717
/* multiple places.                                                 */
Packit 577717
static unsigned int
Packit 577717
get_read_format( unsigned int multiplex,
Packit 577717
		 unsigned int inherit,
Packit 577717
		 int format_group )
Packit 577717
{
Packit 577717
   unsigned int format = 0;
Packit 577717
Packit 577717
   /* if we need read format options for multiplexing, add them now */
Packit 577717
   if (multiplex) {
Packit 577717
      format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
Packit 577717
      format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
Packit 577717
   }
Packit 577717
Packit 577717
   /* if our kernel supports it and we are not using inherit, */
Packit 577717
   /* add the group read options                              */
Packit 577717
   if ( (!bug_format_group()) && !inherit) {
Packit 577717
      if (format_group) {
Packit 577717
	 format |= PERF_FORMAT_GROUP;
Packit 577717
      }
Packit 577717
   }
Packit 577717
Packit 577717
   SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
Packit 577717
	  multiplex, inherit, format_group, format);
Packit 577717
Packit 577717
   return format;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/* attr.exclude_guest is enabled by default in recent libpfm4 */
Packit 577717
/* however older kernels will reject events with it set */
Packit 577717
/* because the reserved field is not all zeros */
Packit 577717
static int
Packit 577717
check_exclude_guest( void )
Packit 577717
{
Packit 577717
	int ev_fd;
Packit 577717
	struct perf_event_attr attr;
Packit 577717
Packit 577717
	exclude_guest_unsupported=0;
Packit 577717
Packit 577717
	/* First check that we can open a plain instructions event */
Packit 577717
	memset(&attr, 0 , sizeof(attr));
Packit 577717
	attr.config = PERF_COUNT_HW_INSTRUCTIONS;
Packit 577717
Packit 577717
	ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 );
Packit 577717
	if ( ev_fd == -1 ) {
Packit 577717
		PAPIERROR("Couldn't open hw_instructions in exclude_guest=0 test");
Packit 577717
		return -1;
Packit 577717
	}
Packit 577717
	close(ev_fd);
Packit 577717
Packit 577717
	/* Now try again with excude_guest */
Packit 577717
	memset(&attr, 0 , sizeof(attr));
Packit 577717
	attr.config = PERF_COUNT_HW_INSTRUCTIONS;
Packit 577717
	attr.exclude_guest=1;
Packit 577717
Packit 577717
	ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 );
Packit 577717
	if ( ev_fd == -1 ) {
Packit 577717
		if (errno==EINVAL) {
Packit 577717
			exclude_guest_unsupported=1;
Packit 577717
		}
Packit 577717
		else {
Packit 577717
		  PAPIERROR("Couldn't open hw_instructions in exclude_guest=1 test");
Packit 577717
		}
Packit 577717
	} else {
Packit 577717
		exclude_guest_unsupported=0;
Packit 577717
		close(ev_fd);
Packit 577717
	}
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/*****************************************************************/
Packit 577717
/********* End Kernel-version Dependent Routines  ****************/
Packit 577717
/*****************************************************************/
Packit 577717
Packit 577717
/*****************************************************************/
Packit 577717
/********* Begin perf_event low-level code ***********************/
Packit 577717
/*****************************************************************/
Packit 577717
Packit 577717
static void perf_event_dump_attr( struct perf_event_attr *hw_event,
Packit 577717
	pid_t pid, int cpu, int group_fd, unsigned long int flags) {
Packit 577717
Packit 577717
	/* Mark parameters as not used                   */
Packit 577717
	/* In the common case (no SUBDBG) the function   */
Packit 577717
	/* compiles into an empty function and complains */
Packit 577717
	/* about unused variables.                       */
Packit 577717
	(void)hw_event;
Packit 577717
	(void)pid;
Packit 577717
	(void)cpu;
Packit 577717
	(void)group_fd;
Packit 577717
	(void)flags;
Packit 577717
Packit 577717
	SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, "
Packit 577717
		"group_fd: %d, flags: %lx\n",
Packit 577717
		hw_event, pid, cpu, group_fd, flags);
Packit 577717
	SUBDBG("   type: %d\n",hw_event->type);
Packit 577717
	SUBDBG("   size: %d\n",hw_event->size);
Packit 577717
	SUBDBG("   config: %"PRIx64" (%"PRIu64")\n",
Packit 577717
		hw_event->config, hw_event->config);
Packit 577717
	SUBDBG("   sample_period: %"PRIu64"\n",hw_event->sample_period);
Packit 577717
	SUBDBG("   sample_type: %"PRIu64"\n",hw_event->sample_type);
Packit 577717
	SUBDBG("   read_format: %"PRIu64"\n",hw_event->read_format);
Packit 577717
	SUBDBG("   disabled: %d\n",hw_event->disabled);
Packit 577717
	SUBDBG("   inherit: %d\n",hw_event->inherit);
Packit 577717
	SUBDBG("   pinned: %d\n",hw_event->pinned);
Packit 577717
	SUBDBG("   exclusive: %d\n",hw_event->exclusive);
Packit 577717
	SUBDBG("   exclude_user: %d\n",hw_event->exclude_user);
Packit 577717
	SUBDBG("   exclude_kernel: %d\n",hw_event->exclude_kernel);
Packit 577717
	SUBDBG("   exclude_hv: %d\n",hw_event->exclude_hv);
Packit 577717
	SUBDBG("   exclude_idle: %d\n",hw_event->exclude_idle);
Packit 577717
	SUBDBG("   mmap: %d\n",hw_event->mmap);
Packit 577717
	SUBDBG("   comm: %d\n",hw_event->comm);
Packit 577717
	SUBDBG("   freq: %d\n",hw_event->freq);
Packit 577717
	SUBDBG("   inherit_stat: %d\n",hw_event->inherit_stat);
Packit 577717
	SUBDBG("   enable_on_exec: %d\n",hw_event->enable_on_exec);
Packit 577717
	SUBDBG("   task: %d\n",hw_event->task);
Packit 577717
	SUBDBG("   watermark: %d\n",hw_event->watermark);
Packit 577717
	SUBDBG("   precise_ip: %d\n",hw_event->precise_ip);
Packit 577717
	SUBDBG("   mmap_data: %d\n",hw_event->mmap_data);
Packit 577717
	SUBDBG("   sample_id_all: %d\n",hw_event->sample_id_all);
Packit 577717
	SUBDBG("   exclude_host: %d\n",hw_event->exclude_host);
Packit 577717
	SUBDBG("   exclude_guest: %d\n",hw_event->exclude_guest);
Packit 577717
	SUBDBG("   exclude_callchain_kernel: %d\n",
Packit 577717
		hw_event->exclude_callchain_kernel);
Packit 577717
	SUBDBG("   exclude_callchain_user: %d\n",
Packit 577717
		hw_event->exclude_callchain_user);
Packit 577717
	SUBDBG("   wakeup_events: %"PRIx32" (%"PRIu32")\n",
Packit 577717
		hw_event->wakeup_events, hw_event->wakeup_events);
Packit 577717
	SUBDBG("   bp_type: %"PRIx32" (%"PRIu32")\n",
Packit 577717
		hw_event->bp_type, hw_event->bp_type);
Packit 577717
	SUBDBG("   config1: %"PRIx64" (%"PRIu64")\n",
Packit 577717
		hw_event->config1, hw_event->config1);
Packit 577717
	SUBDBG("   config2: %"PRIx64" (%"PRIu64")\n",
Packit 577717
		hw_event->config2, hw_event->config2);
Packit 577717
	SUBDBG("   branch_sample_type: %"PRIx64" (%"PRIu64")\n",
Packit 577717
		hw_event->branch_sample_type, hw_event->branch_sample_type);
Packit 577717
	SUBDBG("   sample_regs_user: %"PRIx64" (%"PRIu64")\n",
Packit 577717
		hw_event->sample_regs_user, hw_event->sample_regs_user);
Packit 577717
	SUBDBG("   sample_stack_user: %"PRIx32" (%"PRIu32")\n",
Packit 577717
		hw_event->sample_stack_user, hw_event->sample_stack_user);
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
static int map_perf_event_errors_to_papi(int perf_event_error) {
Packit 577717
Packit 577717
   int ret;
Packit 577717
Packit 577717
   /* These mappings are approximate.
Packit 577717
      EINVAL in particular can mean lots of different things */
Packit 577717
   switch(perf_event_error) {
Packit 577717
      case EPERM:
Packit 577717
      case EACCES:
Packit 577717
           ret = PAPI_EPERM;
Packit 577717
	   break;
Packit 577717
      case ENODEV:
Packit 577717
      case EOPNOTSUPP:
Packit 577717
	   ret = PAPI_ENOSUPP;
Packit 577717
           break;
Packit 577717
      case ENOENT:
Packit 577717
	   ret = PAPI_ENOEVNT;
Packit 577717
           break;
Packit 577717
      case ENOSYS:
Packit 577717
      case EAGAIN:
Packit 577717
      case EBUSY:
Packit 577717
      case E2BIG:	/* Only happens if attr is the wrong size somehow */
Packit 577717
      case EBADF:	/* We are attempting to group with an invalid file descriptor */
Packit 577717
	   ret = PAPI_ESYS;
Packit 577717
	   break;
Packit 577717
      case ENOMEM:
Packit 577717
	   ret = PAPI_ENOMEM;
Packit 577717
	   break;
Packit 577717
      case EMFILE:	/* Out of file descriptors.  Typically max out at 1024 */
Packit 577717
           ret = PAPI_ECOUNT;
Packit 577717
           break;
Packit 577717
      case EINVAL:
Packit 577717
      default:
Packit 577717
	   ret = PAPI_EINVAL;
Packit 577717
           break;
Packit 577717
   }
Packit 577717
   return ret;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/** Check if the current set of options is supported by  */
Packit 577717
/*  perf_events.                                         */
Packit 577717
/*  We do this by temporarily opening an event with the  */
Packit 577717
/*  desired options then closing it again.  We use the   */
Packit 577717
/*  PERF_COUNT_HW_INSTRUCTION event as a dummy event     */
Packit 577717
/*  on the assumption it is available on all             */
Packit 577717
/*  platforms.                                           */
Packit 577717
Packit 577717
static int
Packit 577717
check_permissions( unsigned long tid,
Packit 577717
		   unsigned int cpu_num,
Packit 577717
		   unsigned int domain,
Packit 577717
		   unsigned int granularity,
Packit 577717
		   unsigned int multiplex,
Packit 577717
		   unsigned int inherit )
Packit 577717
{
Packit 577717
   int ev_fd;
Packit 577717
   struct perf_event_attr attr;
Packit 577717
Packit 577717
   long pid;
Packit 577717
Packit 577717
   /* clearing this will set a type of hardware and to count all domains */
Packit 577717
   memset(&attr, '\0', sizeof(attr));
Packit 577717
   attr.read_format = get_read_format(multiplex, inherit, 1);
Packit 577717
Packit 577717
   /* set the event id (config field) to instructios */
Packit 577717
   /* (an event that should always exist)            */
Packit 577717
   /* This was cycles but that is missing on Niagara */
Packit 577717
   attr.config = PERF_COUNT_HW_INSTRUCTIONS;
Packit 577717
Packit 577717
   /* now set up domains this event set will be counting */
Packit 577717
   if (!(domain & PAPI_DOM_SUPERVISOR)) {
Packit 577717
      attr.exclude_hv = 1;
Packit 577717
   }
Packit 577717
   if (!(domain & PAPI_DOM_USER)) {
Packit 577717
      attr.exclude_user = 1;
Packit 577717
   }
Packit 577717
   if (!(domain & PAPI_DOM_KERNEL)) {
Packit 577717
      attr.exclude_kernel = 1;
Packit 577717
   }
Packit 577717
Packit 577717
   if (granularity==PAPI_GRN_SYS) {
Packit 577717
      pid = -1;
Packit 577717
   } else {
Packit 577717
      pid = tid;
Packit 577717
   }
Packit 577717
Packit 577717
   SUBDBG("Calling sys_perf_event_open() from check_permissions\n");
Packit 577717
Packit 577717
	perf_event_dump_attr( &attr, pid, cpu_num, -1, 0 );
Packit 577717
Packit 577717
   ev_fd = sys_perf_event_open( &attr, pid, cpu_num, -1, 0 );
Packit 577717
   if ( ev_fd == -1 ) {
Packit 577717
      SUBDBG("sys_perf_event_open returned error.  Linux says, %s", 
Packit 577717
	     strerror( errno ) );
Packit 577717
      return map_perf_event_errors_to_papi(errno);
Packit 577717
   }
Packit 577717
Packit 577717
   /* now close it, this was just to make sure we have permissions */
Packit 577717
   /* to set these options                                         */
Packit 577717
   close(ev_fd);
Packit 577717
   return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/* Maximum size we ever expect to read from a perf_event fd   */
Packit 577717
/*  (this is the number of 64-bit values)                     */
Packit 577717
/* We use this to size the read buffers                       */
Packit 577717
/* The three is for event count, time_enabled, time_running   */
Packit 577717
/*  and the counter term is count value and count id for each */
Packit 577717
/*  possible counter value.                                   */
Packit 577717
#define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */
Packit 577717
/* implementations (e.g. x86 before 2.6.33) which don't do a static event */
Packit 577717
/* scheduability check in sys_perf_event_open.  It is also needed if the  */
Packit 577717
/* kernel is stealing an event, such as when NMI watchdog is enabled.     */
Packit 577717
Packit 577717
static int
Packit 577717
check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx )
Packit 577717
{
Packit 577717
   int retval = 0, cnt = -1;
Packit 577717
   ( void ) ctx;			 /*unused */
Packit 577717
   long long papi_pe_buffer[READ_BUFFER_SIZE];
Packit 577717
   int i,group_leader_fd;
Packit 577717
Packit 577717
   /* If the kernel isn't tracking scheduability right       */
Packit 577717
   /* Then we need to start/stop/read to force the event     */
Packit 577717
   /* to be scheduled and see if an error condition happens. */
Packit 577717
Packit 577717
   /* get the proper fd to start */
Packit 577717
   group_leader_fd=ctl->events[idx].group_leader_fd;
Packit 577717
   if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
Packit 577717
Packit 577717
   /* start the event */
Packit 577717
   retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
Packit 577717
   if (retval == -1) {
Packit 577717
      PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
Packit 577717
      return PAPI_ESYS;
Packit 577717
   }
Packit 577717
Packit 577717
   /* stop the event */
Packit 577717
   retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
Packit 577717
   if (retval == -1) {
Packit 577717
      PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
Packit 577717
      return PAPI_ESYS;
Packit 577717
   }
Packit 577717
Packit 577717
   /* See if a read returns any results */
Packit 577717
   cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
Packit 577717
   if ( cnt == -1 ) {
Packit 577717
      SUBDBG( "read returned an error!  Should never happen.\n" );
Packit 577717
      return PAPI_ESYS;
Packit 577717
   }
Packit 577717
Packit 577717
   if ( cnt == 0 ) {
Packit 577717
      /* We read 0 bytes if we could not schedule the event */
Packit 577717
      /* The kernel should have detected this at open       */
Packit 577717
      /* but various bugs (including NMI watchdog)          */
Packit 577717
      /* result in this behavior                            */
Packit 577717
Packit 577717
      return PAPI_ECNFLCT;
Packit 577717
Packit 577717
   } else {
Packit 577717
Packit 577717
      /* Reset all of the counters (opened so far) back to zero      */
Packit 577717
      /* from the above brief enable/disable call pair.              */
Packit 577717
Packit 577717
      /* We have to reset all events because reset of group leader      */
Packit 577717
      /* does not reset all.                                            */
Packit 577717
      /* we assume that the events are being added one by one and that  */
Packit 577717
      /* we do not need to reset higher events (doing so may reset ones */
Packit 577717
      /* that have not been initialized yet.                            */
Packit 577717
Packit 577717
      /* Note... PERF_EVENT_IOC_RESET does not reset time running       */
Packit 577717
      /* info if multiplexing, so we should avoid coming here if        */
Packit 577717
      /* we are multiplexing the event.                                 */
Packit 577717
      for( i = 0; i < idx; i++) {
Packit 577717
	 retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
Packit 577717
	 if (retval == -1) {
Packit 577717
	    PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
Packit 577717
		       "(fd %d)failed",
Packit 577717
		       i,ctl->num_events,idx,ctl->events[i].event_fd);
Packit 577717
	    return PAPI_ESYS;
Packit 577717
	 }
Packit 577717
      }
Packit 577717
   }
Packit 577717
   return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/* Do some extra work on a perf_event fd if we're doing sampling  */
Packit 577717
/* This mostly means setting up the mmap buffer.                  */
Packit 577717
static int
Packit 577717
configure_fd_for_sampling( pe_control_t *ctl, int evt_idx )
Packit 577717
{
Packit 577717
   int ret;
Packit 577717
   int fd = ctl->events[evt_idx].event_fd;
Packit 577717
Packit 577717
   /* Register that we would like a SIGIO notification when a mmap'd page */
Packit 577717
   /* becomes full.                                                       */
Packit 577717
   ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK );
Packit 577717
   if ( ret ) {
Packit 577717
      PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) "
Packit 577717
		  "returned error: %s", fd, strerror( errno ) );
Packit 577717
      return PAPI_ESYS;
Packit 577717
   }
Packit 577717
Packit 577717
   /* Set the F_SETOWN_EX flag on the fd.                          */
Packit 577717
   /* This affects which thread an overflow signal gets sent to.   */
Packit 577717
   ret=fcntl_setown_fd(fd);
Packit 577717
   if (ret!=PAPI_OK) return ret;
Packit 577717
Packit 577717
   /* Set FD_CLOEXEC.  Otherwise if we do an exec with an overflow */
Packit 577717
   /* running, the overflow handler will continue into the exec()'d*/
Packit 577717
   /* process and kill it because no signal handler is set up.     */
Packit 577717
   ret=fcntl(fd, F_SETFD, FD_CLOEXEC);
Packit 577717
   if (ret) {
Packit 577717
      return PAPI_ESYS;
Packit 577717
   }
Packit 577717
Packit 577717
   /* when you explicitely declare that you want a particular signal,  */
Packit 577717
   /* even with you use the default signal, the kernel will send more  */
Packit 577717
   /* information concerning the event to the signal handler.          */
Packit 577717
   /*                                                                  */
Packit 577717
   /* In particular, it will send the file descriptor from which the   */
Packit 577717
   /* event is originating which can be quite useful when monitoring   */
Packit 577717
   /* multiple tasks from a single thread.                             */
Packit 577717
   ret = fcntl( fd, F_SETSIG, ctl->overflow_signal );
Packit 577717
   if ( ret == -1 ) {
Packit 577717
      PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s",
Packit 577717
		 ctl->overflow_signal, fd,
Packit 577717
		 strerror( errno ) );
Packit 577717
      return PAPI_ESYS;
Packit 577717
   }
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
static int
Packit 577717
set_up_mmap( pe_control_t *ctl, int evt_idx)
Packit 577717
{
Packit 577717
Packit 577717
	void *buf_addr;
Packit 577717
	int fd = ctl->events[evt_idx].event_fd;
Packit 577717
Packit 577717
	/* mmap() the sample buffer */
Packit 577717
	buf_addr = mmap( NULL,
Packit 577717
			ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
Packit 577717
			PROT_READ | PROT_WRITE,
Packit 577717
			MAP_SHARED,
Packit 577717
			fd, 0 );
Packit 577717
Packit 577717
	/* This may happen if we go over the limit in	*/
Packit 577717
	/* /proc/sys/kernel/perf_event_mlock_kb		*/
Packit 577717
	/* which defaults to 516k			*/
Packit 577717
	/* with regular rdpmc events on 4k page archs	*/
Packit 577717
	/* this is roughly 128 events			*/
Packit 577717
Packit 577717
	/* We sholdn't fail, just fall back to non-rdpmc	*/
Packit 577717
	/* Although not sure what happens if it's a sample	*/
Packit 577717
	/* event that fails to mmap.				*/
Packit 577717
Packit 577717
	if ( buf_addr == MAP_FAILED ) {
Packit 577717
		SUBDBG( "mmap(NULL,%d,%d,%d,%d,0): %s",
Packit 577717
			ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
Packit 577717
			PROT_READ | PROT_WRITE,
Packit 577717
			MAP_SHARED,
Packit 577717
			fd, strerror( errno ) );
Packit 577717
Packit 577717
		ctl->events[evt_idx].mmap_buf = NULL;
Packit 577717
Packit 577717
		/* Easier to just globally disable this, as it should	*/
Packit 577717
		/* be a fairly uncommon case hopefully.			*/
Packit 577717
		if (_perf_event_vector.cmp_info.fast_counter_read) {
Packit 577717
			PAPIERROR("Can't mmap, disabling fast_counter_read\n");
Packit 577717
			_perf_event_vector.cmp_info.fast_counter_read=0;
Packit 577717
		}
Packit 577717
		return PAPI_ESYS;
Packit 577717
	}
Packit 577717
Packit 577717
	SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr );
Packit 577717
Packit 577717
	/* Set up the mmap buffer and its associated helpers */
Packit 577717
	ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr;
Packit 577717
	ctl->events[evt_idx].tail = 0;
Packit 577717
	ctl->events[evt_idx].mask =
Packit 577717
		( ctl->events[evt_idx].nr_mmap_pages - 1 ) * getpagesize() - 1;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/* Open all events in the control state */
Packit 577717
static int
Packit 577717
open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
Packit 577717
{
Packit 577717
Packit 577717
	int i, ret = PAPI_OK;
Packit 577717
	long pid;
Packit 577717
Packit 577717
	if (ctl->granularity==PAPI_GRN_SYS) {
Packit 577717
		pid = -1;
Packit 577717
	}
Packit 577717
	else {
Packit 577717
		pid = ctl->tid;
Packit 577717
	}
Packit 577717
Packit 577717
	for( i = 0; i < ctl->num_events; i++ ) {
Packit 577717
Packit 577717
		ctl->events[i].event_opened=0;
Packit 577717
Packit 577717
		/* set up the attr structure.			*/
Packit 577717
		/* We don't set up all fields here		*/
Packit 577717
		/* as some have already been set up previously.	*/
Packit 577717
Packit 577717
		/* Handle the broken exclude_guest problem */
Packit 577717
		/* libpfm4 sets this by default (PEBS events depend on it) */
Packit 577717
		/* but on older kernels that dont know about exclude_guest */
Packit 577717
		/* perf_event_open() will error out as a "reserved"        */
Packit 577717
		/* unknown bit is set to 1.                                */
Packit 577717
		/* Do we need to also watch for exclude_host, exclude_idle */
Packit 577717
		/* exclude_callchain*?					   */
Packit 577717
		if ((ctl->events[i].attr.exclude_guest) &&
Packit 577717
			(exclude_guest_unsupported)) {
Packit 577717
			SUBDBG("Disabling exclude_guest in event %d\n",i);
Packit 577717
			ctl->events[i].attr.exclude_guest=0;
Packit 577717
		}
Packit 577717
Packit 577717
		/* group leader (event 0) is special                */
Packit 577717
		/* If we're multiplexed, everyone is a group leader */
Packit 577717
		if (( i == 0 ) || (ctl->multiplexed)) {
Packit 577717
			ctl->events[i].attr.pinned = !ctl->multiplexed;
Packit 577717
			ctl->events[i].attr.disabled = 1;
Packit 577717
			ctl->events[i].group_leader_fd=-1;
Packit 577717
			ctl->events[i].attr.read_format = get_read_format(
Packit 577717
							ctl->multiplexed,
Packit 577717
							ctl->inherit,
Packit 577717
							!ctl->multiplexed );
Packit 577717
		} else {
Packit 577717
			ctl->events[i].attr.pinned=0;
Packit 577717
			ctl->events[i].attr.disabled = 0;
Packit 577717
			ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
Packit 577717
			ctl->events[i].attr.read_format = get_read_format(
Packit 577717
							ctl->multiplexed,
Packit 577717
							ctl->inherit,
Packit 577717
							0 );
Packit 577717
		}
Packit 577717
Packit 577717
		/* try to open */
Packit 577717
		perf_event_dump_attr(
Packit 577717
				&ctl->events[i].attr,
Packit 577717
				pid,
Packit 577717
				ctl->events[i].cpu,
Packit 577717
				ctl->events[i].group_leader_fd,
Packit 577717
				0 /* flags */ );
Packit 577717
Packit 577717
		ctl->events[i].event_fd = sys_perf_event_open(
Packit 577717
				&ctl->events[i].attr,
Packit 577717
				pid,
Packit 577717
				ctl->events[i].cpu,
Packit 577717
				ctl->events[i].group_leader_fd,
Packit 577717
				0 /* flags */ );
Packit 577717
Packit 577717
		/* Try to match Linux errors to PAPI errors */
Packit 577717
		if ( ctl->events[i].event_fd == -1 ) {
Packit 577717
			SUBDBG("sys_perf_event_open returned error "
Packit 577717
				"on event #%d.  Error: %s\n",
Packit 577717
				i, strerror( errno ) );
Packit 577717
			ret=map_perf_event_errors_to_papi(errno);
Packit 577717
Packit 577717
			goto open_pe_cleanup;
Packit 577717
		}
Packit 577717
Packit 577717
		SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
Packit 577717
			" group_leader/fd: %d, event_fd: %d,"
Packit 577717
			" read_format: %"PRIu64"\n",
Packit 577717
			pid, ctl->events[i].cpu,
Packit 577717
			ctl->events[i].group_leader_fd,
Packit 577717
			ctl->events[i].event_fd,
Packit 577717
			ctl->events[i].attr.read_format);
Packit 577717
Packit 577717
Packit 577717
		/* in many situations the kernel will indicate we opened fine */
Packit 577717
		/* yet things will fail later.  So we need to double check    */
Packit 577717
		/* we actually can use the events we've set up.               */
Packit 577717
Packit 577717
		/* This is not necessary if we are multiplexing, and in fact */
Packit 577717
		/* we cannot do this properly if multiplexed because         */
Packit 577717
		/* PERF_EVENT_IOC_RESET does not reset the time running info */
Packit 577717
		if (!ctl->multiplexed) {
Packit 577717
			ret = check_scheduability( ctx, ctl, i );
Packit 577717
Packit 577717
			if ( ret != PAPI_OK ) {
Packit 577717
				/* the last event did open, so we need to    */
Packit 577717
				/* bump the counter before doing the cleanup */
Packit 577717
				i++;
Packit 577717
				goto open_pe_cleanup;
Packit 577717
			}
Packit 577717
		}
Packit 577717
		ctl->events[i].event_opened=1;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Now that we've successfully opened all of the events, do whatever  */
Packit 577717
	/* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
Packit 577717
	/* and so on.                                                         */
Packit 577717
Packit 577717
Packit 577717
	/* Make things easier and give each event a mmap() buffer */
Packit 577717
	/* Keeping separate tracking for rdpmc vs regular events  */
Packit 577717
	/* Would be a pain.  Also perf always gives every event a */
Packit 577717
	/* mmap buffer.						  */
Packit 577717
Packit 577717
	for ( i = 0; i < ctl->num_events; i++ ) {
Packit 577717
Packit 577717
		/* Can't mmap() inherited events :( */
Packit 577717
		if (ctl->inherit) {
Packit 577717
			ctl->events[i].nr_mmap_pages = 0;
Packit 577717
			ctl->events[i].mmap_buf = NULL;
Packit 577717
		}
Packit 577717
		else {
Packit 577717
			/* Just a guess at how many pages would make this   */
Packit 577717
			/* relatively efficient.                            */
Packit 577717
			/* Note that it's "1 +" because of the need for a   */
Packit 577717
			/* control page, and the number following the "+"   */
Packit 577717
			/* must be a power of 2 (1, 4, 8, 16, etc) or zero. */
Packit 577717
			/* This is required to optimize dealing with        */
Packit 577717
			/* circular buffer wrapping of the mapped pages.    */
Packit 577717
			if (ctl->events[i].sampling) {
Packit 577717
				ctl->events[i].nr_mmap_pages = 1 + 2;
Packit 577717
			}
Packit 577717
			else if (_perf_event_vector.cmp_info.fast_counter_read) {
Packit 577717
				ctl->events[i].nr_mmap_pages = 1;
Packit 577717
			}
Packit 577717
			else {
Packit 577717
				ctl->events[i].nr_mmap_pages = 0;
Packit 577717
			}
Packit 577717
Packit 577717
			/* Set up the MMAP sample pages */
Packit 577717
			if (ctl->events[i].nr_mmap_pages) {
Packit 577717
				set_up_mmap(ctl,i);
Packit 577717
			} else {
Packit 577717
				ctl->events[i].mmap_buf = NULL;
Packit 577717
			}
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	for ( i = 0; i < ctl->num_events; i++ ) {
Packit 577717
Packit 577717
		/* If sampling is enabled, hook up signal handler */
Packit 577717
		if (ctl->events[i].attr.sample_period) {
Packit 577717
Packit 577717
			ret = configure_fd_for_sampling( ctl, i );
Packit 577717
			if ( ret != PAPI_OK ) {
Packit 577717
				/* We failed, and all of the fds are open */
Packit 577717
				/* so we need to clean up all of them */
Packit 577717
				i = ctl->num_events;
Packit 577717
				goto open_pe_cleanup;
Packit 577717
			}
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	/* Set num_evts only if completely successful */
Packit 577717
	ctx->state |= PERF_EVENTS_OPENED;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
Packit 577717
open_pe_cleanup:
Packit 577717
	/* We encountered an error, close up the fds we successfully opened.  */
Packit 577717
	/* We go backward in an attempt to close group leaders last, although */
Packit 577717
	/* That's probably not strictly necessary.                            */
Packit 577717
	while ( i > 0 ) {
Packit 577717
		i--;
Packit 577717
		if (ctl->events[i].event_fd>=0) {
Packit 577717
			close( ctl->events[i].event_fd );
Packit 577717
			ctl->events[i].event_opened=0;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	return ret;
Packit 577717
}
Packit 577717
Packit 577717
/* TODO: make code clearer -- vmw */
Packit 577717
static int
Packit 577717
close_event( pe_event_info_t *event )
Packit 577717
{
Packit 577717
	int munmap_error=0,close_error=0;
Packit 577717
Packit 577717
	if ( event->mmap_buf ) {
Packit 577717
		if (event->nr_mmap_pages==0) {
Packit 577717
			PAPIERROR("munmap and num pages is zero");
Packit 577717
		}
Packit 577717
		if ( munmap ( event->mmap_buf,
Packit 577717
				event->nr_mmap_pages * getpagesize() ) ) {
Packit 577717
			PAPIERROR( "munmap of fd = %d returned error: %s",
Packit 577717
							event->event_fd,
Packit 577717
							strerror( errno ) );
Packit 577717
			event->mmap_buf=NULL;
Packit 577717
			munmap_error=1;
Packit 577717
		}
Packit 577717
	}
Packit 577717
	if ( close( event->event_fd ) ) {
Packit 577717
		PAPIERROR( "close of fd = %d returned error: %s",
Packit 577717
			event->event_fd, strerror( errno ) );
Packit 577717
		close_error=1;
Packit 577717
	}
Packit 577717
Packit 577717
	event->event_opened=0;
Packit 577717
Packit 577717
	if ((close_error || munmap_error)) {
Packit 577717
		return PAPI_ESYS;
Packit 577717
	}
Packit 577717
Packit 577717
	return 0;
Packit 577717
}
Packit 577717
Packit 577717
/* Close all of the opened events */
Packit 577717
static int
Packit 577717
close_pe_events( pe_context_t *ctx, pe_control_t *ctl )
Packit 577717
{
Packit 577717
	int i,result;
Packit 577717
	int num_closed=0;
Packit 577717
	int events_not_opened=0;
Packit 577717
Packit 577717
	/* should this be a more serious error? */
Packit 577717
	if ( ctx->state & PERF_EVENTS_RUNNING ) {
Packit 577717
		SUBDBG("Closing without stopping first\n");
Packit 577717
	}
Packit 577717
Packit 577717
	/* Close child events first */
Packit 577717
	/* Is that necessary? -- vmw */
Packit 577717
	for( i=0; i<ctl->num_events; i++ ) {
Packit 577717
		if (ctl->events[i].event_opened) {
Packit 577717
			if (ctl->events[i].group_leader_fd!=-1) {
Packit 577717
				result=close_event(&ctl->events[i]);
Packit 577717
				if (result!=0) return result;
Packit 577717
				else num_closed++;
Packit 577717
			}
Packit 577717
		}
Packit 577717
		else {
Packit 577717
			events_not_opened++;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	/* Close the group leaders last */
Packit 577717
	for( i=0; i<ctl->num_events; i++ ) {
Packit 577717
		if (ctl->events[i].event_opened) {
Packit 577717
			if (ctl->events[i].group_leader_fd==-1) {
Packit 577717
				result=close_event(&ctl->events[i]);
Packit 577717
				if (result!=0) return result;
Packit 577717
				else num_closed++;
Packit 577717
			}
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	if (ctl->num_events!=num_closed) {
Packit 577717
		if (ctl->num_events!=(num_closed+events_not_opened)) {
Packit 577717
			PAPIERROR("Didn't close all events: "
Packit 577717
				"Closed %d Not Opened: %d Expected %d",
Packit 577717
				num_closed,events_not_opened,ctl->num_events);
Packit 577717
			return PAPI_EBUG;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	ctl->num_events=0;
Packit 577717
Packit 577717
	ctx->state &= ~PERF_EVENTS_OPENED;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/********************************************************************/
Packit 577717
/********************************************************************/
Packit 577717
/*     Functions that are exported via the component interface      */
Packit 577717
/********************************************************************/
Packit 577717
/********************************************************************/
Packit 577717
Packit 577717
/********************* DOMAIN RELATED *******************************/
Packit 577717
Packit 577717
Packit 577717
/* set the domain. */
Packit 577717
/* perf_events allows per-event control of this, */
Packit 577717
/* papi allows it to be set at the event level or at the event set level. */
Packit 577717
/* this will set the event set level domain values */
Packit 577717
/* but they only get used if no event level domain mask (u= or k=) */
Packit 577717
/* was specified. */
Packit 577717
static int
Packit 577717
_pe_set_domain( hwd_control_state_t *ctl, int domain)
Packit 577717
{
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
Packit 577717
	SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain);
Packit 577717
	pe_ctl->domain = domain;
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/********************* THREAD RELATED *******************************/
Packit 577717
Packit 577717
Packit 577717
/* Shutdown a thread */
Packit 577717
static int
Packit 577717
_pe_shutdown_thread( hwd_context_t *ctx )
Packit 577717
{
Packit 577717
	pe_context_t *pe_ctx = ( pe_context_t *) ctx;
Packit 577717
Packit 577717
	pe_ctx->initialized=0;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/* Initialize a thread */
Packit 577717
static int
Packit 577717
_pe_init_thread( hwd_context_t *hwd_ctx )
Packit 577717
{
Packit 577717
Packit 577717
	pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
Packit 577717
Packit 577717
	/* clear the context structure and mark as initialized */
Packit 577717
	memset( pe_ctx, 0, sizeof ( pe_context_t ) );
Packit 577717
	pe_ctx->initialized=1;
Packit 577717
	pe_ctx->event_table=&perf_native_event_table;
Packit 577717
	pe_ctx->cidx=our_cidx;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/**************************** COUNTER RELATED *******************/
Packit 577717
Packit 577717
Packit 577717
/* reset the hardware counters */
Packit 577717
/* Note: PAPI_reset() does not necessarily call this */
Packit 577717
/* unless the events are actually running.           */
Packit 577717
static int
Packit 577717
_pe_reset( hwd_context_t *ctx, hwd_control_state_t *ctl )
Packit 577717
{
Packit 577717
	int i, ret;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
Packit 577717
	( void ) ctx;			 /*unused */
Packit 577717
Packit 577717
	/* We need to reset all of the events, not just the group leaders */
Packit 577717
	for( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
		ret = ioctl( pe_ctl->events[i].event_fd,
Packit 577717
				PERF_EVENT_IOC_RESET, NULL );
Packit 577717
		if ( ret == -1 ) {
Packit 577717
			PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
Packit 577717
					"returned error, Linux says: %s",
Packit 577717
					pe_ctl->events[i].event_fd,
Packit 577717
					strerror( errno ) );
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/* write (set) the hardware counters */
Packit 577717
/* Currently we do not support this.   */
Packit 577717
static int
Packit 577717
_pe_write( hwd_context_t *ctx, hwd_control_state_t *ctl,
Packit 577717
		long long *from )
Packit 577717
{
Packit 577717
	( void ) ctx;			 /*unused */
Packit 577717
	( void ) ctl;			 /*unused */
Packit 577717
	( void ) from;			 /*unused */
Packit 577717
	/*
Packit 577717
	 * Counters cannot be written.  Do we need to virtualize the
Packit 577717
	 * counters so that they can be written, or perhaps modify code so that
Packit 577717
	 * they can be written? FIXME ?
Packit 577717
	 */
Packit 577717
Packit 577717
	return PAPI_ENOSUPP;
Packit 577717
}
Packit 577717
Packit 577717
/*
Packit 577717
 * perf_event provides a complicated read interface.
Packit 577717
 *  the info returned by read() varies depending on whether
Packit 577717
 *  you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
Packit 577717
 *  PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
Packit 577717
 *
Packit 577717
 * To simplify things we just always ask for everything.  This might
Packit 577717
 * lead to overhead when reading more than we need, but it makes the
Packit 577717
 * read code a lot simpler than the original implementation we had here.
Packit 577717
 *
Packit 577717
 * For more info on the layout see include/uapi/linux/perf_event.h
Packit 577717
 *
Packit 577717
 */
Packit 577717
Packit 577717
Packit 577717
/* When we read with rdpmc, we must read each counter individually */
Packit 577717
/* Because of this we don't need separate multiplexing support */
Packit 577717
/* This is all handled by mmap_read_self() */
Packit 577717
static int
Packit 577717
_pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
Packit 577717
		long long **events, int flags )
Packit 577717
{
Packit 577717
	SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n",
Packit 577717
		ctx, ctl, events, flags);
Packit 577717
Packit 577717
	( void ) flags;			/*unused */
Packit 577717
	( void ) ctx;			/*unused */
Packit 577717
	int i;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
	unsigned long long count, enabled, running, adjusted;
Packit 577717
Packit 577717
	/* we must read each counter individually */
Packit 577717
	for ( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
Packit 577717
		count = mmap_read_self(pe_ctl->events[i].mmap_buf,
Packit 577717
						&enabled,&running);
Packit 577717
Packit Service d25979
		/* TODO: error checking? */
Packit 577717
Packit 577717
		/* Handle multiplexing case */
Packit Service d25979
		if (enabled!=running) {
Packit 577717
			adjusted = (enabled * 128LL) / running;
Packit 577717
			adjusted = adjusted * count;
Packit 577717
			adjusted = adjusted / 128LL;
Packit 577717
			count = adjusted;
Packit 577717
		}
Packit 577717
Packit 577717
		pe_ctl->counts[i] = count;
Packit 577717
	}
Packit 577717
	/* point PAPI to the values we read */
Packit 577717
	*events = pe_ctl->counts;
Packit 577717
Packit 577717
	SUBDBG("EXIT: *events: %p\n", *events);
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
static int
Packit 577717
_pe_read_multiplexed( pe_control_t *pe_ctl )
Packit 577717
{
Packit 577717
	int i,ret=-1;
Packit 577717
	long long papi_pe_buffer[READ_BUFFER_SIZE];
Packit 577717
	long long tot_time_running, tot_time_enabled, scale;
Packit 577717
Packit 577717
	/* perf_event does not support FORMAT_GROUP on multiplex */
Packit 577717
	/* so we have to handle separate events when multiplexing */
Packit 577717
Packit 577717
	for ( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
Packit 577717
		ret = read( pe_ctl->events[i].event_fd,
Packit 577717
				papi_pe_buffer,
Packit 577717
				sizeof ( papi_pe_buffer ) );
Packit 577717
		if ( ret == -1 ) {
Packit 577717
			PAPIERROR("read returned an error: ",
Packit 577717
					strerror( errno ));
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		/* We should read 3 64-bit values from the counter */
Packit 577717
		if (ret<(signed)(3*sizeof(long long))) {
Packit 577717
			PAPIERROR("Error!  short read");
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
Packit 577717
				pe_ctl->events[i].event_fd,
Packit 577717
				(long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
Packit 577717
		SUBDBG("read: %lld %lld %lld\n",
Packit 577717
				papi_pe_buffer[0],
Packit 577717
				papi_pe_buffer[1],
Packit 577717
				papi_pe_buffer[2]);
Packit 577717
Packit 577717
		tot_time_enabled = papi_pe_buffer[1];
Packit 577717
		tot_time_running = papi_pe_buffer[2];
Packit 577717
Packit 577717
		SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
Packit 577717
				"tot_time_enabled %lld) / "
Packit 577717
				"tot_time_running %lld\n",
Packit 577717
				i, 0,papi_pe_buffer[0],
Packit 577717
				tot_time_enabled,tot_time_running);
Packit 577717
Packit 577717
		if (tot_time_running == tot_time_enabled) {
Packit 577717
			/* No scaling needed */
Packit 577717
			pe_ctl->counts[i] = papi_pe_buffer[0];
Packit 577717
		} else if (tot_time_running && tot_time_enabled) {
Packit 577717
			/* Scale to give better results */
Packit 577717
			/* avoid truncation.            */
Packit 577717
			/* Why use 100?  Would 128 be faster? */
Packit 577717
			scale = (tot_time_enabled * 100LL) / tot_time_running;
Packit 577717
			scale = scale * papi_pe_buffer[0];
Packit 577717
			scale = scale / 100LL;
Packit 577717
			pe_ctl->counts[i] = scale;
Packit 577717
		} else {
Packit 577717
			/* This should not happen, but Phil reports it sometime does. */
Packit 577717
			SUBDBG("perf_event kernel bug(?) count, enabled, "
Packit 577717
				"running: %lld, %lld, %lld\n",
Packit 577717
				papi_pe_buffer[0],tot_time_enabled,
Packit 577717
				tot_time_running);
Packit 577717
Packit 577717
			pe_ctl->counts[i] = papi_pe_buffer[0];
Packit 577717
		}
Packit 577717
	}
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/* For cases where we can't group counters together */
Packit 577717
/* But must read them out individually */
Packit 577717
/* This includes when INHERIT is set, as well as various bugs */
Packit 577717
Packit 577717
static int
Packit 577717
_pe_read_nogroup( pe_control_t *pe_ctl ) {
Packit 577717
Packit 577717
	int i,ret=-1;
Packit 577717
	long long papi_pe_buffer[READ_BUFFER_SIZE];
Packit 577717
Packit 577717
	/* we must read each counter individually */
Packit 577717
	for ( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
		ret = read( pe_ctl->events[i].event_fd,
Packit 577717
				papi_pe_buffer,
Packit 577717
				sizeof ( papi_pe_buffer ) );
Packit 577717
		if ( ret == -1 ) {
Packit 577717
			PAPIERROR("read returned an error: ",
Packit 577717
				strerror( errno ));
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		/* we should read one 64-bit value from each counter */
Packit 577717
		if (ret!=sizeof(long long)) {
Packit 577717
			PAPIERROR("Error!  short read");
Packit 577717
			PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d",
Packit 577717
				pe_ctl->events[i].event_fd,
Packit 577717
				(long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
Packit 577717
			pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
Packit 577717
			pe_ctl->events[i].cpu, ret);
Packit 577717
		SUBDBG("read: %lld\n",papi_pe_buffer[0]);
Packit 577717
Packit 577717
		pe_ctl->counts[i] = papi_pe_buffer[0];
Packit 577717
	}
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
static int
Packit 577717
_pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
Packit 577717
	       long long **events, int flags )
Packit 577717
{
Packit 577717
	SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n",
Packit 577717
		ctx, ctl, events, flags);
Packit 577717
Packit 577717
	( void ) flags;			 /*unused */
Packit 577717
	( void ) ctx;			 /*unused */
Packit 577717
	int i, j, ret = -1;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
	long long papi_pe_buffer[READ_BUFFER_SIZE];
Packit 577717
Packit 577717
	/* Handle fast case */
Packit 577717
	if ((_perf_event_vector.cmp_info.fast_counter_read) && (!pe_ctl->inherit)) {
Packit 577717
		return _pe_rdpmc_read( ctx, ctl, events, flags);
Packit 577717
	}
Packit 577717
Packit 577717
	/* Handle case where we are multiplexing */
Packit 577717
	if (pe_ctl->multiplexed) {
Packit 577717
		_pe_read_multiplexed(pe_ctl);
Packit 577717
	}
Packit 577717
Packit 577717
	/* Handle cases where we cannot use FORMAT GROUP */
Packit 577717
	else if (bug_format_group() || pe_ctl->inherit) {
Packit 577717
		_pe_read_nogroup(pe_ctl);
Packit 577717
	}
Packit 577717
Packit 577717
	/* Handle common case where we are using FORMAT_GROUP	*/
Packit 577717
	/* We assume only one group leader, in position 0	*/
Packit 577717
Packit 577717
	/* By reading the leader file descriptor, we get a series */
Packit 577717
	/* of 64-bit values.  The first is the total number of    */
Packit 577717
	/* events, followed by the counts for them.               */
Packit 577717
Packit 577717
	else {
Packit 577717
		if (pe_ctl->events[0].group_leader_fd!=-1) {
Packit 577717
			PAPIERROR("Was expecting group leader");
Packit 577717
		}
Packit 577717
Packit 577717
		ret = read( pe_ctl->events[0].event_fd,
Packit 577717
			papi_pe_buffer,
Packit 577717
			sizeof ( papi_pe_buffer ) );
Packit 577717
Packit 577717
		if ( ret == -1 ) {
Packit 577717
			PAPIERROR("read returned an error: ",
Packit 577717
				strerror( errno ));
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		/* we read 1 64-bit value (number of events) then     */
Packit 577717
		/* num_events more 64-bit values that hold the counts */
Packit 577717
		if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
Packit 577717
			PAPIERROR("Error! short read");
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
Packit 577717
			pe_ctl->events[0].event_fd,
Packit 577717
			(long)pe_ctl->tid, pe_ctl->events[0].cpu, ret);
Packit 577717
Packit 577717
		for(j=0;j
Packit 577717
			SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
Packit 577717
		}
Packit 577717
Packit 577717
		/* Make sure the kernel agrees with how many events we have */
Packit 577717
		if (papi_pe_buffer[0]!=pe_ctl->num_events) {
Packit 577717
			PAPIERROR("Error!  Wrong number of events");
Packit 577717
			return PAPI_ESYS;
Packit 577717
		}
Packit 577717
Packit 577717
		/* put the count values in their proper location */
Packit 577717
		for(i=0;i<pe_ctl->num_events;i++) {
Packit 577717
			pe_ctl->counts[i] = papi_pe_buffer[1+i];
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	/* point PAPI to the values we read */
Packit 577717
	*events = pe_ctl->counts;
Packit 577717
Packit 577717
	SUBDBG("EXIT: *events: %p\n", *events);
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
#if (OBSOLETE_WORKAROUNDS==1)
Packit 577717
/* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */
Packit 577717
/* fields are always 0 unless the counter is disabled.  So if we are on   */
Packit 577717
/* one of these kernels, then we must disable events before reading.      */
Packit 577717
/* Elsewhere though we disable multiplexing on kernels before 2.6.34 */
Packit 577717
/* so maybe this isn't even necessary.                               */
Packit 577717
static int
Packit 577717
_pe_read_bug_sync( hwd_context_t *ctx, hwd_control_state_t *ctl,
Packit 577717
	       long long **events, int flags )
Packit 577717
{
Packit 577717
Packit 577717
	( void ) flags;			 /*unused */
Packit 577717
	int i, ret = -1;
Packit 577717
	pe_context_t *pe_ctx = ( pe_context_t *) ctx;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
	int result;
Packit 577717
Packit 577717
	if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
Packit 577717
		 for ( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
			/* disable only the group leaders */
Packit 577717
			if ( pe_ctl->events[i].group_leader_fd == -1 ) {
Packit 577717
				ret = ioctl( pe_ctl->events[i].event_fd,
Packit 577717
					PERF_EVENT_IOC_DISABLE, NULL );
Packit 577717
				if ( ret == -1 ) {
Packit 577717
					PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) "
Packit 577717
						"returned an error: ", strerror( errno ));
Packit 577717
					return PAPI_ESYS;
Packit 577717
				}
Packit 577717
			}
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	result=_pe_read( ctx, ctl, events, flags );
Packit 577717
Packit 577717
	/* If we disabled the counters due to the sync_read_bug(), */
Packit 577717
	/* then we need to re-enable them now.                     */
Packit 577717
Packit 577717
	if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
Packit 577717
		for ( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
			if ( pe_ctl->events[i].group_leader_fd == -1 ) {
Packit 577717
				/* this should refresh any overflow counters too */
Packit 577717
				ret = ioctl( pe_ctl->events[i].event_fd,
Packit 577717
					PERF_EVENT_IOC_ENABLE, NULL );
Packit 577717
				if ( ret == -1 ) {
Packit 577717
					/* Should never happen */
Packit 577717
					PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ",
Packit 577717
						strerror( errno ));
Packit 577717
					return PAPI_ESYS;
Packit 577717
				}
Packit 577717
			}
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	return result;
Packit 577717
}
Packit 577717
Packit 577717
#endif
Packit 577717
Packit 577717
/* Start counting events */
Packit 577717
static int
Packit 577717
_pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
Packit 577717
{
Packit 577717
	int ret;
Packit 577717
	int i;
Packit 577717
	int did_something = 0;
Packit 577717
	pe_context_t *pe_ctx = ( pe_context_t *) ctx;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
Packit 577717
	/* Reset the counters first.  Is this necessary? */
Packit 577717
	ret = _pe_reset( pe_ctx, pe_ctl );
Packit 577717
	if ( ret ) {
Packit 577717
		return ret;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Enable all of the group leaders                */
Packit 577717
	/* All group leaders have a group_leader_fd of -1 */
Packit 577717
	for( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
		if (pe_ctl->events[i].group_leader_fd == -1) {
Packit 577717
			SUBDBG("ioctl(enable): fd: %d\n",
Packit 577717
				pe_ctl->events[i].event_fd);
Packit 577717
			ret=ioctl( pe_ctl->events[i].event_fd,
Packit 577717
				PERF_EVENT_IOC_ENABLE, NULL) ;
Packit 577717
Packit 577717
			/* ioctls always return -1 on failure */
Packit 577717
			if (ret == -1) {
Packit 577717
				PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
Packit 577717
				return PAPI_ESYS;
Packit 577717
			}
Packit 577717
Packit 577717
			did_something++;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	if (!did_something) {
Packit 577717
		PAPIERROR("Did not enable any counters");
Packit 577717
		return PAPI_EBUG;
Packit 577717
	}
Packit 577717
Packit 577717
	pe_ctx->state |= PERF_EVENTS_RUNNING;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
/* Stop all of the counters */
Packit 577717
static int
Packit 577717
_pe_stop( hwd_context_t *ctx, hwd_control_state_t *ctl )
Packit 577717
{
Packit 577717
	SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl);
Packit 577717
Packit 577717
	int ret;
Packit 577717
	int i;
Packit 577717
	pe_context_t *pe_ctx = ( pe_context_t *) ctx;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
Packit 577717
	/* Just disable the group leaders */
Packit 577717
	for ( i = 0; i < pe_ctl->num_events; i++ ) {
Packit 577717
		if ( pe_ctl->events[i].group_leader_fd == -1 ) {
Packit 577717
			ret=ioctl( pe_ctl->events[i].event_fd,
Packit 577717
				PERF_EVENT_IOC_DISABLE, NULL);
Packit 577717
			if ( ret == -1 ) {
Packit 577717
				PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
Packit 577717
					"returned error, Linux says: %s",
Packit 577717
					pe_ctl->events[i].event_fd, strerror( errno ) );
Packit 577717
				return PAPI_EBUG;
Packit 577717
			}
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	pe_ctx->state &= ~PERF_EVENTS_RUNNING;
Packit 577717
Packit 577717
	SUBDBG( "EXIT:\n");
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/*********************** CONTROL STATE RELATED *******************/
Packit 577717
Packit 577717
Packit 577717
/* This function clears the current contents of the control structure and
Packit 577717
   updates it with whatever resources are allocated for all the native events
Packit 577717
   in the native info structure array. */
Packit 577717
Packit 577717
static int
Packit 577717
_pe_update_control_state( hwd_control_state_t *ctl,
Packit 577717
			       NativeInfo_t *native,
Packit 577717
			       int count, hwd_context_t *ctx )
Packit 577717
{
Packit 577717
	SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n",
Packit 577717
		ctl, native, count, ctx);
Packit 577717
	int i;
Packit 577717
	int j;
Packit 577717
	int ret;
Packit 577717
	int skipped_events=0;
Packit 577717
	struct native_event_t *ntv_evt;
Packit 577717
	pe_context_t *pe_ctx = ( pe_context_t *) ctx;
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
Packit 577717
	/* close all of the existing fds and start over again */
Packit 577717
	/* In theory we could have finer-grained control and know if             */
Packit 577717
	/* things were changed, but it's easier to tear things down and rebuild. */
Packit 577717
	close_pe_events( pe_ctx, pe_ctl );
Packit 577717
Packit 577717
	/* Calling with count==0 should be OK, it's how things are deallocated */
Packit 577717
	/* when an eventset is destroyed.                                      */
Packit 577717
	if ( count == 0 ) {
Packit 577717
		SUBDBG( "EXIT: Called with count == 0\n" );
Packit 577717
		return PAPI_OK;
Packit 577717
	}
Packit 577717
Packit 577717
	/* set up all the events */
Packit 577717
	for( i = 0; i < count; i++ ) {
Packit 577717
		if ( native ) {
Packit 577717
			/* get the native event pointer used for this papi event */
Packit 577717
			int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code));
Packit 577717
			if (ntv_idx < -1) {
Packit 577717
				SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code);
Packit 577717
				continue;
Packit 577717
			}
Packit 577717
			/* if native index is -1, then we have an event without a mask and need to find the right native index to use */
Packit 577717
			if (ntv_idx == -1) {
Packit 577717
				/* find the native event index we want by matching for the right papi event code */
Packit 577717
				for (j=0 ; j<pe_ctx->event_table->num_native_events ; j++) {
Packit 577717
					if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) {
Packit 577717
						ntv_idx = j;
Packit 577717
					}
Packit 577717
				}
Packit 577717
			}
Packit 577717
Packit 577717
			/* if native index is still negative, we did not find event we wanted so just return error */
Packit 577717
			if (ntv_idx < 0) {
Packit 577717
				SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code);
Packit 577717
				continue;
Packit 577717
			}
Packit 577717
Packit 577717
			/* this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use */
Packit 577717
			ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx]));
Packit 577717
			SUBDBG("ntv_evt: %p\n", ntv_evt);
Packit 577717
Packit 577717
			SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events);
Packit 577717
Packit 577717
			/* Move this events hardware config values and other attributes to the perf_events attribute structure */
Packit 577717
			memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t));
Packit 577717
Packit 577717
			/* may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain) */
Packit 577717
			/* only done if the event mask which controls each counting domain was not provided */
Packit 577717
Packit 577717
			/* get pointer to allocated name, will be NULL when adding preset events to event set */
Packit 577717
			char *aName = ntv_evt->allocated_name;
Packit 577717
			if ((aName == NULL)  ||  (strstr(aName, ":u=") == NULL)) {
Packit 577717
				SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER));
Packit 577717
				pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER);
Packit 577717
			}
Packit 577717
			if ((aName == NULL)  ||  (strstr(aName, ":k=") == NULL)) {
Packit 577717
				SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL));
Packit 577717
				pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL);
Packit 577717
			}
Packit 577717
Packit 577717
			// libpfm4 supports mh (monitor host) and mg (monitor guest) event masks
Packit 577717
			// perf_events supports exclude_hv and exclude_idle attributes
Packit 577717
			// PAPI_set_domain supports PAPI_DOM_SUPERVISOR and PAPI_DOM_OTHER domain attributes
Packit 577717
			// not sure how these perf_event attributes, and PAPI domain attributes relate to each other
Packit 577717
			// if that can be figured out then there should probably be code here to set some perf_events attributes based on what was set in a PAPI_set_domain call
Packit 577717
			// the code sample below is one possibility
Packit 577717
//			if (strstr(ntv_evt->allocated_name, ":mg=") == NULL) {
Packit 577717
//				SUBDBG("set exclude_hv attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_hv, !(pe_ctl->domain & PAPI_DOM_SUPERVISOR));
Packit 577717
//				pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
Packit 577717
//			}
Packit 577717
Packit 577717
Packit 577717
			// set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
Packit 577717
			pe_ctl->events[i].cpu = ntv_evt->cpu;
Packit 577717
			// if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called)
Packit 577717
			if (pe_ctl->events[i].cpu == -1) {
Packit 577717
				pe_ctl->events[i].cpu = pe_ctl->cpu;
Packit 577717
			}
Packit 577717
      } else {
Packit 577717
    	  /* This case happens when called from _pe_set_overflow and _pe_ctl */
Packit 577717
          /* Those callers put things directly into the pe_ctl structure so it is already set for the open call */
Packit 577717
      }
Packit 577717
Packit 577717
      /* Copy the inherit flag into the attribute block that will be passed to the kernel */
Packit 577717
      pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
Packit 577717
Packit 577717
      /* Set the position in the native structure */
Packit 577717
      /* We just set up events linearly           */
Packit 577717
      if ( native ) {
Packit 577717
    	  native[i].ni_position = i;
Packit 577717
    	  SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n",
Packit 577717
			i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners);
Packit 577717
      }
Packit 577717
   }
Packit 577717
Packit 577717
	if (count <= skipped_events) {
Packit 577717
		SUBDBG("EXIT: No events to count, they all contained invalid umasks\n");
Packit 577717
		return PAPI_ENOEVNT;
Packit 577717
	}
Packit 577717
Packit 577717
	pe_ctl->num_events = count - skipped_events;
Packit 577717
Packit 577717
	/* actually open the events */
Packit 577717
	ret = open_pe_events( pe_ctx, pe_ctl );
Packit 577717
	if ( ret != PAPI_OK ) {
Packit 577717
		SUBDBG("EXIT: open_pe_events returned: %d\n", ret);
Packit 577717
      		/* Restore values ? */
Packit 577717
		return ret;
Packit 577717
	}
Packit 577717
Packit 577717
	SUBDBG( "EXIT: PAPI_OK\n" );
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/* Set various options on a control state */
Packit 577717
static int
Packit 577717
_pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
Packit 577717
{
Packit 577717
   int ret;
Packit 577717
   pe_context_t *pe_ctx = ( pe_context_t *) ctx;
Packit 577717
   pe_control_t *pe_ctl = NULL;
Packit 577717
Packit 577717
   switch ( code ) {
Packit 577717
      case PAPI_MULTIPLEX:
Packit 577717
	   pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
Packit 577717
	   ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
Packit 577717
				    pe_ctl->granularity,
Packit 577717
				    1, pe_ctl->inherit );
Packit 577717
           if (ret != PAPI_OK) {
Packit 577717
	      return ret;
Packit 577717
	   }
Packit 577717
Packit 577717
	   /* looks like we are allowed, so set multiplexed attribute */
Packit 577717
	   pe_ctl->multiplexed = 1;
Packit 577717
	   ret = _pe_update_control_state( pe_ctl, NULL,
Packit 577717
						pe_ctl->num_events, pe_ctx );
Packit 577717
	   if (ret != PAPI_OK) {
Packit 577717
	      pe_ctl->multiplexed = 0;
Packit 577717
	   }
Packit 577717
	   return ret;
Packit 577717
Packit 577717
      case PAPI_ATTACH:
Packit 577717
	   pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
Packit 577717
	   ret = check_permissions( option->attach.tid, pe_ctl->cpu,
Packit 577717
				  pe_ctl->domain, pe_ctl->granularity,
Packit 577717
				  pe_ctl->multiplexed,
Packit 577717
				    pe_ctl->inherit );
Packit 577717
	   if (ret != PAPI_OK) {
Packit 577717
	      return ret;
Packit 577717
	   }
Packit 577717
Packit 577717
	   pe_ctl->tid = option->attach.tid;
Packit 577717
Packit 577717
	   /* If events have been already been added, something may */
Packit 577717
	   /* have been done to the kernel, so update */
Packit 577717
	   ret =_pe_update_control_state( pe_ctl, NULL,
Packit 577717
						pe_ctl->num_events, pe_ctx);
Packit 577717
Packit 577717
	   return ret;
Packit 577717
Packit 577717
      case PAPI_DETACH:
Packit 577717
	   pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
Packit 577717
Packit 577717
	   pe_ctl->tid = 0;
Packit 577717
	   return PAPI_OK;
Packit 577717
Packit 577717
      case PAPI_CPU_ATTACH:
Packit 577717
	   pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
Packit 577717
	   ret = check_permissions( pe_ctl->tid, option->cpu.cpu_num,
Packit 577717
				    pe_ctl->domain, pe_ctl->granularity,
Packit 577717
				    pe_ctl->multiplexed,
Packit 577717
				    pe_ctl->inherit );
Packit 577717
           if (ret != PAPI_OK) {
Packit 577717
	       return ret;
Packit 577717
	   }
Packit 577717
	   /* looks like we are allowed so set cpu number */
Packit 577717
Packit 577717
	   /* this tells the kernel not to count for a thread   */
Packit 577717
	   /* should we warn if we try to set both?  perf_event */
Packit 577717
	   /* will reject it.                                   */
Packit 577717
	   pe_ctl->tid = -1;
Packit 577717
Packit 577717
	   pe_ctl->cpu = option->cpu.cpu_num;
Packit 577717
Packit 577717
	   return PAPI_OK;
Packit 577717
Packit 577717
      case PAPI_DOMAIN:
Packit 577717
	   pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
Packit 577717
	   ret = check_permissions( pe_ctl->tid, pe_ctl->cpu,
Packit 577717
				    option->domain.domain,
Packit 577717
				    pe_ctl->granularity,
Packit 577717
				    pe_ctl->multiplexed,
Packit 577717
				    pe_ctl->inherit );
Packit 577717
           if (ret != PAPI_OK) {
Packit 577717
	      return ret;
Packit 577717
	   }
Packit 577717
	   /* looks like we are allowed, so set event set level counting domains */
Packit 577717
       pe_ctl->domain = option->domain.domain;
Packit 577717
	   return PAPI_OK;
Packit 577717
Packit 577717
      case PAPI_GRANUL:
Packit 577717
	   pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
Packit 577717
Packit 577717
	   /* FIXME: we really don't support this yet */
Packit 577717
Packit 577717
           switch ( option->granularity.granularity  ) {
Packit 577717
              case PAPI_GRN_PROCG:
Packit 577717
              case PAPI_GRN_SYS_CPU:
Packit 577717
              case PAPI_GRN_PROC:
Packit 577717
		   return PAPI_ECMP;
Packit 577717
Packit 577717
	      /* Currently we only support thread and CPU granularity */
Packit 577717
              case PAPI_GRN_SYS:
Packit 577717
	 	   pe_ctl->granularity=PAPI_GRN_SYS;
Packit 577717
		   pe_ctl->cpu=_papi_getcpu();
Packit 577717
		   break;
Packit 577717
Packit 577717
              case PAPI_GRN_THR:
Packit 577717
	 	   pe_ctl->granularity=PAPI_GRN_THR;
Packit 577717
		   break;
Packit 577717
Packit 577717
Packit 577717
              default:
Packit 577717
		   return PAPI_EINVAL;
Packit 577717
	   }
Packit 577717
           return PAPI_OK;
Packit 577717
Packit 577717
      case PAPI_INHERIT:
Packit 577717
	   pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
Packit 577717
	   ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
Packit 577717
				  pe_ctl->granularity, pe_ctl->multiplexed,
Packit 577717
				    option->inherit.inherit );
Packit 577717
           if (ret != PAPI_OK) {
Packit 577717
	      return ret;
Packit 577717
	   }
Packit 577717
	   /* looks like we are allowed, so set the requested inheritance */
Packit 577717
	   if (option->inherit.inherit) {
Packit 577717
	      /* children will inherit counters */
Packit 577717
	      pe_ctl->inherit = 1;
Packit 577717
	   } else {
Packit 577717
	      /* children won't inherit counters */
Packit 577717
	      pe_ctl->inherit = 0;
Packit 577717
	   }
Packit 577717
	   return PAPI_OK;
Packit 577717
Packit 577717
      case PAPI_DATA_ADDRESS:
Packit 577717
	   return PAPI_ENOSUPP;
Packit 577717
#if 0
Packit 577717
	   pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
Packit 577717
	   ret = set_default_domain( pe_ctl, option->address_range.domain );
Packit 577717
	   if ( ret != PAPI_OK ) {
Packit 577717
	      return ret;
Packit 577717
	   }
Packit 577717
	   set_drange( pe_ctx, pe_ctl, option );
Packit 577717
	   return PAPI_OK;
Packit 577717
#endif
Packit 577717
      case PAPI_INSTR_ADDRESS:
Packit 577717
	   return PAPI_ENOSUPP;
Packit 577717
#if 0
Packit 577717
	   pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
Packit 577717
	   ret = set_default_domain( pe_ctl, option->address_range.domain );
Packit 577717
	   if ( ret != PAPI_OK ) {
Packit 577717
	      return ret;
Packit 577717
	   }
Packit 577717
	   set_irange( pe_ctx, pe_ctl, option );
Packit 577717
	   return PAPI_OK;
Packit 577717
#endif
Packit 577717
Packit 577717
      case PAPI_DEF_ITIMER:
Packit 577717
	   /* What should we be checking for here?                   */
Packit 577717
	   /* This seems like it should be OS-specific not component */
Packit 577717
	   /* specific.                                              */
Packit 577717
Packit 577717
	   return PAPI_OK;
Packit 577717
Packit 577717
      case PAPI_DEF_MPX_NS:
Packit 577717
	   /* Defining a given ns per set is not current supported */
Packit 577717
	   return PAPI_ENOSUPP;
Packit 577717
Packit 577717
      case PAPI_DEF_ITIMER_NS:
Packit 577717
	   /* We don't support this... */
Packit 577717
	   return PAPI_OK;
Packit 577717
Packit 577717
      default:
Packit 577717
	   return PAPI_ENOSUPP;
Packit 577717
   }
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/* Initialize a new control state */
Packit 577717
static int
Packit 577717
_pe_init_control_state( hwd_control_state_t *ctl )
Packit 577717
{
Packit 577717
	pe_control_t *pe_ctl = ( pe_control_t *) ctl;
Packit 577717
Packit 577717
	/* clear the contents */
Packit 577717
	memset( pe_ctl, 0, sizeof ( pe_control_t ) );
Packit 577717
Packit 577717
	/* Set the domain */
Packit 577717
	_pe_set_domain( ctl, _perf_event_vector.cmp_info.default_domain );
Packit 577717
Packit 577717
	/* default granularity */
Packit 577717
	pe_ctl->granularity= _perf_event_vector.cmp_info.default_granularity;
Packit 577717
Packit 577717
	/* overflow signal */
Packit 577717
	pe_ctl->overflow_signal=_perf_event_vector.cmp_info.hardware_intr_sig;
Packit 577717
Packit 577717
	pe_ctl->cidx=our_cidx;
Packit 577717
Packit 577717
	/* Set cpu number in the control block to show events */
Packit 577717
	/* are not tied to specific cpu                       */
Packit 577717
	pe_ctl->cpu = -1;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/****************** EVENT NAME HANDLING CODE *****************/
Packit 577717
Packit 577717
static int
Packit 577717
_pe_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
Packit 577717
{
Packit 577717
	return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier, our_cidx,
Packit 577717
			&perf_native_event_table);
Packit 577717
}
Packit 577717
Packit 577717
static int
Packit 577717
_pe_ntv_name_to_code( const char *name, unsigned int *event_code)
Packit 577717
{
Packit 577717
	return _pe_libpfm4_ntv_name_to_code(name,event_code, our_cidx,
Packit 577717
			&perf_native_event_table);
Packit 577717
}
Packit 577717
Packit 577717
static int
Packit 577717
_pe_ntv_code_to_name(unsigned int EventCode,
Packit 577717
			char *ntv_name, int len)
Packit 577717
{
Packit 577717
	return _pe_libpfm4_ntv_code_to_name(EventCode,
Packit 577717
					ntv_name, len,
Packit 577717
					&perf_native_event_table);
Packit 577717
}
Packit 577717
Packit 577717
static int
Packit 577717
_pe_ntv_code_to_descr( unsigned int EventCode,
Packit 577717
			char *ntv_descr, int len)
Packit 577717
{
Packit 577717
Packit 577717
	return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
Packit 577717
					&perf_native_event_table);
Packit 577717
}
Packit 577717
Packit 577717
static int
Packit 577717
_pe_ntv_code_to_info(unsigned int EventCode,
Packit 577717
			PAPI_event_info_t *info) {
Packit 577717
Packit 577717
	return _pe_libpfm4_ntv_code_to_info(EventCode, info,
Packit 577717
					&perf_native_event_table);
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/*********************** SAMPLING / PROFILING *******************/
Packit 577717
Packit 577717
Packit 577717
/* Find a native event specified by a profile index */
Packit 577717
static int
Packit 577717
find_profile_index( EventSetInfo_t *ESI, int evt_idx, int *flags,
Packit 577717
		unsigned int *native_index, int *profile_index )
Packit 577717
{
Packit 577717
	int pos, esi_index, count;
Packit 577717
Packit 577717
	for ( count = 0; count < ESI->profile.event_counter; count++ ) {
Packit 577717
		esi_index = ESI->profile.EventIndex[count];
Packit 577717
		pos = ESI->EventInfoArray[esi_index].pos[0];
Packit 577717
Packit 577717
		if ( pos == evt_idx ) {
Packit 577717
			*profile_index = count;
Packit 577717
			*native_index = ESI->NativeInfoArray[pos].ni_event &
Packit 577717
					PAPI_NATIVE_AND_MASK;
Packit 577717
			*flags = ESI->profile.flags;
Packit 577717
			SUBDBG( "Native event %d is at profile index %d, flags %d\n",
Packit 577717
				*native_index, *profile_index, *flags );
Packit 577717
			return PAPI_OK;
Packit 577717
		}
Packit 577717
	}
Packit 577717
	PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d",
Packit 577717
			count, ESI->profile.event_counter );
Packit 577717
	return PAPI_EBUG;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/* What exactly does this do? */
Packit 577717
static int
Packit 577717
process_smpl_buf( int evt_idx, ThreadInfo_t **thr, int cidx )
Packit 577717
{
Packit 577717
	int ret, flags, profile_index;
Packit 577717
	unsigned native_index;
Packit 577717
	pe_control_t *ctl;
Packit 577717
Packit 577717
	ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx,
Packit 577717
			&flags, &native_index, &profile_index );
Packit 577717
	if ( ret != PAPI_OK ) {
Packit 577717
		return ret;
Packit 577717
	}
Packit 577717
Packit 577717
	ctl= (*thr)->running_eventset[cidx]->ctl_state;
Packit 577717
Packit 577717
	mmap_read( cidx, thr, &(ctl->events[evt_idx]), profile_index );
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
/*
Packit 577717
 * This function is used when hardware overflows are working or when
Packit 577717
 * software overflows are forced
Packit 577717
 */
Packit 577717
Packit 577717
static void
Packit 577717
_pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc)
Packit 577717
{
Packit 577717
	( void ) n;                           /*unused */
Packit 577717
	_papi_hwi_context_t hw_context;
Packit 577717
	int found_evt_idx = -1, fd = info->si_fd;
Packit 577717
	caddr_t address;
Packit 577717
	ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 );
Packit 577717
	int i;
Packit 577717
	pe_control_t *ctl;
Packit 577717
	int cidx = _perf_event_vector.cmp_info.CmpIdx;
Packit 577717
Packit 577717
	if ( thread == NULL ) {
Packit 577717
		PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd );
Packit 577717
		return;
Packit 577717
	}
Packit 577717
Packit 577717
	if ( thread->running_eventset[cidx] == NULL ) {
Packit 577717
		PAPIERROR( "thread->running_eventset == NULL in "
Packit 577717
				"_papi_pe_dispatch_timer for fd %d!",fd );
Packit 577717
		return;
Packit 577717
	}
Packit 577717
Packit 577717
	if ( thread->running_eventset[cidx]->overflow.flags == 0 ) {
Packit 577717
		PAPIERROR( "thread->running_eventset->overflow.flags == 0 in "
Packit 577717
			"_papi_pe_dispatch_timer for fd %d!", fd );
Packit 577717
		return;
Packit 577717
	}
Packit 577717
Packit 577717
	hw_context.si = info;
Packit 577717
	hw_context.ucontext = ( hwd_ucontext_t * ) uc;
Packit 577717
Packit 577717
	if ( thread->running_eventset[cidx]->overflow.flags &
Packit 577717
			PAPI_OVERFLOW_FORCE_SW ) {
Packit 577717
		address = GET_OVERFLOW_ADDRESS( hw_context );
Packit 577717
		_papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
Packit 577717
					address, NULL, 0,
Packit 577717
					0, &thread, cidx );
Packit 577717
		return;
Packit 577717
	}
Packit 577717
Packit 577717
	if ( thread->running_eventset[cidx]->overflow.flags !=
Packit 577717
		PAPI_OVERFLOW_HARDWARE ) {
Packit 577717
			PAPIERROR( "thread->running_eventset->overflow.flags "
Packit 577717
				"is set to something other than "
Packit 577717
				"PAPI_OVERFLOW_HARDWARE or "
Packit 577717
				"PAPI_OVERFLOW_FORCE_SW for fd %d (%#x)",
Packit 577717
				fd,
Packit 577717
				thread->running_eventset[cidx]->overflow.flags);
Packit 577717
	}
Packit 577717
Packit 577717
	/* convoluted way to get ctl */
Packit 577717
	ctl= thread->running_eventset[cidx]->ctl_state;
Packit 577717
Packit 577717
	/* See if the fd is one that's part of the this thread's context */
Packit 577717
	for( i=0; i < ctl->num_events; i++ ) {
Packit 577717
		if ( fd == ctl->events[i].event_fd ) {
Packit 577717
			found_evt_idx = i;
Packit 577717
			break;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	if ( found_evt_idx == -1 ) {
Packit 577717
		PAPIERROR( "Unable to find fd %d among the open event fds "
Packit 577717
				"_papi_hwi_dispatch_timer!", fd );
Packit 577717
		return;
Packit 577717
	}
Packit 577717
Packit 577717
	if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) {
Packit 577717
		PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed");
Packit 577717
	}
Packit 577717
Packit 577717
	if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) &&
Packit 577717
		!( thread->running_eventset[cidx]->profile.flags &
Packit 577717
		PAPI_PROFIL_FORCE_SW ) ) {
Packit 577717
		process_smpl_buf( found_evt_idx, &thread, cidx );
Packit 577717
	}
Packit 577717
	else {
Packit 577717
		uint64_t ip;
Packit 577717
		unsigned int head;
Packit 577717
		pe_event_info_t *pe = &(ctl->events[found_evt_idx]);
Packit 577717
		unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize(  );
Packit 577717
Packit 577717
	/*
Packit 577717
	* Read up the most recent IP from the sample in the mmap buffer.  To
Packit 577717
	* do this, we make the assumption that all of the records in the
Packit 577717
	* mmap buffer are the same size, and that they all contain the IP as
Packit 577717
	* their only record element.  This means that we can use the
Packit 577717
	* data_head element from the user page and move backward one record
Packit 577717
	* from that point and read the data.  Since we don't actually need
Packit 577717
	* to access the header of the record, we can just subtract 8 (size
Packit 577717
	* of the IP) from data_head and read up that word from the mmap
Packit 577717
	* buffer.  After we subtract 8, we account for mmap buffer wrapping
Packit 577717
	* by AND'ing this offset with the buffer mask.
Packit 577717
	*/
Packit 577717
		head = mmap_read_head( pe );
Packit 577717
Packit 577717
		if ( head == 0 ) {
Packit 577717
			PAPIERROR( "Attempting to access memory "
Packit 577717
				"which may be inaccessable" );
Packit 577717
			return;
Packit 577717
		}
Packit 577717
		ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) );
Packit 577717
	/*
Packit 577717
	* Update the tail to the current head pointer.
Packit 577717
	*
Packit 577717
	* Note: that if we were to read the record at the tail pointer,
Packit 577717
	* rather than the one at the head (as you might otherwise think
Packit 577717
	* would be natural), we could run into problems.  Signals don't
Packit 577717
	* stack well on Linux, particularly if not using RT signals, and if
Packit 577717
	* they come in rapidly enough, we can lose some.  Overtime, the head
Packit 577717
	* could catch up to the tail and monitoring would be stopped, and
Packit 577717
	* since no more signals are coming in, this problem will never be
Packit 577717
	* resolved, resulting in a complete loss of overflow notification
Packit 577717
	* from that point on.  So the solution we use here will result in
Packit 577717
	* only the most recent IP value being read every time there are two
Packit 577717
	* or more samples in the buffer (for that one overflow signal).  But
Packit 577717
	* the handler will always bring up the tail, so the head should
Packit 577717
	* never run into the tail.
Packit 577717
	*/
Packit 577717
		mmap_write_tail( pe, head );
Packit 577717
Packit 577717
	/*
Packit 577717
	* The fourth parameter is supposed to be a vector of bits indicating
Packit 577717
	* the overflowed hardware counters, but it's not really clear that
Packit 577717
	* it's useful, because the actual hardware counters used are not
Packit 577717
	* exposed to the PAPI user.  For now, I'm just going to set the bit
Packit 577717
	* that indicates which event register in the array overflowed.  The
Packit 577717
	* result is that the overflow vector will not be identical to the
Packit 577717
	* perfmon implementation, and part of that is due to the fact that
Packit 577717
	* which hardware register is actually being used is opaque at the
Packit 577717
	* user level (the kernel event dispatcher hides that info).
Packit 577717
	*/
Packit 577717
Packit 577717
		_papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
Packit 577717
					( caddr_t ) ( unsigned long ) ip,
Packit 577717
					NULL, ( 1 << found_evt_idx ), 0,
Packit 577717
					&thread, cidx );
Packit 577717
Packit 577717
	}
Packit 577717
Packit 577717
	/* Restart the counters */
Packit 577717
	if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) {
Packit 577717
		PAPIERROR( "overflow refresh failed", 0 );
Packit 577717
	}
Packit 577717
}
Packit 577717
Packit 577717
/* Stop profiling */
Packit 577717
/* FIXME: does this actually stop anything? */
Packit 577717
/* It looks like it is only actually called from PAPI_stop() */
Packit 577717
/* So the event will be destroyed soon after anyway. */
Packit 577717
static int
Packit 577717
_pe_stop_profiling( ThreadInfo_t *thread, EventSetInfo_t *ESI )
Packit 577717
{
Packit 577717
	int i, ret = PAPI_OK;
Packit 577717
	pe_control_t *ctl;
Packit 577717
	int cidx;
Packit 577717
Packit 577717
	ctl=ESI->ctl_state;
Packit 577717
Packit 577717
	cidx=ctl->cidx;
Packit 577717
Packit 577717
	/* Loop through all of the events and process those which have mmap */
Packit 577717
	/* buffers attached.                                                */
Packit 577717
	for ( i = 0; i < ctl->num_events; i++ ) {
Packit 577717
		/* Use the mmap_buf field as an indicator */
Packit 577717
		/* of this fd being used for profiling.   */
Packit 577717
		if ( ctl->events[i].profiling ) {
Packit 577717
			/* Process any remaining samples in the sample buffer */
Packit 577717
			ret = process_smpl_buf( i, &thread, cidx );
Packit 577717
			if ( ret ) {
Packit 577717
				PAPIERROR( "process_smpl_buf returned error %d", ret );
Packit 577717
				return ret;
Packit 577717
			}
Packit 577717
			ctl->events[i].profiling=0;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	return ret;
Packit 577717
}
Packit 577717
Packit 577717
/* Set up an event to cause overflow */
Packit 577717
/* If threshold==0 then disable overflow for that event */
Packit 577717
static int
Packit 577717
_pe_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
Packit 577717
{
Packit 577717
	SUBDBG("ENTER: ESI: %p, EventIndex: %d, threshold: %d\n",
Packit 577717
		ESI, EventIndex, threshold);
Packit 577717
Packit 577717
	pe_context_t *ctx;
Packit 577717
	pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state );
Packit 577717
	int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK;
Packit 577717
	int cidx;
Packit 577717
Packit 577717
	cidx = ctl->cidx;
Packit 577717
	ctx = ( pe_context_t *) ( ESI->master->context[cidx] );
Packit 577717
Packit 577717
	/* pos[0] is the first native event */
Packit 577717
	/* derived events might be made up of multiple native events */
Packit 577717
	evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
Packit 577717
Packit 577717
	SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n",
Packit 577717
		evt_idx,EventIndex,ESI->EventSetIndex);
Packit 577717
Packit 577717
	if (evt_idx<0) {
Packit 577717
		SUBDBG("EXIT: evt_idx: %d\n", evt_idx);
Packit 577717
		return PAPI_EINVAL;
Packit 577717
	}
Packit 577717
Packit 577717
	/* It's an error to disable overflow if it wasn't set in the	*/
Packit 577717
	/* first place.							*/
Packit 577717
	if (( threshold == 0 ) &&
Packit 577717
		( ctl->events[evt_idx].attr.sample_period == 0 ) ) {
Packit 577717
			SUBDBG("EXIT: PAPI_EINVAL, Tried to clear "
Packit 577717
				"sample threshold when it was not set\n");
Packit 577717
			return PAPI_EINVAL;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Set the sample period to threshold */
Packit 577717
	ctl->events[evt_idx].attr.sample_period = threshold;
Packit 577717
Packit 577717
	if (threshold == 0) {
Packit 577717
		ctl->events[evt_idx].sampling = 0;
Packit 577717
	}
Packit 577717
	else {
Packit 577717
		ctl->events[evt_idx].sampling = 1;
Packit 577717
Packit 577717
		/* Setting wakeup_events to one means issue a wakeup on every */
Packit 577717
		/* counter overflow (not mmap page overflow).                 */
Packit 577717
		ctl->events[evt_idx].attr.wakeup_events = 1;
Packit 577717
		/* We need the IP to pass to the overflow handler */
Packit 577717
		ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP;
Packit 577717
	}
Packit 577717
Packit 577717
Packit 577717
	/* Check to see if any events in the EventSet are setup to sample */
Packit 577717
	/* Do we actually handle multiple overflow events at once? --vmw  */
Packit 577717
	for ( i = 0; i < ctl->num_events; i++ ) {
Packit 577717
		if ( ctl->events[i].attr.sample_period ) {
Packit 577717
			found_non_zero_sample_period = 1;
Packit 577717
			break;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	if ( found_non_zero_sample_period ) {
Packit 577717
		/* turn on internal overflow flag for this event set */
Packit 577717
		ctl->overflow = 1;
Packit 577717
Packit 577717
		/* Enable the signal handler */
Packit 577717
		retval = _papi_hwi_start_signal(
Packit 577717
				    ctl->overflow_signal,
Packit 577717
				    1, ctl->cidx );
Packit 577717
		if (retval != PAPI_OK) {
Packit 577717
			SUBDBG("Call to _papi_hwi_start_signal "
Packit 577717
				"returned: %d\n", retval);
Packit 577717
		}
Packit 577717
	} else {
Packit 577717
Packit 577717
		/* turn off internal overflow flag for this event set */
Packit 577717
		ctl->overflow = 0;
Packit 577717
Packit 577717
		/* Remove the signal handler, if there are no remaining */
Packit 577717
		/* non-zero sample_periods set                          */
Packit 577717
		retval = _papi_hwi_stop_signal(ctl->overflow_signal);
Packit 577717
		if ( retval != PAPI_OK ) {
Packit 577717
			SUBDBG("Call to _papi_hwi_stop_signal "
Packit 577717
				"returned: %d\n", retval);
Packit 577717
			return retval;
Packit 577717
		}
Packit 577717
	}
Packit 577717
Packit 577717
	retval = _pe_update_control_state( ctl, NULL,
Packit 577717
				((pe_control_t *)(ESI->ctl_state) )->num_events,
Packit 577717
				ctx );
Packit 577717
Packit 577717
	SUBDBG("EXIT: return: %d\n", retval);
Packit 577717
Packit 577717
	return retval;
Packit 577717
}
Packit 577717
Packit 577717
/* Enable/disable profiling */
Packit 577717
/* If threshold is zero, we disable */
Packit 577717
static int
Packit 577717
_pe_set_profile( EventSetInfo_t *ESI, int EventIndex, int threshold )
Packit 577717
{
Packit 577717
	int ret;
Packit 577717
	int evt_idx;
Packit 577717
	pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state );
Packit 577717
Packit 577717
	/* Since you can't profile on a derived event,	*/
Packit 577717
	/* the event is always the first and only event	*/
Packit 577717
	/* in the native event list.			*/
Packit 577717
	evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
Packit 577717
Packit 577717
	/* If threshold is zero we want to *disable*    */
Packit 577717
	/* profiling on the event                       */
Packit 577717
	if ( threshold == 0 ) {
Packit 577717
//		SUBDBG( "MUNMAP(%p,%"PRIu64")\n",
Packit 577717
//			ctl->events[evt_idx].mmap_buf,
Packit 577717
//			( uint64_t ) ctl->events[evt_idx].nr_mmap_pages *
Packit 577717
//			getpagesize() );
Packit 577717
Packit 577717
//		if ( ctl->events[evt_idx].mmap_buf ) {
Packit 577717
//			munmap( ctl->events[evt_idx].mmap_buf,
Packit 577717
//				ctl->events[evt_idx].nr_mmap_pages *
Packit 577717
//				getpagesize() );
Packit 577717
//		}
Packit 577717
//		ctl->events[evt_idx].mmap_buf = NULL;
Packit 577717
//		ctl->events[evt_idx].nr_mmap_pages = 0;
Packit 577717
Packit 577717
		/* no longer sample on IP */
Packit 577717
		ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP;
Packit 577717
Packit 577717
		/* Clear any residual overflow flags */
Packit 577717
		/* ??? old warning says "This should be handled somewhere else" */
Packit 577717
		ESI->state &= ~( PAPI_OVERFLOWING );
Packit 577717
		ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE );
Packit 577717
Packit 577717
		ctl->events[evt_idx].profiling=0;
Packit 577717
Packit 577717
	} else {
Packit 577717
Packit 577717
		/* Otherwise, we are *enabling* profiling */
Packit 577717
Packit 577717
		/* Look up the native event code */
Packit 577717
Packit 577717
		if ( ESI->profile.flags & (PAPI_PROFIL_DATA_EAR |
Packit 577717
						PAPI_PROFIL_INST_EAR)) {
Packit 577717
			/* Not supported yet... */
Packit 577717
			return PAPI_ENOSUPP;
Packit 577717
		}
Packit 577717
Packit 577717
		if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) {
Packit 577717
			/* This requires an ability to randomly alter the    */
Packit 577717
			/* sample_period within a given range.		     */
Packit 577717
			/* Linux currently does not have this ability. FIXME */
Packit 577717
			return PAPI_ENOSUPP;
Packit 577717
		}
Packit 577717
		ctl->events[evt_idx].profiling=1;
Packit 577717
	}
Packit 577717
Packit 577717
	ret = _pe_set_overflow( ESI, EventIndex, threshold );
Packit 577717
	if ( ret != PAPI_OK ) return ret;
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/************ INITIALIZATION / SHUTDOWN CODE *********************/
Packit 577717
Packit 577717
Packit 577717
/* Shutdown the perf_event component */
Packit 577717
static int
Packit 577717
_pe_shutdown_component( void ) {
Packit 577717
Packit 577717
	/* deallocate our event table */
Packit 577717
	_pe_libpfm4_shutdown(&_perf_event_vector, &perf_native_event_table);
Packit 577717
Packit 577717
	/* Shutdown libpfm4 */
Packit 577717
	_papi_libpfm4_shutdown(&_perf_event_vector);
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
/* Check the mmap page for rdpmc support */
Packit 577717
static int _pe_detect_rdpmc(void) {
Packit 577717
Packit 577717
	struct perf_event_attr pe;
Packit 577717
	int fd,rdpmc_exists=1;
Packit 577717
	void *addr;
Packit 577717
	struct perf_event_mmap_page *our_mmap;
Packit 577717
	int page_size=getpagesize();
Packit 577717
Packit 577717
#if defined(__i386__) || defined (__x86_64__)
Packit 577717
#else
Packit 577717
	/* We only support rdpmc on x86 for now */
Packit 577717
        return 0;
Packit 577717
#endif
Packit 577717
Packit 577717
	/* There were various subtle bugs in rdpmc support before	*/
Packit 577717
	/* the Linux 4.13 release.					*/
Packit 577717
	if (_papi_os_info.os_version < LINUX_VERSION(4,13,0)) {
Packit 577717
		return 0;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Create a fake instructions event so we can read a mmap page */
Packit 577717
	memset(&pe,0,sizeof(struct perf_event_attr));
Packit 577717
Packit 577717
	pe.type=PERF_TYPE_HARDWARE;
Packit 577717
	pe.size=sizeof(struct perf_event_attr);
Packit 577717
	pe.config=PERF_COUNT_HW_INSTRUCTIONS;
Packit 577717
	pe.exclude_kernel=1;
Packit 577717
	pe.disabled=1;
Packit 577717
Packit 577717
	perf_event_dump_attr(&pe,0,-1,-1,0);
Packit 577717
	fd=sys_perf_event_open(&pe,0,-1,-1,0);
Packit 577717
Packit 577717
	/* This hopefully won't happen? */
Packit 577717
	/* Though there is a chance this is the first */
Packit 577717
	/* attempt to open a perf_event */
Packit 577717
	if (fd<0) {
Packit 577717
		SUBDBG("FAILED perf_event_open trying to detect rdpmc support");
Packit 577717
		return PAPI_ESYS;
Packit 577717
	}
Packit 577717
Packit 577717
	/* create the mmap page */
Packit 577717
	addr=mmap(NULL, page_size, PROT_READ, MAP_SHARED,fd,0);
Packit 577717
	if (addr == MAP_FAILED) {
Packit 577717
		SUBDBG("FAILED mmap trying to detect rdpmc support");
Packit 577717
		close(fd);
Packit 577717
		return PAPI_ESYS;
Packit 577717
	}
Packit 577717
Packit 577717
	/* get the rdpmc info from the mmap page */
Packit 577717
	our_mmap=(struct perf_event_mmap_page *)addr;
Packit 577717
Packit 577717
	/* If cap_usr_rdpmc bit is set to 1, we have support! */
Packit 577717
	if (our_mmap->cap_usr_rdpmc!=0) {
Packit 577717
		rdpmc_exists=1;
Packit 577717
	}
Packit 577717
	else if ((!our_mmap->cap_bit0_is_deprecated) && (our_mmap->cap_bit0)) {
Packit 577717
		/* 3.4 to 3.11 had somewhat broken rdpmc support */
Packit 577717
		/* This convoluted test is the "official" way to detect this */
Packit 577717
		/* To make things easier we don't support these kernels */
Packit 577717
		rdpmc_exists=0;
Packit 577717
	}
Packit 577717
	else {
Packit 577717
		rdpmc_exists=0;
Packit 577717
	}
Packit 577717
Packit 577717
	/* close the fake event */
Packit 577717
	munmap(addr,page_size);
Packit 577717
	close(fd);
Packit 577717
Packit 577717
	return rdpmc_exists;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
static int
Packit 577717
_pe_handle_paranoid(papi_vector_t *component) {
Packit 577717
Packit 577717
	FILE *fff;
Packit 577717
	int paranoid_level;
Packit 577717
	int retval;
Packit 577717
Packit 577717
	/* The is the official way to detect if perf_event support exists */
Packit 577717
	/* The file is called perf_counter_paranoid on 2.6.31             */
Packit 577717
	/* currently we are lazy and do not support 2.6.31 kernels        */
Packit 577717
Packit 577717
	fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
Packit 577717
	if (fff==NULL) {
Packit 577717
		strncpy(component->cmp_info.disabled_reason,
Packit 577717
			"perf_event support not detected",PAPI_MAX_STR_LEN);
Packit 577717
		return PAPI_ENOCMP;
Packit 577717
	}
Packit 577717
Packit 577717
	/* 3 (vendor patch) means completely disabled */
Packit 577717
	/* 2 means no kernel measurements allowed   */
Packit 577717
	/* 1 means normal counter access            */
Packit 577717
	/* 0 means you can access CPU-specific data */
Packit 577717
	/* -1 means no restrictions                 */
Packit 577717
	retval=fscanf(fff,"%d",&paranoid_level);
Packit 577717
	if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
Packit 577717
	fclose(fff);
Packit 577717
Packit 577717
	if (paranoid_level==3) {
Packit 577717
		strncpy(component->cmp_info.disabled_reason,
Packit 577717
			"perf_event support disabled by Linux with paranoid=3",PAPI_MAX_STR_LEN);
Packit 577717
		return PAPI_ENOCMP;
Packit 577717
	}
Packit 577717
Packit 577717
	if ((paranoid_level==2) && (getuid()!=0)) {
Packit 577717
		SUBDBG("/proc/sys/kernel/perf_event_paranoid prohibits kernel counts");
Packit 577717
		component->cmp_info.available_domains &=~PAPI_DOM_KERNEL;
Packit 577717
	}
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
#if (OBSOLETE_WORKAROUNDS==1)
Packit 577717
/* Version based workarounds */
Packit 577717
/* perf_event has many bugs */
Packit 577717
/* PAPI has to work around a number of them, but for the most part */
Packit 577717
/* all of those were fixed by Linux 2.6.34 (May 2010) */
Packit 577717
/* Unfortunately it's not easy to auto-detect for these so we were */
Packit 577717
/* going by uname() version number */
Packit 577717
/* To complicate things, some vendors like Redhat backport fixes */
Packit 577717
/* So even though their kernel reports as 2.6.32 it has the fixes */
Packit 577717
/* As of PAPI 5.6 we're going to default to disabling the workarounds */
Packit 577717
/* I'm going to leave them here, ifdefed out, for the time being */
Packit 577717
static int
Packit 577717
_pe_version_workarounds(papi_vector_t *component) {
Packit 577717
Packit 577717
	/* Kernel multiplexing is broken prior to kernel 2.6.34 */
Packit 577717
	/* The fix was probably git commit:                     */
Packit 577717
	/*     45e16a6834b6af098702e5ea6c9a40de42ff77d8         */
Packit 577717
	if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) {
Packit 577717
		component->cmp_info.kernel_multiplex = 0;
Packit 577717
		component->cmp_info.num_mpx_cntrs = PAPI_MAX_SW_MPX_EVENTS;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Check that processor is supported */
Packit 577717
	if (processor_supported(_papi_hwi_system_info.hw_info.vendor,
Packit 577717
			_papi_hwi_system_info.hw_info.cpuid_family)!=PAPI_OK) {
Packit 577717
		fprintf(stderr,"warning, your processor is unsupported\n");
Packit 577717
		/* should not return error, as software events should still work */
Packit 577717
	}
Packit 577717
Packit 577717
	/* Update the default function pointers */
Packit 577717
	/* Based on features/bugs               */
Packit 577717
	if (bug_sync_read()) {
Packit 577717
		component->read = _pe_read_bug_sync;
Packit 577717
	}
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
#endif
Packit 577717
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/* Initialize the perf_event component */
Packit 577717
static int
Packit 577717
_pe_init_component( int cidx )
Packit 577717
{
Packit 577717
Packit 577717
	int retval;
Packit 577717
Packit 577717
	our_cidx=cidx;
Packit 577717
Packit 577717
	/* Update component behavior based on paranoid setting */
Packit 577717
	retval=_pe_handle_paranoid(_papi_hwd[cidx]);
Packit 577717
	if (retval!=PAPI_OK) return retval;
Packit 577717
Packit 577717
#if (OBSOLETE_WORKAROUNDS==1)
Packit 577717
	/* Handle any kernel version related workarounds */
Packit 577717
	_pe_version_workarounds(_papi_hwd[cidx]);
Packit 577717
#endif
Packit 577717
Packit 577717
	/* Setup mmtimers, if appropriate */
Packit 577717
	retval=mmtimer_setup();
Packit 577717
	if (retval) {
Packit 577717
		strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
			"Error initializing mmtimer",PAPI_MAX_STR_LEN);
Packit 577717
		return retval;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Set the overflow signal */
Packit 577717
	_papi_hwd[cidx]->cmp_info.hardware_intr_sig = SIGRTMIN + 2;
Packit 577717
Packit 577717
	/* Run Vendor-specific fixups */
Packit 577717
	pe_vendor_fixups(_papi_hwd[cidx]);
Packit 577717
Packit 577717
	/* Detect if we can use rdpmc (or equivalent) */
Packit 577717
	retval=_pe_detect_rdpmc();
Packit 577717
	_papi_hwd[cidx]->cmp_info.fast_counter_read = retval;
Packit 577717
	if (retval < 0 ) {
Packit 577717
		/* Don't actually fail here, as could be a surivable bug? */
Packit 577717
		/* If perf_event_open/mmap truly are failing we will      */
Packit 577717
		/* likely catch it pretty quickly elsewhere.              */
Packit 577717
		_papi_hwd[cidx]->cmp_info.fast_counter_read = 0;
Packit 577717
	}
Packit 577717
Packit 577717
#if (USE_PERFEVENT_RDPMC==1)
Packit 577717
Packit 577717
#else
Packit 577717
	/* Force fast_counter_read off if --enable-perfevent-rdpmc=no */
Packit 577717
	_papi_hwd[cidx]->cmp_info.fast_counter_read = 0;
Packit 577717
#endif
Packit 577717
Packit 577717
	/* Run the libpfm4-specific setup */
Packit 577717
	retval = _papi_libpfm4_init(_papi_hwd[cidx]);
Packit 577717
	if (retval) {
Packit 577717
Packit 577717
		strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
			"Error initializing libpfm4",PAPI_MAX_STR_LEN);
Packit 577717
		return retval;
Packit 577717
Packit 577717
	}
Packit 577717
Packit 577717
	/* Now that libpfm4 is initialized */
Packit 577717
	/* Try to setup the perf_event component events */
Packit 577717
Packit 577717
	retval = _pe_libpfm4_init(_papi_hwd[cidx], cidx,
Packit 577717
				&perf_native_event_table,
Packit 577717
				PMU_TYPE_CORE | PMU_TYPE_OS);
Packit 577717
	if (retval) {
Packit 577717
		switch(retval) {
Packit 577717
			case PAPI_ENOMEM:
Packit 577717
				strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
					"Error libpfm4 memory allocation",
Packit 577717
					PAPI_MAX_STR_LEN);
Packit 577717
				break;
Packit 577717
			case PAPI_ENOSUPP:
Packit 577717
				strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
					"Error libpfm4 no PMUs found",
Packit 577717
					PAPI_MAX_STR_LEN);
Packit 577717
				break;
Packit 577717
			case PAPI_ENOCMP:
Packit 577717
				strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
					"Error libpfm4 no default PMU found",
Packit 577717
					PAPI_MAX_STR_LEN);
Packit 577717
				break;
Packit 577717
			case PAPI_ECOUNT:
Packit 577717
				strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
					"Error libpfm4 too many default PMUs found",
Packit 577717
					PAPI_MAX_STR_LEN);
Packit 577717
				break;
Packit 577717
			case PAPI_ENOEVNT:
Packit 577717
				strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
					"Error loading preset events",
Packit 577717
					PAPI_MAX_STR_LEN);
Packit 577717
				break;
Packit 577717
			default:
Packit 577717
				printf("PAPI error %d\n",retval);
Packit 577717
				strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
Packit 577717
					"Unknown libpfm4 related error",
Packit 577717
					PAPI_MAX_STR_LEN);
Packit 577717
Packit 577717
		}
Packit 577717
		return retval;
Packit 577717
	}
Packit 577717
Packit 577717
	/* Detect NMI watchdog which can steal counters */
Packit 577717
	/* FIXME: on Intel we should also halve the count if SMT enabled */
Packit 577717
	if (_linux_detect_nmi_watchdog()) {
Packit 577717
		if (_papi_hwd[cidx]->cmp_info.num_cntrs>0) {
Packit 577717
			_papi_hwd[cidx]->cmp_info.num_cntrs--;
Packit 577717
		}
Packit 577717
		SUBDBG("The Linux nmi_watchdog is using one of the performance "
Packit 577717
			"counters, reducing the total number available.\n");
Packit 577717
	}
Packit 577717
Packit 577717
	/* check for exclude_guest issue */
Packit 577717
	check_exclude_guest();
Packit 577717
Packit 577717
	return PAPI_OK;
Packit 577717
Packit 577717
}
Packit 577717
Packit 577717
Packit 577717
Packit 577717
/* Our component vector */
Packit 577717
Packit 577717
papi_vector_t _perf_event_vector = {
Packit 577717
   .cmp_info = {
Packit 577717
       /* component information (unspecified values initialized to 0) */
Packit 577717
      .name = "perf_event",
Packit 577717
      .short_name = "perf",
Packit 577717
      .version = "5.0",
Packit 577717
      .description = "Linux perf_event CPU counters",
Packit 577717
Packit 577717
      .default_domain = PAPI_DOM_USER,
Packit 577717
      .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
Packit 577717
      .default_granularity = PAPI_GRN_THR,
Packit 577717
      .available_granularities = PAPI_GRN_THR | PAPI_GRN_SYS,
Packit 577717
Packit 577717
      .hardware_intr = 1,
Packit 577717
      .kernel_profile = 1,
Packit 577717
Packit 577717
      /* component specific cmp_info initializations */
Packit 577717
      .fast_virtual_timer = 0,
Packit 577717
      .attach = 1,
Packit 577717
      .attach_must_ptrace = 1,
Packit 577717
      .cpu = 1,
Packit 577717
      .inherit = 1,
Packit 577717
      .cntr_umasks = 1,
Packit 577717
Packit 577717
	.kernel_multiplex = 1,
Packit 577717
	.num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS,
Packit 577717
Packit 577717
Packit 577717
  },
Packit 577717
Packit 577717
  /* sizes of framework-opaque component-private structures */
Packit 577717
  .size = {
Packit 577717
      .context = sizeof ( pe_context_t ),
Packit 577717
      .control_state = sizeof ( pe_control_t ),
Packit 577717
      .reg_value = sizeof ( int ),
Packit 577717
      .reg_alloc = sizeof ( int ),
Packit 577717
  },
Packit 577717
Packit 577717
  /* function pointers in this component */
Packit 577717
  .init_component =        _pe_init_component,
Packit 577717
  .shutdown_component =    _pe_shutdown_component,
Packit 577717
  .init_thread =           _pe_init_thread,
Packit 577717
  .init_control_state =    _pe_init_control_state,
Packit 577717
  .dispatch_timer =        _pe_dispatch_timer,
Packit 577717
Packit 577717
  /* function pointers from the shared perf_event lib */
Packit 577717
  .start =                 _pe_start,
Packit 577717
  .stop =                  _pe_stop,
Packit 577717
  .read =                  _pe_read,
Packit 577717
  .shutdown_thread =       _pe_shutdown_thread,
Packit 577717
  .ctl =                   _pe_ctl,
Packit 577717
  .update_control_state =  _pe_update_control_state,
Packit 577717
  .set_domain =            _pe_set_domain,
Packit 577717
  .reset =                 _pe_reset,
Packit 577717
  .set_overflow =          _pe_set_overflow,
Packit 577717
  .set_profile =           _pe_set_profile,
Packit 577717
  .stop_profiling =        _pe_stop_profiling,
Packit 577717
  .write =                 _pe_write,
Packit 577717
Packit 577717
Packit 577717
  /* from counter name mapper */
Packit 577717
  .ntv_enum_events =   _pe_ntv_enum_events,
Packit 577717
  .ntv_name_to_code =  _pe_ntv_name_to_code,
Packit 577717
  .ntv_code_to_name =  _pe_ntv_code_to_name,
Packit 577717
  .ntv_code_to_descr = _pe_ntv_code_to_descr,
Packit 577717
  .ntv_code_to_info =  _pe_ntv_code_to_info,
Packit 577717
};